21 re (Regular Expression) - module
1. Using regular expressions we can find a string based on pattern.
2. Using re module we can do regular expression operations
# Regular Expression Patterns
'\d ==> Match a digit : [0-9]
'\D ==> Match a nondigit: [^0-9]
'\s ==> Match a whitespace character
'\S ==> Match a nonwhitespace character
'\w ==> Match a single character : [A-Za-z0-9_]
'\W ==> Match a single nonword character: [^A-Za-z0-9_]
'\n ==> Match a new line character
+ ==> Matches 1 or more occurrence of preceding expression.
* ==> Matches 0 or more occurrence of preceding expression.
? ==> Matches 0 or 1 occurrence of preceding expression.
^ ==> Matches beginning of line.
$ ==> Matches end of line.
| ==> To do OR operation
. ==> Matches any single character except newline.
'\' ==> Used to escape any special character and interpret it literal
() ==> Using parentheses we can create a groups
[] ==> Using square brackets "[]" Matches any single character in brackets.
{} ==> Matches exactly n number of occurrences of preceding expression.
1) match() function
'''
1. Match is used to find a pattern from starting positopn
'''
import re
str='umamahesh'
m=re.match('uma',str)
print(m)
----Match object methods----
--span()--: It returns the tuple containing the starting and end position of the match.
--string()--: It returns a string passed into the function.
--group()--: The part of the string is returned where the match is found.
import re
str='hi how r u man'
m=re.match('hi',str)
print(m.span())
print(m.group())
print(m.string())
print(m)
#To match uma string using group()
import re
name = 'umamahesh'
mo = re.match('uma', name)
print(mo.group())
#to match not from starting
import re
name = 'umamahesh'
mo = re.match('he', name)
print(mo.group())
2) Search function
'''
1. Search is used to find a pattern anywhere in the string
'''
import re
name = 'umamahesh'
mo = re.search('he', name)
print(mo.group())
# apply pattern with search
import re
name = 'umamahesh'
mo = re.search('\w\w\w', name)
print(mo.group())
3) pattern w and d
import re
# Matches 1 or more occurrence of preceding expression.
name = 'umamahesh'
mo = re.match('\w+', name) #w matches single character
print(mo.group())
# Matches 0 or more occurrence of preceding expression.
name = 'umamahesh123'
mo = re.match('\d*', name) # matches single digits
print(mo.group())
# Matches 0 or 1 occurrence of preceding expression.
name = 'umamahesh123'
mo = re.match('\d?', name)
print(mo.group())
4) pattern start and end
import re
# Matches from beginning of line.
name = 'umamahesh'
mo = re.match('^uma', name)
print(mo.group())
# Matches from end of line.
name = 'umamahesh123'
mo = re.match('sh123$', name)
print(mo.group())
5) pattern or
import re
# To do OR operation
name = 'umamahesh'
mo = re.match('mahesh|uma', name)
print(mo.group())
# " . " Matches any single character except newline.
name = 'umamahesh\n123'
mo = re.match('.*', name)
print(mo.group())
# " . " Matches any single character except newline.
mo = re.match('sri\.ram', 'sri.ram')
mo1 = re.match('sri\+ram', 'sri+ram')
print(re.search(r'sri\\ram', r'sri\ram').group())
print(mo.group())
6) grouping
'''
1. Using parentheses we can create groups
'''
import re
name = 'uma mahesh'
mo = re.match('(\w+)\s(\w+)', name)
# to print all groups
print(' All groups are :', mo.groups())
# to print first group
print(' Surname is :', mo.group(1))
# to print second group
print(' first name is :', mo.group(2))
# to print third group
print(' Second name is :', mo.group(3))
6) pattern [ ]
'''
1. Using square brackets "[]" Matches any single character in
'''
import re
# To match s is capital or samll letter in in [] position
name = 'Umamahesh'
mo = re.match('[uU]mahesh', name)
print(mo.group())
# To match "a to z" any small letter in [] position
mo = re.match('[a-z]mahe', 'sh')
print(mo.group())
# To match "A to E" any capital letter in [] position
mo = re.match('[A-E]riram', 'E34ok')
print(mo.group())
# To match any one character in "akd43$." in [] position
mo = re.match('sri[akd43$.]', 'sri$')
print(mo.group())
# To match any one character a to z , A to Z, 0-9 in [] position
mo = re.match('s[a-zA-Z0-9]i', 'sri')
print(mo.group())
# To match any one character except a to z in [] position
# if you use caret(^) symbol inside [] it will match anything except given charecters
mo = re.match('s[^a-z]i', 's8i')
print(mo.group())
# To match any one character except r and a in [] position
mo = re.match('s[ra]i', 's8i')
print(mo.group())
Comments
Post a Comment