21 re (Regular Expression) - module

 


1. Using regular expressions we can find a string based on pattern.

2. Using re module we can do regular expression operations


# Regular Expression Patterns

'\d  ==>  Match a digit   : [0-9]

'\D  ==>  Match a nondigit: [^0-9]

'\s  ==>  Match a whitespace character

'\S  ==>  Match a nonwhitespace character

'\w  ==>  Match a single character : [A-Za-z0-9_]

'\W  ==>  Match a single nonword character: [^A-Za-z0-9_]

'\n  ==>  Match a new line character


+   ==>  Matches 1 or more occurrence of preceding expression.

*   ==>  Matches 0 or more occurrence of preceding expression.

?   ==>  Matches 0 or 1 occurrence of preceding expression.

^   ==>  Matches beginning of line.

$  ==>  Matches end of line.

|   ==>  To do OR operation

.   ==>  Matches any single character except newline.

'\'   ==>  Used to escape any special character and interpret it literal


()  ==>  Using parentheses we can create a groups

[]  ==>  Using square brackets "[]" Matches any single character in brackets.

{}  ==>  Matches exactly n number of occurrences of preceding expression.


1) match() function

'''

1. Match is used to find a pattern from starting positopn

'''

import  re

str='umamahesh'

m=re.match('uma',str)

print(m)


----Match object methods----

--span()--: It returns the tuple containing the starting and end position of the match.

--string()--: It returns a string passed into the function.

--group()--: The part of the string is returned where the match is found.

 

import re 

str='hi how r u man'  

m=re.match('hi',str)  

print(m.span()) 

print(m.group())  

print(m.string()) 

print(m) 


#To match uma string using group()

import re

name = 'umamahesh'

mo = re.match('uma', name)

print(mo.group())


#to match not from starting

import re

name = 'umamahesh'

mo = re.match('he', name)

print(mo.group())


2) Search function


'''

1. Search is used to find a pattern anywhere in the string 

'''

import re

name = 'umamahesh'

mo = re.search('he', name)

print(mo.group())



# apply pattern with search

import re


name = 'umamahesh'

mo = re.search('\w\w\w', name)

print(mo.group())


3) pattern w and d

import re


# Matches 1 or more occurrence of preceding expression.

name = 'umamahesh'

mo = re.match('\w+', name)  #w matches single character

print(mo.group())



# Matches 0 or more occurrence of preceding expression.

name = 'umamahesh123'

mo = re.match('\d*', name)  # matches single digits

print(mo.group())



# Matches 0 or 1 occurrence of preceding expression.

name = 'umamahesh123'

mo = re.match('\d?', name)

print(mo.group())

4) pattern start and end

import re


# Matches from beginning of line.

name = 'umamahesh'

mo = re.match('^uma', name)

print(mo.group())



# Matches from end of line.

name = 'umamahesh123'

mo = re.match('sh123$', name)

print(mo.group())


5) pattern or

import re


# To do OR operation

name = 'umamahesh'

mo = re.match('mahesh|uma', name)

print(mo.group())



# " . " Matches any single character except newline.

name = 'umamahesh\n123'

mo = re.match('.*', name)

print(mo.group())



# " . " Matches any single character except newline.

mo = re.match('sri\.ram', 'sri.ram')

mo1 = re.match('sri\+ram', 'sri+ram')

print(re.search(r'sri\\ram', r'sri\ram').group())

print(mo.group())


6) grouping

'''

1. Using parentheses we can create groups

'''

import re


name = 'uma mahesh'

mo = re.match('(\w+)\s(\w+)', name)


# to print all groups

print(' All groups are  :', mo.groups())


# to print first group

print(' Surname is      :', mo.group(1))


# to print second group

print(' first name  is  :', mo.group(2))


# to print third group

print(' Second name  is :', mo.group(3))

6) pattern [ ]

'''

1. Using square brackets "[]" Matches any single character in

'''

import re


# To match s is capital or samll letter in in [] position

name = 'Umamahesh'

mo = re.match('[uU]mahesh', name)

print(mo.group())


# To match "a to z" any small letter in [] position

mo = re.match('[a-z]mahe', 'sh')

print(mo.group())


# To match "A to E" any capital letter in [] position

mo = re.match('[A-E]riram', 'E34ok')

print(mo.group())


# To match any one character in "akd43$." in [] position

mo = re.match('sri[akd43$.]', 'sri$')

print(mo.group())


# To match any one character a to z , A to Z, 0-9 in [] position

mo = re.match('s[a-zA-Z0-9]i', 'sri')

print(mo.group())


# To match any one character except a to z in [] position

# if you use caret(^) symbol inside [] it will match anything except given charecters

mo = re.match('s[^a-z]i', 's8i')

print(mo.group())


# To match any one character except r and a in [] position

mo = re.match('s[ra]i', 's8i')

print(mo.group())

7) pattern with range
'''
1. Using curly brace "{}" Matches exactly 
n number of occurrences of preceding expression.
'''
import re

# To match exact 3 charecters
name = 'Umamahesh'
mo = re.match('\w{3}', name)
print( ' Pattren \w{3} ' ,mo.group())

# To match minimum 3 charecters maximum 6 charecters
name = 'Umamahesh'
mo = re.match('\w{3,6}', name)
print( ' Pattren \w{3,6} ' ,mo.group())

# To match minimum 0 charecters maximum 6 charecters
name = 'Umamahesh'
mo = re.match('\w{,6}', name)
print( ' Pattren \w{,6} ' ,mo.group())

# To match minimum 3 charecters maximum more charecters
name = 'Umamahesh'
mo = re.match('\w{3,}', name)
print( ' Pattren \w{3,} ' ,mo.group())

8) flags
'''
1. The flags modifies the meaning of the given regex pattern.

'''

import re

## re.I or re.IGNORECASE to match Case sensitive values
#name = 'SRIram'
#mo = re.search('sriram', name, re.I)
#print(mo.group())


## re.S or re.DOTALL Makes a period (dot) match any character
name = 'SRI\nram'
mo = re.search('sri.ram', name, re.I + re.S)
print(mo.group())

9) findall
'''
1. Using findall we can search all matching pattrens in a string
2. It will return all matching values in list format
'''
import re

s = '345uma89mahesh99yes9678'

v = re.findall('[a-z]+', s)
print(v)

10) sub
'''
1. sub used to replace substrings based on pattren and 
return replaced string
'''

import re

# To replace a number with $
# name = '5A9BC 123'
# v = re.sub('\d', '$', name)
# print(' After replace :', v)


# To remove all numbers
# name = '6AB9C 123'
# v = re.sub('\d', '', name)
# print(' After removing numbers  :', v)


# To replace only first occurrences
name = '6AB9C 123'
v = re.sub('\d', '@', name, 3)
print(' After removing numbers  :', v)
exit()






Comments

Popular posts from this blog

1 PYTHON PROGRAMMING

4 Tuple data types

2 String data types