21 re (Regular Expression)

1. Using regular expressions we can find a string based on pattern.

2. Using re module we can do regular expression operations

# Regular Expression Patterns

'\d ==> Match a digit : [0-9]

'\D ==> Match a nondigit: [^0-9]

'\s ==> Match a whitespace character

'\S ==> Match a nonwhitespace character

'\w ==> Match a single character : [A-Za-z0-9_]

'\W ==> Match a single nonword character: [^A-Za-z0-9_]

'\n ==> Match a new line character

+ ==> Matches 1 or more occurrence of preceding expression.

* ==> Matches 0 or more occurrence of preceding expression.

? ==> Matches 0 or 1 occurrence of preceding expression.

^ ==> Matches beginning of line.

$ ==> Matches end of line.

| ==> To do OR operation

. ==> Matches any single character except newline.

'\' ==> Used to escape any special character and interpret it literal

() ==> Using parentheses we can create a groups

[] ==> Using square brackets "[]" Matches any single character in brackets.

{} ==> Matches exactly n number of occurrences of preceding expression.

1) match() function

'''

1. Match is used to find a pattern from starting positopn

'''

import re

str='umamahesh'

m=re.match('uma',str)

print(m)

----Match object methods----

--span()--: It returns the tuple containing the starting and end position of the match.

--string()--: It returns a string passed into the function.

--group()--: The part of the string is returned where the match is found.

import re

str='hi how r u man'

m=re.match('hi',str)

print(m.span())

print(m.group())

print(m.string())

print(m)

#To match uma string using group()

import re

name = 'umamahesh'

mo = re.match('uma', name)

print(mo.group())

#to match not from starting

import re

name = 'umamahesh'

mo = re.match('he', name)

print(mo.group())

2) Search function

'''

1. Search is used to find a pattern anywhere in the string

'''

import re

name = 'umamahesh'

mo = re.search('he', name)

print(mo.group())

# apply pattern with search

import re

name = 'umamahesh'

mo = re.search('\w\w\w', name)

print(mo.group())

3) pattern w and d

import re

# Matches 1 or more occurrence of preceding expression.

name = 'umamahesh'

mo = re.match('\w+', name) #w matches single character

print(mo.group())

# Matches 0 or more occurrence of preceding expression.

name = 'umamahesh123'

mo = re.match('\d*', name) # matches single digits

print(mo.group())

# Matches 0 or 1 occurrence of preceding expression.

name = 'umamahesh123'

mo = re.match('\d?', name)

print(mo.group())

4) pattern start and end

import re

# Matches from beginning of line.

name = 'umamahesh'

mo = re.match('^uma', name)

print(mo.group())

# Matches from end of line.

name = 'umamahesh123'

mo = re.match('sh123$', name)

print(mo.group())

5) pattern or

import re

# To do OR operation

name = 'umamahesh'

mo = re.match('mahesh|uma', name)

print(mo.group())

# " . " Matches any single character except newline.

name = 'umamahesh\n123'

mo = re.match('.*', name)

print(mo.group())

# " . " Matches any single character except newline.

mo = re.match('sri\.ram', 'sri.ram')

mo1 = re.match('sri\+ram', 'sri+ram')

print(re.search(r'sri\\ram', r'sri\ram').group())

print(mo.group())

6) grouping

'''

1. Using parentheses we can create groups

'''

import re

name = 'uma mahesh'

mo = re.match('(\w+)\s(\w+)', name)

# to print all groups

print(' All groups are :', mo.groups())

# to print first group

print(' Surname is :', mo.group(1))

# to print second group

print(' first name is :', mo.group(2))

# to print third group

print(' Second name is :', mo.group(3))

6) pattern [ ]

'''

1. Using square brackets "[]" Matches any single character in

'''

import re

# To match s is capital or samll letter in in [] position

name = 'Umamahesh'

mo = re.match('[uU]mahesh', name)

print(mo.group())

# To match "a to z" any small letter in [] position

mo = re.match('[a-z]mahe', 'sh')

print(mo.group())

# To match "A to E" any capital letter in [] position

mo = re.match('[A-E]riram', 'E34ok')

print(mo.group())

# To match any one character in "akd43$." in [] position

mo = re.match('sri[akd43$.]', 'sri$')

print(mo.group())

# To match any one character a to z , A to Z, 0-9 in [] position

mo = re.match('s[a-zA-Z0-9]i', 'sri')

print(mo.group())

# To match any one character except a to z in [] position

# if you use caret(^) symbol inside [] it will match anything except given charecters

mo = re.match('s[^a-z]i', 's8i')

print(mo.group())

# To match any one character except r and a in [] position

mo = re.match('s[ra]i', 's8i')

print(mo.group())

7) pattern with range

'''

1. Using curly brace "{}" Matches exactly

n number of occurrences of preceding expression.

'''

import re

# To match exact 3 charecters

name = 'Umamahesh'

mo = re.match('\w{3}', name)

print( ' Pattren \w{3} ' ,mo.group())

# To match minimum 3 charecters maximum 6 charecters

name = 'Umamahesh'

mo = re.match('\w{3,6}', name)

print( ' Pattren \w{3,6} ' ,mo.group())

# To match minimum 0 charecters maximum 6 charecters

name = 'Umamahesh'

mo = re.match('\w{,6}', name)

print( ' Pattren \w{,6} ' ,mo.group())

# To match minimum 3 charecters maximum more charecters

name = 'Umamahesh'

mo = re.match('\w{3,}', name)

print( ' Pattren \w{3,} ' ,mo.group())

8) flags

'''

1. The flags modifies the meaning of the given regex pattern.

'''

import re

## re.I or re.IGNORECASE to match Case sensitive values

#name = 'SRIram'

#mo = re.search('sriram', name, re.I)

#print(mo.group())

## re.S or re.DOTALL Makes a period (dot) match any character

name = 'SRI\nram'

mo = re.search('sri.ram', name, re.I + re.S)

print(mo.group())

9) findall

'''

1. Using findall we can search all matching pattrens in a string

2. It will return all matching values in list format

'''

import re

s = '345uma89mahesh99yes9678'

v = re.findall('[a-z]+', s)

print(v)

10) sub

'''

1. sub used to replace substrings based on pattren and

return replaced string

'''

import re

# To replace a number with $

# name = '5A9BC 123'

# v = re.sub('\d', '$', name)

# print(' After replace :', v)

# To remove all numbers

# name = '6AB9C 123'

# v = re.sub('\d', '', name)

# print(' After removing numbers :', v)

# To replace only first occurrences

name = '6AB9C 123'

v = re.sub('\d', '@', name, 3)

print(' After removing numbers :', v)

exit()

Search This Blog

Python Programming

21 re (Regular Expression) - module

Comments

Post a Comment

Popular posts from this blog

39 abstract method in python

4 Tuple data types

30 class in python