文章目录
# -*- codeing=utf-8 -*-
# @Author:姜磊
# 人间烟火气,最抚凡人心
# -*- coding: UTF-8 -*-
import re
string = 'small smell sm.ll smll smaall sm3ll smAll smaaaall sm\nll sm ll small smbll smbbll sm..ll sm**ll smaaaaaall'
#print(re.search("small",string))
#匹配单个字符
'''
print(re.findall("small",string))#查找所有匹配的字符
print(re.findall("small|smell",string))#管道符,查找small或者smell
print(re.findall("sm.ll",string))#.符号可以匹配任意一个字符
print(re.findall("sm[abcd]ll",string))#[]符号限定只匹配abcd中的字符
print(re.findall("sm[a-z]ll",string))#限定匹配所有的小写字母
print(re.findall("sm[a-zA-Z0-9]ll",string))#匹配小写字母,大写字母和数字
print(re.findall("sm\.ll",string))#转义
print(re.findall("sm[^a]ll",string))#除了a以外的所有字符
'''
#量化符
'''
print(re.findall("sm.{2}ll",string))#任意一个字符匹配2两次
print(re.findall("sm[a-z]{2,4}ll",string))#匹配所有的小写字母出现2-4次
print(re.findall("sm[a-z]{2,}ll",string))#匹配所有的小写字母出现2次以上
print(re.findall("sm[a-z]?ll",string))#{0,1}
print(re.findall("sm[a-z]+ll",string))#{1,}
print(re.findall("sm[a-z]*ll",string))#{0,}
'''
eg="My name is jianglei."
name0="My name is .+\."
name="My name is (.+)\."#()表示提取符合正则表达式的字符串中()中的内容
print(re.findall(name0,eg))
print(re.findall(name,eg)[0])
'''
rawdata = '
555-1239
Moe Szyslak
(636) 555-0113
Burns, C.Montgomery
555-6542
Rev. Timothy Lovejoy
555 8904
Ned Flanders
636-555-3226
Simpson,Homer
5553642
Dr. Julius Hibbert'
'''
rawdata = '555-1239Moe Szyslak(636) 555-0113Burns, C.Montgomery555-6542Rev. Timothy Lovejoy555 8904Ned Flanders636-555-3226Simpson,Homer5553642Dr. Julius Hibbert'
tel_pattern='\(?[0-9]{0,3}\)?[ ]?[0-9]{3}[- ]?[0-9]{4}'
tel=re.findall(tel_pattern,rawdata)
#print(tel)
name_pattern='[a-zA-Z]{0,5}[,\.]?[ ]?[a-zA-Z]{1,7}[\. ,][a-zA-Z]{1,10}'
name=re.findall(name_pattern,rawdata)
#print(name)
import pandas as pd
ans=pd.DataFrame({'name':name,'tel':tel})
print(ans)