python 2.7.6
urllib:发送报文并得到response
BeautifulSoup:解析报文的body(html)
#encoding=UTF-8
from bs4 import BeautifulSoup
from urllib import urlopen
import urlliblist_no_results=[]#没查到的银行卡的list
list_yes_results=[]#已查到的银行卡的list#解析报文,以字典存储
def parseData(htmls,code):dic={}s=BeautifulSoup(htmls)if code=='00':list=s.find_all('td','STYLE2',align=False)dic={'id':list[0].string,'lt':list[1].string,'type':list[2].string}if code=='01':list=s.find_all('li')dic['id']=list[3].get_text('|').split('|')[1]dic['lt']=list[4].get_text('|').split('|')[1]dic['type']=list[5].get_text('|').split('|')[1]return dic #卡号id, 归属地lt,卡种type dict#向网站00和01发送查询银行卡信息,并得到报文body,交parseData处理,返回result字典
def getInfoFromWeb00(cardNumStr): #http://www.6wm.cn/card/6222020200094043425.htmlurl_get='http://www.6wm.cn/card/'+cardNumStr+'.html'get_2=urlopen(url_get).read()if '404 Not Found' not in get_2:#pare datareturn parseData(get_2,'00')else:data=urllib.urlencode({'cardNum':cardNumStr})url_query='http://www.6wm.cn/bank.php'post_1=urlopen(url_query,data=data)get_2=urlopen(url_get).read()#pare dataif '404 Not Found' not in get_2:return parseData(get_2,'00')else:list_no_results.append(cardNumStr)return Falsedef getInfoFromWeb01(cardNumStr):#http://www.cha789.com/bank_6228212028001510771.htmlurl_get='http://www.cha789.com/bank_'+cardNumStr+'.html'get_1=urlopen(url_get).read()if 'cxContext' not in get_1:list_no_results.append(cardNumStr)return Falseelse:return parseData(get_1,'01')
if __name__=='__main__':
list_card=['……','……']#list of card string
……