爬虫获取列表是空列表

from urllib import requestimport reclass Spider(): url = 'https://www.huya.com/g/lol' root_pattern = '([\s\S]*?)' name_pattern = ' [\s\S]' number_pattern= '([\s\S])' def __fetch_content(self): r = request.urlopen(Spider.url) htmls = r.read() htmls = str(htmls, encoding='utf-8') return htmls def __analysis(self,htmls): root_html = re.findall(Spider.root_pattern,htmls) anchors = [] for html in root_html: name = re.findall(Spider.name_pattern,html) number = re.findall(Spider.number_pattern,html) anchor = {'name':name} anchors.append(anchor) return anchors def __refine(self,anchors): l= lambda anchor: { 'name':anchor['name'][0].strip(), 'number':anchor['number'][0] } return map(l,anchors) def __soft(self,anchors): anchors = sorted(anchors,key=self.__soft_seed,reverse=True) return anchors def __soft_seed(self,anchor): r = re.findall('\d',anchor['number']) number = float(r[0]) if '万' in anchor['number']: number *=10000 return number def __show(self,anchors): for rank in range(0,len(anchors)): print('rank'+str(rank+1)) +' : '+anchors[rank]['name'] +' '+anchors[rank]['number'] def go(self): htmls = self.__fetch_content() anchors=self.__analysis(htmls) anchors =list(self.__refine(anchors)) anchors = self.__soft(anchors) self.__show(anchors)spider = Spider()spider.go()

查看完整描述

3 回答

pardon110

TA贡献1038条经验获得超227个赞

无非两个方面的错误：

没有抓取到目标页面
正则错误，不能匹配到目标内容

反对回复 2019-03-19

qq_慕函数725819
ooo

回复 2019-03-20

热搜

最近搜索清空

爬虫获取列表是空列表

爬虫获取列表是空列表

3 回答

添加回答