soup.find()总是提示我SyntaxError: non-default argument follows default argument
from bs4 import BeautifulSoup
import re
import urllib.parse
from urllib.parse import urljoin
import urllib.request
class HtmlParser(object):
def _get_new_urls(self,page_url , soup):
print("get_new_urls")
new_urls = set()
links = soup.find_all('a' , href=re.compile(r"/view/\d+\.htm"))
for link in links:
new_url = link['href']
new_full_url = urllib.parse.urljoin(page_url,new_url)
new_url.add(new_full_url)
return new_urls
def _get_new_dataa(self, page_url, soup):
print("in parse def _get_new_data")
res_data = {}
# url
res_data['url'] = page_url
# <dd class="lemmaWgt-lemmaTitle-title"><h1>Python</h1>
title_node = soup.find('dd', class_="lemmaWgt-lemmaTitle-title").find("h1")
res_data['title'] = title_node.get_text()
# <div class="lemma-summary" label-module="lemmaSummary">
summary_node = soup.find('div', class_="lemma-summary")
res_data['summary'] = summary_node.get_text()
print ("get_over")
return res_data
def _get_new_data(self,page_url,soup):
print("get_new_data")
res_data = {}
res_data['url'] = page_url
title_node = soup.find('dd', attrs={"class":"lemmaWgt-lemmaTitle-title"}).find('h1')
res_data['title'] = title_node.get_text()
summary_node = soup.find('div', attrs={"class":"lemma-summary"})
res_data['summary'] = summary_node.get_text()
return res_data
def parse(self,page_url,html_cont):
print("parse")
if page_url is None or html_cont is None:
return
soup = BeautifulSoup(html_cont,'html.parser',from_encoding='utf-8')
new_urls = self._get_new_urls(page_url,soup)
new_data = self._get_new_data(page_url, soup)
return new_urls,new_data然后提示我错误
C:\Users\Administrator\AppData\Local\Programs\Python\Python35\python.exe D:/要用的/python/text/spider_main.py
Traceback (most recent call last):
File "D:/要用的/python/text/spider_main.py", line 1, in <module>
from text import url_manager, html_downloader, html_parser, html_outputer
File "D:\要用的\python\text\html_parser.py", line 1, in <module>
from bs4 import BeautifulSoup
File "C:\Users\Administrator\AppData\Roaming\Python\Python35\site-packages\bs4\__init__.py", line 35, in <module>
from .builder import builder_registry, ParserRejectedMarkup
File "C:\Users\Administrator\AppData\Roaming\Python\Python35\site-packages\bs4\builder\__init__.py", line 7, in <module>
from bs4.element import (
File "C:\Users\Administrator\AppData\Roaming\Python\Python35\site-packages\bs4\element.py", line 1273
def find(self, name: object = None, attrs: object = {}, recursive: object = True, text: object = None,
^
SyntaxError: non-default argument follows default argument我不知道我的find函数哪里错了 提示我要添加变量不知道怎么弄 有没有人可以帮下我 麻烦了 很急