cursors
from bs4 import BeautifulSoup
from urllib.request import urlopen
import re
import pymysql.cursors
url = "https://zh.wikipedia.org/wiki/Wikipedia:%E9%A6%96%E9%A1%B5"
resp = urlopen(url).read().decode("UTF-8")
soup = BeautifulSoup(resp,"html.parser")
listurls = soup.findAll('a', href=re.compile("^/wiki/"))
for url in listurls:
if not re.search("\.(jpg|JGP)$",url["href"]):
print(url.get_text(),"<---->","https://zh.wikipedia.org" + url["href"])
connection = pymysql.connect(host='localhost',
user = 'root',
password = '',
db = 'wiki',
charset = 'utf8mb4')
try:
with connection.cursor() as cursor:
sql = "insert into `wikiurls`(`urlname`,`urlhref`)values(%s,%s)"
cursor.execute(sql,(url.get_text(),"https://zh.wikipedia.org" + url["href"]))
connection.commit()
finally:
connection.close()
#print(soup)报错
Traceback (most recent call last):
File "C:\Program Files (x86)\Python36-32\lib\site-packages\pymysql\connections.py", line 920, in connect
**kwargs)
File "C:\Program Files (x86)\Python36-32\lib\socket.py", line 704, in create_connection
for res in getaddrinfo(host, port, 0, SOCK_STREAM):
File "C:\Program Files (x86)\Python36-32\lib\socket.py", line 745, in getaddrinfo
for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno 11001] getaddrinfo failed
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "D:\Program Files\eclipse2017\HelloPython\test\wikitosave.py", line 18, in <module>
charset = 'utf8mb4')
File "C:\Program Files (x86)\Python36-32\lib\site-packages\pymysql\__init__.py", line 90, in Connect
return Connection(*args, **kwargs)
File "C:\Program Files (x86)\Python36-32\lib\site-packages\pymysql\connections.py", line 699, in __init__
self.connect()
File "C:\Program Files (x86)\Python36-32\lib\site-packages\pymysql\connections.py", line 967, in connect
raise exc