Python爬虫:爬取小说并存储到数据库 - 行业资讯 - 肥雀云

# !/usr/bin/python 　　#,- *安康;编码:UTF-8 - * - 　　　　import urllib2再保险　　　　时间=domain “http://www.quanshu.net” 　　headers =, { 　　,,,“用户代理”:,“Mozilla/5.0, (Windows NT 6.3;, Win64;, x64), AppleWebKit/537.36, (KHTML, like 壁虎),Chrome/58.0.3029.110 Safari/537.36” 　　} 　　　　def getTypeList (pn=1):, #获取分类列表的函数　　,,,req =, urllib2.Request (“http://www.quanshu.net/map/%s.html”, %, pn), #实例将要请求的对象　　,,,req.headers =, headers , #替换所有头信息　　,,,# req.add_header(), #添加单个头信息　　,,,res =, urllib2.urlopen(要求的),,,#开始请求　　,,,html =, res.read () .decode (gbk),, #解码解码,解码成Unicode 　　,,,reg =, r ' & lt; a href=" https://www.yisu.com/zixun/(/book/. * ?)”,目标=捌降取弊４?. * ?)& lt;/a>” 　　,,,reg =, re.compile(注册),#增加匹配效率,,正则匹配返回的类型为列表　　　　,,,return re.findall (reg, html) 　　　　def getNovelList (url):,, #获取章节列表函数　　,,,req =, urllib2.Request(时间+ domain url) 　　,,,req.headers =,头　　,,,res =, urllib2.urlopen(要求) 　　,,,html =, res.read () .decode (“gbk”) 　　,,,reg =, r ' & lt; li> & lt; a href=" https://www.yisu.com/zixun/(. * ?)”, title=啊? ?”祝辞(. * ?)& lt;/a> & lt;/li>” 　　,,,reg =, re.compile(注册) 　　,,,return re.findall (reg, html) 　　　　def getNovelContent (url):,, #获取章节内容　　,,,req =, urllib2.Request(时间+ domain url) 　　,,,req.headers =,头　　,,,res =, urllib2.urlopen(要求) 　　,,,html =, res.read () .decode (“gbk”) 　　,,,reg =, r 'style5 \ (\); & lt;/script> (. * ?) & lt; script type=" text/javascript祝辞style6 \ (\) ' 　　,,,return re.findall (reg, html) [0] 　　　　　　if __name__ ==,“__main__”: 　　,,,for type 拷贝范围(10): 　　,,,,,,,for url, title 拷贝getTypeList(类型): 　　,,,,,,,,,,,for zurl, ztitle 拷贝getNovelList (url): 　　,,,,,,,,,,,,,,,print u”正则爬取——% s的,% ztitle 　　,,,,,,,,,,,,,,,content =, getNovelContent (url.replace (index . html, zurl)) 　　,,,,,,,,,,,,,,,print 内容　　,,,,,,,,,,,休息　　,,,,,,,打破

Python爬虫:爬取小说并存储到数据库

<强>

1.1新建库:小说

Python爬虫:爬取小说并存储到数据库

1.2设计表:小说

Python爬虫:爬取小说并存储到数据库

并设置外键

Python爬虫:爬取小说并存储到数据库

<强>

# !/usr/bin/Python 　　#,- *安康;编码:UTF-8 - * - 　　　　import urllib2再保险　　import MySQLdb 　　　　class Sql(对象): 　　,,,conn =, MySQLdb.connect(主机=192.168.19.213,端口=3306,用户=?passwd=Admin123, db=∷怠?charset=use utf8) 　　,,,def addnovels(自我,排序,novelname): 　　,,,,,,,cur =, self.conn.cursor () 　　,,,,,,,cur.execute (“insert into 小说(排序,novelname),值(% s ,, ' % s ')”, %(排序,novelname)) 　　,,,,,,,lastrowid =cur.lastrowid 　　,,,,,,,cur.close () 　　,,,,,,,self.conn.commit () 　　,,,,,,,return lastrowid 　　,,,def addchapters(自我、novelid chaptername、内容): 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null 　　null