强>
# !/usr/bin/python #,- *安康;编码:UTF-8 - * - import urllib2再保险 时间=domain “http://www.quanshu.net” headers =, { ,,,“用户代理”:,“Mozilla/5.0, (Windows NT 6.3;, Win64;, x64), AppleWebKit/537.36, (KHTML, like 壁虎),Chrome/58.0.3029.110 Safari/537.36” } def getTypeList (pn=1):, #获取分类列表的函数 ,,,req =, urllib2.Request (“http://www.quanshu.net/map/%s.html”, %, pn), #实例将要请求的对象 ,,,req.headers =, headers , #替换所有头信息 ,,,# req.add_header(), #添加单个头信息 ,,,res =, urllib2.urlopen(要求的),,,#开始请求 ,,,html =, res.read () .decode (gbk),, #解码解码,解码成Unicode ,,,reg =, r ' & lt; a  href=" https://www.yisu.com/zixun/(/book/. * ?)”,目标=捌降取弊4?. * ?)& lt;/a>” ,,,reg =, re.compile(注册),#增加匹配效率,,正则匹配返回的类型为列表 ,,,return re.findall (reg, html) def getNovelList (url):,, #获取章节列表函数 ,,,req =, urllib2.Request(时间+ domain url) ,,,req.headers =,头 ,,,res =, urllib2.urlopen(要求) ,,,html =, res.read () .decode (“gbk”) ,,,reg =, r ' & lt; li> & lt; a  href=" https://www.yisu.com/zixun/(. * ?)”, title=啊? ?”祝辞(. * ?)& lt;/a> & lt;/li>” ,,,reg =, re.compile(注册) ,,,return re.findall (reg, html) def getNovelContent (url):,, #获取章节内容 ,,,req =, urllib2.Request(时间+ domain url) ,,,req.headers =,头 ,,,res =, urllib2.urlopen(要求) ,,,html =, res.read () .decode (“gbk”) ,,,reg =, r 'style5 \ (\); & lt;/script> (. * ?) & lt; script type=" text/javascript祝辞style6 \ (\) ' ,,,return re.findall (reg, html) [0] if __name__ ==,“__main__”: ,,,for type 拷贝范围(10): ,,,,,,,for url, title 拷贝getTypeList(类型): ,,,,,,,,,,,for zurl, ztitle 拷贝getNovelList (url): ,,,,,,,,,,,,,,,print u”正则爬取——% s的,% ztitle ,,,,,,,,,,,,,,,content =, getNovelContent (url.replace (index . html, zurl)) ,,,,,,,,,,,,,,,print 内容 ,,,,,,,,,,,休息 ,,,,,,,打破
强>
<强> 强>
1.1新建库:小说
1.2设计表:小说
并设置外键
<强> 强>
# !/usr/bin/Python #,- *安康;编码:UTF-8 - * - import urllib2再保险 import MySQLdb class Sql(对象): ,,,conn =, MySQLdb.connect(主机=192.168.19.213,端口=3306,用户=?passwd=Admin123, db=∷怠?charset=use utf8) ,,,def addnovels(自我,排序,novelname): ,,,,,,,cur =, self.conn.cursor () ,,,,,,,cur.execute (“insert into 小说(排序,novelname),值(% s ,, ' % s ')”, %(排序,novelname)) ,,,,,,,lastrowid =cur.lastrowid ,,,,,,,cur.close () ,,,,,,,self.conn.commit () ,,,,,,,return lastrowid ,,,def addchapters(自我、novelid chaptername、内容): null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null nullPython爬虫:爬取小说并存储到数据库