利用python爬虫实现爬取网易云音乐热歌榜 - 行业资讯 - 肥雀云

　　介绍

本篇文章给大家分享的是有关利用python爬虫实现爬取网易云音乐热歌榜,小编觉得挺实用的,因此分享给大家学习,希望大家阅读完这篇文章后可以有所收获,话不多说,跟着小编一起来看看吧。

代码实现:

从urllib进口要求
　　从bs4进口BeautifulSoup
　　进口再保险
　　进口的要求
　　导入的时间
　　
　　
　　音乐类(对象):
　　def __init__(自我、baseurl路径):
　　头={
　　“user-agent":“Mozilla/5.0 (Windows NT 10.0;WOW64) AppleWebKit/537.36 (KHTML,像壁虎)Chrome/69.0.3497.100 Safari 537.36“;
　　}
　　自我。baseurl=baseurl
　　自我。头=头
　　自我。路径=路径
　　
　　
　　def主要(自我):
　　html=self.askurl ()
　　bs4=self.analysis (html)
　　name1=self.matching (bs4)
　　self.save (name1)
　　
　　
　　def askurl(自我):
　　要求=request.Request (url=自我。baseurl,标题=self.headers)
　　响应=request.urlopen(要求)
　　html=response.read () .decode (“utf-8")
　　返回的html
　　
　　
　　def分析(自我、html):
　　汤=BeautifulSoup (html、“html.parser")
　　bs4=soup.find_all (“textarea")
　　bs4=str (bs4)
　　返回bs4
　　
　　
　　def匹配(自我,bs4):
　　rule0=re.compile (" # 39;“name":“(。*和# 63;)“,“tns": [],“alias": [] & # 39;)
　　name0=re.findall (rule0 bs4)
　　str=?“
　　因为我在name0:
　　str=str +“,“;+我
　　str=str.replace (“\ xa0",““)
　　规则1=re.compile (" # 39; jpg,(。*和# 63;),(。*和# 63;)“,“id": (\ d *) & # 39;)
　　name1=re.findall(规则1,str)
　　返回name1
　　
　　
　　def保存(自我,name1):
　　在name1 j:
　　print(“正在下载:“+ j [1] +“;——“;+ j [0] +“…“)
　　url=癶ttp://music.163.com/song/media/outer/url& # 63; id=?+ j [2]
　　内容=请求。get (url=url,标题=self.headers) .content
　　打开(自我。路径+ j [1] +“;——“;+ j [0] +“.mp3",“wb" f):
　　f.write(内容)
　　打印(j [1] +“;——“;+ j[0] +“下载完毕。\ n")
　　time . sleep (0.5)
　　返回
　　
　　
　　if __name__==癬_main__":
　　baseurl=癶ttps://music.163.com/discover/toplist& # 63; id=3778678“#要爬取的热歌榜链接
　　路径=癉:/360下载/网易云热歌榜/?#保存的文件目录
　　demo0=音乐(baseurl,路径)
　　demo0.main ()
　　print(“下载完毕“)

Python3实战之爬虫抓取网易云音乐的热门评论

# !/usr/bin/env python3 　　# - * -编码:utf - 8 - * 　　　　进口再保险　　进口urllib.request 　　进口urllib.error 　　进口urllib.parse 　　进口json 　　　　　　　　def get_all_hotSong(): #获取热歌榜所有歌曲名称和id 　　url=& # 39; http://music.163.com/discover/toplist& # 63; id=3778678 & # 39;#网易云云音乐热歌榜url 　　html=urllib.request.urlopen (url) .read () .decode (& # 39; use utf8 # 39;) #打开的url 　　html=str (html) #转换成str 　　pat1=" # 39; & lt; ul类=癴-hide"祝辞& lt; li> https://www.yisu.com/zixun/& lt; a href="/歌曲/& # 63;id=/d * & # 63;“rel=巴獠縩ofollow”rel=巴獠縩ofollow”>。李* ' #进行第一次筛选的正则表达式　　结果=re.compile (pat1) .findall (html) #用正则表达式进行筛选　　结果=结果[0]#获取元组的第一个元素　　　　李pat2=r ' <> (* & # 63;) ' #进行歌名筛选的正则表达式　　李pat3=r ' <> 。* & # 63; ' #进行歌id筛选的正则表达式　　hot_song_name=re.compile (pat2) .findall(结果)#获取所有热门歌曲名称　　hot_song_id=re.compile (pat3) .findall(结果)#获取所有热门歌曲对应的Id 　　　　返回hot_song_name hot_song_id 　　　　def get_hotComments (hot_song_name hot_song_id): 　　url=' http://music.163.com/weapi/v1/resource/comments/R_SO_4_ ' + hot_song_id + & # 63; csrf_token=的#歌评url 　　头={#请求头部　　“用户代理”:“Mozilla/5.0 (X11;Fedora;Linux x86_64) AppleWebKit/537.36 (KHTML,像壁虎)Chrome/58.0.3029.110 Safari/537.36” 　　} 　　#职位请求表单数据　　data={“参数”:“zC7fzWBKxxsm6TZ3PiRjd056g9iGHtbtc8vjTpBXshKIboaPnUyAXKze + KNi9QiEz/IieyRnZfNztp7yvTFyBXOlVQP JdYNZw2 + GRQDg7grOR2ZjroqoOU2z0TNhy + qDHKSV8ZXOnxUF93w3DA51ADDQHB0IngL + v6N8KthdVZeZBe0d3EsUFS8ZJltNRUJ”、“encSecKey”:“4801507 e42c326dfc6b50539395a4fe417594f7cf122cf3d061d1447372ba3aa804541a8ae3b3811c081eb0f2b71827850af59af411a10a1795f7a16a5189d163bc9f67b3d1907f5e6fac652f7ef66e5a1f12d6949be851fcf4f39a0c2379580a040dc53b306d5c807bf313cc0e8f39bf7d35de691c497cda1d436b808549acc”} 　　postdata=urllib.parse.urlencode(数据).encode (use utf8) #进行编码　　请求=urllib.request.Request (url,标题=头,data=postdata) 　　反应=urllib.request.urlopen(请求).read () .decode (use utf8) 　　json_dict=json.loads(响应)#获取json 　　hot_commit=json_dict [' hotComments '] #获取json中的热门评论　　　　　　num=0 　　fhandle=开放(的。/song_comments ', ' a ') #写入文件　　fhandle.write (hot_song_name +“:”+“/n”) 　　　　在hot_commit:项　　num +=1 　　fhandle.write (str (num) +“。”(“内容”)+项目+/n) 　　fhandle.write ('/n==============================================/n/n ') 　　fhandle.close () 　　　　　　　　　　hot_song_name hot_song_id=get_all_hotSong() #获取热歌榜所有歌曲名称和id 　　　　num=0 　　虽然num