怎样用Python爬取京东的价格和标题及评价等商品情况 - 行业资讯

　　介绍

这篇文章将为大家详细讲解有关怎样用Python爬取京东的价格和标题及评价等商品情况,文章内容质量较高,因此小编分享给大家做个参考,希望大家阅读完这篇文章后对相关知识有一定的了解。

<强>前言

代码实现

 import 请求
　　得到lxml  import  etree
　　import 时间
　　import 随机
　　import  pandas  as  pd
　　import  json
　　得到sqlalchemy  import  create_engine
　　得到sqlalchemy.dialects.oracle  import 日期、浮数,VARCHAR2 ,
　　import  cx_Oracle

先导入需要用的包

 def  create_table (table_name):
　　,,,,,conn =, cx_Oracle.connect(& # 39;用户/password@IP:港口/数据库# 39;),,
　　,,,,,cursor =, conn.cursor  ()
　　,,,,,create_shouji =, & # 39; & # 39; & # 39;,,,,,, CREATE  TABLE {}(,,,,,商品ID  VARCHAR2(256),,,,,,,价格,数字(19日8),,,,,,,店名,VARCHAR2(256),,,,,,,,店属性,VARCHAR2(256),,,,,,,,标题,VARCHAR2(256),,,,,,,,评论,数量(19),,,,,,,优评论,数量(19),,,,,,),,,,,,& # 39;& # 39;& # 39;.format (table_name)
　　,,,,,cursor.execute (create_shouji)
　　,,,,,cursor.close ()
　　,,,,,conn.close ()

建表

 def  mapping_df_types (df_pro):
　　,,,,,dtypedict =, {}
　　,,,,,for 我,j 拷贝zip (df_pro.columns, df_pro.dtypes):
　　,,,,,,,,,,,if “object"在

定义类型的映射

 def  sava_oracle (df_pro):
　　,,,,,engine =, create_engine(& # 39;甲骨文://用户:password@ip:港口/数据库# 39;)
　　,,,,,dtypedict =, mapping_df_types (df_pro)
　　,,,,,df_pro.to_sql (“shouji" con=引擎,指数=False, if_exists=& # 39;添加# 39;,dtype=dtypedict)

定义请求头和请求方法

头={
　　,,,,,& # 39;用户代理# 39;:,& # 39;Mozilla/5.0, (Windows  NT  10.0;, Win64;, x64), AppleWebKit/537.36, (KHTML, like 壁虎),Chrome/83.0.4103.61  Safari/537.36, Edg/83.0.478.37& # 39;
　　}
　　def  requesturl (url):
　　,,,,,session =, requests.Session ()
　　,,,,,rep =, session.get (url,头=标题)
　　,,,,,return 代表

解析评论的url

 def  commreq (url_comm):
　　,,,,,dd_commt =, pd.DataFrame(列=[& # 39;商品id # 39;, & # 39;评论& # 39;,& # 39;优评论& # 39;])
　　,,,,,session =, requests.Session ()
　　,,,,,rep_comm =, session.get (url_comm header=头)
　　,,,,,comment =, json.loads (rep_comm.text) [& # 39; CommentsCount& # 39;】
　　,,,,,comment_list =, []
　　,,,,,for 小姐:备注:拷贝
　　,,,,,,,,,,,comment_list.append({& # 39;商品id # 39;: str(我[& # 39;ProductId& # 39;]), & # 39;评论& # 39;:我[& # 39;CommentCount& # 39;], & # 39;优评论& # 39;:我[& # 39;GoodCount& # 39;]})
　　,,,,,dd_commt =, dd_commt.append (comment_list)
　　,,,,,return  dd_commt

主体解析

 def 解析(代表):
　　,,,,,df =, pd.DataFrame(列=[& # 39;商品id # 39;, & # 39;价格& # 39;,& # 39;店名& # 39;,& # 39;店属性& # 39;,& # 39;标题& # 39;])
　　,,,,,html =, etree.HTML (rep.text)
　　,,,,,all_pro =, html.xpath (“//ul (@class=& # 39; gl-warp  clearfix& # 39;]/li")
　　,,,,,proid =, & # 39; & # 39; . join (html.xpath(“//李/@data-sku"))
　　,,,,,#,商品评价url
　　,,,,,#,referenceIds=之后到,回调之前,都是商品的id,只需要在商品列表获取商品id拼接即可
　　,,,,,#,1只评论解析
　　,,,,,url_comm =, " # 39; https://club.jd.com/comment/productCommentSummaries.action?referenceIds={} & # 39; .format (proid)
　　,,,,,dd_commt =, commreq (url_comm)
　　,,,,,#,2只商品列表信息解析
　　,,,,,pro_list =, []
　　,,,,,for  product 拷贝all_pro:
　　null
　　null
　　null
　　null
　　null
　　null
　　null
　　null
　　null
　　null
　　null
　　null
　　null
　　null
　　null
　　null
　　null
　　null
　　null
　　null
　　null
　　null怎样用Python爬取京东的价格和标题及评价等商品情况