MapReduce怎样实现TopK

  介绍

今天就跟大家聊聊有关MapReduce怎样实现TopK,可能很多人都不太了解,为了让大家更加了解,小编给大家总结了以下内容,希望大家根据这篇文章可以有所收获。

需求:HTTP日志文件中全部流量前80%的记录,按流量值降序排序

输出格式& lt; phoneNB, sum_flow>

HTTP日志文件:

 1363157985066,, 13726230503, 00 - fd - 07 - a4 - 72 - b8: CMCC  120.196.100.82  i02.c.aliimg.com , 24, 27, 2481, 200, 24681
  1363157995052,,13826544101,5 c-0e-8b-c7-f1-e0: CMCC  120.197.40.4 ,, 4, 0, 264, 0, 200
  1363157991076,,13926435656,20-10-7A-28-CC-0A: CMCC  120.196.100.99 ,, 2, 4, 132, 1512, 200
  1363154400022,,13926251106,5 c-0e-8b-8b-b1-50: CMCC  120.197.40.4 ,, 4, 0, 240, 0, 200
  1363157993044,,18211575961,94 - 71 ac - cd - e6 18: CMCC-EASY  120.196.100.99  iface.qiyi.com 视频网站,15,12,1527,2106,200
  1363157995074,,84138413,5 c-0e-8b-8c-e8-20:7daysinn  120.197.40.4  122.72.52.12 , 20, 16, 4116, 1432, 200
  1363157993055,,13560439658,C4-17-FE-BA-DE-D9: CMCC  120.196.100.99 ,, 18岁,15,1116,954,200
  1363157995033,,15920133257,5 c-0e-8b-c7-ba-20: CMCC  120.197.40.4  sug.so.360.cn 信息安全,20,20,3156,2936,200
  1363157983019,1363157983019,68 - a1 - b7 - 03 - 07 - b1: CMCC-EASY  120.196.100.82 ,, 4, 0, 240, 0, 200
  1363157984041,,13660577991,5 c - 0 - e - 8 - b - 92 - 5 - c - 20: CMCC-EASY  120.197.40.4  s19.cnzz.com 站点统计,24,9,6960,690,200
  1363157973098,,15013685858,5 c - 0 - e - 8 b - c7 - f7 - 90: CMCC  120.197.40.4  rank.ie.sogou.com 搜索引擎,28日,27,3659,3538,200
  1363157986029,,15989002119,e8 - 99 - c4 - 4 - e - 93 - e0: CMCC-EASY  120.196.100.99  www.umeng.com 站点统计,3,3,1938,180,200
  1363157992093,,13560439658,C4-17-FE-BA-DE-D9: CMCC  120.196.100.99 ,, 15, 9, 918, 4938, 200
  1363157986041,,13480253104,5 c - 0 - e - 8 b - c7 - fc - 80: CMCC-EASY  120.197.40.4 ,, 3, 3, 180, 180, 200
  1363157984040,,13602846565,5 c-0e-8b-8b-b6-00: CMCC  120.197.40.4  2052. flash3-http.qq.com 综合门户,15,12,1938,2910,200
  1363157995093,,13922314466,00-FD-07-A2-EC-BA: CMCC  120.196.100.82  img.qfc.cn , 12, 12, 3008, 3720, 200
  1363157982040,,13502468823,5 c-0a-5b-6a-0b-d4: CMCC-EASY  120.196.100.99  y0.ifengimg.com 综合门户,57,102,7335,200,110349
  1363157986072,,84,18320173382 - 25 - db - 4 - f - 10 - 1 - a: CMCC-EASY  120.196.100.99  input.shouji.sogou.com 搜索引擎,21日,18,9531,2412,200
  1363157990043,,13925057413,00 - 1 - f - 64 - e1 - e6 - 9 a: CMCC  120.196.100.55  t3.baidu.com 搜索引擎,69,63,11058,200,48243
  1363157988072,,13760778710,00-FD-07-A4-7B-08: CMCC  120.196.100.82 ,, 2, 2, 120, 120, 200
  1363157985066,,13726238888,00 - fd - 07 - a4 - 72 - b8: CMCC  120.196.100.82  i02.c.aliimg.com , 24, 27, 2481, 200, 24681
  1363157993055,,13560436666,C4-17-FE-BA-DE-D9: CMCC  120.196.100.99 ,, 18岁,15,1116,954,200 

定义FlowBean类,该类实现WritableComparable接口

实现写(),readFields (), compareTo()方法

 public  class  FlowBean  implements  WritableComparable, {
  ,private  String  phoneNB;//,号码
  ,private  long  up_flow;//,上行流量
  ,private  long  down_flow;//,下行流量
  ,private  long  sum_flow;//,总流量
  ,public  String  getPhoneNB (), {
  return 才能;phoneNB;
  ,}
  ,public  void  setPhoneNB (String  phoneNB), {
  时间=this.phoneNB 才能;phoneNB;
  ,}
  ,public  long  getUp_flow (), {
  return 才能;up_flow;
  ,}
  ,public  void  setUp_flow (long  up_flow), {
  时间=this.up_flow 才能;up_flow;
  ,}
  ,public  long  getDown_flow (), {
  return 才能;down_flow;
  ,}
  ,public  void  setDown_flow (long  down_flow), {
  时间=this.down_flow 才能;down_flow;
  ,}
  ,public  long  getSum_flow (), {
  return 才能;sum_flow;
  ,}
  ,public  void  setSum_flow (long  sum_flow), {
  时间=this.sum_flow 才能;sum_flow;
  ,}
  ,public  FlowBean (), {
  ,}
  ,public  FlowBean (String  phoneNB, long  up_flow,, long  down_flow), {
  时间=this.phoneNB 才能;phoneNB;
  时间=this.up_flow 才能;up_flow;
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null
  null

MapReduce怎样实现TopK