介绍
今天就跟大家聊聊有关MapReduce怎样实现TopK,可能很多人都不太了解,为了让大家更加了解,小编给大家总结了以下内容,希望大家根据这篇文章可以有所收获。
需求:HTTP日志文件中全部流量前80%的记录,按流量值降序排序
输出格式& lt; phoneNB, sum_flow>
HTTP日志文件:
1363157985066,, 13726230503, 00 - fd - 07 - a4 - 72 - b8: CMCC 120.196.100.82 i02.c.aliimg.com , 24, 27, 2481, 200, 24681 1363157995052,,13826544101,5 c-0e-8b-c7-f1-e0: CMCC 120.197.40.4 ,, 4, 0, 264, 0, 200 1363157991076,,13926435656,20-10-7A-28-CC-0A: CMCC 120.196.100.99 ,, 2, 4, 132, 1512, 200 1363154400022,,13926251106,5 c-0e-8b-8b-b1-50: CMCC 120.197.40.4 ,, 4, 0, 240, 0, 200 1363157993044,,18211575961,94 - 71 ac - cd - e6 18: CMCC-EASY 120.196.100.99 iface.qiyi.com 视频网站,15,12,1527,2106,200 1363157995074,,84138413,5 c-0e-8b-8c-e8-20:7daysinn 120.197.40.4 122.72.52.12 , 20, 16, 4116, 1432, 200 1363157993055,,13560439658,C4-17-FE-BA-DE-D9: CMCC 120.196.100.99 ,, 18岁,15,1116,954,200 1363157995033,,15920133257,5 c-0e-8b-c7-ba-20: CMCC 120.197.40.4 sug.so.360.cn 信息安全,20,20,3156,2936,200 1363157983019,1363157983019,68 - a1 - b7 - 03 - 07 - b1: CMCC-EASY 120.196.100.82 ,, 4, 0, 240, 0, 200 1363157984041,,13660577991,5 c - 0 - e - 8 - b - 92 - 5 - c - 20: CMCC-EASY 120.197.40.4 s19.cnzz.com 站点统计,24,9,6960,690,200 1363157973098,,15013685858,5 c - 0 - e - 8 b - c7 - f7 - 90: CMCC 120.197.40.4 rank.ie.sogou.com 搜索引擎,28日,27,3659,3538,200 1363157986029,,15989002119,e8 - 99 - c4 - 4 - e - 93 - e0: CMCC-EASY 120.196.100.99 www.umeng.com 站点统计,3,3,1938,180,200 1363157992093,,13560439658,C4-17-FE-BA-DE-D9: CMCC 120.196.100.99 ,, 15, 9, 918, 4938, 200 1363157986041,,13480253104,5 c - 0 - e - 8 b - c7 - fc - 80: CMCC-EASY 120.197.40.4 ,, 3, 3, 180, 180, 200 1363157984040,,13602846565,5 c-0e-8b-8b-b6-00: CMCC 120.197.40.4 2052. flash3-http.qq.com 综合门户,15,12,1938,2910,200 1363157995093,,13922314466,00-FD-07-A2-EC-BA: CMCC 120.196.100.82 img.qfc.cn , 12, 12, 3008, 3720, 200 1363157982040,,13502468823,5 c-0a-5b-6a-0b-d4: CMCC-EASY 120.196.100.99 y0.ifengimg.com 综合门户,57,102,7335,200,110349 1363157986072,,84,18320173382 - 25 - db - 4 - f - 10 - 1 - a: CMCC-EASY 120.196.100.99 input.shouji.sogou.com 搜索引擎,21日,18,9531,2412,200 1363157990043,,13925057413,00 - 1 - f - 64 - e1 - e6 - 9 a: CMCC 120.196.100.55 t3.baidu.com 搜索引擎,69,63,11058,200,48243 1363157988072,,13760778710,00-FD-07-A4-7B-08: CMCC 120.196.100.82 ,, 2, 2, 120, 120, 200 1363157985066,,13726238888,00 - fd - 07 - a4 - 72 - b8: CMCC 120.196.100.82 i02.c.aliimg.com , 24, 27, 2481, 200, 24681 1363157993055,,13560436666,C4-17-FE-BA-DE-D9: CMCC 120.196.100.99 ,, 18岁,15,1116,954,200
定义FlowBean类,该类实现WritableComparable接口
实现写(),readFields (), compareTo()方法
public class FlowBean implements WritableComparable, { ,private String  phoneNB;//,号码 ,private long  up_flow;//,上行流量 ,private long  down_flow;//,下行流量 ,private long  sum_flow;//,总流量 ,public String  getPhoneNB (), { return 才能;phoneNB; ,} ,public void  setPhoneNB (String phoneNB), { 时间=this.phoneNB 才能;phoneNB; ,} ,public long  getUp_flow (), { return 才能;up_flow; ,} ,public void  setUp_flow (long up_flow), { 时间=this.up_flow 才能;up_flow; ,} ,public long  getDown_flow (), { return 才能;down_flow; ,} ,public void  setDown_flow (long down_flow), { 时间=this.down_flow 才能;down_flow; ,} ,public long  getSum_flow (), { return 才能;sum_flow; ,} ,public void  setSum_flow (long sum_flow), { 时间=this.sum_flow 才能;sum_flow; ,} ,public FlowBean (), { ,} ,public FlowBean (String phoneNB, long up_flow,, long down_flow), { 时间=this.phoneNB 才能;phoneNB; 时间=this.up_flow 才能;up_flow; null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null null MapReduce怎样实现TopK