Mapreduce中:
洗牌阶段是在map和reduce之间,可以自定义排序,自定义分区和自定义分组!
Mapreduce中,映射出的数据是键值对,默认的是hashPatitionner来对映射出的数据进行分区;
分区的方法还有其他几个:
RandomSampler<文本,Text>, sampler =, ,,,,,,,,,,,,,,,,,,,,new InputSampler.RandomSampler<文本,Text>(0.5, 3000年,10); IntervalSampler<文本,Text>, sampler2 =, ,,,,,,,,,,,,,,,,,,,new InputSampler.IntervalSampler<文本,Text> (0.333, 10); SplitSampler<文本,Text>, sampler3 =, ,,,,,,,,,,,,,,,,,,,new InputSampler.SplitSampler<文本,Text> (reduceNumber);
实现和细节
public class TotalSortMR {, ,,,,, ,,,@SuppressWarnings(弃用) ,,,public static int runTotalSortJob (String [], args), throws Exception {,, ,,,,,,,Path inputPath =, new 路径(args [0]);,, ,,,,,,,Path outputPath =, new 路径(args [1]);,, ,,,,,,,Path partitionFile =, new 路径(args [2]);,, ,,,,,,,int reduceNumber =, Integer.parseInt (args [3]),,, ,,,,,,,,, ,,,,,,,//三种采样器 ,,,,,,,RandomSampler<文本,Text>, sampler =, new InputSampler.RandomSampler<文本,Text>(1), 3000年,10); ,,,,,,,IntervalSampler<文本,Text>, sampler2 =, new InputSampler.IntervalSampler<文本,Text> (0.333, 10); ,,,,,,,SplitSampler<文本,Text>, sampler3 =, new InputSampler.SplitSampler<文本,Text> (reduceNumber); ,,,,,,, ,,,,,,,//任务初始化 ,,,,,,,Configuration conf =, new 配置();,, ,,,,,,,Job Job =, Job.getInstance(设计); ,,,,,,, ,,,,,,,job.setJobName(“总排序”);,, ,,,,,,,job.setJarByClass (TotalSortMR.class);,, ,,,,,,,job.setInputFormatClass (KeyValueTextInputFormat.class);,, ,,,,,,,job.setMapOutputKeyClass (Text.class);,, ,,,,,,,job.setMapOutputValueClass (Text.class);,, ,,,,,,,job.setNumReduceTasks (reduceNumber);,, ,,,,,,,//设置所有的分区类 ,,,,,,,job.setPartitionerClass (TotalOrderPartitioner.class);,, ,,,,,,,//分区类参考的分区文件 ,,,,,,,TotalOrderPartitioner.setPartitionFile(参看,partitionFile);,, ,,,,,,,//分区使用哪种采样器 ,,,,,,,InputSampler.writePartitionFile(工作,,取样器),, ,,,,,,, ,,,,,,,//工作的输入和输出路径 ,,,,,,,FileInputFormat.setInputPaths(工作,,inputPath);,, ,,,,,,,FileOutputFormat.setOutputPath(工作,,outputPath);,, ,,,,,,,outputPath.getFileSystem(参看)delete (outputPath,,真的),,, ,,,,,,,,, ,,,,,,,return job.waitForCompletion(真正的)?,0,:,1; ,,,},, ,,,,, ,,,public static void main (String [], args), throws 异常{,, ,,,,,,,system . exit (runTotalSortJob (args)),,, ,,,} null学习日志——瓜分者和采样器