MRwordCount

xiaoxiao2021-12-14 22

mapreduce代码部分：wordcount 新建工程，导入jar包： C:\hadoop-2.7.1\share\hadoop\common （公共jar包）： hadoop-common-2.7.1、 C:\hadoop-2.7.1\share\hadoop\common\lib下所有 C:\hadoop-2.7.1\share\hadoop\hdfs （hdfs部分jar包）： hadoop-hdfs-2.7.1、 C:\hadoop-2.7.1\share\hadoop\hdfs\lib下所有 C:\hadoop-2.7.1\share\hadoop\mapreduce （mapreduce部分jar包）： C:\hadoop-2.7.1\share\hadoop\mapreduce下所有单独的jar包（hadoop-mapreduce-examples-2.7.1可以不要）、 C:\hadoop-2.7.1\share\hadoop\mapreduce\lib下所有 C:\hadoop-2.7.1\share\hadoop\yarn （yarn部分jar包） C:\hadoop-2.7.1\share\hadoop\yarn下除了server的其他所有、 C:\hadoop-2.7.1\share\hadoop\yarn\lib下的所有打包时，若打成普通jar包（而不是runnerbale的jar），是不会把该项目中lib中的jar包打进去的~~~ 代码：分为3个部分：Mapper、Reducer、Driver package cn.bigdata.wordcount; import java.io.IOException; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable>{ @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] words = line.split(" "); for (String word: words) { context.write(new Text(word), new IntWritable(1)); } } } package cn.bigdata.wordcount; import java.io.IOException; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class WordCountReducer extends Reducer<Text, IntWritable, Text, LongWritable>{ @Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { long count = 0; for (IntWritable value: values) { count += value.get(); } context.write(key, new LongWritable(count)); } } package cn.bigdata.wordcount; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; public class WordCountDriver { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); Job wcJob = Job.getInstance(conf); wcJob.setJarByClass(WordCountDriver.class); wcJob.setMapperClass(WordCountMapper.class); wcJob.setReducerClass(WordCountReducer.class); wcJob.setMapOutputKeyClass(Text.class); wcJob.setMapOutputValueClass(IntWritable.class); wcJob.setOutputKeyClass(Text.class); wcJob.setOutputValueClass(LongWritable.class); wcJob.setInputFormatClass(TextInputFormat.class); FileInputFormat.setInputPaths(wcJob, new Path("hdfs://hadoop-01-server:9000/wordcount/srcdata")); wcJob.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(wcJob, new Path("hdfs://hadoop-01-server:9000/wordcount/outdata")); boolean res = wcJob.waitForCompletion(true); System.exit(res ? 0 : 1); } }

转载请注明原文地址: https://ju.6miu.com/read-963047.html

专利

最新回复(0)