MRwordCount

    xiaoxiao2021-12-14  22

    mapreduce代码部分:wordcount 新建工程,导入jar包: C:\hadoop-2.7.1\share\hadoop\common (公共jar包): hadoop-common-2.7.1、 C:\hadoop-2.7.1\share\hadoop\common\lib下所有 C:\hadoop-2.7.1\share\hadoop\hdfs (hdfs部分jar包): hadoop-hdfs-2.7.1、 C:\hadoop-2.7.1\share\hadoop\hdfs\lib下所有 C:\hadoop-2.7.1\share\hadoop\mapreduce (mapreduce部分jar包): C:\hadoop-2.7.1\share\hadoop\mapreduce下所有单独的jar包(hadoop-mapreduce-examples-2.7.1可以不要)、 C:\hadoop-2.7.1\share\hadoop\mapreduce\lib下所有 C:\hadoop-2.7.1\share\hadoop\yarn (yarn部分jar包) C:\hadoop-2.7.1\share\hadoop\yarn下除了server的其他所有、 C:\hadoop-2.7.1\share\hadoop\yarn\lib下的所有 打包时,若打成普通jar包(而不是runnerbale的jar),是不会把该项目中lib中的jar包打进去的~~~ 代码: 分为3个部分:Mapper、Reducer、Driver package cn.bigdata.wordcount; import java.io.IOException; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable>{ @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] words = line.split(" "); for (String word: words)  { context.write(new Text(word), new IntWritable(1)); } } } package cn.bigdata.wordcount; import java.io.IOException; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class WordCountReducer extends Reducer<Text, IntWritable, Text, LongWritable>{ @Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { long count = 0; for (IntWritable value: values) { count += value.get(); } context.write(key, new LongWritable(count)); } } package cn.bigdata.wordcount; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; public class WordCountDriver { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); Job wcJob = Job.getInstance(conf); wcJob.setJarByClass(WordCountDriver.class); wcJob.setMapperClass(WordCountMapper.class); wcJob.setReducerClass(WordCountReducer.class); wcJob.setMapOutputKeyClass(Text.class); wcJob.setMapOutputValueClass(IntWritable.class); wcJob.setOutputKeyClass(Text.class); wcJob.setOutputValueClass(LongWritable.class); wcJob.setInputFormatClass(TextInputFormat.class); FileInputFormat.setInputPaths(wcJob, new Path("hdfs://hadoop-01-server:9000/wordcount/srcdata")); wcJob.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(wcJob, new Path("hdfs://hadoop-01-server:9000/wordcount/outdata")); boolean res = wcJob.waitForCompletion(true); System.exit(res ? 0 : 1); } }
    转载请注明原文地址: https://ju.6miu.com/read-963047.html

    最新回复(0)