本文主要是介绍HADOOP MapReduce WordCount案列,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
1.安装windows hadoop 运行环境
2.IDEA 创建maven项目
导入依赖
<dependencies><dependency><groupId>org.apache.hadoop</groupId><artifactId>hadoop-client</artifactId><version>3.1.3</version></dependency></dependencies>
3.创建WordCountMap类继承Mapper
package com.lhh.textlhh;import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;import java.io.IOException;/*** @program: lhh* @description:* @author: 华仔* @create: 2021-04-25 14:50*/
public class WordCountMap extends Mapper<LongWritable, Text,Text, IntWritable> {@Overrideprotected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {//获取一行数据String values = value.toString();//--\\s+--匹配一个或者多个空格String[] split = s1.split("\\s+");//循环for (String s : split) {//写出context.write(new Text(s),new IntWritable(1));}}
}
4.创建WordCountReduce类继承Reducer
package com.lhh.textlhh;import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;import java.io.IOException;/*** @program: lhh* @description:* @author: 华仔* @create: 2021-04-25 14:51*/
public class WordCountRduce extends Reducer<Text, IntWritable, Text, IntWritable> {@Overrideprotected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {//声明变量int i = 0;for (IntWritable value : values) {i += 1; //累加}//写出数据context.write(key, new IntWritable(i));}
}
5.创建主启动类
package com.lhh.textlhh;import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.IOException;/*** @program: lhh* @description:* @author: 华仔* @create: 2021-04-25 14:51*/
public class WordCountJob {public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {//获取jobConfiguration configured=new Configuration();Job job =Job.getInstance(configured);//设置map reducejob.setMapperClass(WordCountMap.class);job.setReducerClass(WordCountRduce.class);//设置map输出类job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(IntWritable.class);//设置最终输出类job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);//设置输入和输出路径FileInputFormat.setInputPaths(job,new Path("D:/test/in/123.txt"));FileOutputFormat.setOutputPath(job,new Path("D:/test/out/test123"));//等待任务完成boolean b = job.waitForCompletion(true);if (b){System.out.println("程序完成");}else{System.out.println("程序异常");}//关闭程序System.exit(b ? 0 : 1);}
}
这篇关于HADOOP MapReduce WordCount案列的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!