本文主要是介绍MapReduce(五):表关联,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
给出:
Tom Lucy
Tom Jack
Jone Lucy
Jone Jack
Lucy Mary
Lucy Ben
Jack Alice
Jack Jesse
输出:
Tom Alice
Tom Jesse
Jone Alice
Jone Jesse
Tom Mary
Tom Ben
Jone Mary
Jone Ben
源代码如下:
import java.io.IOException;import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class SingleTable_Join {public static class STMapper extends Mapper<Object,Text,Text,Text>{private Text k1=new Text();private Text v1=new Text();private Text k2=new Text();private Text v2=new Text();@Overrideprotected void map(Object key, Text value,Context context)throws IOException, InterruptedException {String line=value.toString();String[]str=line.split(" ");String a=str[0];String b=str[1];String relationType=new String();//建立左表relationType="1";k1.set(b);v1.set(relationType+" "+a);context.write(k1, v1);//建立右表relationType="2";k2.set(a);v2.set(relationType+" "+b);context.write(k2, v2);}}public static class STReducer extends Reducer<Text,Text,Text,Text>{private Text k=new Text();private Text v=new Text();@Overrideprotected void reduce(Text key, Iterable<Text> values,Context context)throws IOException, InterruptedException {String[]grandChild=new String[10];String[]grandParent=new String[10];int grandChildNum=0;int grandParentNum=0;for(Text t:values){String str=t.toString();String[] s=str.split(" ");if(s.length==0)continue;if(s[0].compareTo("1")==0)grandChild[grandChildNum++]=s[1];elsegrandParent[grandParentNum++]=s[1];}if(grandChildNum!=0&&grandParentNum!=0){for(int i=0;i<grandChildNum;i++){k.set(grandChild[i]);for(int j=0;j<grandParentNum;j++){v.set(grandParent[j]);context.write(k,v);}}}}}public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {Configuration conf=new Configuration();Job job=Job.getInstance(conf,"SingleTable_Join");job.setJarByClass(SingleTable_Join.class);job.setMapperClass(STMapper.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(Text.class);job.setReducerClass(STReducer.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(Text.class);FileInputFormat.setInputPaths(job, new Path(args[0]));FileOutputFormat.setOutputPath(job, new Path(args[1]));System.exit(job.waitForCompletion(true)? 0:1);}}
注:多表关联思路也一样!
这篇关于MapReduce(五):表关联的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!