需求:单表关联问题。从文件中孩子和父母的关系挖掘出孙子和爷奶关系
样板:child-parent.txt
xiaoming daxiong
daxiong alice
daxiong jack
输出:xiaoming alice
xiaoming jack
分析设计:
mapper部分设计:
1、
2、左表:
3、右表:
reduce部分设计:
4、
5、求笛卡尔积
程序部分:
SingletonTableJoinMapper类
package com.cn.singletonTableJoin;import java.io.IOException; import java.util.StringTokenizer;import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper;public class SingletonTableJoinMapper extends Mapper
SingletonTableJoinReduce类:
package com.cn.singletonTableJoin;import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List;import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer;public class SingletonTableJoinReduce extends Reducer{@Overrideprotected void reduce(Text key, Iterable values, Reducer .Context context)throws IOException, InterruptedException {List grandChild = new ArrayList ();List grandParent = new ArrayList ();Iterator itr = values.iterator();while(itr.hasNext()){String[] record = itr.next().toString().split(" ");if(0 == record[0].length()){continue;}if("1".equals(record[0])){grandChild.add(record[1]);}else if("2".equals(record[0])){grandParent.add(record[1]);}}if(0 != grandChild.size() && 0 != grandParent.size()){for(String grandchild : grandChild){for(String grandparent : grandParent){context.write(new Text(grandchild), new Text(grandparent));}}}} }
SingletonTableJoin类
package com.cn.singletonTableJoin;import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser;/*** 单表关联* @author root**/ public class SingletonTableJoin {public static void main(String[] args) throws Exception {Configuration conf = new Configuration();String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();if (otherArgs.length != 2) {System.err.println("Usage: SingletonTableJoin ");System.exit(2);}//创建一个jobJob job = new Job(conf, "SingletonTableJoin");job.setJarByClass(SingletonTableJoin.class);//设置文件的输入输出路径FileInputFormat.addInputPath(job, new Path(otherArgs[0]));FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));//设置mapper和reduce处理类job.setMapperClass(SingletonTableJoinMapper.class);job.setReducerClass(SingletonTableJoinReduce.class);//设置输出key-value数据类型job.setOutputKeyClass(Text.class);job.setOutputValueClass(Text.class);//提交作业并等待它完成System.exit(job.waitForCompletion(true) ? 0 : 1);} }
把总结当成一种习惯。