- 主页 > 生活百科 > >
package cn.mr.dedup;
import JAVA.io.IOException;
import org.Apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.MApper;
public class DedupMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
private static Text field = new Text();
// <0,2018-3-3 c><11,2018-3-4 d>
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
field = value;
context.write(field, NullWritable.get());
}
// <2018-3-3 c,null> <2018-3-4 d,null>
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class DedupReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
// <2018-3-3 c,null> <2018-3-4 d,null><2018-3-4 d,null>
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context)
【Hadoop数据去重】throws IOException, InterruptedException {
context.write(key, NullWritable.get());
}
}
package cn.mr.dedup;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class DedupRunner {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(DedupRunner.class);
job.setMapperClass(DedupMapper.class);
job.setReducerClass(DedupReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("D:\Dedup\input"));
// 指定处理完成之后的结果所保存的位置
FileOutputFormat.setOutputPath(job, new Path("D:\Dedup\output"));
job.waitForCompletion(true);
}
}
推荐阅读
-
北青网综合|平台叫卖明星祝福视频产业链:最高售价二十万元
-
饭局上同事为巴结领导,让你买单,用3招让他自作自受
-
工作|打防结合,潮南公安机关对盗抢等侵财犯罪围追堵截
-
【瓣瓣同心?协同五年谱新篇】天津滨海中关村科技园打造京津
-
琉璃娃娃的梦|吴冠希徘徊在十字路口:为张常宁会选择京苏,为事业将会首选广东
-
-
-
显卡|RTX 3080暴跌只剩零头?下单其实是中圈套
-
-
锐龙|AYANEO国产掌机年底升级锐龙6000:小厂太难了
-
|瓜帅执教曼城以来18次单场进球5+,该数据同时期英超第1
-
距退休年龄还有五年,要求提前离岗,退出实职,合乎规定吗
-
-
-
一花一世界一叶一菩提是什么意思 一花一世界一叶一菩提
-
-
ACI环保资讯|17人受伤!5月以来全国发生10起污水事故21人死亡,辽宁阜新一污水厂爆炸
-
『菊子美食记TB』开学了,推荐这12种适合孩子吃的早餐饼,省时好做,每天不重样
-
懂车周报|明年乘用车销量预计增7%,长城打造高端品牌“灵魂”
-