共计 18706 个字符,预计需要花费 47 分钟才能阅读完成。
什么是概要设计模式
概要分析师将相似数据分组到一起并执行类似统计计算、索引生成或简单计数等后续的分析操作。
概要设计模式有哪些分类?
(1) 数值概要 (2) 倒排索引概要 (3) 计数器计数等等。
数值概要
MapReduce 的内容结果输出,如果父文件夹已经存在,会报文件已存在错误,每次重新输出文件,如果都手动删除,会比较麻烦,可以自己写一个删除文件的工具类。
import java.io.File; | |
/** | |
* @Author bluesnail95 | |
* @Date 2019/7/14 23:31 | |
* @Description | |
*/ | |
public class FileUtil { | |
/** | |
* 删除文件 | |
* @param fileName 文件名称 | |
*/ | |
public static void deleteFile(String fileName) {File file = new File(fileName); | |
if(!file.exists()) {return;} | |
if(file.isFile()) {file.delete(); | |
}else if(file.isDirectory()) {File[] fileList = file.listFiles(); | |
for (int i = 0; i < fileList.length; i++) {fileList[i].delete();} | |
file.delete();} | |
} | |
} |
1. 最大值 / 最小值 / 计数
import org.apache.hadoop.io.Writable; | |
import java.io.DataInput; | |
import java.io.DataOutput; | |
import java.io.IOException; | |
import java.text.SimpleDateFormat; | |
import java.util.Date; | |
/** | |
* @Author bluesnail95 | |
* @Date 2019/7/14 9:57 | |
* @Description | |
*/ | |
public class MinMaxCountData implements Writable { | |
// 日期 | |
private Date createDate; | |
// 用户标识 | |
private String userId; | |
private final static SimpleDateFormat frmt = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS"); | |
public MinMaxCountData() {} | |
public MinMaxCountData(Date createDate, String userId) { | |
this.createDate = createDate; | |
this.userId = userId; | |
} | |
public Date getCreateDate() {return createDate;} | |
public void setCreateDate(Date createDate) {this.createDate = createDate;} | |
public String getUserId() {return userId;} | |
public void setUserId(String userId) {this.userId = userId;} | |
public void write(DataOutput dataOutput) throws IOException {dataOutput.writeLong(createDate.getTime()); | |
dataOutput.writeBytes(userId); | |
} | |
public void readFields(DataInput dataInput) throws IOException {createDate = new Date(dataInput.readLong()); | |
userId = dataInput.readLine();} | |
@Override | |
public String toString() { | |
return "MinMaxCountData{" + | |
"createDate=" + createDate + | |
", userId='" + userId + '\'' + | |
'}'; | |
} | |
} |
import org.apache.hadoop.io.Writable; | |
import java.io.DataInput; | |
import java.io.DataOutput; | |
import java.io.IOException; | |
import java.text.SimpleDateFormat; | |
import java.util.Date; | |
/** | |
* @Author bluesnail95 | |
* @Date 2019/7/14 9:36 | |
* @Description | |
*/ | |
public class MinMaxCountTuple implements Writable { | |
// 最小日期 | |
private Date min = null; | |
// 最大日期 | |
private Date max = null; | |
// 计数 | |
private long count = 0; | |
private final static SimpleDateFormat frmt = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS"); | |
public Date getMin() {return min;} | |
public void setMin(Date min) {this.min = min;} | |
public Date getMax() {return max;} | |
public void setMax(Date max) {this.max = max;} | |
public long getCount() {return count;} | |
public void setCount(long count) {this.count = count;} | |
public void write(DataOutput dataOutput) throws IOException {dataOutput.writeLong(min.getTime()); | |
dataOutput.writeLong(max.getTime()); | |
dataOutput.writeLong(count); | |
} | |
public void readFields(DataInput dataInput) throws IOException {min = new Date(dataInput.readLong()); | |
max = new Date(dataInput.readLong()); | |
count = dataInput.readLong();} | |
public String toString() {return frmt.format(min) + "\t" + frmt.format(max) + "\t" + count; | |
} | |
} |
import org.apache.hadoop.conf.Configuration; | |
import org.apache.hadoop.fs.Path; | |
import org.apache.hadoop.io.Text; | |
import org.apache.hadoop.mapreduce.Job; | |
import org.apache.hadoop.mapreduce.Mapper; | |
import org.apache.hadoop.mapreduce.Reducer; | |
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; | |
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; | |
import org.codehaus.jackson.map.ObjectMapper; | |
import java.io.File; | |
import java.text.SimpleDateFormat; | |
/** | |
* @Author bluesnail95 | |
* @Date 2019/7/14 10:02 | |
* @Description | |
*/ | |
public class MinMaxCountMain { | |
public static class MinMaxCountMapper extends Mapper<Object,Text,Text,MinMaxCountTuple> {private Text userId = new Text(); | |
private MinMaxCountTuple minMaxCountTuple = new MinMaxCountTuple(); | |
private final static SimpleDateFormat frmt = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS"); | |
public void map(Object key,Text value,Context context){ | |
try {ObjectMapper objectMapper = new ObjectMapper(); | |
objectMapper.setDateFormat(frmt); | |
MinMaxCountData minMaxCountData = objectMapper.readValue(value.toString(), MinMaxCountData.class); | |
minMaxCountTuple.setCount(1); | |
minMaxCountTuple.setMin(minMaxCountData.getCreateDate()); | |
minMaxCountTuple.setMax(minMaxCountData.getCreateDate()); | |
userId.set(minMaxCountData.getUserId()); | |
context.write(userId, minMaxCountTuple); | |
} catch (Exception e) {e.printStackTrace(); | |
} | |
} | |
} | |
public static class MinMaxCountReducer extends Reducer<Text, MinMaxCountTuple, Text, MinMaxCountTuple> {private MinMaxCountTuple minMaxCountTuple = new MinMaxCountTuple(); | |
public void reduce(Text key,Iterable<MinMaxCountTuple> values,Context context) { | |
try { | |
long sum = 0; | |
for (MinMaxCountTuple value : values) {if(minMaxCountTuple.getMin() == null || value.getMin().compareTo(minMaxCountTuple.getMin()) < 0 ) {minMaxCountTuple.setMin(value.getMin()); | |
} | |
if(minMaxCountTuple.getMax() == null || value.getMax().compareTo(minMaxCountTuple.getMax()) > 0 ) {minMaxCountTuple.setMax(value.getMax()); | |
} | |
sum += value.getCount();} | |
minMaxCountTuple.setCount(sum); | |
context.write(key, minMaxCountTuple); | |
} catch (Exception e) {e.printStackTrace(); | |
} | |
} | |
} | |
public static void main(String[] args) {Configuration conf = new Configuration(); | |
try {Job job = Job.getInstance(conf, "NumericalSummarization:MinMaxCount"); | |
job.setOutputKeyClass(Text.class); | |
job.setOutputValueClass(MinMaxCountTuple.class); | |
job.setJarByClass(MinMaxCountMain.class); | |
job.setMapperClass(MinMaxCountMapper.class); | |
job.setCombinerClass(MinMaxCountReducer.class); | |
job.setReducerClass(MinMaxCountReducer.class); | |
FileInputFormat.addInputPath(job, new Path(args[0])); | |
File outputFile = new File(args[1]); | |
if(outputFile.exists()){outputFile.delete(); | |
} | |
FileOutputFormat.setOutputPath(job, new Path(args[1])); | |
System.exit(job.waitForCompletion(true) ? 0 : 1); | |
} catch (Exception e) {e.printStackTrace(); | |
} | |
} | |
} |
2. 平均值
import org.apache.hadoop.io.Writable; | |
import java.io.DataInput; | |
import java.io.DataOutput; | |
import java.io.IOException; | |
import java.text.ParseException; | |
import java.text.SimpleDateFormat; | |
import java.util.Date; | |
/** | |
* @Author bluesnail95 | |
* @Date 2019/7/14 21:51 | |
* @Description | |
*/ | |
public class CountAverageData implements Writable { | |
// 日期 | |
private Date creationDate; | |
// 文本 | |
private String text; | |
private final static SimpleDateFormat frmt = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS"); | |
public CountAverageData() {} | |
public CountAverageData(Date creationDate, String text) { | |
this.creationDate = creationDate; | |
this.text = text; | |
} | |
public Date getCreationDate() {return creationDate;} | |
public void setCreationDate(Date creationDate) {this.creationDate = creationDate;} | |
public String getText() {return text;} | |
public void setText(String text) {this.text = text;} | |
public void write(DataOutput dataOutput) throws IOException {dataOutput.writeBytes(frmt.format(creationDate)); | |
dataOutput.writeBytes(text); | |
} | |
public void readFields(DataInput dataInput) throws IOException { | |
try {System.out.println(dataInput); | |
creationDate = frmt.parse(dataInput.toString()); | |
text = dataInput.readLine();} catch (ParseException e) {e.printStackTrace(); | |
} | |
} | |
@Override | |
public String toString() { | |
return "{" + | |
"creationDate=" + creationDate + | |
", text='" + text + '\'' + | |
'}'; | |
} | |
} |
import org.apache.hadoop.io.Writable; | |
import java.io.DataInput; | |
import java.io.DataOutput; | |
import java.io.IOException; | |
/** | |
* @Author bluesnail95 | |
* @Date 2019/7/14 21:36 | |
* @Description | |
*/ | |
public class CountAverageTuple implements Writable { | |
// 计数 | |
private long count; | |
// 平均值 | |
private float average; | |
public long getCount() {return count;} | |
public void setCount(long count) {this.count = count;} | |
public float getAverage() {return average;} | |
public void setAverage(float average) {this.average = average;} | |
public void write(DataOutput dataOutput) throws IOException {dataOutput.writeLong(count); | |
dataOutput.writeFloat(average); | |
} | |
public void readFields(DataInput dataInput) throws IOException {count = dataInput.readLong(); | |
average = dataInput.readFloat();} | |
@Override | |
public String toString() { | |
return "{" + | |
"count=" + count + | |
", average=" + average + | |
'}'; | |
} | |
} |
import file.FileUtil; | |
import org.apache.hadoop.conf.Configuration; | |
import org.apache.hadoop.fs.Path; | |
import org.apache.hadoop.io.IntWritable; | |
import org.apache.hadoop.io.Text; | |
import org.apache.hadoop.mapreduce.Job; | |
import org.apache.hadoop.mapreduce.Mapper; | |
import org.apache.hadoop.mapreduce.Reducer; | |
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; | |
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; | |
import org.codehaus.jackson.map.ObjectMapper; | |
import java.io.IOException; | |
import java.text.SimpleDateFormat; | |
import java.util.Calendar; | |
import java.util.Date; | |
/** | |
* @Author bluesnail95 | |
* @Date 2019/7/14 21:40 | |
* @Description | |
*/ | |
public class CountAverageMain { | |
public static class CountAverageMapper extends Mapper<Object, Text, IntWritable, CountAverageTuple> {private IntWritable outHour = new IntWritable(); | |
private CountAverageTuple countAverageTuple = new CountAverageTuple(); | |
private final static SimpleDateFormat frmt = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS"); | |
public void map(Object key,Text value,Context context) {ObjectMapper objectMapper = new ObjectMapper(); | |
objectMapper.setDateFormat(frmt); | |
try {CountAverageData countAverageData = objectMapper.readValue(value.toString(), CountAverageData.class); | |
Calendar calendar = Calendar.getInstance(); | |
Date creationDate = countAverageData.getCreationDate(); | |
calendar.setTime(creationDate); | |
int hour = calendar.get(Calendar.HOUR_OF_DAY); | |
outHour.set(hour); | |
countAverageTuple.setAverage(countAverageData.getText().length()); | |
countAverageTuple.setCount(1); | |
context.write(outHour, countAverageTuple); | |
} catch (Exception e) {e.printStackTrace(); | |
} | |
} | |
} | |
public static class CountAverageReducer extends Reducer<IntWritable, CountAverageTuple,IntWritable, CountAverageTuple> {private CountAverageTuple result = new CountAverageTuple(); | |
public void reduce(IntWritable key, Iterable<CountAverageTuple> values,Context context) { | |
float sum = 0; | |
long count = 0; | |
for(CountAverageTuple countAverageTuple : values) {count += countAverageTuple.getCount(); | |
sum += countAverageTuple.getCount() * countAverageTuple.getAverage(); | |
} | |
result.setAverage(sum / count); | |
result.setCount(count); | |
try {context.write(key, result); | |
} catch (IOException e) {e.printStackTrace(); | |
} catch (InterruptedException e) {e.printStackTrace(); | |
} | |
} | |
} | |
public static void main(String[] args) {Configuration configuration = new Configuration(); | |
try {Job job = Job.getInstance(configuration, "CountAverage"); | |
job.setJarByClass(CountAverageMain.class); | |
job.setMapperClass(CountAverageMapper.class); | |
job.setCombinerClass(CountAverageReducer.class); | |
job.setReducerClass(CountAverageReducer.class); | |
job.setOutputKeyClass(IntWritable.class); | |
job.setOutputValueClass(CountAverageTuple.class); | |
FileInputFormat.addInputPath(job, new Path(args[0])); | |
FileUtil.deleteFile(args[1]); | |
FileOutputFormat.setOutputPath(job, new Path(args[1])); | |
System.exit(job.waitForCompletion(true)? 0 : 1); | |
} catch (Exception e) {e.printStackTrace(); | |
} | |
} | |
} |
3. 中位数和方差
import org.apache.hadoop.io.Writable; | |
import java.io.DataInput; | |
import java.io.DataOutput; | |
import java.io.IOException; | |
/** | |
* @Author bluesnail95 | |
* @Date 2019/7/16 6:33 | |
* @Description | |
*/ | |
public class MedianStdDevTuple implements Writable { | |
private float median; | |
private float stdDev; | |
public float getMedian() {return median;} | |
public void setMedian(float median) {this.median = median;} | |
public float getStdDev() {return stdDev;} | |
public void setStdDev(float stdDev) {this.stdDev = stdDev;} | |
public void write(DataOutput dataOutput) throws IOException {dataOutput.writeFloat(median); | |
dataOutput.writeFloat(stdDev); | |
} | |
public void readFields(DataInput dataInput) throws IOException {median = dataInput.readFloat(); | |
stdDev = dataInput.readFloat();} | |
@Override | |
public String toString() { | |
return "{" + | |
"median=" + median + | |
", stdDev=" + stdDev + | |
'}'; | |
} | |
} |
import file.FileUtil; | |
import org.apache.hadoop.conf.Configuration; | |
import org.apache.hadoop.fs.Path; | |
import org.apache.hadoop.io.IntWritable; | |
import org.apache.hadoop.io.Text; | |
import org.apache.hadoop.mapreduce.Job; | |
import org.apache.hadoop.mapreduce.Mapper; | |
import org.apache.hadoop.mapreduce.Reducer; | |
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; | |
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; | |
import org.codehaus.jackson.map.ObjectMapper; | |
import java.io.IOException; | |
import java.text.SimpleDateFormat; | |
import java.util.ArrayList; | |
import java.util.Calendar; | |
import java.util.Collections; | |
import java.util.Date; | |
/** | |
* @Author bluesnail95 | |
* @Date 2019/7/16 6:18 | |
* @Description | |
*/ | |
public class MedianStdDevMain {private final static SimpleDateFormat frmt = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS"); | |
public static class MedianStdDevMapper extends Mapper<Object, Text,IntWritable, IntWritable> {private IntWritable outhour = new IntWritable(); | |
private IntWritable outlength = new IntWritable(); | |
public void map(Object key,Text value,Context context) {ObjectMapper objectMapper = new ObjectMapper(); | |
objectMapper.setDateFormat(frmt); | |
try {CountAverageData countAverageData = objectMapper.readValue(value.toString(), CountAverageData.class); | |
Date creationDate = countAverageData.getCreationDate(); | |
Calendar calendar = Calendar.getInstance(); | |
calendar.setTime(creationDate); | |
int hour = calendar.get(Calendar.HOUR_OF_DAY); | |
int length = countAverageData.getText().length(); | |
outhour.set(hour); | |
outlength.set(length); | |
context.write(outhour, outlength); | |
} catch (Exception e) {e.printStackTrace(); | |
} | |
} | |
} | |
public static class MadianStdDevReducer extends Reducer<IntWritable, IntWritable, IntWritable, MedianStdDevTuple> {private ArrayList<Float> lengths = new ArrayList<Float>(); | |
private MedianStdDevTuple medianStdDevTuple = new MedianStdDevTuple(); | |
public void reduce(IntWritable key, Iterable<IntWritable> values, Context context) { | |
int sum = 0; | |
int count = 0; | |
try {for (IntWritable value : values) {sum += value.get(); | |
count++; | |
lengths.add((float) value.get()); | |
} | |
// 进行排序 | |
Collections.sort(lengths); | |
// 求中位数 | |
if(count == 1 || count % 2 == 0) {medianStdDevTuple.setMedian(lengths.get(count/2)); | |
}else {medianStdDevTuple.setMedian((lengths.get(count / 2 - 1) + lengths.get(count / 2)) / 2.0f); | |
} | |
// 求平均值 | |
float mean = sum / count; | |
float sumOfSquare = 0.0f; | |
// 求标准差 | |
for(Float value: lengths) {sumOfSquare += (value - mean) * (value - mean); | |
} | |
if(count == 1) {medianStdDevTuple.setStdDev(0); | |
}else{medianStdDevTuple.setStdDev((float)Math.sqrt(sumOfSquare / (count - 1))); | |
} | |
context.write(key, medianStdDevTuple); | |
} catch (Exception e) {e.printStackTrace(); | |
} | |
} | |
} | |
public static void main(String[] args) {Configuration configuration = new Configuration(); | |
try {Job job = Job.getInstance(configuration, "CountAverage"); | |
job.setJarByClass(MedianStdDevMain.class); | |
job.setMapperClass(MedianStdDevMapper.class); | |
job.setReducerClass(MadianStdDevReducer.class); | |
job.setOutputKeyClass(IntWritable.class); | |
job.setOutputValueClass(IntWritable.class); | |
FileInputFormat.addInputPath(job, new Path(args[0])); | |
FileUtil.deleteFile(args[1]); | |
FileOutputFormat.setOutputPath(job, new Path(args[1])); | |
System.exit(job.waitForCompletion(true)? 0 : 1); | |
} catch (Exception e) {e.printStackTrace(); | |
} | |
} | |
} |
升级版
import file.FileUtil; | |
import org.apache.hadoop.conf.Configuration; | |
import org.apache.hadoop.fs.Path; | |
import org.apache.hadoop.io.*; | |
import org.apache.hadoop.mapreduce.Job; | |
import org.apache.hadoop.mapreduce.Mapper; | |
import org.apache.hadoop.mapreduce.Reducer; | |
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; | |
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; | |
import org.codehaus.jackson.map.ObjectMapper; | |
import java.io.IOException; | |
import java.text.SimpleDateFormat; | |
import java.util.*; | |
/** | |
* @Author bluesnail95 | |
* @Date 2019/7/16 21:28 | |
* @Description | |
*/ | |
public class MedianStdDevUpgradeMain {private final static SimpleDateFormat frmt = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS"); | |
public static class MedianStdDevUpgradeMapper extends Mapper<Object, Text, IntWritable, SortedMapWritable> {private IntWritable outHour = new IntWritable(); | |
private LongWritable one = new LongWritable(1); | |
private IntWritable lengths = new IntWritable(); | |
public void map(Object key,Text value,Context context) {ObjectMapper objectMapper = new ObjectMapper(); | |
objectMapper.setDateFormat(frmt); | |
try {CountAverageData countAverageData = objectMapper.readValue(value.toString(), CountAverageData.class); | |
Date creationDate = countAverageData.getCreationDate(); | |
Calendar calendar = Calendar.getInstance(); | |
calendar.setTime(creationDate); | |
outHour.set(calendar.get(Calendar.HOUR_OF_DAY)); | |
lengths.set(countAverageData.getText().length()); | |
SortedMapWritable sortedMapWritable = new SortedMapWritable(); | |
sortedMapWritable.put(lengths,one); | |
context.write(outHour, sortedMapWritable); | |
} catch (Exception e) {e.printStackTrace(); | |
} | |
} | |
} | |
public static class MedianStdDevUpgradeCombiner extends Reducer<IntWritable,SortedMapWritable,IntWritable,SortedMapWritable> {protected void reduce(IntWritable key,Iterable<SortedMapWritable> values,Context context) {SortedMapWritable outValue = new SortedMapWritable(); | |
try {for (SortedMapWritable sortedMapWritable : values) {Set<Map.Entry<WritableComparable,Writable>> set = sortedMapWritable.entrySet(); | |
Iterator<Map.Entry<WritableComparable,Writable>> iterator = set.iterator(); | |
while(iterator.hasNext()) {Map.Entry<WritableComparable,Writable> entry = iterator.next(); | |
LongWritable count = (LongWritable) outValue.get(entry.getKey()); | |
if(count != null) {count.set(count.get() + ((LongWritable)entry.getValue()).get()); | |
outValue.put(entry.getKey(), count); | |
}else{outValue.put(entry.getKey(),new LongWritable(((LongWritable)entry.getValue()).get())); | |
} | |
} | |
} | |
context.write(key, outValue); | |
} catch (Exception e) {e.printStackTrace(); | |
} | |
} | |
} | |
public static class MedianStdDevUpgradeReducer extends Reducer<IntWritable,SortedMapWritable,IntWritable,MedianStdDevTuple> {private MedianStdDevTuple medianStdDevTuple = new MedianStdDevTuple(); | |
private TreeMap<Integer, Long> lengthCounts = new TreeMap<Integer, Long>(); | |
public void reduce(IntWritable key,Iterable<SortedMapWritable> values,Context context) { | |
float sum = 0; | |
long total = 0; | |
lengthCounts.clear(); | |
medianStdDevTuple.setStdDev(0); | |
medianStdDevTuple.setMedian(0); | |
for(SortedMapWritable sortedMapWritable : values) {Set<Map.Entry<WritableComparable,Writable>> set = sortedMapWritable.entrySet(); | |
Iterator<Map.Entry<WritableComparable,Writable>> iterator = set.iterator(); | |
while (iterator.hasNext()) {Map.Entry<WritableComparable,Writable> writableEntry = iterator.next(); | |
int length = ((IntWritable)writableEntry.getKey()).get(); | |
long count = ((LongWritable)writableEntry.getValue()).get(); | |
total += count; | |
sum += count * length; | |
Long sortedCount = lengthCounts.get(length); | |
if(sortedCount == null) {lengthCounts.put(length, count); | |
}else{lengthCounts.put(length, count + sortedCount); | |
} | |
} | |
} | |
long medianIndex = total / 2; | |
long previousCount = 0; | |
long count = 0; | |
long prevKey = 0; | |
for(Map.Entry<Integer, Long> entry:lengthCounts.entrySet()) {count = previousCount + entry.getValue(); | |
if(previousCount <= medianIndex && medianIndex < count) {if(total % 2 == 0 && previousCount == medianIndex) {medianStdDevTuple.setMedian((entry.getKey() + prevKey) / 2.0f); | |
}else{medianStdDevTuple.setMedian(entry.getKey()); | |
} | |
break; | |
} | |
previousCount = count; | |
prevKey = entry.getKey();} | |
float mean = sum / total; | |
float sumOfSquares = 0.0f; | |
for(Map.Entry<Integer, Long> entry:lengthCounts.entrySet()) {sumOfSquares += (entry.getKey() - mean) * (entry.getKey() - mean) * entry.getValue();} | |
if(total == 1) {medianStdDevTuple.setStdDev(0); | |
}else{medianStdDevTuple.setStdDev((float)Math.sqrt((sumOfSquares / (total - 1)))); | |
} | |
try {context.write(key, medianStdDevTuple); | |
} catch (Exception e) {e.printStackTrace(); | |
} | |
} | |
} | |
public static void main(String[] args) {Configuration configuration = new Configuration(); | |
try {Job job = Job.getInstance(configuration, "MedianStdDevUpgrade"); | |
job.setJarByClass(MedianStdDevUpgradeMain.class); | |
job.setMapperClass(MedianStdDevUpgradeMapper.class); | |
job.setCombinerClass(MedianStdDevUpgradeCombiner.class); | |
job.setReducerClass(MedianStdDevUpgradeReducer.class); | |
job.setOutputKeyClass(IntWritable.class); | |
job.setOutputValueClass(SortedMapWritable.class); | |
FileInputFormat.addInputPath(job, new Path(args[0])); | |
FileUtil.deleteFile(args[1]); | |
FileOutputFormat.setOutputPath(job, new Path(args[1])); | |
System.exit(job.waitForCompletion(true)? 0 : 1); | |
} catch (Exception e) {e.printStackTrace(); | |
} | |
} | |
} |
正文完