您现在的位置是：主页 > news > 做外贸怎么打开国外网站/如何制作付费视频网站

做外贸怎么打开国外网站/如何制作付费视频网站

admin2025/5/15 1:21:53【news】

简介做外贸怎么打开国外网站,如何制作付费视频网站,网站被盗用,封装系统如何做自己的网站一、实验目的通过实验掌握基本的MapReduce编程方法；掌握用MapReduce解决一些常见的数据处理问题，包括数据去重计数、数据排序。二、实验平台操作系统：LinuxHadoop版本：2.6.0 三、实验步骤 （一）对访…

做外贸怎么打开国外网站,如何制作付费视频网站,网站被盗用,封装系统如何做自己的网站一、实验目的通过实验掌握基本的MapReduce编程方法；掌握用MapReduce解决一些常见的数据处理问题，包括数据去重计数、数据排序。二、实验平台操作系统：LinuxHadoop版本：2.6.0 三、实验步骤 （一）对访…

一、实验目的

通过实验掌握基本的MapReduce编程方法；
掌握用MapReduce解决一些常见的数据处理问题，包括数据去重计数、数据排序。

二、实验平台

操作系统：Linux
Hadoop版本：2.6.0

三、实验步骤

（一）对访问同一个网站的用户去重计数。

注：文件userurl_20150911中，数据以”\t”隔开，用户手机号为第三列，网站主域为第17列

_{这个是记录用户访问了一个网站多少次。。。}

将用户手机号同用户访问网站两个属性合在一起作为 key 值，其余和 wordcount 差不多，改改即可，不再赘述

import com.amazonaws.services.dynamodbv2.xspec.S;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;public class mr {public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> {public void map(Object key, Text value, Context context) throws IOException, InterruptedException {String str = value.toString();if (str != null && !str.equals("")) {String[] fa = str.split("\t");String per = fa[2], web = fa[16];context.write(new Text(per+'\t'+web),new IntWritable(1));}}}public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {public static HashMap<String, Integer> mp;public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {int sum=0;for(IntWritable t:values)sum++;context.write(key,new IntWritable(sum));}}public static void main(String[] args) throws Exception {Configuration conf = new Configuration();conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");FileSystem fs = FileSystem.get(conf);Job job = Job.getInstance(conf, "merge and duplicate removal");job.setJarByClass(mr.class);job.setMapperClass(TokenizerMapper.class);job.setReducerClass(Reduce.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(IntWritable.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);job.setInputFormatClass(TextInputFormat.class);FileInputFormat.addInputPath(job, new Path("hdfs://localhost:9000/input4/userurl_20150911"));FileOutputFormat.setOutputPath(job, new Path("hdfs://localhost:9000/output4"));System.exit(job.waitForCompletion(true) ? 0 : 1);}
}

在这里插入图片描述

正解：先将网站同用户一起作为主键map后，在reduce拆分获取用户访问网站去重后的数据，再次编写另一个程序进行计数即可

import com.amazonaws.services.dynamodbv2.xspec.S;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;public class mr {public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> {public void map(Object key, Text value, Context context) throws IOException, InterruptedException {String str = value.toString();if (str != null && !str.equals("")) {String[] fa = str.split("\t");String per = fa[2], web = fa[16];context.write(new Text(per+'\t'+web),new IntWritable(1));}}}public static class Reduce extends Reducer<Text, IntWritable, Text, Text> {public static HashMap<String, Integer> mp;public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {String[] tmp = key.toString().split("\t");String per = tmp[0], web = tmp[1];// 此处主要存在有用户多次访问同一网站，需要再执行一边mapreducecontext.write(new Text(per+'\t'+web),new Text(""));}}public static void main(String[] args) throws Exception {Configuration conf = new Configuration();conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");FileSystem fs = FileSystem.get(conf);Job job = Job.getInstance(conf, "merge and duplicate removal");job.setJarByClass(mr.class);job.setMapperClass(TokenizerMapper.class);job.setReducerClass(Reduce.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(IntWritable.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(Text.class);job.setInputFormatClass(TextInputFormat.class);FileInputFormat.addInputPath(job, new Path("hdfs://localhost:9000/input4/userurl_20150911"));FileOutputFormat.setOutputPath(job, new Path("hdfs://localhost:9000/output4"));System.exit(job.waitForCompletion(true) ? 0 : 1);}
}

在这里插入图片描述
接着第二道程序，这里就和wordcount一样了

import com.amazonaws.services.dynamodbv2.xspec.S;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;public class mr {public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> {public void map(Object key, Text value, Context context) throws IOException, InterruptedException {String str = value.toString();if (str != null && !str.equals("")) {String[] fa = str.split("\t");String web = fa[1];context.write(new Text(web), new IntWritable(1));}}}public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {public static HashMap<String, Integer> mp;public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {int sum = 0;for (IntWritable t : values) sum += t.get();context.write(key, new IntWritable(sum));}}public static void main(String[] args) throws Exception {Configuration conf = new Configuration();conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");FileSystem fs = FileSystem.get(conf);Job job = Job.getInstance(conf, "merge and duplicate removal");job.setJarByClass(mr.class);job.setMapperClass(TokenizerMapper.class);job.setReducerClass(Reduce.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(IntWritable.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);job.setInputFormatClass(TextInputFormat.class);FileInputFormat.addInputPath(job, new Path("hdfs://localhost:9000/output4/part-r-00000"));FileOutputFormat.setOutputPath(job, new Path("hdfs://localhost:9000/output4/outnext"));System.exit(job.waitForCompletion(true) ? 0 : 1);}
}

在这里插入图片描述

（二）对同一个用户不同记录产生的上下行流量求和后进行排序输出。

注：上行流量位于第25列，下行流量位于第26列

此处认为同一个用户不同记录为：同一用户访问的同一个网站的一条记录

import com.amazonaws.services.dynamodbv2.xspec.S;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.io.IntWritable.Comparator;import org.apache.hadoop.io.WritableComparable;import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;public class mr {public static class TokenizerMapper extends Mapper<Object, Text, Text, IntWritable> {public void map(Object key, Text value, Context context) throws IOException, InterruptedException {String str = value.toString();if (str != null && !str.equals("")) {String[] fa = str.split("\t");String per = fa[2], web = fa[16], up = fa[24], down = fa[25];int use = Integer.parseInt(up) + Integer.parseInt(down);context.write(new Text(per + '\t' + web), new IntWritable(use));}}}public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {int sum = 0;for (IntWritable t : values) {sum = t.get();context.write(key, new IntWritable(sum));}}}public static void main(String[] args) throws Exception {Configuration conf = new Configuration();conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");FileSystem fs = FileSystem.get(conf);Job job = Job.getInstance(conf, "merge and duplicate removal");job.setJarByClass(mr.class);job.setMapperClass(TokenizerMapper.class);job.setReducerClass(Reduce.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(IntWritable.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);job.setInputFormatClass(TextInputFormat.class);FileInputFormat.addInputPath(job, new Path("hdfs://localhost:9000/input4/userurl_20150911"));FileOutputFormat.setOutputPath(job, new Path("hdfs://localhost:9000/output5"));System.exit(job.waitForCompletion(true) ? 0 : 1);}
}