Academic Integrity: tutoring, explanations, and feedback — we don’t complete graded work or submit on a student’s behalf.

Example The poems data is a total of six poems Change the code to count how many

ID: 3735242 • Letter: E

Question

Example The poems data is a total of six poems

Change the code to count how many words of different lengths are in the poems data. Note that for this problem, the map function should output a pair with the format and with examples or , or similar. The reduce function should stay the same because it will only count the different lengths. . To get the length of a string "theWord" in Java you need to use the function theWord.length). For more information, visit: http://docs.oracle.com/javase/1.5.0/ docslapi/java/lang/String.html. Submit your code in txt and results as a screenshot.

Explanation / Answer

Here , just provide the path where the data resides, it will read up all the files in that path

Input : args[0]

output: args[1]

//Code - I have commented on code , to understand the working.

import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.fs.Path;

public class WordLenghtCount
{
   // Here the input to the Mapper will be key -> (ByteOffset(lineNumber by default)) value -> Single Line
   // and Output K,V pair will be Text('Length vlenght') and value will be Count IntWritable
   public static class MyMapper extends Mapper<LongWritable,Text,Text,IntWritable> {
       public void map(LongWritable key, Text value,Context context) throws IOException,InterruptedException{
               String line = value.toString();
               StringTokenizer tokenizer = new StringTokenizer(line);
               while (tokenizer.hasMoreTokens()) {
                   String str = tokenizer.nextToken();
                   String res = "Length "+str.length();
                  
                   context.write(new Text(res), new IntWritable(1));
                   // output : <Length 8,1>
               }
   }
}

public static class MyReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
   // Input : <Lenght 8,(1,1,1,1,1,1,1,1)>
   public void reduce(Text key, Iterable<IntWritable> values,Context context) throws IOException,InterruptedException {
       int sum=0;
       for(IntWritable x: values)
       {
           sum+=x.get();
       }
       context.write(key, new IntWritable(sum));
   }
}

public static void main(String[] args) throws Exception {

Configuration conf= new Configuration();
Job job = new Job(conf,"My Word length Count Program");
job.setJarByClass(WordLenghtCount.class);
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
Path outputPath = new Path(args[1]);
// here we take the input path and output path as Inline Arguments
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));

// if output dir already exists delete that
outputPath.getFileSystem(conf).delete(outputPath);

System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}

Hire Me For All Your Tutoring Needs
Integrity-first tutoring: clear explanations, guidance, and feedback.
Chat Now And Get Quote