How to access value of variable in Driver class which has been declared and modified inside Mapper class?

Better options than using static variable are, imo:

One option it use Counters. Check that API. We are using that for values that are numeric and we need those in the driver once the job finishes. You can create your custom counters too.

Other option is (if you need more than just one value or your info is complicated) then we output data from each mapper or reducer as files in each task (e.g. in the cleanup method) and then read those once the job finishes.

In both of these, you will have to collate and merge, according to the logic all the copies of the value/info that you end up with. The number of copies would be equal to the number of mappers or reducers (if you are keeping the value there) used in the job.

You can also set the value in the job configuration and read that once the is over.

Here are some examples of how to use custom counters:
http://www.ashishpaliwal.com/blog/2012/05/hadoop-recipe-using-custom-java-counters/

Here is my code snippet .

// Driver Class
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.log4j.Logger;

public class MyDriver
{
private static Logger log = Logger.getLogger(MyDriver.class);

public static void main(String[] args) throws Exception
{
Configuration conf = new Configuration();

// String min = "200";
String min = null;
List<String> otherArgs = new ArrayList<String>();
for (int i=0; i < args.length; ++i)
{
if ("-min".equals(args[i]))
min = args[++i];
else
otherArgs.add(args[i]);
}

Date date; long startTime, endTime; // for recording start and end time of job
date = new Date(); startTime = date.getTime(); // starting timer

// Phase-1
/* */

// Phase-2
int iteration = 1;
/* */

// Phase-k >2
iteration++;
long counter; int cachePathCount = iteration; int passCount = 1;
do
{
Configuration conf3 = new Configuration();
conf3.set("min_value", min);
conf3.setInt("k", iteration+1);

log.info("Started counting from "+(iteration+1)+"- ...........");
Job job3 = Job.getInstance(conf3, "MyDriver: Iteration"+(iteration+1));
job3.setJarByClass(MyDriver.class);

job3.setMapperClass(TrieMapper.class);
job3.setCombinerClass(MyCombiner.class);
job3.setPartitionerClass(MyPartitioner.class);
job3.setReducerClass(MyReducer.class);

job3.setNumReduceTasks(6); // how to set number of reducers dynamically?

job3.setOutputKeyClass(Text.class);
job3.setOutputValueClass(IntWritable.class);

// cachePath = new Path("hdfs://hadoopmaster:9000/user/hduser/output/fis-"+cachePathCount+"/part-r-0000"+(passCount-1));
cachePath = new Path(otherArgs.get(1)+"/fis-"+cachePathCount+"/part-r-0000"+(passCount-1));

job3.addCacheFile(cachePath.toUri());

/* input is same for all jobs */
// outputPath = new Path("hdfs://hadoopmaster:9000/user/hduser/output/fis-"+(iteration+1));
outputPath = new Path(otherArgs.get(1)+"/fis-"+(iteration+1));

//FileInputFormat.addInputPath(job3, inputPath);
FileInputFormat.setInputPaths(job3, inputPath);
FileOutputFormat.setOutputPath(job3, outputPath);

if(job3.waitForCompletion(true))
log.info("SUCCESSFULLY- Completed "+(iteration+TrieMapper.passCount)+"-");
else
log.info("ERROR- Completed "+(iteration+TrieMapper.passCount)+"-");

cachePathCount = iteration + 1; passCount = TrieMapper.passCount;
iteration += passCount;

counter = job3.getCounters().findCounter(TrieMapper.State.UPDATED).getValue();
} while (counter > 0);

date = new Date(); endTime = date.getTime(); //end timer
log.info("Total Time (in milliseconds) = "+ (endTime-startTime));
log.info("Total Time (in seconds) = "+ (endTime-startTime)*0.001F);
}
}

//Mapper Class
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.net.URI;
import java.util.LinkedList;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.util.StringUtils;

import dataStructures.Trie;

public class TrieMapper extends Mapper<Object, Text, Text, IntWritable>
{
public static int passCount = 0; // no of passes combined

public static enum State
{
UPDATED
}

private final static IntWritable one = new IntWritable(1);
private Text items = new Text();
private Configuration conf;
private int k; // iteration no.

public void setup(Context context) throws IOException, InterruptedException
{
conf = context.getConfiguration();
URI[] previousOutputURIs = Job.getInstance(conf).getCacheFiles();
k = conf.getInt("k", k);

for (URI previousOutputURI : previousOutputURIs)
{
Path previousOutputPath = new Path(previousOutputURI.getPath());
String previousOutputFileName = previousOutputPath.getName().toString();
filterItemset(previousOutputFileName);
}
}

private void filterItemset(String fileName)
{
/* */
}

public void map(Object key, Text value, Context context) throws IOException, InterruptedException
{
Trie trie = null;
/* */
long threshold = (long) (2*trie.numberOfItems());
long Count = 0;
passCount = 0;
do
{
/* */
Count += trie.numberOfItems();
passCount++;
} while(Count <= threshold);

if(trie.numberOfItems() > 0)
context.getCounter(State.UPDATED).increment(1); // increment counter
}
}

How to access value of variable in Driver class which has been declared and modified inside Mapper class?

Your comment on this post:

1 Answer

Your comment on this answer:

Your answer

Preview