I want to run a map reduce example:
package my.test;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.cli.BasicParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.MultiTableOutputFormat;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;
/**
* This class demonstrates the use of the MultiTableOutputFormat class.
* Using this class we can write the output of a Hadoop map reduce program
* into different HBase table.
*
* @version 1.0 19 Jul 2011
* @author Wildnove
*/
public class TestMultiTable extends Configured implements Tool {
private static final Logger LOG = Logger.getLogger(TestMultiTable.class);
private static final String CMDLINE = "com.wildnove.tutorial.TestMultiTable <inputFile> [-n name] [-s]";
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new TestMultiTable(), args);
System.exit(res);
}
@Override
public int run(String[] args) throws Exception {
HelpFormatter help = new HelpFormatter();
Options options = new Options();
options.addOption("h", "help", false, "print program usage");
options.addOption("n", "name", true, "sets job name");
CommandLineParser parser = new BasicParser();
CommandLine cline;
try {
cline = parser.parse(options, args);
args = cline.getArgs();
if (args.length < 1) {
help.printHelp(CMDLINE, options);
return -1;
}
} catch (ParseException e) {
System.out.println(e);
e.printStackTrace();
help.printHelp(CMDLINE, options);
return -1;
}
String name = null;
try {
if (cline.hasOption('n'))
name = cline.getOptionValue('n');
else
name = "wildnove.com - Tutorial MultiTableOutputFormat ";
Configuration conf = getConf();
FileSystem fs = FileSystem.get(conf);
Path inputFile = new Path(fs.makeQualified(new Path(args[0])).toUri().getPath());
if (!getMultiTableOutputJob(name, inputFile).waitForCompletion(true))
return -1;
} catch (Exception e) {
System.out.println(e);
e.printStackTrace();
help.printHelp(CMDLINE, options);
return -1;
}
return 0;
}
/**
* Here we configure our job to use MultiTableOutputFormat class as map reduce output.
* Note that we use 1 reduce only for debugging purpose, but you can use more than 1 reduce.
*/
private Job getMultiTableOutputJob(String name, Path inputFile) throws IOException {
if (LOG.isInfoEnabled()) {
LOG.info(name + " starting...");
LOG.info("computing file: " + inputFile);
}
Job job = new Job(getConf(), name);
job.setJarByClass(TestMultiTable.class);
job.setMapperClass(Mapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, inputFile);
job.setOutputFormatClass(MultiTableOutputFormat.class);
job.setNumReduceTasks(1);
job.setReducerClass(Reducer.class);
return job;
}
private static class Mapper extends org.apache.hadoop.mapreduce.Mapper<LongWritable, Text, Text, Text> {
private Text outKey = new Text();
private Text outValue = new Text();
/**
* The map method splits the csv file according to this structure
* brand,model,size (e.g. Cadillac,Seville,Midsize) and output all data using
* brand as key and the couple model,size as value.
*/
@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] valueSplitted = value.toString().split(",");
if (valueSplitted.length == 3) {
String brand = valueSplitted[0];
String model = valueSplitted[1];
String size = valueSplitted[2];
outKey.set(brand);
outValue.set(model + "," + size);
context.write(outKey, outValue);
}
}
}
private static class Reducer extends org.apache.hadoop.mapreduce.Reducer<Text, Text, ImmutableBytesWritable, Writable> {
/**
* The reduce method fill the TestCars table with all csv data,
* compute some counters and save those counters into the TestBrandsSizes table.
* So we use two different HBase table as output for the reduce method.
*/
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
Map<String, Integer> statsSizeCounters = new HashMap<String, Integer>();
String brand = key.toString();
// We are receiving all models,size grouped by brand.
for (Text value : values) {
String[] valueSplitted = value.toString().split(",");
if (valueSplitted.length == 2) {
String model = valueSplitted[0];
String size = valueSplitted[1];
// Fill the TestCars table
ImmutableBytesWritable putTable = new ImmutableBytesWritable(Bytes.toBytes("TestCars"));
byte[] putKey = Bytes.toBytes(brand + "," + model);
byte[] putFamily = Bytes.toBytes("Car");
Put put = new Put(putKey);
// qualifier brand
byte[] putQualifier = Bytes.toBytes("brand");
byte[] putValue = Bytes.toBytes(brand);
put.add(putFamily, putQualifier, putValue);
// qualifier model
putQualifier = Bytes.toBytes("model");
putValue = Bytes.toBytes(model);
put.add(putFamily, putQualifier, putValue);
// qualifier size
putQualifier = Bytes.toBytes("size");
putValue = Bytes.toBytes(size);
put.add(putFamily, putQualifier, putValue);
context.write(putTable, put);
// Compute some counters: number of different sizes for a brand
if (!statsSizeCounters.containsKey(size))
statsSizeCounters.put(size, 1);
else
statsSizeCounters.put(size, statsSizeCounters.get(size) + 1);
}
}
for (Entry<String, Integer> entry : statsSizeCounters.entrySet()) {
// Fill the TestBrandsSizes table
ImmutableBytesWritable putTable = new ImmutableBytesWritable(Bytes.toBytes("TestBrandsSizes"));
byte[] putKey = Bytes.toBytes(brand);
byte[] putFamily = Bytes.toBytes("BrandSizes");
Put put = new Put(putKey);
// We can use as qualifier the sizes
byte[] putQualifier = Bytes.toBytes(entry.getKey());
byte[] putValue = Bytes.toBytes(entry.getValue());
put.add(putFamily, putQualifier, putValue);
context.write(putTable, put);
}
}
}
}
build to jar mt.jar with eclipse options :jar file
run the mapreduce:
[zhouhh@Hadoop48 ~]$ HADOOP_CLASSPATH=
${HBASE_HOME}/bin/hbase classpath
:${HADOOP_HOME}/bin/hadoop classpath
${HADOOP_HOME}/bin/hadoop jar mt.jar cars.csv 12/06/11 20:14:33 INFO test.TestMultiTable: wildnove.com - Tutorial MultiTableOutputFormat starting... 12/06/11 20:14:33 INFO test.TestMultiTable: computing file: /user/zhouhh/cars.csv 12/06/11 20:14:34 INFO input.FileInputFormat: Total input paths to process : 1 12/06/11 20:14:34 INFO util.NativeCodeLoader: Loaded the native-hadoop library 12/06/11 20:14:34 WARN snappy.LoadSnappy: Snappy native library not loaded 12/06/11 20:14:35 INFO mapred.JobClient: Running job: job_201206111811_0012 12/06/11 20:14:36 INFO mapred.JobClient: map 0% reduce 0% 12/06/11 20:14:42 INFO mapred.JobClient: Task Id : attempt_201206111811_0012_m_000002_0, Status : FAILED java.lang.RuntimeException: java.lang.ClassNotFoundException: org.apache.hadoop.hbase.mapreduce.MultiTableOutputFormat at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:867) at org.apache.hadoop.mapreduce.JobContext.getOutputFormatClass(JobContext.java:235) at org.apache.hadoop.mapred.Task.initialize(Task.java:513) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:353) at org.apache.hadoop.mapred.Child$4.run(Child.java:255) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121) at org.apache.hadoop.mapred.Child.main(Child.java:249) Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.hbase.mapreduce.MultiTableOutputFormat at java.net.URLClassLoader$1.run(URLClassLoader.java:366) at java.net.URLClassLoader$1.run(URLClassLoader.java:355) at java.security.AccessController.doPrivileged(Native Method) at java.net.URLClassLoader.findClass(URLClassLoader.java:354) at java.lang.ClassLoader.loadClass(ClassLoader.java:423) at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308) at java.lang.ClassLoader.loadClass(ClassLoader.java:356) at java.lang.Class.forName0(Native Method) at java.lang.Class.forName(Class.java:264) at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:820) at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:865)
cars.csv:
[zhouhh@Hadoop48 ~]$ cat cars.csv Acura,Integra,Small Acura,Legend,Midsize Audi,90,Compact Audi,100,Midsize BMW,535i,Midsize Buick,Century,Midsize Buick,LeSabre,Large Buick,Roadmaster,Large Buick,Riviera,Midsize Cadillac,DeVille,Large Cadillac,Seville,Midsize
MultiTableOutputFormat.class is in Hbase.0.94.jar
[zhouhh@Hadoop48 ~]$ echo $HADOOP_CLASSPATH |tr ':' '\n' | grep hbase /home/zhouhh/hbase-0.94.0/conf /home/zhouhh/hbase-0.94.0 /home/zhouhh/hbase-0.94.0/hbase-0.94.0.jar /home/zhouhh/hbase-0.94.0/hbase-0.94.0-tests.jar /home/zhouhh/hbase-0.94.0/lib/activation-1.1.jar /home/zhouhh/hbase-0.94.0/lib/asm-3.1.jar /home/zhouhh/hbase-0.94.0/lib/avro-1.5.3.jar /home/zhouhh/hbase-0.94.0/lib/avro-ipc-1.5.3.jar /home/zhouhh/hbase-0.94.0/lib/commons-beanutils-1.7.0.jar /home/zhouhh/hbase-0.94.0/lib/commons-beanutils-core-1.8.0.jar /home/zhouhh/hbase-0.94.0/lib/commons-cli-1.2.jar /home/zhouhh/hbase-0.94.0/lib/commons-codec-1.4.jar /home/zhouhh/hbase-0.94.0/lib/commons-collections-3.2.1.jar /home/zhouhh/hbase-0.94.0/lib/commons-configuration-1.6.jar /home/zhouhh/hbase-0.94.0/lib/commons-digester-1.8.jar /home/zhouhh/hbase-0.94.0/lib/commons-el-1.0.jar /home/zhouhh/hbase-0.94.0/lib/commons-httpclient-3.1.jar /home/zhouhh/hbase-0.94.0/lib/commons-io-2.1.jar /home/zhouhh/hbase-0.94.0/lib/commons-lang-2.5.jar /home/zhouhh/hbase-0.94.0/lib/commons-logging-1.1.1.jar /home/zhouhh/hbase-0.94.0/lib/commons-math-2.1.jar /home/zhouhh/hbase-0.94.0/lib/commons-net-1.4.1.jar /home/zhouhh/hbase-0.94.0/lib/core-3.1.1.jar /home/zhouhh/hbase-0.94.0/lib/guava-r09.jar /home/zhouhh/hbase-0.94.0/lib/hadoop-core-1.0.2.jar /home/zhouhh/hbase-0.94.0/lib/high-scale-lib-1.1.1.jar /home/zhouhh/hbase-0.94.0/lib/httpclient-4.1.2.jar /home/zhouhh/hbase-0.94.0/lib/httpcore-4.1.3.jar /home/zhouhh/hbase-0.94.0/lib/jackson-core-asl-1.5.5.jar /home/zhouhh/hbase-0.94.0/lib/jackson-jaxrs-1.5.5.jar /home/zhouhh/hbase-0.94.0/lib/jackson-mapper-asl-1.5.5.jar /home/zhouhh/hbase-0.94.0/lib/jackson-xc-1.5.5.jar /home/zhouhh/hbase-0.94.0/lib/jamon-runtime-2.3.1.jar /home/zhouhh/hbase-0.94.0/lib/jasper-compiler-5.5.23.jar /home/zhouhh/hbase-0.94.0/lib/jasper-runtime-5.5.23.jar /home/zhouhh/hbase-0.94.0/lib/jaxb-api-2.1.jar /home/zhouhh/hbase-0.94.0/lib/jaxb-impl-2.1.12.jar /home/zhouhh/hbase-0.94.0/lib/jersey-core-1.4.jar /home/zhouhh/hbase-0.94.0/lib/jersey-json-1.4.jar /home/zhouhh/hbase-0.94.0/lib/jersey-server-1.4.jar /home/zhouhh/hbase-0.94.0/lib/jettison-1.1.jar /home/zhouhh/hbase-0.94.0/lib/jetty-6.1.26.jar /home/zhouhh/hbase-0.94.0/lib/jetty-util-6.1.26.jar /home/zhouhh/hbase-0.94.0/lib/jruby-complete-1.6.5.jar /home/zhouhh/hbase-0.94.0/lib/jsp-2.1-6.1.14.jar /home/zhouhh/hbase-0.94.0/lib/jsp-api-2.1-6.1.14.jar /home/zhouhh/hbase-0.94.0/lib/libthrift-0.8.0.jar /home/zhouhh/hbase-0.94.0/lib/log4j-1.2.16.jar /home/zhouhh/hbase-0.94.0/lib/netty-3.2.4.Final.jar /home/zhouhh/hbase-0.94.0/lib/protobuf-java-2.4.0a.jar /home/zhouhh/hbase-0.94.0/lib/servlet-api-2.5-6.1.14.jar /home/zhouhh/hbase-0.94.0/lib/slf4j-api-1.5.8.jar /home/zhouhh/hbase-0.94.0/lib/snappy-java-1.0.3.2.jar /home/zhouhh/hbase-0.94.0/lib/stax-api-1.0.1.jar /home/zhouhh/hbase-0.94.0/lib/velocity-1.7.jar /home/zhouhh/hbase-0.94.0/lib/xmlenc-0.52.jar /home/zhouhh/hbase-0.94.0/lib/zookeeper-3.4.3.jar /home/zhouhh/hbase-0.94.0/conf /home/zhouhh/hbase-0.94.0 /home/zhouhh/hbase-0.94.0/hbase-0.94.0.jar /home/zhouhh/hbase-0.94.0/hbase-0.94.0-tests.jar /home/zhouhh/hbase-0.94.0/lib/activation-1.1.jar /home/zhouhh/hbase-0.94.0/lib/asm-3.1.jar /home/zhouhh/hbase-0.94.0/lib/avro-1.5.3.jar /home/zhouhh/hbase-0.94.0/lib/avro-ipc-1.5.3.jar /home/zhouhh/hbase-0.94.0/lib/commons-beanutils-1.7.0.jar /home/zhouhh/hbase-0.94.0/lib/commons-beanutils-core-1.8.0.jar /home/zhouhh/hbase-0.94.0/lib/commons-cli-1.2.jar /home/zhouhh/hbase-0.94.0/lib/commons-codec-1.4.jar /home/zhouhh/hbase-0.94.0/lib/commons-collections-3.2.1.jar /home/zhouhh/hbase-0.94.0/lib/commons-configuration-1.6.jar /home/zhouhh/hbase-0.94.0/lib/commons-digester-1.8.jar /home/zhouhh/hbase-0.94.0/lib/commons-el-1.0.jar /home/zhouhh/hbase-0.94.0/lib/commons-httpclient-3.1.jar /home/zhouhh/hbase-0.94.0/lib/commons-io-2.1.jar /home/zhouhh/hbase-0.94.0/lib/commons-lang-2.5.jar /home/zhouhh/hbase-0.94.0/lib/commons-logging-1.1.1.jar /home/zhouhh/hbase-0.94.0/lib/commons-math-2.1.jar /home/zhouhh/hbase-0.94.0/lib/commons-net-1.4.1.jar /home/zhouhh/hbase-0.94.0/lib/core-3.1.1.jar /home/zhouhh/hbase-0.94.0/lib/guava-r09.jar /home/zhouhh/hbase-0.94.0/lib/hadoop-core-1.0.2.jar /home/zhouhh/hbase-0.94.0/lib/high-scale-lib-1.1.1.jar /home/zhouhh/hbase-0.94.0/lib/httpclient-4.1.2.jar /home/zhouhh/hbase-0.94.0/lib/httpcore-4.1.3.jar /home/zhouhh/hbase-0.94.0/lib/jackson-core-asl-1.5.5.jar /home/zhouhh/hbase-0.94.0/lib/jackson-jaxrs-1.5.5.jar /home/zhouhh/hbase-0.94.0/lib/jackson-mapper-asl-1.5.5.jar /home/zhouhh/hbase-0.94.0/lib/jackson-xc-1.5.5.jar /home/zhouhh/hbase-0.94.0/lib/jamon-runtime-2.3.1.jar /home/zhouhh/hbase-0.94.0/lib/jasper-compiler-5.5.23.jar /home/zhouhh/hbase-0.94.0/lib/jasper-runtime-5.5.23.jar /home/zhouhh/hbase-0.94.0/lib/jaxb-api-2.1.jar /home/zhouhh/hbase-0.94.0/lib/jaxb-impl-2.1.12.jar /home/zhouhh/hbase-0.94.0/lib/jersey-core-1.4.jar /home/zhouhh/hbase-0.94.0/lib/jersey-json-1.4.jar /home/zhouhh/hbase-0.94.0/lib/jersey-server-1.4.jar /home/zhouhh/hbase-0.94.0/lib/jettison-1.1.jar /home/zhouhh/hbase-0.94.0/lib/jetty-6.1.26.jar /home/zhouhh/hbase-0.94.0/lib/jetty-util-6.1.26.jar /home/zhouhh/hbase-0.94.0/lib/jruby-complete-1.6.5.jar /home/zhouhh/hbase-0.94.0/lib/jsp-2.1-6.1.14.jar /home/zhouhh/hbase-0.94.0/lib/jsp-api-2.1-6.1.14.jar /home/zhouhh/hbase-0.94.0/lib/libthrift-0.8.0.jar /home/zhouhh/hbase-0.94.0/lib/log4j-1.2.16.jar /home/zhouhh/hbase-0.94.0/lib/netty-3.2.4.Final.jar /home/zhouhh/hbase-0.94.0/lib/protobuf-java-2.4.0a.jar /home/zhouhh/hbase-0.94.0/lib/servlet-api-2.5-6.1.14.jar /home/zhouhh/hbase-0.94.0/lib/slf4j-api-1.5.8.jar /home/zhouhh/hbase-0.94.0/lib/snappy-java-1.0.3.2.jar /home/zhouhh/hbase-0.94.0/lib/stax-api-1.0.1.jar /home/zhouhh/hbase-0.94.0/lib/velocity-1.7.jar /home/zhouhh/hbase-0.94.0/lib/xmlenc-0.52.jar /home/zhouhh/hbase-0.94.0/lib/zookeeper-3.4.3.jar
I have tried many methods,but the same error still there.
any one can help me? thanks
I'm using the following script to add job's dependencies in lib folder and hbase's dependencies to job's classpath:
You have two easy options:
1) Build a fat jar, where your
mt.jar
file includes thehbase-0.94.0.jar
(can be done withmvn package -Dfatjar
)2) Use the
GenericOptionsParser
(I think you are trying to by implementingTool
) and then specify the -libjars parameter on the command line.I struggled with the same. My this post has it working - https://my-bigdata-blog.blogspot.in/2017/08/Hbase-Programming-Java-Netbeans-Maven.html You need below line in code along with setting Hadoop_classpath. TableMapReduceUtil.addDependencyJars(job);
and
will give cluster classpath export this to HADOOP_CLASSPATH. (is standard way to utilize cluster's local environment).
-libjars
option of the mapreduce if was not finding the jar which you are looking for.