001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.mapreduce; 019 020import java.io.IOException; 021import java.text.ParseException; 022import java.text.SimpleDateFormat; 023import java.util.Map; 024import java.util.TreeMap; 025import org.apache.hadoop.conf.Configuration; 026import org.apache.hadoop.conf.Configured; 027import org.apache.hadoop.fs.Path; 028import org.apache.hadoop.hbase.Cell; 029import org.apache.hadoop.hbase.CellUtil; 030import org.apache.hadoop.hbase.HBaseConfiguration; 031import org.apache.hadoop.hbase.KeyValue; 032import org.apache.hadoop.hbase.KeyValueUtil; 033import org.apache.hadoop.hbase.TableName; 034import org.apache.hadoop.hbase.client.Connection; 035import org.apache.hadoop.hbase.client.ConnectionFactory; 036import org.apache.hadoop.hbase.client.Delete; 037import org.apache.hadoop.hbase.client.Mutation; 038import org.apache.hadoop.hbase.client.Put; 039import org.apache.hadoop.hbase.client.RegionLocator; 040import org.apache.hadoop.hbase.client.Table; 041import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 042import org.apache.hadoop.hbase.regionserver.wal.WALCellCodec; 043import org.apache.hadoop.hbase.util.Bytes; 044import org.apache.hadoop.hbase.util.MapReduceExtendedCell; 045import org.apache.hadoop.hbase.wal.WALEdit; 046import org.apache.hadoop.hbase.wal.WALKey; 047import org.apache.hadoop.mapreduce.Job; 048import org.apache.hadoop.mapreduce.Mapper; 049import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 050import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 051import org.apache.hadoop.util.Tool; 052import org.apache.hadoop.util.ToolRunner; 053import org.apache.yetus.audience.InterfaceAudience; 054import org.slf4j.Logger; 055import org.slf4j.LoggerFactory; 056 057/** 058 * A tool to replay WAL files as a M/R job. The WAL can be replayed for a set of tables or all 059 * tables, and a time range can be provided (in milliseconds). The WAL is filtered to the passed set 060 * of tables and the output can optionally be mapped to another set of tables. WAL replay can also 061 * generate HFiles for later bulk importing, in that case the WAL is replayed for a single table 062 * only. 063 */ 064@InterfaceAudience.Public 065public class WALPlayer extends Configured implements Tool { 066 private static final Logger LOG = LoggerFactory.getLogger(WALPlayer.class); 067 final static String NAME = "WALPlayer"; 068 public final static String BULK_OUTPUT_CONF_KEY = "wal.bulk.output"; 069 public final static String TABLES_KEY = "wal.input.tables"; 070 public final static String TABLE_MAP_KEY = "wal.input.tablesmap"; 071 public final static String INPUT_FILES_SEPARATOR_KEY = "wal.input.separator"; 072 public final static String IGNORE_MISSING_FILES = "wal.input.ignore.missing.files"; 073 074 // This relies on Hadoop Configuration to handle warning about deprecated configs and 075 // to set the correct non-deprecated configs when an old one shows up. 076 static { 077 Configuration.addDeprecation("hlog.bulk.output", BULK_OUTPUT_CONF_KEY); 078 Configuration.addDeprecation("hlog.input.tables", TABLES_KEY); 079 Configuration.addDeprecation("hlog.input.tablesmap", TABLE_MAP_KEY); 080 } 081 082 private final static String JOB_NAME_CONF_KEY = "mapreduce.job.name"; 083 084 public WALPlayer() { 085 } 086 087 protected WALPlayer(final Configuration c) { 088 super(c); 089 } 090 091 /** 092 * A mapper that just writes out KeyValues. This one can be used together with 093 * {@link KeyValueSortReducer} 094 * @deprecated Use {@link WALCellMapper}. Will be removed from 3.0 onwards 095 */ 096 @Deprecated 097 static class WALKeyValueMapper extends Mapper<WALKey, WALEdit, ImmutableBytesWritable, KeyValue> { 098 private byte[] table; 099 100 @Override 101 public void map(WALKey key, WALEdit value, Context context) throws IOException { 102 try { 103 // skip all other tables 104 if (Bytes.equals(table, key.getTableName().getName())) { 105 for (Cell cell : value.getCells()) { 106 KeyValue kv = KeyValueUtil.ensureKeyValue(cell); 107 if (WALEdit.isMetaEditFamily(kv)) { 108 continue; 109 } 110 context.write(new ImmutableBytesWritable(CellUtil.cloneRow(kv)), kv); 111 } 112 } 113 } catch (InterruptedException e) { 114 e.printStackTrace(); 115 } 116 } 117 118 @Override 119 public void setup(Context context) throws IOException { 120 // only a single table is supported when HFiles are generated with HFileOutputFormat 121 String[] tables = context.getConfiguration().getStrings(TABLES_KEY); 122 if (tables == null || tables.length != 1) { 123 // this can only happen when WALMapper is used directly by a class other than WALPlayer 124 throw new IOException("Exactly one table must be specified for bulk HFile case."); 125 } 126 table = Bytes.toBytes(tables[0]); 127 128 } 129 130 } 131 132 /** 133 * A mapper that just writes out Cells. This one can be used together with {@link CellSortReducer} 134 */ 135 static class WALCellMapper extends Mapper<WALKey, WALEdit, ImmutableBytesWritable, Cell> { 136 private byte[] table; 137 138 @Override 139 public void map(WALKey key, WALEdit value, Context context) throws IOException { 140 try { 141 // skip all other tables 142 if (Bytes.equals(table, key.getTableName().getName())) { 143 for (Cell cell : value.getCells()) { 144 if (WALEdit.isMetaEditFamily(cell)) { 145 continue; 146 } 147 context.write(new ImmutableBytesWritable(CellUtil.cloneRow(cell)), 148 new MapReduceExtendedCell(cell)); 149 } 150 } 151 } catch (InterruptedException e) { 152 e.printStackTrace(); 153 } 154 } 155 156 @Override 157 public void setup(Context context) throws IOException { 158 // only a single table is supported when HFiles are generated with HFileOutputFormat 159 String[] tables = context.getConfiguration().getStrings(TABLES_KEY); 160 if (tables == null || tables.length != 1) { 161 // this can only happen when WALMapper is used directly by a class other than WALPlayer 162 throw new IOException("Exactly one table must be specified for bulk HFile case."); 163 } 164 table = Bytes.toBytes(tables[0]); 165 166 } 167 168 } 169 170 /** 171 * Enum for map metrics. Keep it out here rather than inside in the Map inner-class so we can find 172 * associated properties. 173 */ 174 protected static enum Counter { 175 /** Number of aggregated writes */ 176 PUTS, 177 /** Number of aggregated deletes */ 178 DELETES, 179 CELLS_READ, 180 CELLS_WRITTEN, 181 WALEDITS 182 } 183 184 /** 185 * A mapper that writes out {@link Mutation} to be directly applied to a running HBase instance. 186 */ 187 protected static class WALMapper 188 extends Mapper<WALKey, WALEdit, ImmutableBytesWritable, Mutation> { 189 private Map<TableName, TableName> tables = new TreeMap<>(); 190 191 @Override 192 public void map(WALKey key, WALEdit value, Context context) throws IOException { 193 context.getCounter(Counter.WALEDITS).increment(1); 194 try { 195 if (tables.isEmpty() || tables.containsKey(key.getTableName())) { 196 TableName targetTable = 197 tables.isEmpty() ? key.getTableName() : tables.get(key.getTableName()); 198 ImmutableBytesWritable tableOut = new ImmutableBytesWritable(targetTable.getName()); 199 Put put = null; 200 Delete del = null; 201 Cell lastCell = null; 202 for (Cell cell : value.getCells()) { 203 context.getCounter(Counter.CELLS_READ).increment(1); 204 // Filtering WAL meta marker entries. 205 if (WALEdit.isMetaEditFamily(cell)) { 206 continue; 207 } 208 // Allow a subclass filter out this cell. 209 if (filter(context, cell)) { 210 // A WALEdit may contain multiple operations (HBASE-3584) and/or 211 // multiple rows (HBASE-5229). 212 // Aggregate as much as possible into a single Put/Delete 213 // operation before writing to the context. 214 if ( 215 lastCell == null || lastCell.getTypeByte() != cell.getTypeByte() 216 || !CellUtil.matchingRows(lastCell, cell) 217 ) { 218 // row or type changed, write out aggregate KVs. 219 if (put != null) { 220 context.write(tableOut, put); 221 context.getCounter(Counter.PUTS).increment(1); 222 } 223 if (del != null) { 224 context.write(tableOut, del); 225 context.getCounter(Counter.DELETES).increment(1); 226 } 227 if (CellUtil.isDelete(cell)) { 228 del = new Delete(CellUtil.cloneRow(cell)); 229 } else { 230 put = new Put(CellUtil.cloneRow(cell)); 231 } 232 } 233 if (CellUtil.isDelete(cell)) { 234 del.add(cell); 235 } else { 236 put.add(cell); 237 } 238 context.getCounter(Counter.CELLS_WRITTEN).increment(1); 239 } 240 lastCell = cell; 241 } 242 // write residual KVs 243 if (put != null) { 244 context.write(tableOut, put); 245 context.getCounter(Counter.PUTS).increment(1); 246 } 247 if (del != null) { 248 context.getCounter(Counter.DELETES).increment(1); 249 context.write(tableOut, del); 250 } 251 } 252 } catch (InterruptedException e) { 253 e.printStackTrace(); 254 } 255 } 256 257 protected boolean filter(Context context, final Cell cell) { 258 return true; 259 } 260 261 @Override 262 protected void 263 cleanup(Mapper<WALKey, WALEdit, ImmutableBytesWritable, Mutation>.Context context) 264 throws IOException, InterruptedException { 265 super.cleanup(context); 266 } 267 268 @SuppressWarnings("checkstyle:EmptyBlock") 269 @Override 270 public void setup(Context context) throws IOException { 271 String[] tableMap = context.getConfiguration().getStrings(TABLE_MAP_KEY); 272 String[] tablesToUse = context.getConfiguration().getStrings(TABLES_KEY); 273 if (tableMap == null) { 274 tableMap = tablesToUse; 275 } 276 if (tablesToUse == null) { 277 // Then user wants all tables. 278 } else if (tablesToUse.length != tableMap.length) { 279 // this can only happen when WALMapper is used directly by a class other than WALPlayer 280 throw new IOException("Incorrect table mapping specified ."); 281 } 282 int i = 0; 283 if (tablesToUse != null) { 284 for (String table : tablesToUse) { 285 tables.put(TableName.valueOf(table), TableName.valueOf(tableMap[i++])); 286 } 287 } 288 } 289 } 290 291 void setupTime(Configuration conf, String option) throws IOException { 292 String val = conf.get(option); 293 if (null == val) { 294 return; 295 } 296 long ms; 297 try { 298 // first try to parse in user friendly form 299 ms = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SS").parse(val).getTime(); 300 } catch (ParseException pe) { 301 try { 302 // then see if just a number of ms's was specified 303 ms = Long.parseLong(val); 304 } catch (NumberFormatException nfe) { 305 throw new IOException( 306 option + " must be specified either in the form 2001-02-20T16:35:06.99 " 307 + "or as number of milliseconds"); 308 } 309 } 310 conf.setLong(option, ms); 311 } 312 313 /** 314 * Sets up the actual job. 315 * @param args The command line parameters. 316 * @return The newly created job. 317 * @throws IOException When setting up the job fails. 318 */ 319 public Job createSubmittableJob(String[] args) throws IOException { 320 Configuration conf = getConf(); 321 setupTime(conf, WALInputFormat.START_TIME_KEY); 322 setupTime(conf, WALInputFormat.END_TIME_KEY); 323 String inputDirs = args[0]; 324 String[] tables = args.length == 1 ? new String[] {} : args[1].split(","); 325 String[] tableMap; 326 if (args.length > 2) { 327 tableMap = args[2].split(","); 328 if (tableMap.length != tables.length) { 329 throw new IOException("The same number of tables and mapping must be provided."); 330 } 331 } else { 332 // if no mapping is specified, map each table to itself 333 tableMap = tables; 334 } 335 conf.setStrings(TABLES_KEY, tables); 336 conf.setStrings(TABLE_MAP_KEY, tableMap); 337 conf.set(FileInputFormat.INPUT_DIR, inputDirs); 338 Job job = 339 Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + System.currentTimeMillis())); 340 job.setJarByClass(WALPlayer.class); 341 342 job.setInputFormatClass(WALInputFormat.class); 343 job.setMapOutputKeyClass(ImmutableBytesWritable.class); 344 345 String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); 346 if (hfileOutPath != null) { 347 LOG.debug("add incremental job :" + hfileOutPath + " from " + inputDirs); 348 349 // the bulk HFile case 350 if (tables.length != 1) { 351 throw new IOException("Exactly one table must be specified for the bulk export option"); 352 } 353 TableName tableName = TableName.valueOf(tables[0]); 354 job.setMapperClass(WALCellMapper.class); 355 job.setReducerClass(CellSortReducer.class); 356 Path outputDir = new Path(hfileOutPath); 357 FileOutputFormat.setOutputPath(job, outputDir); 358 job.setMapOutputValueClass(MapReduceExtendedCell.class); 359 try (Connection conn = ConnectionFactory.createConnection(conf); 360 Table table = conn.getTable(tableName); 361 RegionLocator regionLocator = conn.getRegionLocator(tableName)) { 362 HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator); 363 } 364 TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), 365 org.apache.hbase.thirdparty.com.google.common.base.Preconditions.class); 366 } else { 367 // output to live cluster 368 job.setMapperClass(WALMapper.class); 369 job.setOutputFormatClass(MultiTableOutputFormat.class); 370 TableMapReduceUtil.addDependencyJars(job); 371 TableMapReduceUtil.initCredentials(job); 372 // No reducers. 373 job.setNumReduceTasks(0); 374 } 375 String codecCls = WALCellCodec.getWALCellCodecClass(conf).getName(); 376 try { 377 TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), 378 Class.forName(codecCls)); 379 } catch (Exception e) { 380 throw new IOException("Cannot determine wal codec class " + codecCls, e); 381 } 382 return job; 383 } 384 385 /** 386 * Print usage 387 * @param errorMsg Error message. Can be null. 388 */ 389 private void usage(final String errorMsg) { 390 if (errorMsg != null && errorMsg.length() > 0) { 391 System.err.println("ERROR: " + errorMsg); 392 } 393 System.err.println("Usage: " + NAME + " [options] <WAL inputdir> [<tables> <tableMappings>]"); 394 System.err.println(" <WAL inputdir> directory of WALs to replay."); 395 System.err.println(" <tables> comma separated list of tables. If no tables specified,"); 396 System.err.println(" all are imported (even hbase:meta if present)."); 397 System.err.println( 398 " <tableMappings> WAL entries can be mapped to a new set of tables by " + "passing"); 399 System.err 400 .println(" <tableMappings>, a comma separated list of target " + "tables."); 401 System.err 402 .println(" If specified, each table in <tables> must have a " + "mapping."); 403 System.err.println("To generate HFiles to bulk load instead of loading HBase directly, pass:"); 404 System.err.println(" -D" + BULK_OUTPUT_CONF_KEY + "=/path/for/output"); 405 System.err.println(" Only one table can be specified, and no mapping allowed!"); 406 System.err.println("To specify a time range, pass:"); 407 System.err.println(" -D" + WALInputFormat.START_TIME_KEY + "=[date|ms]"); 408 System.err.println(" -D" + WALInputFormat.END_TIME_KEY + "=[date|ms]"); 409 System.err.println(" The start and the end date of timerange (inclusive). The dates can be"); 410 System.err 411 .println(" expressed in milliseconds-since-epoch or yyyy-MM-dd'T'HH:mm:ss.SS " + "format."); 412 System.err.println(" E.g. 1234567890120 or 2009-02-13T23:32:30.12"); 413 System.err.println("Other options:"); 414 System.err.println(" -D" + JOB_NAME_CONF_KEY + "=jobName"); 415 System.err.println(" Use the specified mapreduce job name for the wal player"); 416 System.err.println(" -Dwal.input.separator=' '"); 417 System.err.println(" Change WAL filename separator (WAL dir names use default ','.)"); 418 System.err.println("For performance also consider the following options:\n" 419 + " -Dmapreduce.map.speculative=false\n" + " -Dmapreduce.reduce.speculative=false"); 420 } 421 422 /** 423 * Main entry point. 424 * @param args The command line parameters. 425 * @throws Exception When running the job fails. 426 */ 427 public static void main(String[] args) throws Exception { 428 int ret = ToolRunner.run(new WALPlayer(HBaseConfiguration.create()), args); 429 System.exit(ret); 430 } 431 432 @Override 433 public int run(String[] args) throws Exception { 434 if (args.length < 1) { 435 usage("Wrong number of arguments: " + args.length); 436 System.exit(-1); 437 } 438 Job job = createSubmittableJob(args); 439 return job.waitForCompletion(true) ? 0 : 1; 440 } 441}