001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.mapreduce;
019
020import java.io.IOException;
021import java.lang.reflect.Method;
022import java.util.Map;
023import org.apache.hadoop.conf.Configuration;
024import org.apache.hadoop.hbase.DoNotRetryIOException;
025import org.apache.hadoop.hbase.client.Result;
026import org.apache.hadoop.hbase.client.ResultScanner;
027import org.apache.hadoop.hbase.client.Scan;
028import org.apache.hadoop.hbase.client.Table;
029import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
030import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
031import org.apache.hadoop.hbase.util.Bytes;
032import org.apache.hadoop.mapreduce.Counter;
033import org.apache.hadoop.mapreduce.InputSplit;
034import org.apache.hadoop.mapreduce.TaskAttemptContext;
035import org.apache.hadoop.util.StringUtils;
036import org.apache.yetus.audience.InterfaceAudience;
037import org.slf4j.Logger;
038import org.slf4j.LoggerFactory;
039
040/**
041 * Iterate over an HBase table data, return (ImmutableBytesWritable, Result) pairs.
042 */
043@InterfaceAudience.Public
044public class TableRecordReaderImpl {
045  public static final String LOG_PER_ROW_COUNT = "hbase.mapreduce.log.scanner.rowcount";
046
047  private static final Logger LOG = LoggerFactory.getLogger(TableRecordReaderImpl.class);
048
049  // HBASE_COUNTER_GROUP_NAME is the name of mapreduce counter group for HBase
050  @InterfaceAudience.Private
051  static final String HBASE_COUNTER_GROUP_NAME = "HBaseCounters";
052
053  private ResultScanner scanner = null;
054  private Scan scan = null;
055  private Scan currentScan = null;
056  private Table htable = null;
057  private byte[] lastSuccessfulRow = null;
058  private ImmutableBytesWritable key = null;
059  private Result value = null;
060  private TaskAttemptContext context = null;
061  private long numRestarts = 0;
062  private long numStale = 0;
063  private long timestamp;
064  private int rowcount;
065  private boolean logScannerActivity = false;
066  private int logPerRowCount = 100;
067
068  /**
069   * Restart from survivable exceptions by creating a new scanner.
070   * @param firstRow The first row to start at.
071   * @throws IOException When restarting fails.
072   */
073  public void restart(byte[] firstRow) throws IOException {
074    // Update counter metrics based on current scan before reinitializing it
075    if (currentScan != null) {
076      updateCounters();
077    }
078    currentScan = new Scan(scan);
079    currentScan.withStartRow(firstRow);
080    currentScan.setScanMetricsEnabled(true);
081    if (this.scanner != null) {
082      if (logScannerActivity) {
083        LOG.info("Closing the previously opened scanner object.");
084      }
085      this.scanner.close();
086    }
087    this.scanner = this.htable.getScanner(currentScan);
088    if (logScannerActivity) {
089      LOG.info("Current scan=" + currentScan.toString());
090      timestamp = System.currentTimeMillis();
091      rowcount = 0;
092    }
093  }
094
095  /**
096   * In new mapreduce APIs, TaskAttemptContext has two getCounter methods Check if
097   * getCounter(String, String) method is available.
098   * @return The getCounter method or null if not available.
099   * @deprecated since 2.4.0 and 2.3.2, will be removed in 4.0.0
100   */
101  @Deprecated
102  protected static Method retrieveGetCounterWithStringsParams(TaskAttemptContext context)
103    throws IOException {
104    Method m = null;
105    try {
106      m = context.getClass().getMethod("getCounter", new Class[] { String.class, String.class });
107    } catch (SecurityException e) {
108      throw new IOException("Failed test for getCounter", e);
109    } catch (NoSuchMethodException e) {
110      // Ignore
111    }
112    return m;
113  }
114
115  /**
116   * Sets the HBase table.
117   * @param htable The {@link org.apache.hadoop.hbase.HTableDescriptor} to scan.
118   */
119  public void setHTable(Table htable) {
120    Configuration conf = htable.getConfiguration();
121    logScannerActivity = conf.getBoolean(
122      "hbase.client.log.scanner.activity" /* ScannerCallable.LOG_SCANNER_ACTIVITY */, false);
123    logPerRowCount = conf.getInt(LOG_PER_ROW_COUNT, 100);
124    this.htable = htable;
125  }
126
127  /**
128   * Sets the scan defining the actual details like columns etc.
129   * @param scan The scan to set.
130   */
131  public void setScan(Scan scan) {
132    this.scan = scan;
133  }
134
135  /**
136   * Build the scanner. Not done in constructor to allow for extension.
137   */
138  public void initialize(InputSplit inputsplit, TaskAttemptContext context)
139    throws IOException, InterruptedException {
140    if (context != null) {
141      this.context = context;
142    }
143    restart(scan.getStartRow());
144  }
145
146  /**
147   * Closes the split.
148   */
149  public void close() {
150    if (this.scanner != null) {
151      this.scanner.close();
152    }
153    try {
154      this.htable.close();
155    } catch (IOException ioe) {
156      LOG.warn("Error closing table", ioe);
157    }
158  }
159
160  /**
161   * Returns the current key.
162   * @return The current key.
163   * @throws InterruptedException When the job is aborted.
164   */
165  public ImmutableBytesWritable getCurrentKey() throws IOException, InterruptedException {
166    return key;
167  }
168
169  /**
170   * Returns the current value.
171   * @return The current value.
172   * @throws IOException          When the value is faulty.
173   * @throws InterruptedException When the job is aborted.
174   */
175  public Result getCurrentValue() throws IOException, InterruptedException {
176    return value;
177  }
178
179  /**
180   * Positions the record reader to the next record.
181   * @return <code>true</code> if there was another record.
182   * @throws IOException          When reading the record failed.
183   * @throws InterruptedException When the job was aborted.
184   */
185  public boolean nextKeyValue() throws IOException, InterruptedException {
186    if (key == null) {
187      key = new ImmutableBytesWritable();
188    }
189    if (value == null) {
190      value = new Result();
191    }
192    try {
193      try {
194        value = this.scanner.next();
195        if (value != null && value.isStale()) {
196          numStale++;
197        }
198        if (logScannerActivity) {
199          rowcount++;
200          if (rowcount >= logPerRowCount) {
201            long now = System.currentTimeMillis();
202            LOG.info("Mapper took {}ms to process {} rows", (now - timestamp), rowcount);
203            timestamp = now;
204            rowcount = 0;
205          }
206        }
207      } catch (IOException e) {
208        // do not retry if the exception tells us not to do so
209        if (e instanceof DoNotRetryIOException) {
210          updateCounters();
211          throw e;
212        }
213        // try to handle all other IOExceptions by restarting
214        // the scanner, if the second call fails, it will be rethrown
215        LOG.info("recovered from " + StringUtils.stringifyException(e));
216        if (lastSuccessfulRow == null) {
217          LOG.warn("We are restarting the first next() invocation,"
218            + " if your mapper has restarted a few other times like this"
219            + " then you should consider killing this job and investigate"
220            + " why it's taking so long.");
221        }
222        if (lastSuccessfulRow == null) {
223          restart(scan.getStartRow());
224        } else {
225          restart(lastSuccessfulRow);
226          scanner.next(); // skip presumed already mapped row
227        }
228        value = scanner.next();
229        if (value != null && value.isStale()) {
230          numStale++;
231        }
232        numRestarts++;
233      }
234
235      if (value != null && value.size() > 0) {
236        key.set(value.getRow());
237        lastSuccessfulRow = key.get();
238        return true;
239      }
240
241      // Need handle cursor result
242      if (value != null && value.isCursor()) {
243        key.set(value.getCursor().getRow());
244        lastSuccessfulRow = key.get();
245        return true;
246      }
247
248      updateCounters();
249      return false;
250    } catch (IOException ioe) {
251      updateCounters();
252      if (logScannerActivity) {
253        long now = System.currentTimeMillis();
254        LOG.info("Mapper took {}ms to process {} rows", (now - timestamp), rowcount);
255        LOG.info(ioe.toString(), ioe);
256        String lastRow =
257          lastSuccessfulRow == null ? "null" : Bytes.toStringBinary(lastSuccessfulRow);
258        LOG.info("lastSuccessfulRow=" + lastRow);
259      }
260      throw ioe;
261    }
262  }
263
264  /**
265   * If hbase runs on new version of mapreduce, RecordReader has access to counters thus can update
266   * counters based on scanMetrics. If hbase runs on old version of mapreduce, it won't be able to
267   * get access to counters and TableRecorderReader can't update counter values.
268   */
269  private void updateCounters() {
270    ScanMetrics scanMetrics = scanner.getScanMetrics();
271    if (scanMetrics == null) {
272      return;
273    }
274
275    updateCounters(scanMetrics, numRestarts, context, numStale);
276  }
277
278  /**
279   * @deprecated since 2.4.0 and 2.3.2, will be removed in 4.0.0 Use
280   *             {@link #updateCounters(ScanMetrics, long, TaskAttemptContext, long)} instead.
281   */
282  @Deprecated
283  protected static void updateCounters(ScanMetrics scanMetrics, long numScannerRestarts,
284    Method getCounter, TaskAttemptContext context, long numStale) {
285    updateCounters(scanMetrics, numScannerRestarts, context, numStale);
286  }
287
288  protected static void updateCounters(ScanMetrics scanMetrics, long numScannerRestarts,
289    TaskAttemptContext context, long numStale) {
290    // we can get access to counters only if hbase uses new mapreduce APIs
291    if (context == null) {
292      return;
293    }
294
295    for (Map.Entry<String, Long> entry : scanMetrics.getMetricsMap().entrySet()) {
296      Counter counter = context.getCounter(HBASE_COUNTER_GROUP_NAME, entry.getKey());
297      if (counter != null) {
298        counter.increment(entry.getValue());
299      }
300    }
301    if (numScannerRestarts != 0L) {
302      Counter counter = context.getCounter(HBASE_COUNTER_GROUP_NAME, "NUM_SCANNER_RESTARTS");
303      if (counter != null) {
304        counter.increment(numScannerRestarts);
305      }
306    }
307    if (numStale != 0L) {
308      Counter counter = context.getCounter(HBASE_COUNTER_GROUP_NAME, "NUM_SCAN_RESULTS_STALE");
309      if (counter != null) {
310        counter.increment(numStale);
311      }
312    }
313  }
314
315  /**
316   * The current progress of the record reader through its data.
317   * @return A number between 0.0 and 1.0, the fraction of the data read.
318   */
319  public float getProgress() {
320    // Depends on the total number of tuples
321    return 0;
322  }
323
324}