diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java index 254f0adbd5ff..0aed1721fdf0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java @@ -21,7 +21,6 @@ import org.apache.hadoop.hbase.io.HeapSize; import org.apache.hadoop.hbase.util.ClassSize; import org.apache.hadoop.hbase.util.FastStringPool; -import org.apache.hadoop.hbase.util.HFileArchiveUtil; import org.apache.yetus.audience.InterfaceAudience; /** @@ -38,8 +37,6 @@ public class BlockCacheKey implements HeapSize, java.io.Serializable { private static final FastStringPool CF_NAME_POOL = new FastStringPool(); - // New compressed format using integer file ID (when codec is available) - private final String hfileName; private final String regionName; @@ -90,6 +87,8 @@ public BlockCacheKey(String hfileName, long offset, boolean isPrimaryReplica, * @param offset Offset of the block into the file * @param isPrimaryReplica Whether this is from primary replica * @param blockType Type of block + * @param archived Whether the file is archived or not. This is relevant for proper cache + * metrics computation. */ public BlockCacheKey(String hfileName, String cfName, String regionName, long offset, boolean isPrimaryReplica, BlockType blockType, boolean archived) { @@ -105,8 +104,10 @@ public BlockCacheKey(String hfileName, String cfName, String regionName, long of /** * Construct a new BlockCacheKey using a file path. File, column family and region information - * will be extracted from the passed path. This should be used when inserting keys into the cache, - * so that region cache metrics are recorded properly. + * will be extracted from the passed path, so that region cache metrics are recorded properly. + * NOTE: This should be used when inserting keys into the cache from tests only, as the region and + * cf name parsing from the path can be costly and induce a 10% performance degradation on the + * read path, when this parsing is performed on each key creation time. * @param hfilePath The path to the HFile * @param offset Offset of the block into the file * @param isPrimaryReplica Whether this is from primary replica @@ -114,8 +115,7 @@ public BlockCacheKey(String hfileName, String cfName, String regionName, long of */ public BlockCacheKey(Path hfilePath, long offset, boolean isPrimaryReplica, BlockType blockType) { this(hfilePath.getName(), hfilePath.getParent().getName(), - hfilePath.getParent().getParent().getName(), offset, isPrimaryReplica, blockType, - HFileArchiveUtil.isHFileArchived(hfilePath)); + hfilePath.getParent().getParent().getName(), offset, isPrimaryReplica, blockType, false); } @Override @@ -194,5 +194,4 @@ public void setBlockType(BlockType blockType) { public boolean isArchived() { return archived; } - } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java index 6e8f11711ae0..000fd4acf00b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java @@ -52,6 +52,7 @@ import org.apache.hadoop.hbase.regionserver.KeyValueScanner; import org.apache.hadoop.hbase.util.ByteBufferUtils; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.HFileArchiveUtil; import org.apache.hadoop.hbase.util.IdLock; import org.apache.hadoop.hbase.util.ObjectIntPair; import org.apache.hadoop.io.WritableUtils; @@ -124,6 +125,14 @@ public abstract class HFileReaderImpl implements HFile.Reader, Configurable { /** Minor versions starting with this number have faked index key */ static final int MINOR_VERSION_WITH_FAKED_KEY = 3; + private final String fileName; + + private final String region; + + private final String family; + + private final boolean archived; + /** * Opens a HFile. * @param context Reader context info @@ -149,6 +158,10 @@ public HFileReaderImpl(ReaderContext context, HFileInfo fileInfo, CacheConfig ca fsBlockReader.setDataBlockEncoder(dataBlockEncoder, conf); dataBlockIndexReader = fileInfo.getDataBlockIndexReader(); metaBlockIndexReader = fileInfo.getMetaBlockIndexReader(); + fileName = path.getName(); + family = path.getParent().getName(); + region = path.getParent().getParent().getName(); + archived = HFileArchiveUtil.isHFileArchived(path); } @SuppressWarnings("serial") @@ -1246,8 +1259,8 @@ public HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock) throws synchronized (metaBlockIndexReader.getRootBlockKey(block)) { // Check cache for block. If found return. long metaBlockOffset = metaBlockIndexReader.getRootBlockOffset(block); - BlockCacheKey cacheKey = - new BlockCacheKey(path, metaBlockOffset, this.isPrimaryReplicaReader(), BlockType.META); + BlockCacheKey cacheKey = new BlockCacheKey(fileName, family, region, metaBlockOffset, + this.isPrimaryReplicaReader(), BlockType.META, archived); cacheBlock &= cacheConf.shouldCacheBlockOnRead(BlockType.META.getCategory(), getHFileInfo(), conf); @@ -1336,9 +1349,8 @@ public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize, final bo // the other choice is to duplicate work (which the cache would prevent you // from doing). - BlockCacheKey cacheKey = - new BlockCacheKey(path, dataBlockOffset, this.isPrimaryReplicaReader(), expectedBlockType); - + BlockCacheKey cacheKey = new BlockCacheKey(fileName, family, region, dataBlockOffset, + this.isPrimaryReplicaReader(), expectedBlockType, archived); boolean useLock = false; IdLock.Entry lockEntry = null; final Span span = Span.current(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java index b635c2cfec63..7435685292f2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java @@ -189,11 +189,21 @@ public void setTimeRangeTrackerForTiering(Supplier timeRangeTr private final TimeRangeTracker timeRangeTracker; private long earliestPutTs = HConstants.LATEST_TIMESTAMP; + private String regionName; + + private String familyName; + public HFileWriterImpl(final Configuration conf, CacheConfig cacheConf, Path path, FSDataOutputStream outputStream, HFileContext fileContext) { this.outputStream = outputStream; this.path = path; - this.name = path != null ? path.getName() : outputStream.toString(); + if (path != null) { + this.name = path.getName(); + this.regionName = path.getParent().getParent().getName(); + this.familyName = path.getParent().getName(); + } else { + this.name = outputStream.toString(); + } this.hFileContext = fileContext; // TODO: Move this back to upper layer this.timeRangeTracker = TimeRangeTracker.create(TimeRangeTracker.Type.NON_SYNC); @@ -593,7 +603,7 @@ private void doCacheOnWrite(long offset) { private BlockCacheKey buildCacheBlockKey(long offset, BlockType blockType) { if (path != null) { - return new BlockCacheKey(path, offset, true, blockType); + return new BlockCacheKey(name, familyName, regionName, offset, true, blockType, false); } return new BlockCacheKey(name, offset, true, blockType); }