-
Notifications
You must be signed in to change notification settings - Fork 663
Add Support for RangeIndex Diskbased Replication #1807
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
24 commits
Select commit
Hold shift + click to select a range
a3a99f9
Add RangeIndex file enumeration for disk-based replication
vazois 563fa5e
Add primary-side RangeIndex replication: reader, data source, and tra…
vazois de56c62
Add receiver-side RangeIndex replication and session integration
vazois c8ad447
fix formatting
vazois a247874
add tests for cluster diskbased replication
vazois 27829b3
fix formatting
vazois 36d7a6e
addressing comments round 1
vazois d07bc37
enforce consistent little endian read/write for rangeindex metadata
vazois 6d0deed
expose rangeIndexManager through the ClusterProvider
vazois b13d05a
validate stream position
vazois a1db906
ensure break out when readAsync reads zero bytes
vazois 46cda75
add validation for snapshot file names
vazois c1916bb
fix formatting
vazois d6b73e7
pass RangeIndexManager to ClusterProvider CreateFactory
vazois b2b2f06
using FlushSuffix const
vazois fb7f88e
re-use read buffer to avoid unecessary allocations
vazois 75b4103
reduce buffer size to avoid fragmentation
vazois 3fe6daa
check for activeSink double initialization and re-use buffer across r…
vazois 3607147
fix test comment
vazois a0581a5
fix formatting
vazois bcf09e1
validate rangeIndexManager is enabled during replay
vazois 5406a38
add logging message when enumerating flush files
vazois fc4e739
throw on bytesRead == 0
vazois 17cbde7
add validation for checking flush.bftree file creation
vazois File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
158 changes: 158 additions & 0 deletions
158
libs/cluster/Server/Replication/PrimaryOps/DiskbasedReplication/RangeIndexFileDataSource.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,158 @@ | ||
| // Copyright (c) Microsoft Corporation. | ||
| // Licensed under the MIT license. | ||
|
|
||
| using System; | ||
| using System.Buffers.Binary; | ||
| using System.IO; | ||
| using System.Threading; | ||
| using System.Threading.Tasks; | ||
| using Garnet.common; | ||
| using Microsoft.Extensions.Logging; | ||
|
|
||
| namespace Garnet.cluster | ||
| { | ||
| /// <summary> | ||
| /// A checkpoint data source that reads a RangeIndex .bftree file using FileStream. | ||
| /// Unlike <see cref="FileDataSource"/> which uses Tsavorite's IDevice for sector-aligned I/O, | ||
| /// this source reads plain files directly since .bftree files are not managed by the device layer. | ||
| /// </summary> | ||
| internal sealed class RangeIndexFileDataSource : ISnapshotDataSource | ||
| { | ||
| /// <summary> | ||
| /// Default chunk size for streaming .bftree files (64 KB). | ||
| /// </summary> | ||
| internal const int DefaultChunkSize = 1 << 16; | ||
|
|
||
| /// <summary> | ||
| /// Length in bytes of the ASCII-encoded key hash in the metadata payload. | ||
| /// </summary> | ||
| internal const int KeyHashLength = 32; | ||
|
|
||
| /// <summary> | ||
| /// Length in bytes of the little-endian encoded logical address in the metadata payload. | ||
| /// </summary> | ||
| internal const int AddressLength = sizeof(long); | ||
|
|
||
| /// <summary> | ||
| /// Total metadata length for flush files (key hash + address). | ||
| /// </summary> | ||
| internal const int FlushMetadataLength = KeyHashLength + AddressLength; | ||
|
|
||
| private readonly string filePath; | ||
| private readonly int chunkSize; | ||
| private readonly ILogger logger; | ||
| private FileStream stream; | ||
|
|
||
| /// <summary> | ||
| /// Shared read buffer, set externally by the snapshot reader to avoid per-file allocations. | ||
| /// </summary> | ||
| private byte[] buffer; | ||
|
|
||
| /// <inheritdoc/> | ||
| public CheckpointFileType Type { get; } | ||
|
|
||
| /// <inheritdoc/> | ||
| public Guid Token { get; } | ||
|
|
||
| /// <summary> | ||
| /// The 32-character key hash prefix identifying the RangeIndex tree. | ||
| /// </summary> | ||
| public string KeyHash { get; } | ||
|
|
||
| /// <summary> | ||
| /// The logical hlog address embedded in the flush filename. | ||
| /// Only meaningful for <see cref="CheckpointFileType.STORE_RANGEINDEX_FLUSH"/>. | ||
| /// </summary> | ||
| public long Address { get; } | ||
|
|
||
| /// <inheritdoc/> | ||
| public long StartOffset => 0; | ||
|
|
||
| /// <inheritdoc/> | ||
| public long CurrentOffset { get; private set; } | ||
|
|
||
| /// <inheritdoc/> | ||
| public long EndOffset { get; } | ||
|
|
||
| /// <inheritdoc/> | ||
| public bool HasNextChunk => CurrentOffset < EndOffset; | ||
|
|
||
| /// <inheritdoc/> | ||
| public byte[] GetMetadata() | ||
| { | ||
| var keyHashBytes = System.Text.Encoding.ASCII.GetBytes(KeyHash); | ||
|
|
||
| if (Type == CheckpointFileType.STORE_RANGEINDEX_FLUSH) | ||
| { | ||
| var metadata = new byte[FlushMetadataLength]; | ||
| Buffer.BlockCopy(keyHashBytes, 0, metadata, 0, KeyHashLength); | ||
| BinaryPrimitives.WriteInt64LittleEndian(metadata.AsSpan(KeyHashLength), Address); | ||
| return metadata; | ||
| } | ||
|
|
||
| // Snapshot: keyHash only | ||
| return keyHashBytes; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Creates a new RangeIndexFileDataSource. | ||
| /// </summary> | ||
| /// <param name="type">The checkpoint file type (STORE_RANGEINDEX_FLUSH or STORE_RANGEINDEX_SNAPSHOT).</param> | ||
| /// <param name="token">The checkpoint token.</param> | ||
| /// <param name="filePath">Full path to the .bftree file on disk.</param> | ||
| /// <param name="keyHash">The 32-character key hash prefix.</param> | ||
| /// <param name="address">The hlog logical address (flush files only).</param> | ||
| /// <param name="chunkSize">Maximum bytes to read per chunk.</param> | ||
| /// <param name="logger">Optional logger.</param> | ||
| public RangeIndexFileDataSource(CheckpointFileType type, Guid token, string filePath, string keyHash, long address, int chunkSize = DefaultChunkSize, ILogger logger = null) | ||
| { | ||
| Type = type; | ||
| Token = token; | ||
| KeyHash = keyHash; | ||
| Address = address; | ||
| this.filePath = filePath; | ||
| this.chunkSize = chunkSize; | ||
| this.logger = logger; | ||
|
|
||
| var fileInfo = new FileInfo(filePath); | ||
| if (!fileInfo.Exists) | ||
| throw new FileNotFoundException($"RangeIndex file not found: {filePath}"); | ||
|
|
||
| EndOffset = fileInfo.Length; | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Sets the shared read buffer for chunk reads. The buffer must be at least | ||
| /// <see cref="chunkSize"/> bytes. Called by <see cref="RangeIndexSnapshotReader"/> | ||
| /// before transmission begins, allowing a single allocation to be reused across | ||
| /// all data sources. | ||
| /// </summary> | ||
| internal void SetBuffer(byte[] sharedBuffer) => buffer = sharedBuffer; | ||
|
|
||
| /// <inheritdoc/> | ||
| public async Task<DataSourceReadResult> ReadNextChunkAsync(CancellationToken cancellationToken = default) | ||
| { | ||
| stream ??= new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read, bufferSize: chunkSize, useAsync: true); | ||
|
|
||
| var remaining = EndOffset - CurrentOffset; | ||
| var bytesToRead = (int)Math.Min(remaining, chunkSize); | ||
|
|
||
| var bytesRead = await stream.ReadAsync(buffer, 0, bytesToRead, cancellationToken).ConfigureAwait(false); | ||
|
|
||
| if (bytesRead == 0) | ||
| ExceptionUtils.ThrowException(new GarnetException($"RangeIndexFileDataSource: unexpected EOF at offset {CurrentOffset}, expected {EndOffset} for {filePath}")); | ||
|
|
||
| var chunkStart = CurrentOffset; | ||
| CurrentOffset += bytesRead; | ||
|
|
||
| return new DataSourceReadResult(buffer, bytesRead, chunkStartAddress: chunkStart); | ||
| } | ||
|
|
||
| /// <inheritdoc/> | ||
| public void Dispose() | ||
| { | ||
| stream?.Dispose(); | ||
| stream = null; | ||
| } | ||
| } | ||
| } | ||
86 changes: 86 additions & 0 deletions
86
...luster/Server/Replication/PrimaryOps/DiskbasedReplication/RangeIndexFileTransmitSource.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,86 @@ | ||
| // Copyright (c) Microsoft Corporation. | ||
| // Licensed under the MIT license. | ||
|
|
||
| using System; | ||
| using System.Threading; | ||
| using System.Threading.Tasks; | ||
| using Garnet.client; | ||
| using Garnet.common; | ||
| using Microsoft.Extensions.Logging; | ||
|
|
||
| namespace Garnet.cluster | ||
| { | ||
| /// <summary> | ||
| /// Transmits a RangeIndex file over the network. Sends a metadata header (with | ||
| /// <c>startAddress = -1</c>) containing the serialized key hash and address needed to | ||
| /// reconstruct the target path, followed by chunked file content, followed by an empty | ||
| /// end-of-transmission marker. | ||
| /// | ||
| /// <para>Metadata payload layout:</para> | ||
| /// <list type="bullet"> | ||
| /// <item><b>STORE_RANGEINDEX_FLUSH</b>: keyHash (32 bytes ASCII) + address (8 bytes little-endian) = 40 bytes</item> | ||
| /// <item><b>STORE_RANGEINDEX_SNAPSHOT</b>: keyHash (32 bytes ASCII) = 32 bytes</item> | ||
| /// </list> | ||
| /// </summary> | ||
| internal sealed class RangeIndexFileTransmitSource : ISnapshotTransmitSource | ||
| { | ||
| private readonly ILogger logger; | ||
|
|
||
| public ISnapshotDataSource DataSource { get; } | ||
|
|
||
| public RangeIndexFileTransmitSource(ISnapshotDataSource dataSource, ILogger logger = null) | ||
| { | ||
| DataSource = dataSource; | ||
| this.logger = logger; | ||
| } | ||
|
|
||
| /// <inheritdoc/> | ||
| public async Task TransmitAsync(GarnetClientSession gcs, TimeSpan timeout, CancellationToken cancellationToken = default) | ||
| { | ||
| var riDataSource = (RangeIndexFileDataSource)DataSource; | ||
| var fileTokenBytes = DataSource.Token.ToByteArray(); | ||
|
|
||
| // Get metadata from data source: keyHash + address (flush only) | ||
| var metadata = riDataSource.GetMetadata(); | ||
|
|
||
| // Send header with startAddress = -1 to indicate single-message control payload. | ||
| var headerResp = await gcs.ExecuteClusterSnapshotData( | ||
| fileTokenBytes, (int)DataSource.Type, -1, metadata) | ||
| .WaitAsync(timeout, cancellationToken).ConfigureAwait(false); | ||
|
|
||
| if (!headerResp.Equals("OK")) | ||
| ExceptionUtils.ThrowException(new GarnetException( | ||
| $"Replica error at RangeIndex header {DataSource.Type} {headerResp} keyHash={riDataSource.KeyHash}")); | ||
|
|
||
| // Stream file content in chunks | ||
| while (DataSource.HasNextChunk) | ||
| { | ||
| var result = await DataSource.ReadNextChunkAsync(cancellationToken).ConfigureAwait(false); | ||
|
|
||
| var resp = await gcs.ExecuteClusterSnapshotData( | ||
| fileTokenBytes, | ||
| (int)DataSource.Type, | ||
| startAddress: result.ChunkStartAddress, | ||
| new Span<byte>(result.Data, 0, result.BytesRead)).WaitAsync(timeout, cancellationToken).ConfigureAwait(false); | ||
|
|
||
| if (!resp.Equals("OK")) | ||
| ExceptionUtils.ThrowException(new GarnetException( | ||
| $"Replica error at RangeIndex TransmitAsync {DataSource.Type} {resp} [{DataSource.StartOffset},{DataSource.CurrentOffset},{DataSource.EndOffset}]")); | ||
| } | ||
|
|
||
| // Send empty package to indicate end of transmission | ||
| var endResp = await gcs.ExecuteClusterSnapshotData( | ||
| fileTokenBytes, (int)DataSource.Type, DataSource.CurrentOffset, []) | ||
| .WaitAsync(timeout, cancellationToken).ConfigureAwait(false); | ||
|
|
||
| if (!endResp.Equals("OK")) | ||
| ExceptionUtils.ThrowException(new GarnetException( | ||
| $"Replica error at RangeIndex TransmitAsync Completion {DataSource.Type} {endResp}")); | ||
| } | ||
|
|
||
| public void Dispose() | ||
| { | ||
| DataSource?.Dispose(); | ||
| } | ||
| } | ||
| } |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.