Skip to content

Commit 9f69699

Browse files
HBASE-29905 BackupLogCleaner: skip tables no longer in the backup set
BackupLogCleaner.serverToPreservationBoundaryTs() computes the WAL deletion boundary by iterating over tableSetTimestampMap from persisted BackupInfo sessions. This map can contain entries for tables that were once part of the backup set but have since had all their backups deleted. Their stale, old timestamps drag the minimum WAL boundary back, preventing old WALs from being cleaned up. Fix: when computing boundaries, load the incrbackupset per backup root from BackupSystemTable and skip tables not in the active set. The incrbackupset is populated on every full backup and pruned when backups are deleted, so it accurately reflects which tables still need WAL retention.
1 parent e59fa86 commit 9f69699

2 files changed

Lines changed: 73 additions & 3 deletions

File tree

hbase-backup/src/main/java/org/apache/hadoop/hbase/backup/master/BackupLogCleaner.java

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,14 @@
1818
package org.apache.hadoop.hbase.backup.master;
1919

2020
import static org.apache.hadoop.hbase.backup.BackupInfo.withState;
21-
2221
import java.io.IOException;
2322
import java.time.Duration;
2423
import java.util.ArrayList;
2524
import java.util.Collections;
2625
import java.util.HashMap;
2726
import java.util.List;
2827
import java.util.Map;
28+
import java.util.Set;
2929
import java.util.stream.Collectors;
3030
import org.apache.hadoop.conf.Configuration;
3131
import org.apache.hadoop.fs.FileStatus;
@@ -49,7 +49,6 @@
4949
import org.apache.yetus.audience.InterfaceAudience;
5050
import org.slf4j.Logger;
5151
import org.slf4j.LoggerFactory;
52-
5352
import org.apache.hbase.thirdparty.org.apache.commons.collections4.IterableUtils;
5453
import org.apache.hbase.thirdparty.org.apache.commons.collections4.MapUtils;
5554

@@ -119,13 +118,30 @@ private BackupBoundaries serverToPreservationBoundaryTs(BackupSystemTable sysTab
119118
.collect(Collectors.joining(", ")));
120119
}
121120

121+
// Load the active backup table set for each backup root, so we can then skip
122+
// tables that are no longer part of the backup set. The incrbackupset is populated
123+
// on every full backup and pruned when backups are deleted, so it accurately reflects
124+
// which tables still have backups and may need future incrementals.
125+
Map<String, Set<TableName>> activeTablesPerRoot = new HashMap<>();
126+
for (String backupRoot : newestBackupPerRootDir.keySet()) {
127+
activeTablesPerRoot.put(backupRoot, sysTable.getIncrementalBackupTableSet(backupRoot));
128+
}
129+
122130
BackupBoundaries.BackupBoundariesBuilder builder =
123131
BackupBoundaries.builder(getConf().getLong(TS_BUFFER_KEY, TS_BUFFER_DEFAULT));
124-
for (BackupInfo backupInfo : newestBackupPerRootDir.values()) {
132+
for (Map.Entry<String, BackupInfo> rootEntry : newestBackupPerRootDir.entrySet()) {
133+
String backupRoot = rootEntry.getKey();
134+
BackupInfo backupInfo = rootEntry.getValue();
135+
Set<TableName> activeTables = activeTablesPerRoot.get(backupRoot);
125136
long startCode = Long.parseLong(sysTable.readBackupStartCode(backupInfo.getBackupRootDir()));
126137
// Iterate over all tables in the timestamp map, which contains all tables covered in the
127138
// backup root, not just the tables included in that specific backup (which could be a subset)
128139
for (TableName table : backupInfo.getTableSetTimestampMap().keySet()) {
140+
if (activeTables != null && !activeTables.contains(table)) {
141+
LOG.debug("Skipping stale table {} in backup root {}: not in incremental backup set",
142+
table, backupRoot);
143+
continue;
144+
}
129145
for (Map.Entry<String, Long> entry : backupInfo.getTableSetTimestampMap().get(table)
130146
.entrySet()) {
131147
builder.addBackupTimestamps(entry.getKey(), entry.getValue(), startCode);

hbase-backup/src/test/java/org/apache/hadoop/hbase/backup/master/TestBackupLogCleaner.java

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import java.util.ArrayList;
2626
import java.util.Arrays;
2727
import java.util.Collection;
28+
import java.util.Collections;
2829
import java.util.HashMap;
2930
import java.util.LinkedHashSet;
3031
import java.util.List;
@@ -215,6 +216,59 @@ public void testBackupLogCleaner() throws Exception {
215216
}
216217
}
217218

219+
@Test
220+
public void testRemovedTableDoesNotPinWals() throws Exception {
221+
Path backupRoot = new Path(BACKUP_ROOT_DIR, "staleRoot");
222+
223+
try {
224+
BackupLogCleaner cleaner = new BackupLogCleaner();
225+
cleaner.setConf(TEST_UTIL.getConfiguration());
226+
Map<String, Object> params = new HashMap<>(1);
227+
params.put(HMaster.MASTER, TEST_UTIL.getHBaseCluster().getMaster());
228+
cleaner.init(params);
229+
230+
// Create FULL backup B1 with table1 and table2
231+
String backupIdB1 =
232+
backupTables(BackupType.FULL, Arrays.asList(table1, table2), backupRoot.toString());
233+
assertTrue(checkSucceeded(backupIdB1));
234+
235+
Set<FileStatus> walFilesAfterB1 =
236+
new LinkedHashSet<>(getListOfWALFiles(TEST_UTIL.getConfiguration()));
237+
238+
// Insert data so the next backup advances WAL positions for table1
239+
Connection conn = TEST_UTIL.getConnection();
240+
try (Table t1 = conn.getTable(table1)) {
241+
for (int i = 0; i < NB_ROWS_IN_BATCH; i++) {
242+
Put p = new Put(Bytes.toBytes("stale-row-t1" + i));
243+
p.addColumn(famName, qualName, Bytes.toBytes("val" + i));
244+
t1.put(p);
245+
}
246+
}
247+
248+
// Create FULL backup B2 with only table1.
249+
// B2's tableSetTimestampMap carries forward the old timestamp from B1 for table2,
250+
// while table1 gets a fresh timestamp: { table1: ts(B2), table2: ts(B1) }
251+
String backupIdB2 =
252+
backupTables(BackupType.FULL, Collections.singletonList(table1), backupRoot.toString());
253+
assertTrue(checkSucceeded(backupIdB2));
254+
255+
Set<FileStatus> walFilesAfterB2 =
256+
mergeAsSet(walFilesAfterB1, getListOfWALFiles(TEST_UTIL.getConfiguration()));
257+
258+
// Delete B1: since it is the only backup referencing table2, finalizeDelete will
259+
// remove table2 from the incremental backup set for this root.
260+
getBackupAdmin().deleteBackups(new String[] { backupIdB1 });
261+
262+
// table2 is no longer in the backup set, so the boundary = ts(B2) instead of
263+
// min(ts(B2), ts(B1)) = ts(B1). WALs between B1 and B2 are now deletable.
264+
Iterable<FileStatus> deletable = cleaner.getDeletableFiles(walFilesAfterB2);
265+
assertTrue("WALs after B1 should be deletable once stale tables are removed from incr set",
266+
toSet(deletable).containsAll(walFilesAfterB1));
267+
} finally {
268+
TEST_UTIL.truncateTable(BackupSystemTable.getTableName(TEST_UTIL.getConfiguration())).close();
269+
}
270+
}
271+
218272
@Test
219273
public void testDoesNotDeleteWALsFromNewServers() throws Exception {
220274
Path backupRoot1 = new Path(BACKUP_ROOT_DIR, "backup1");

0 commit comments

Comments
 (0)