From e4888cf82618b04d9b8e8ab85b8d0c08b3b7c059 Mon Sep 17 00:00:00 2001 From: Kristofer Karlsson Date: Wed, 1 Jul 2026 14:32:01 +0200 Subject: [PATCH 1/2] t/perf: add perf test for ref tombstone scenarios Add a performance test for update-ref when many tombstones are present. This exercises the scenario where all refs are deleted (creating tombstones in the reftable) and then re-created, which currently exhibits quadratic behavior. Helped-by: Jeff King Signed-off-by: Kristofer Karlsson --- t/perf/p1401-ref-store-tombstones.sh | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100755 t/perf/p1401-ref-store-tombstones.sh diff --git a/t/perf/p1401-ref-store-tombstones.sh b/t/perf/p1401-ref-store-tombstones.sh new file mode 100755 index 00000000000000..fd3b99e2b9d39d --- /dev/null +++ b/t/perf/p1401-ref-store-tombstones.sh @@ -0,0 +1,27 @@ +#!/bin/sh + +test_description="Tests performance of ref operations with many tombstones" + +. ./perf-lib.sh + +test_perf_fresh_repo + +test_expect_success "setup" ' + blob=$(echo foo | git hash-object -w --stdin) && + for i in $(test_seq 8000) + do + printf "create refs/tags/tag-%d %s\n" "$i" "$blob" || + return 1 + done >input && + git update-ref --stdin Date: Wed, 1 Jul 2026 14:32:07 +0200 Subject: [PATCH 2/2] reftable: fix quadratic behavior when re-creating deleted refs When many refs are deleted and then re-created, update-ref exhibits quadratic behavior. With 8000 refs deleted and re-created, the runtime is ~15s, quadrupling for each doubling of input size. The root cause is the merged iterator's suppress_deletions flag. When set, merged_iter_next_void() silently consumes tombstone records in a tight internal loop before returning to the caller. This prevents higher-level code from checking iteration bounds (such as prefix or refname comparisons) until after all tombstones have been scanned. This affects two code paths during ref creation: - refs_verify_refnames_available() seeks to "refs/tags/foo-1/" to check for D/F conflicts and must scan through all subsequent tombstones before the caller can see that they are past the prefix of interest. - reftable_backend_read_ref() seeks to a specific refname and must scan through all subsequent tombstones before returning "not found", because the merged iterator skips the matching tombstone and searches for the next live record. Fix this by removing suppress_deletions from the merged iterator and instead handling deletion records at each call site in the reftable backend, where prefix and refname bounds are available. Tombstones are now returned to callers, which skip them after their existing bounds checks. This allows iteration to terminate as soon as a tombstone past the relevant bound is encountered. This also requires adding deletion checks to the log iteration paths, since suppress_deletions applied to both ref and log iterators. Before: nr=1000 0.306s nr=2000 0.945s nr=4000 3.816s nr=8000 14.93s After: nr=1000 0.020s nr=2000 0.044s nr=4000 0.071s nr=8000 0.145s nr=16000 0.258s nr=32000 0.591s Reported-by: Jeff King Signed-off-by: Kristofer Karlsson --- refs/reftable-backend.c | 54 ++++++++++++++++++++++++++++++-------- reftable/merged.c | 12 +-------- reftable/merged.h | 4 --- reftable/stack.c | 1 - t/t0610-reftable-basics.sh | 21 +++++++++++++++ 5 files changed, 65 insertions(+), 27 deletions(-) diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 4ae22922de558b..8c4f119ff13b55 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -86,7 +86,8 @@ static int reftable_backend_read_ref(struct reftable_backend *be, if (ret) goto done; - if (strcmp(ref.refname, refname)) { + if (strcmp(ref.refname, refname) || + reftable_ref_record_is_deletion(&ref)) { ret = 1; goto done; } @@ -112,7 +113,6 @@ static int reftable_backend_read_ref(struct reftable_backend *be, oidread(oid, reftable_ref_record_val1(&ref), &hash_algos[hash_id]); } else { - /* We got a tombstone, which should not happen. */ BUG("unhandled reference value type %d", ref.value_type); } @@ -633,6 +633,9 @@ static int reftable_ref_iterator_advance(struct ref_iterator *ref_iterator) break; } + if (iter->ref.value_type == REFTABLE_REF_DELETION) + continue; + if (iter->exclude_patterns && should_exclude_current_ref(iter)) continue; @@ -1492,6 +1495,8 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data ret = 0; break; } + if (reftable_log_record_is_deletion(&log)) + continue; ALLOC_GROW(logs, logs_nr + 1, logs_alloc); tombstone = &logs[logs_nr++]; @@ -1889,6 +1894,8 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data) ret = 0; break; } + if (reftable_log_record_is_deletion(&old_log)) + continue; free(old_log.refname); @@ -2019,6 +2026,9 @@ static int reftable_reflog_iterator_advance(struct ref_iterator *ref_iterator) if (iter->err) break; + if (reftable_log_record_is_deletion(&iter->log)) + continue; + /* * We want the refnames that we have reflogs for, so we skip if * we've already produced this name. This could be faster by @@ -2178,6 +2188,8 @@ static int reftable_be_for_each_reflog_ent_reverse(struct ref_store *ref_store, ret = 0; break; } + if (reftable_log_record_is_deletion(&log)) + continue; ret = yield_log_record(refs, &log, fn, cb_data); if (ret) @@ -2230,6 +2242,10 @@ static int reftable_be_for_each_reflog_ent(struct ref_store *ref_store, ret = 0; break; } + if (reftable_log_record_is_deletion(&log)) { + reftable_log_record_release(&log); + continue; + } ALLOC_GROW(logs, logs_nr + 1, logs_alloc); logs[logs_nr++] = log; @@ -2276,18 +2292,26 @@ static int reftable_be_reflog_exists(struct ref_store *ref_store, goto done; /* - * Check whether we get at least one log record for the given ref name. - * If so, the reflog exists, otherwise it doesn't. + * Check whether we get at least one non-deleted log record for the + * given ref name. If so, the reflog exists, otherwise it doesn't. */ - ret = reftable_iterator_next_log(&it, &log); - if (ret < 0) - goto done; - if (ret > 0) { - ret = 0; - goto done; + while (1) { + ret = reftable_iterator_next_log(&it, &log); + if (ret < 0) + goto done; + if (ret > 0) { + ret = 0; + goto done; + } + if (strcmp(log.refname, refname)) { + ret = 0; + goto done; + } + if (!reftable_log_record_is_deletion(&log)) + break; } - ret = strcmp(log.refname, refname) == 0; + ret = 1; done: reftable_iterator_destroy(&it); @@ -2399,6 +2423,8 @@ static int write_reflog_delete_table(struct reftable_writer *writer, void *cb_da ret = 0; break; } + if (reftable_log_record_is_deletion(&log)) + continue; tombstone.refname = (char *)arg->refname; tombstone.value_type = REFTABLE_LOG_DELETION; @@ -2580,6 +2606,10 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store, reftable_log_record_release(&log); break; } + if (reftable_log_record_is_deletion(&log)) { + reftable_log_record_release(&log); + continue; + } oidread(&old_oid, log.value.update.old_hash, ref_store->repo->hash_algo); @@ -2746,6 +2776,8 @@ static int reftable_be_fsck(struct ref_store *ref_store, struct fsck_options *o, report.path = refname.buf; switch (ref.value_type) { + case REFTABLE_REF_DELETION: + continue; case REFTABLE_REF_VAL1: case REFTABLE_REF_VAL2: { struct object_id oid; diff --git a/reftable/merged.c b/reftable/merged.c index 733de07454d210..2f9a3612340212 100644 --- a/reftable/merged.c +++ b/reftable/merged.c @@ -26,7 +26,6 @@ struct merged_iter { struct merged_subiter *subiters; struct merged_iter_pqueue pq; size_t subiters_len; - int suppress_deletions; ssize_t advance_index; }; @@ -166,15 +165,7 @@ static int merged_iter_seek_void(void *it, struct reftable_record *want) static int merged_iter_next_void(void *p, struct reftable_record *rec) { - struct merged_iter *mi = p; - while (1) { - int err = merged_iter_next_entry(mi, rec); - if (err) - return err; - if (mi->suppress_deletions && reftable_record_is_deletion(rec)) - continue; - return 0; - } + return merged_iter_next_entry(p, rec); } static struct reftable_iterator_vtable merged_iter_vtable = { @@ -278,7 +269,6 @@ int merged_table_init_iter(struct reftable_merged_table *mt, goto out; } mi->advance_index = -1; - mi->suppress_deletions = mt->suppress_deletions; mi->subiters = subiters; mi->subiters_len = mt->tables_len; diff --git a/reftable/merged.h b/reftable/merged.h index 4317e5f5f6746e..6fafd1d080a694 100644 --- a/reftable/merged.h +++ b/reftable/merged.h @@ -17,10 +17,6 @@ struct reftable_merged_table { size_t tables_len; enum reftable_hash hash_id; - /* If unset, produce deletions. This is useful for compaction. For the - * full stack, deletions should be produced. */ - int suppress_deletions; - uint64_t min; uint64_t max; }; diff --git a/reftable/stack.c b/reftable/stack.c index 1fba96ddb3366f..77aeac47157e36 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -337,7 +337,6 @@ static int reftable_stack_reload_once(struct reftable_stack *st, /* Update the stack to point to the new tables. */ if (st->merged) reftable_merged_table_free(st->merged); - new_merged->suppress_deletions = 1; st->merged = new_merged; if (st->tables) diff --git a/t/t0610-reftable-basics.sh b/t/t0610-reftable-basics.sh index e19e0368989d50..bd43c3b2b0f46f 100755 --- a/t/t0610-reftable-basics.sh +++ b/t/t0610-reftable-basics.sh @@ -1163,4 +1163,25 @@ test_expect_success 'writes do not persist peeled value for invalid tags' ' ) ' +test_expect_success 'delete and re-create refs with tombstones' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + test_commit initial && + blob=$(echo foo | git hash-object -w --stdin) && + for i in $(test_seq 100) + do + printf "create refs/tags/tag-%d %s\n" "$i" "$blob" || + return 1 + done >input && + git update-ref --stdin actual && + test_line_count = 100 actual + ) +' + test_done