diff --git a/Documentation/RelNotes/2.54.0.adoc b/Documentation/RelNotes/2.54.0.adoc index 7de9f7e7ecf6d2..25affd6a981d61 100644 --- a/Documentation/RelNotes/2.54.0.adoc +++ b/Documentation/RelNotes/2.54.0.adoc @@ -227,6 +227,10 @@ Performance, Internal Implementation, Development Support etc. * Add a coccinelle rule to break the build when "struct strbuf" gets passed by value. + * Further work on incremental repacking using MIDX/bitmap + + * The logic to count objects has been cleaned up. + Fixes since v2.53 ----------------- diff --git a/Documentation/git-multi-pack-index.adoc b/Documentation/git-multi-pack-index.adoc index 2f642697e9e106..612568301412d6 100644 --- a/Documentation/git-multi-pack-index.adoc +++ b/Documentation/git-multi-pack-index.adoc @@ -9,7 +9,14 @@ git-multi-pack-index - Write and verify multi-pack-indexes SYNOPSIS -------- [verse] -'git multi-pack-index' [--object-dir=] [--[no-]bitmap] +'git multi-pack-index' [] write [--preferred-pack=] + [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs] + [--refs-snapshot=] +'git multi-pack-index' [] compact [--[no-]incremental] + [--[no-]bitmap] +'git multi-pack-index' [] verify +'git multi-pack-index' [] expire +'git multi-pack-index' [] repack [--batch-size=] DESCRIPTION ----------- @@ -18,6 +25,8 @@ Write or verify a multi-pack-index (MIDX) file. OPTIONS ------- +The following command-line options are applicable to all sub-commands: + --object-dir=:: Use given directory for the location of Git objects. We check `/packs/multi-pack-index` for the current MIDX file, and @@ -73,7 +82,21 @@ marker). Write an incremental MIDX file containing only objects and packs not present in an existing MIDX layer. Migrates non-incremental MIDXs to incremental ones when - necessary. Incompatible with `--bitmap`. + necessary. +-- + +compact:: + Write a new MIDX layer containing only objects and packs present + in the range `` to ``, where both arguments are + checksums of existing layers in the MIDX chain. ++ +-- + --incremental:: + Write the result to a MIDX chain instead of writing a + stand-alone MIDX. + + --[no-]bitmap:: + Control whether or not a multi-pack bitmap is written. -- verify:: diff --git a/Documentation/gitformat-pack.adoc b/Documentation/gitformat-pack.adoc index 1b4db4aa611e83..3416edceab82e9 100644 --- a/Documentation/gitformat-pack.adoc +++ b/Documentation/gitformat-pack.adoc @@ -374,7 +374,9 @@ HEADER: The signature is: {'M', 'I', 'D', 'X'} 1-byte version number: - Git only writes or recognizes version 1. + Git writes the version specified by the "midx.version" + configuration option, which defaults to 2. It recognizes + both versions 1 and 2. 1-byte Object Id Version We infer the length of object IDs (OIDs) from this value: @@ -413,7 +415,9 @@ CHUNK DATA: strings. There is no extra padding between the filenames, and they are listed in lexicographic order. The chunk itself is padded at the end with between 0 and 3 NUL bytes to make the - chunk size a multiple of 4 bytes. + chunk size a multiple of 4 bytes. Version 1 MIDXs are required to + list their packs in lexicographic order, but version 2 MIDXs may + list their packs in any arbitrary order. Bitmapped Packfiles (ID: {'B', 'T', 'M', 'P'}) Stores a table of two 4-byte unsigned integers in network order. diff --git a/builtin/gc.c b/builtin/gc.c index 5d8d358f7a511d..3a71e314c975af 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -467,37 +467,19 @@ static int rerere_gc_condition(struct gc_config *cfg UNUSED) static int too_many_loose_objects(int limit) { /* - * Quickly check if a "gc" is needed, by estimating how - * many loose objects there are. Because SHA-1 is evenly - * distributed, we can check only one and get a reasonable - * estimate. + * This is weird, but stems from legacy behaviour: the GC auto + * threshold was always essentially interpreted as if it was rounded up + * to the next multiple 256 of, so we retain this behaviour for now. */ - DIR *dir; - struct dirent *ent; - int auto_threshold; - int num_loose = 0; - int needed = 0; - const unsigned hexsz_loose = the_hash_algo->hexsz - 2; - char *path; - - path = repo_git_path(the_repository, "objects/17"); - dir = opendir(path); - free(path); - if (!dir) + int auto_threshold = DIV_ROUND_UP(limit, 256) * 256; + unsigned long loose_count; + + if (odb_source_loose_count_objects(the_repository->objects->sources, + ODB_COUNT_OBJECTS_APPROXIMATE, + &loose_count) < 0) return 0; - auto_threshold = DIV_ROUND_UP(limit, 256); - while ((ent = readdir(dir)) != NULL) { - if (strspn(ent->d_name, "0123456789abcdef") != hexsz_loose || - ent->d_name[hexsz_loose] != '\0') - continue; - if (++num_loose > auto_threshold) { - needed = 1; - break; - } - } - closedir(dir); - return needed; + return loose_count > auto_threshold; } static struct packed_git *find_base_packs(struct string_list *packs, @@ -592,9 +574,13 @@ static uint64_t total_ram(void) static uint64_t estimate_repack_memory(struct gc_config *cfg, struct packed_git *pack) { - unsigned long nr_objects = repo_approximate_object_count(the_repository); + unsigned long nr_objects; size_t os_cache, heap; + if (odb_count_objects(the_repository->objects, + ODB_COUNT_OBJECTS_APPROXIMATE, &nr_objects) < 0) + return 0; + if (!pack || !nr_objects) return 0; diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index 5f364aa816ba25..0f72d96c02da4c 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -9,12 +9,18 @@ #include "strbuf.h" #include "trace2.h" #include "odb.h" +#include "odb/source.h" #include "replace-object.h" #include "repository.h" #define BUILTIN_MIDX_WRITE_USAGE \ - N_("git multi-pack-index [] write [--preferred-pack=]" \ - "[--refs-snapshot=]") + N_("git multi-pack-index [] write [--preferred-pack=]\n" \ + " [--[no-]bitmap] [--[no-]incremental] [--[no-]stdin-packs]\n" \ + " [--refs-snapshot=]") + +#define BUILTIN_MIDX_COMPACT_USAGE \ + N_("git multi-pack-index [] compact [--[no-]incremental]\n" \ + " [--[no-]bitmap] ") #define BUILTIN_MIDX_VERIFY_USAGE \ N_("git multi-pack-index [] verify") @@ -29,6 +35,10 @@ static char const * const builtin_multi_pack_index_write_usage[] = { BUILTIN_MIDX_WRITE_USAGE, NULL }; +static char const * const builtin_multi_pack_index_compact_usage[] = { + BUILTIN_MIDX_COMPACT_USAGE, + NULL +}; static char const * const builtin_multi_pack_index_verify_usage[] = { BUILTIN_MIDX_VERIFY_USAGE, NULL @@ -43,6 +53,7 @@ static char const * const builtin_multi_pack_index_repack_usage[] = { }; static char const * const builtin_multi_pack_index_usage[] = { BUILTIN_MIDX_WRITE_USAGE, + BUILTIN_MIDX_COMPACT_USAGE, BUILTIN_MIDX_VERIFY_USAGE, BUILTIN_MIDX_EXPIRE_USAGE, BUILTIN_MIDX_REPACK_USAGE, @@ -84,6 +95,8 @@ static struct option common_opts[] = { N_("directory"), N_("object directory containing set of packfile and pack-index pairs"), parse_object_dir), + OPT_BIT(0, "progress", &opts.flags, N_("force progress reporting"), + MIDX_PROGRESS), OPT_END(), }; @@ -138,8 +151,6 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, N_("pack for reuse when computing a multi-pack bitmap")), OPT_BIT(0, "bitmap", &opts.flags, N_("write multi-pack bitmap"), MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX), - OPT_BIT(0, "progress", &opts.flags, - N_("force progress reporting"), MIDX_PROGRESS), OPT_BIT(0, "incremental", &opts.flags, N_("write a new incremental MIDX"), MIDX_WRITE_INCREMENTAL), OPT_BOOL(0, "stdin-packs", &opts.stdin_packs, @@ -194,14 +205,78 @@ static int cmd_multi_pack_index_write(int argc, const char **argv, return ret; } +static int cmd_multi_pack_index_compact(int argc, const char **argv, + const char *prefix, + struct repository *repo) +{ + struct multi_pack_index *m, *cur; + struct multi_pack_index *from_midx = NULL; + struct multi_pack_index *to_midx = NULL; + struct odb_source *source; + int ret; + + struct option *options; + static struct option builtin_multi_pack_index_compact_options[] = { + OPT_BIT(0, "bitmap", &opts.flags, N_("write multi-pack bitmap"), + MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX), + OPT_BIT(0, "incremental", &opts.flags, + N_("write a new incremental MIDX"), MIDX_WRITE_INCREMENTAL), + OPT_END(), + }; + + repo_config(repo, git_multi_pack_index_write_config, NULL); + + options = add_common_options(builtin_multi_pack_index_compact_options); + + trace2_cmd_mode(argv[0]); + + if (isatty(2)) + opts.flags |= MIDX_PROGRESS; + argc = parse_options(argc, argv, prefix, + options, builtin_multi_pack_index_compact_usage, + 0); + + if (argc != 2) + usage_with_options(builtin_multi_pack_index_compact_usage, + options); + source = handle_object_dir_option(the_repository); + + FREE_AND_NULL(options); + + m = get_multi_pack_index(source); + + for (cur = m; cur && !(from_midx && to_midx); cur = cur->base_midx) { + const char *midx_csum = midx_get_checksum_hex(cur); + + if (!from_midx && !strcmp(midx_csum, argv[0])) + from_midx = cur; + if (!to_midx && !strcmp(midx_csum, argv[1])) + to_midx = cur; + } + + if (!from_midx) + die(_("could not find MIDX: %s"), argv[0]); + if (!to_midx) + die(_("could not find MIDX: %s"), argv[1]); + if (from_midx == to_midx) + die(_("MIDX compaction endpoints must be unique")); + + for (m = from_midx; m; m = m->base_midx) { + if (m == to_midx) + die(_("MIDX %s must be an ancestor of %s"), argv[0], argv[1]); + } + + ret = write_midx_file_compact(source, from_midx, to_midx, opts.flags); + + return ret; +} + static int cmd_multi_pack_index_verify(int argc, const char **argv, const char *prefix, struct repository *repo UNUSED) { struct option *options; static struct option builtin_multi_pack_index_verify_options[] = { - OPT_BIT(0, "progress", &opts.flags, - N_("force progress reporting"), MIDX_PROGRESS), OPT_END(), }; struct odb_source *source; @@ -231,8 +306,6 @@ static int cmd_multi_pack_index_expire(int argc, const char **argv, { struct option *options; static struct option builtin_multi_pack_index_expire_options[] = { - OPT_BIT(0, "progress", &opts.flags, - N_("force progress reporting"), MIDX_PROGRESS), OPT_END(), }; struct odb_source *source; @@ -264,8 +337,6 @@ static int cmd_multi_pack_index_repack(int argc, const char **argv, static struct option builtin_multi_pack_index_repack_options[] = { OPT_UNSIGNED(0, "batch-size", &opts.batch_size, N_("during repack, collect pack-files of smaller size into a batch that is larger than this size")), - OPT_BIT(0, "progress", &opts.flags, - N_("force progress reporting"), MIDX_PROGRESS), OPT_END(), }; struct odb_source *source; @@ -300,6 +371,7 @@ int cmd_multi_pack_index(int argc, struct option builtin_multi_pack_index_options[] = { OPT_SUBCOMMAND("repack", &fn, cmd_multi_pack_index_repack), OPT_SUBCOMMAND("write", &fn, cmd_multi_pack_index_write), + OPT_SUBCOMMAND("compact", &fn, cmd_multi_pack_index_compact), OPT_SUBCOMMAND("verify", &fn, cmd_multi_pack_index_verify), OPT_SUBCOMMAND("expire", &fn, cmd_multi_pack_index_expire), OPT_END(), diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index ff8b05d1bafbe3..2f589e3b378d3f 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -29,6 +29,7 @@ #include "object-file.h" #include "object-name.h" #include "odb.h" +#include "odb/source.h" #include "advice.h" #include "branch.h" #include "list-objects-filter-options.h" diff --git a/commit-graph.c b/commit-graph.c index f8e24145a513b0..c0300033304404 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -2607,7 +2607,8 @@ int write_commit_graph(struct odb_source *source, replace = ctx.opts->split_flags & COMMIT_GRAPH_SPLIT_REPLACE; } - ctx.approx_nr_objects = repo_approximate_object_count(r); + if (odb_count_objects(r->objects, ODB_COUNT_OBJECTS_APPROXIMATE, &ctx.approx_nr_objects) < 0) + ctx.approx_nr_objects = 0; if (ctx.append && g) { for (i = 0; i < g->num_commits; i++) { diff --git a/midx-write.c b/midx-write.c index 6485cb67068553..0ff2e45aa7abdd 100644 --- a/midx-write.c +++ b/midx-write.c @@ -36,10 +36,13 @@ extern int cmp_idx_or_pack_name(const char *idx_or_pack_name, static size_t write_midx_header(const struct git_hash_algo *hash_algo, struct hashfile *f, unsigned char num_chunks, - uint32_t num_packs) + uint32_t num_packs, int version) { + if (version != MIDX_VERSION_V1 && version != MIDX_VERSION_V2) + BUG("unexpected MIDX version: %d", version); + hashwrite_be32(f, MIDX_SIGNATURE); - hashwrite_u8(f, MIDX_VERSION); + hashwrite_u8(f, version); hashwrite_u8(f, oid_version(hash_algo)); hashwrite_u8(f, num_chunks); hashwrite_u8(f, 0); /* unused */ @@ -105,15 +108,29 @@ struct write_midx_context { uint32_t preferred_pack_idx; + int version; /* must be MIDX_VERSION_V1 or _V2 */ + int incremental; uint32_t num_multi_pack_indexes_before; + struct multi_pack_index *compact_from; + struct multi_pack_index *compact_to; + int compact; + struct string_list *to_include; struct repository *repo; struct odb_source *source; }; +static uint32_t midx_pack_perm(struct write_midx_context *ctx, + uint32_t orig_pack_int_id) +{ + if (ctx->compact) + orig_pack_int_id -= ctx->compact_from->num_packs_in_base; + return ctx->pack_perm[orig_pack_int_id]; +} + static int should_include_pack(const struct write_midx_context *ctx, const char *file_name) { @@ -257,18 +274,14 @@ static void midx_fanout_sort(struct midx_fanout *fanout) QSORT(fanout->entries, fanout->nr, midx_oid_compare); } -static void midx_fanout_add_midx_fanout(struct midx_fanout *fanout, - struct multi_pack_index *m, - uint32_t cur_fanout, - uint32_t preferred_pack) +static void midx_fanout_add_midx_fanout_1(struct midx_fanout *fanout, + struct multi_pack_index *m, + uint32_t cur_fanout, + uint32_t preferred_pack) { uint32_t start = m->num_objects_in_base, end; uint32_t cur_object; - if (m->base_midx) - midx_fanout_add_midx_fanout(fanout, m->base_midx, cur_fanout, - preferred_pack); - if (cur_fanout) start += ntohl(m->chunk_oid_fanout[cur_fanout - 1]); end = m->num_objects_in_base + ntohl(m->chunk_oid_fanout[cur_fanout]); @@ -292,6 +305,17 @@ static void midx_fanout_add_midx_fanout(struct midx_fanout *fanout, } } +static void midx_fanout_add_midx_fanout(struct midx_fanout *fanout, + struct multi_pack_index *m, + uint32_t cur_fanout, + uint32_t preferred_pack) +{ + if (m->base_midx) + midx_fanout_add_midx_fanout(fanout, m->base_midx, cur_fanout, + preferred_pack); + midx_fanout_add_midx_fanout_1(fanout, m, cur_fanout, preferred_pack); +} + static void midx_fanout_add_pack_fanout(struct midx_fanout *fanout, struct pack_info *info, uint32_t cur_pack, @@ -317,6 +341,45 @@ static void midx_fanout_add_pack_fanout(struct midx_fanout *fanout, } } +static void midx_fanout_add(struct midx_fanout *fanout, + struct write_midx_context *ctx, + uint32_t start_pack, + uint32_t cur_fanout) +{ + uint32_t cur_pack; + + if (ctx->m && !ctx->incremental) + midx_fanout_add_midx_fanout(fanout, ctx->m, cur_fanout, + ctx->preferred_pack_idx); + + for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++) { + int preferred = cur_pack == ctx->preferred_pack_idx; + midx_fanout_add_pack_fanout(fanout, ctx->info, cur_pack, + preferred, cur_fanout); + } + + if (ctx->preferred_pack_idx != NO_PREFERRED_PACK && + ctx->preferred_pack_idx < start_pack) + midx_fanout_add_pack_fanout(fanout, ctx->info, + ctx->preferred_pack_idx, 1, + cur_fanout); +} + +static void midx_fanout_add_compact(struct midx_fanout *fanout, + struct write_midx_context *ctx, + uint32_t cur_fanout) +{ + struct multi_pack_index *m = ctx->compact_to; + + ASSERT(ctx->compact); + + while (m && m != ctx->compact_from->base_midx) { + midx_fanout_add_midx_fanout_1(fanout, m, cur_fanout, + NO_PREFERRED_PACK); + m = m->base_midx; + } +} + /* * It is possible to artificially get into a state where there are many * duplicate copies of objects. That can create high memory pressure if @@ -335,6 +398,9 @@ static void compute_sorted_entries(struct write_midx_context *ctx, size_t alloc_objects, total_objects = 0; struct midx_fanout fanout = { 0 }; + if (ctx->compact) + ASSERT(!start_pack); + for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++) total_objects = st_add(total_objects, ctx->info[cur_pack].p->num_objects); @@ -353,23 +419,10 @@ static void compute_sorted_entries(struct write_midx_context *ctx, for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) { fanout.nr = 0; - if (ctx->m && !ctx->incremental) - midx_fanout_add_midx_fanout(&fanout, ctx->m, cur_fanout, - ctx->preferred_pack_idx); - - for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++) { - int preferred = cur_pack == ctx->preferred_pack_idx; - midx_fanout_add_pack_fanout(&fanout, - ctx->info, cur_pack, - preferred, cur_fanout); - } - - if (ctx->preferred_pack_idx != NO_PREFERRED_PACK && - ctx->preferred_pack_idx < start_pack) - midx_fanout_add_pack_fanout(&fanout, ctx->info, - ctx->preferred_pack_idx, 1, - cur_fanout); - + if (ctx->compact) + midx_fanout_add_compact(&fanout, ctx, cur_fanout); + else + midx_fanout_add(&fanout, ctx, start_pack, cur_fanout); midx_fanout_sort(&fanout); /* @@ -410,7 +463,9 @@ static int write_midx_pack_names(struct hashfile *f, void *data) if (ctx->info[i].expired) continue; - if (i && strcmp(ctx->info[i].pack_name, ctx->info[i - 1].pack_name) <= 0) + if (ctx->version == MIDX_VERSION_V1 && + i && strcmp(ctx->info[i].pack_name, + ctx->info[i - 1].pack_name) <= 0) BUG("incorrect pack-file order: %s before %s", ctx->info[i - 1].pack_name, ctx->info[i].pack_name); @@ -514,12 +569,12 @@ static int write_midx_object_offsets(struct hashfile *f, for (i = 0; i < ctx->entries_nr; i++) { struct pack_midx_entry *obj = list++; - if (ctx->pack_perm[obj->pack_int_id] == PACK_EXPIRED) + if (midx_pack_perm(ctx, obj->pack_int_id) == PACK_EXPIRED) BUG("object %s is in an expired pack with int-id %d", oid_to_hex(&obj->oid), obj->pack_int_id); - hashwrite_be32(f, ctx->pack_perm[obj->pack_int_id]); + hashwrite_be32(f, midx_pack_perm(ctx, obj->pack_int_id)); if (ctx->large_offsets_needed && obj->offset >> 31) hashwrite_be32(f, MIDX_LARGE_OFFSET_NEEDED | nr_large_offset++); @@ -620,8 +675,8 @@ static uint32_t *midx_pack_order(struct write_midx_context *ctx) for (i = 0; i < ctx->entries_nr; i++) { struct pack_midx_entry *e = &ctx->entries[i]; data[i].nr = i; - data[i].pack = ctx->pack_perm[e->pack_int_id]; - if (!e->preferred) + data[i].pack = midx_pack_perm(ctx, e->pack_int_id); + if (!e->preferred || ctx->compact) data[i].pack |= (1U << 31); data[i].offset = e->offset; } @@ -630,14 +685,14 @@ static uint32_t *midx_pack_order(struct write_midx_context *ctx) for (i = 0; i < ctx->entries_nr; i++) { struct pack_midx_entry *e = &ctx->entries[data[i].nr]; - struct pack_info *pack = &ctx->info[ctx->pack_perm[e->pack_int_id]]; + struct pack_info *pack = &ctx->info[midx_pack_perm(ctx, e->pack_int_id)]; if (pack->bitmap_pos == BITMAP_POS_UNKNOWN) pack->bitmap_pos = i + base_objects; pack->bitmap_nr++; pack_order[i] = data[i].nr; } for (i = 0; i < ctx->nr; i++) { - struct pack_info *pack = &ctx->info[ctx->pack_perm[i]]; + struct pack_info *pack = &ctx->info[i]; if (pack->bitmap_pos == BITMAP_POS_UNKNOWN) pack->bitmap_pos = 0; } @@ -691,7 +746,7 @@ static void prepare_midx_packing_data(struct packing_data *pdata, struct object_entry *to = packlist_alloc(pdata, &from->oid); oe_set_in_pack(pdata, to, - ctx->info[ctx->pack_perm[from->pack_int_id]].p); + ctx->info[midx_pack_perm(ctx, from->pack_int_id)].p); } trace2_region_leave("midx", "prepare_midx_packing_data", ctx->repo); @@ -900,6 +955,21 @@ static int write_midx_bitmap(struct write_midx_context *ctx, return ret; } +static int fill_pack_from_midx(struct pack_info *info, + struct multi_pack_index *m, + uint32_t pack_int_id) +{ + if (prepare_midx_pack(m, pack_int_id)) + return error(_("could not load pack %d"), pack_int_id); + + fill_pack_info(info, + m->packs[pack_int_id - m->num_packs_in_base], + m->pack_names[pack_int_id - m->num_packs_in_base], + pack_int_id); + + return 0; +} + static int fill_packs_from_midx(struct write_midx_context *ctx) { struct multi_pack_index *m; @@ -907,19 +977,88 @@ static int fill_packs_from_midx(struct write_midx_context *ctx) for (m = ctx->m; m; m = m->base_midx) { uint32_t i; - for (i = 0; i < m->num_packs; i++) { - if (prepare_midx_pack(m, m->num_packs_in_base + i)) - return error(_("could not load pack")); - + for (i = m->num_packs_in_base; + i < m->num_packs_in_base + m->num_packs; i++) { ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc); - fill_pack_info(&ctx->info[ctx->nr++], m->packs[i], - m->pack_names[i], - m->num_packs_in_base + i); + + if (fill_pack_from_midx(&ctx->info[ctx->nr], m, i) < 0) + return -1; + + ctx->nr++; } } return 0; } +static uint32_t compactible_packs_between(const struct multi_pack_index *from, + const struct multi_pack_index *to) +{ + uint32_t nr; + + ASSERT(from && to); + + if (unsigned_add_overflows(to->num_packs, to->num_packs_in_base)) + die(_("too many packs, unable to compact")); + + nr = to->num_packs + to->num_packs_in_base; + if (nr < from->num_packs_in_base) + BUG("unexpected number of packs in base during compaction: " + "%"PRIu32" < %"PRIu32, nr, from->num_packs_in_base); + + return nr - from->num_packs_in_base; +} + +static int fill_packs_from_midx_range(struct write_midx_context *ctx, + int bitmap_order) +{ + struct multi_pack_index *m = ctx->compact_to; + uint32_t packs_nr; + + ASSERT(ctx->compact && !ctx->nr); + ASSERT(ctx->compact_from); + ASSERT(ctx->compact_to); + + packs_nr = compactible_packs_between(ctx->compact_from, + ctx->compact_to); + + ALLOC_GROW(ctx->info, packs_nr, ctx->alloc); + + while (m != ctx->compact_from->base_midx) { + uint32_t pack_int_id, preferred_pack_id; + uint32_t i; + + if (bitmap_order) { + if (midx_preferred_pack(m, &preferred_pack_id) < 0) + die(_("could not determine preferred pack")); + } else { + preferred_pack_id = m->num_packs_in_base; + } + + pack_int_id = m->num_packs_in_base - ctx->compact_from->num_packs_in_base; + + if (fill_pack_from_midx(&ctx->info[pack_int_id++], m, + preferred_pack_id) < 0) + return -1; + + for (i = m->num_packs_in_base; + i < m->num_packs_in_base + m->num_packs; i++) { + if (preferred_pack_id == i) + continue; + + if (fill_pack_from_midx(&ctx->info[pack_int_id++], m, + i) < 0) + return -1; + } + + ctx->nr += m->num_packs; + m = m->base_midx; + } + + ASSERT(ctx->nr == packs_nr); + + return 0; +} + static struct { const char *non_split; const char *split; @@ -946,7 +1085,7 @@ static int link_midx_to_chain(struct multi_pack_index *m) } for (i = 0; i < ARRAY_SIZE(midx_exts); i++) { - const unsigned char *hash = get_midx_checksum(m); + const unsigned char *hash = midx_get_checksum_hash(m); get_midx_filename_ext(m->source, &from, hash, midx_exts[i].non_split); @@ -1025,6 +1164,12 @@ static bool midx_needs_update(struct multi_pack_index *midx, struct write_midx_c if (!midx_checksum_valid(midx)) goto out; + /* + * If the version differs, we need to update. + */ + if (midx->version != ctx->version) + goto out; + /* * Ignore incremental updates for now. The assumption is that any * incremental update would be either empty (in which case we will bail @@ -1033,6 +1178,9 @@ static bool midx_needs_update(struct multi_pack_index *midx, struct write_midx_c if (ctx->incremental) goto out; + if (ctx->compact) + goto out; /* Compaction always requires an update. */ + /* * Otherwise, we need to verify that the packs covered by the existing * MIDX match the packs that we already have. The logic to do so is way @@ -1078,14 +1226,31 @@ static bool midx_needs_update(struct multi_pack_index *midx, struct write_midx_c return needed; } -static int write_midx_internal(struct odb_source *source, - struct string_list *packs_to_include, - struct string_list *packs_to_drop, - const char *preferred_pack_name, - const char *refs_snapshot, - unsigned flags) +static int midx_hashcmp(const struct multi_pack_index *a, + const struct multi_pack_index *b, + const struct git_hash_algo *algop) { - struct repository *r = source->odb->repo; + return hashcmp(midx_get_checksum_hash(a), midx_get_checksum_hash(b), + algop); +} + +struct write_midx_opts { + struct odb_source *source; /* non-optional */ + + struct string_list *packs_to_include; + struct string_list *packs_to_drop; + + struct multi_pack_index *compact_from; + struct multi_pack_index *compact_to; + + const char *preferred_pack_name; + const char *refs_snapshot; + unsigned flags; +}; + +static int write_midx_internal(struct write_midx_opts *opts) +{ + struct repository *r = opts->source->odb->repo; struct strbuf midx_name = STRBUF_INIT; unsigned char midx_hash[GIT_MAX_RAWSZ]; uint32_t start_pack; @@ -1094,6 +1259,7 @@ static int write_midx_internal(struct odb_source *source, struct tempfile *incr; struct write_midx_context ctx = { .preferred_pack_idx = NO_PREFERRED_PACK, + .version = MIDX_VERSION_V2, }; struct multi_pack_index *midx_to_free = NULL; int bitmapped_packs_concat_len = 0; @@ -1101,27 +1267,45 @@ static int write_midx_internal(struct odb_source *source, int dropped_packs = 0; int result = -1; const char **keep_hashes = NULL; + size_t keep_hashes_nr = 0; struct chunkfile *cf; trace2_region_enter("midx", "write_midx_internal", r); ctx.repo = r; - ctx.source = source; + ctx.source = opts->source; - ctx.incremental = !!(flags & MIDX_WRITE_INCREMENTAL); + repo_config_get_int(ctx.repo, "midx.version", &ctx.version); + if (ctx.version != MIDX_VERSION_V1 && ctx.version != MIDX_VERSION_V2) + die(_("unknown MIDX version: %d"), ctx.version); + + ctx.incremental = !!(opts->flags & MIDX_WRITE_INCREMENTAL); + ctx.compact = !!(opts->flags & MIDX_WRITE_COMPACT); + + if (ctx.compact) { + if (ctx.version != MIDX_VERSION_V2) + die(_("cannot perform MIDX compaction with v1 format")); + if (!opts->compact_from) + BUG("expected non-NULL 'from' MIDX during compaction"); + if (!opts->compact_to) + BUG("expected non-NULL 'to' MIDX during compaction"); + + ctx.compact_from = opts->compact_from; + ctx.compact_to = opts->compact_to; + } if (ctx.incremental) strbuf_addf(&midx_name, "%s/pack/multi-pack-index.d/tmp_midx_XXXXXX", - source->path); + opts->source->path); else - get_midx_filename(source, &midx_name); + get_midx_filename(opts->source, &midx_name); if (safe_create_leading_directories(r, midx_name.buf)) die_errno(_("unable to create leading directories of %s"), midx_name.buf); - if (!packs_to_include || ctx.incremental) { - struct multi_pack_index *m = get_multi_pack_index(source); + if (!opts->packs_to_include || ctx.incremental) { + struct multi_pack_index *m = get_multi_pack_index(opts->source); if (m && !midx_checksum_valid(m)) { warning(_("ignoring existing multi-pack-index; checksum mismatch")); m = NULL; @@ -1136,11 +1320,18 @@ static int write_midx_internal(struct odb_source *source, */ if (ctx.incremental) ctx.base_midx = m; - else if (!packs_to_include) + if (!opts->packs_to_include) ctx.m = m; } } + /* + * If compacting MIDX layer(s) in the range [from, to], then the + * compacted MIDX will share the same base MIDX as 'from'. + */ + if (ctx.compact) + ctx.base_midx = ctx.compact_from->base_midx; + ctx.nr = 0; ctx.alloc = ctx.m ? ctx.m->num_packs + ctx.m->num_packs_in_base : 16; ctx.info = NULL; @@ -1149,34 +1340,42 @@ static int write_midx_internal(struct odb_source *source, if (ctx.incremental) { struct multi_pack_index *m = ctx.base_midx; while (m) { - if (flags & MIDX_WRITE_BITMAP && load_midx_revindex(m)) { + if (opts->flags & MIDX_WRITE_BITMAP && load_midx_revindex(m)) { error(_("could not load reverse index for MIDX %s"), - hash_to_hex_algop(get_midx_checksum(m), - m->source->odb->repo->hash_algo)); + midx_get_checksum_hex(m)); goto cleanup; } ctx.num_multi_pack_indexes_before++; m = m->base_midx; } - } else if (ctx.m && fill_packs_from_midx(&ctx)) { + } else if (ctx.m && !ctx.compact && fill_packs_from_midx(&ctx)) { goto cleanup; } start_pack = ctx.nr; ctx.pack_paths_checked = 0; - if (flags & MIDX_PROGRESS) + if (opts->flags & MIDX_PROGRESS) ctx.progress = start_delayed_progress(r, _("Adding packfiles to multi-pack-index"), 0); else ctx.progress = NULL; - ctx.to_include = packs_to_include; + if (ctx.compact) { + int bitmap_order = 0; + if (opts->preferred_pack_name) + bitmap_order |= 1; + else if (opts->flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP)) + bitmap_order |= 1; - for_each_file_in_pack_dir(source->path, add_pack_to_midx, &ctx); + fill_packs_from_midx_range(&ctx, bitmap_order); + } else { + ctx.to_include = opts->packs_to_include; + for_each_file_in_pack_dir(opts->source->path, add_pack_to_midx, &ctx); + } stop_progress(&ctx.progress); - if (!packs_to_drop) { + if (!opts->packs_to_drop) { /* * If there is no MIDX then either it doesn't exist, or we're * doing a geometric repack. Try to load it from the source to @@ -1189,7 +1388,7 @@ static int write_midx_internal(struct odb_source *source, if (midx && !midx_needs_update(midx, &ctx)) { struct bitmap_index *bitmap_git; int bitmap_exists; - int want_bitmap = flags & MIDX_WRITE_BITMAP; + int want_bitmap = opts->flags & MIDX_WRITE_BITMAP; bitmap_git = prepare_midx_bitmap_git(midx); bitmap_exists = bitmap_git && bitmap_is_midx(bitmap_git); @@ -1201,7 +1400,7 @@ static int write_midx_internal(struct odb_source *source, * corresponding bitmap (or one wasn't requested). */ if (!want_bitmap) - clear_midx_files_ext(source, "bitmap", NULL); + clear_midx_files_ext(ctx.source, "bitmap", NULL); result = 0; goto cleanup; } @@ -1216,11 +1415,11 @@ static int write_midx_internal(struct odb_source *source, goto cleanup; /* nothing to do */ } - if (preferred_pack_name) { + if (opts->preferred_pack_name) { ctx.preferred_pack_idx = NO_PREFERRED_PACK; for (size_t i = 0; i < ctx.nr; i++) { - if (!cmp_idx_or_pack_name(preferred_pack_name, + if (!cmp_idx_or_pack_name(opts->preferred_pack_name, ctx.info[i].pack_name)) { ctx.preferred_pack_idx = i; break; @@ -1229,9 +1428,9 @@ static int write_midx_internal(struct odb_source *source, if (ctx.preferred_pack_idx == NO_PREFERRED_PACK) warning(_("unknown preferred pack: '%s'"), - preferred_pack_name); + opts->preferred_pack_name); } else if (ctx.nr && - (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP))) { + (opts->flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP))) { struct packed_git *oldest = ctx.info[0].p; ctx.preferred_pack_idx = 0; @@ -1242,7 +1441,7 @@ static int write_midx_internal(struct odb_source *source, */ open_pack_index(oldest); - if (packs_to_drop && packs_to_drop->nr) + if (opts->packs_to_drop && opts->packs_to_drop->nr) BUG("cannot write a MIDX bitmap during expiration"); /* @@ -1302,22 +1501,30 @@ static int write_midx_internal(struct odb_source *source, ctx.large_offsets_needed = 1; } - QSORT(ctx.info, ctx.nr, pack_info_compare); + if (ctx.compact) { + if (ctx.version != MIDX_VERSION_V2) + BUG("performing MIDX compaction with v1 MIDX"); + } else { + QSORT(ctx.info, ctx.nr, pack_info_compare); + } - if (packs_to_drop && packs_to_drop->nr) { + if (opts->packs_to_drop && opts->packs_to_drop->nr) { size_t drop_index = 0; int missing_drops = 0; - for (size_t i = 0; i < ctx.nr && drop_index < packs_to_drop->nr; i++) { + ASSERT(!ctx.compact); + + for (size_t i = 0; + i < ctx.nr && drop_index < opts->packs_to_drop->nr; i++) { int cmp = strcmp(ctx.info[i].pack_name, - packs_to_drop->items[drop_index].string); + opts->packs_to_drop->items[drop_index].string); if (!cmp) { drop_index++; ctx.info[i].expired = 1; } else if (cmp > 0) { error(_("did not see pack-file %s to drop"), - packs_to_drop->items[drop_index].string); + opts->packs_to_drop->items[drop_index].string); drop_index++; missing_drops++; i--; @@ -1338,12 +1545,20 @@ static int write_midx_internal(struct odb_source *source, */ ALLOC_ARRAY(ctx.pack_perm, ctx.nr); for (size_t i = 0; i < ctx.nr; i++) { + uint32_t from = ctx.info[i].orig_pack_int_id; + uint32_t to; + if (ctx.info[i].expired) { + to = PACK_EXPIRED; dropped_packs++; - ctx.pack_perm[ctx.info[i].orig_pack_int_id] = PACK_EXPIRED; } else { - ctx.pack_perm[ctx.info[i].orig_pack_int_id] = i - dropped_packs; + to = i - dropped_packs; } + + if (ctx.compact) + from -= ctx.compact_from->num_packs_in_base; + + ctx.pack_perm[from] = to; } for (size_t i = 0; i < ctx.nr; i++) { @@ -1354,16 +1569,16 @@ static int write_midx_internal(struct odb_source *source, } /* Check that the preferred pack wasn't expired (if given). */ - if (preferred_pack_name) { - struct pack_info *preferred = bsearch(preferred_pack_name, + if (opts->preferred_pack_name) { + struct pack_info *preferred = bsearch(opts->preferred_pack_name, ctx.info, ctx.nr, sizeof(*ctx.info), idx_or_pack_name_cmp); if (preferred) { - uint32_t perm = ctx.pack_perm[preferred->orig_pack_int_id]; + uint32_t perm = midx_pack_perm(&ctx, preferred->orig_pack_int_id); if (perm == PACK_EXPIRED) warning(_("preferred pack '%s' is expired"), - preferred_pack_name); + opts->preferred_pack_name); } } @@ -1377,15 +1592,15 @@ static int write_midx_internal(struct odb_source *source, } if (!ctx.entries_nr) { - if (flags & MIDX_WRITE_BITMAP) + if (opts->flags & MIDX_WRITE_BITMAP) warning(_("refusing to write multi-pack .bitmap without any objects")); - flags &= ~(MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP); + opts->flags &= ~(MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP); } if (ctx.incremental) { struct strbuf lock_name = STRBUF_INIT; - get_midx_chain_filename(source, &lock_name); + get_midx_chain_filename(opts->source, &lock_name); hold_lock_file_for_update(&lk, lock_name.buf, LOCK_DIE_ON_ERROR); strbuf_release(&lock_name); @@ -1428,7 +1643,7 @@ static int write_midx_internal(struct odb_source *source, MIDX_CHUNK_LARGE_OFFSET_WIDTH), write_midx_large_offsets); - if (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP)) { + if (opts->flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP)) { ctx.pack_order = midx_pack_order(&ctx); add_chunk(cf, MIDX_CHUNKID_REVINDEX, st_mult(ctx.entries_nr, sizeof(uint32_t)), @@ -1439,18 +1654,18 @@ static int write_midx_internal(struct odb_source *source, } write_midx_header(r->hash_algo, f, get_num_chunks(cf), - ctx.nr - dropped_packs); + ctx.nr - dropped_packs, ctx.version); write_chunkfile(cf, &ctx); finalize_hashfile(f, midx_hash, FSYNC_COMPONENT_PACK_METADATA, CSUM_FSYNC | CSUM_HASH_IN_STREAM); free_chunkfile(cf); - if (flags & MIDX_WRITE_REV_INDEX && + if (opts->flags & MIDX_WRITE_REV_INDEX && git_env_bool("GIT_TEST_MIDX_WRITE_REV", 0)) write_midx_reverse_index(&ctx, midx_hash); - if (flags & MIDX_WRITE_BITMAP) { + if (opts->flags & MIDX_WRITE_BITMAP) { struct packing_data pdata; struct commit_stack commits = COMMIT_STACK_INIT; @@ -1459,7 +1674,7 @@ static int write_midx_internal(struct odb_source *source, prepare_midx_packing_data(&pdata, &ctx); - find_commits_for_midx_bitmap(&commits, refs_snapshot, &ctx); + find_commits_for_midx_bitmap(&commits, opts->refs_snapshot, &ctx); /* * The previous steps translated the information from @@ -1470,8 +1685,8 @@ static int write_midx_internal(struct odb_source *source, FREE_AND_NULL(ctx.entries); ctx.entries_nr = 0; - if (write_midx_bitmap(&ctx, midx_hash, &pdata, - commits.items, commits.nr, flags) < 0) { + if (write_midx_bitmap(&ctx, midx_hash, &pdata, commits.items, + commits.nr, opts->flags) < 0) { error(_("could not write multi-pack bitmap")); clear_packing_data(&pdata); commit_stack_clear(&commits); @@ -1489,7 +1704,24 @@ static int write_midx_internal(struct odb_source *source, if (ctx.num_multi_pack_indexes_before == UINT32_MAX) die(_("too many multi-pack-indexes")); - CALLOC_ARRAY(keep_hashes, ctx.num_multi_pack_indexes_before + 1); + if (ctx.compact) { + struct multi_pack_index *m; + + /* + * Keep all MIDX layers excluding those in the range [from, to]. + */ + for (m = ctx.base_midx; m; m = m->base_midx) + keep_hashes_nr++; + for (m = ctx.m; + m && midx_hashcmp(m, ctx.compact_to, r->hash_algo); + m = m->base_midx) + keep_hashes_nr++; + + keep_hashes_nr++; /* include the compacted layer */ + } else { + keep_hashes_nr = ctx.num_multi_pack_indexes_before + 1; + } + CALLOC_ARRAY(keep_hashes, keep_hashes_nr); if (ctx.incremental) { FILE *chainf = fdopen_lock_file(&lk, "w"); @@ -1504,7 +1736,7 @@ static int write_midx_internal(struct odb_source *source, if (link_midx_to_chain(ctx.base_midx) < 0) goto cleanup; - get_split_midx_filename_ext(source, &final_midx_name, + get_split_midx_filename_ext(opts->source, &final_midx_name, midx_hash, MIDX_EXT_MIDX); if (rename_tempfile(&incr, final_midx_name.buf) < 0) { @@ -1514,18 +1746,47 @@ static int write_midx_internal(struct odb_source *source, strbuf_release(&final_midx_name); - keep_hashes[ctx.num_multi_pack_indexes_before] = - xstrdup(hash_to_hex_algop(midx_hash, r->hash_algo)); + if (ctx.compact) { + struct multi_pack_index *m; + uint32_t num_layers_before_from = 0; + uint32_t i; + + for (m = ctx.base_midx; m; m = m->base_midx) + num_layers_before_from++; + + m = ctx.base_midx; + for (i = 0; i < num_layers_before_from; i++) { + uint32_t j = num_layers_before_from - i - 1; - for (uint32_t i = 0; i < ctx.num_multi_pack_indexes_before; i++) { - uint32_t j = ctx.num_multi_pack_indexes_before - i - 1; + keep_hashes[j] = xstrdup(midx_get_checksum_hex(m)); + m = m->base_midx; + } - keep_hashes[j] = xstrdup(hash_to_hex_algop(get_midx_checksum(m), + keep_hashes[i] = xstrdup(hash_to_hex_algop(midx_hash, r->hash_algo)); - m = m->base_midx; + + i = 0; + for (m = ctx.m; + m && midx_hashcmp(m, ctx.compact_to, r->hash_algo); + m = m->base_midx) { + keep_hashes[keep_hashes_nr - i - 1] = + xstrdup(midx_get_checksum_hex(m)); + i++; + } + } else { + keep_hashes[ctx.num_multi_pack_indexes_before] = + xstrdup(hash_to_hex_algop(midx_hash, + r->hash_algo)); + + for (uint32_t i = 0; i < ctx.num_multi_pack_indexes_before; i++) { + uint32_t j = ctx.num_multi_pack_indexes_before - i - 1; + + keep_hashes[j] = xstrdup(midx_get_checksum_hex(m)); + m = m->base_midx; + } } - for (uint32_t i = 0; i <= ctx.num_multi_pack_indexes_before; i++) + for (uint32_t i = 0; i < keep_hashes_nr; i++) fprintf(get_lock_file_fp(&lk), "%s\n", keep_hashes[i]); } else { keep_hashes[ctx.num_multi_pack_indexes_before] = @@ -1538,8 +1799,7 @@ static int write_midx_internal(struct odb_source *source, if (commit_lock_file(&lk) < 0) die_errno(_("could not write multi-pack-index")); - clear_midx_files(source, keep_hashes, - ctx.num_multi_pack_indexes_before + 1, + clear_midx_files(opts->source, keep_hashes, keep_hashes_nr, ctx.incremental); result = 0; @@ -1557,7 +1817,7 @@ static int write_midx_internal(struct odb_source *source, free(ctx.pack_perm); free(ctx.pack_order); if (keep_hashes) { - for (uint32_t i = 0; i <= ctx.num_multi_pack_indexes_before; i++) + for (uint32_t i = 0; i < keep_hashes_nr; i++) free((char *)keep_hashes[i]); free(keep_hashes); } @@ -1573,9 +1833,14 @@ int write_midx_file(struct odb_source *source, const char *preferred_pack_name, const char *refs_snapshot, unsigned flags) { - return write_midx_internal(source, NULL, NULL, - preferred_pack_name, refs_snapshot, - flags); + struct write_midx_opts opts = { + .source = source, + .preferred_pack_name = preferred_pack_name, + .refs_snapshot = refs_snapshot, + .flags = flags, + }; + + return write_midx_internal(&opts); } int write_midx_file_only(struct odb_source *source, @@ -1583,8 +1848,30 @@ int write_midx_file_only(struct odb_source *source, const char *preferred_pack_name, const char *refs_snapshot, unsigned flags) { - return write_midx_internal(source, packs_to_include, NULL, - preferred_pack_name, refs_snapshot, flags); + struct write_midx_opts opts = { + .source = source, + .packs_to_include = packs_to_include, + .preferred_pack_name = preferred_pack_name, + .refs_snapshot = refs_snapshot, + .flags = flags, + }; + + return write_midx_internal(&opts); +} + +int write_midx_file_compact(struct odb_source *source, + struct multi_pack_index *from, + struct multi_pack_index *to, + unsigned flags) +{ + struct write_midx_opts opts = { + .source = source, + .compact_from = from, + .compact_to = to, + .flags = flags | MIDX_WRITE_COMPACT, + }; + + return write_midx_internal(&opts); } int expire_midx_packs(struct odb_source *source, unsigned flags) @@ -1643,9 +1930,14 @@ int expire_midx_packs(struct odb_source *source, unsigned flags) free(count); - if (packs_to_drop.nr) - result = write_midx_internal(source, NULL, - &packs_to_drop, NULL, NULL, flags); + if (packs_to_drop.nr) { + struct write_midx_opts opts = { + .source = source, + .packs_to_drop = &packs_to_drop, + .flags = flags & MIDX_PROGRESS, + }; + result = write_midx_internal(&opts); + } string_list_clear(&packs_to_drop, 0); @@ -1778,6 +2070,10 @@ int midx_repack(struct odb_source *source, size_t batch_size, unsigned flags) struct child_process cmd = CHILD_PROCESS_INIT; FILE *cmd_in; struct multi_pack_index *m = get_multi_pack_index(source); + struct write_midx_opts opts = { + .source = source, + .flags = flags, + }; /* * When updating the default for these configuration @@ -1852,8 +2148,7 @@ int midx_repack(struct odb_source *source, size_t batch_size, unsigned flags) goto cleanup; } - result = write_midx_internal(source, NULL, NULL, NULL, NULL, - flags); + result = write_midx_internal(&opts); cleanup: free(include_pack); diff --git a/midx.c b/midx.c index ab8e2611d1fe58..81d6ab11e6eb0e 100644 --- a/midx.c +++ b/midx.c @@ -24,7 +24,13 @@ void clear_incremental_midx_files_ext(struct odb_source *source, const char *ext int cmp_idx_or_pack_name(const char *idx_or_pack_name, const char *idx_name); -const unsigned char *get_midx_checksum(struct multi_pack_index *m) +const char *midx_get_checksum_hex(const struct multi_pack_index *m) +{ + return hash_to_hex_algop(midx_get_checksum_hash(m), + m->source->odb->repo->hash_algo); +} + +const unsigned char *midx_get_checksum_hash(const struct multi_pack_index *m) { return m->data + m->data_len - m->source->odb->repo->hash_algo->rawsz; } @@ -144,7 +150,7 @@ static struct multi_pack_index *load_multi_pack_index_one(struct odb_source *sou m->signature, MIDX_SIGNATURE); m->version = m->data[MIDX_BYTE_FILE_VERSION]; - if (m->version != MIDX_VERSION) + if (m->version != MIDX_VERSION_V1 && m->version != MIDX_VERSION_V2) die(_("multi-pack-index version %d not recognized"), m->version); @@ -205,7 +211,8 @@ static struct multi_pack_index *load_multi_pack_index_one(struct odb_source *sou die(_("multi-pack-index pack-name chunk is too short")); cur_pack_name = end + 1; - if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0) + if (m->version == MIDX_VERSION_V1 && + i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0) die(_("multi-pack-index pack names out of order: '%s' before '%s'"), m->pack_names[i - 1], m->pack_names[i]); @@ -406,6 +413,7 @@ void close_midx(struct multi_pack_index *m) } FREE_AND_NULL(m->packs); FREE_AND_NULL(m->pack_names); + FREE_AND_NULL(m->pack_names_sorted); free(m); } @@ -651,17 +659,40 @@ int cmp_idx_or_pack_name(const char *idx_or_pack_name, return strcmp(idx_or_pack_name, idx_name); } + +static int midx_pack_names_cmp(const void *a, const void *b, void *m_) +{ + struct multi_pack_index *m = m_; + return strcmp(m->pack_names[*(const size_t *)a], + m->pack_names[*(const size_t *)b]); +} + static int midx_contains_pack_1(struct multi_pack_index *m, const char *idx_or_pack_name) { uint32_t first = 0, last = m->num_packs; + if (m->version == MIDX_VERSION_V2 && !m->pack_names_sorted) { + uint32_t i; + + ALLOC_ARRAY(m->pack_names_sorted, m->num_packs); + + for (i = 0; i < m->num_packs; i++) + m->pack_names_sorted[i] = i; + + QSORT_S(m->pack_names_sorted, m->num_packs, midx_pack_names_cmp, + m); + } + while (first < last) { uint32_t mid = first + (last - first) / 2; const char *current; int cmp; - current = m->pack_names[mid]; + if (m->pack_names_sorted) + current = m->pack_names[m->pack_names_sorted[mid]]; + else + current = m->pack_names[mid]; cmp = cmp_idx_or_pack_name(idx_or_pack_name, current); if (!cmp) return 1; diff --git a/midx.h b/midx.h index 6e54d73503d560..08f3728e5204b8 100644 --- a/midx.h +++ b/midx.h @@ -11,7 +11,8 @@ struct git_hash_algo; struct odb_source; #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ -#define MIDX_VERSION 1 +#define MIDX_VERSION_V1 1 +#define MIDX_VERSION_V2 2 #define MIDX_BYTE_FILE_VERSION 4 #define MIDX_BYTE_HASH_VERSION 5 #define MIDX_BYTE_NUM_CHUNKS 6 @@ -71,6 +72,7 @@ struct multi_pack_index { uint32_t num_packs_in_base; const char **pack_names; + size_t *pack_names_sorted; struct packed_git **packs; }; @@ -80,12 +82,14 @@ struct multi_pack_index { #define MIDX_WRITE_BITMAP_HASH_CACHE (1 << 3) #define MIDX_WRITE_BITMAP_LOOKUP_TABLE (1 << 4) #define MIDX_WRITE_INCREMENTAL (1 << 5) +#define MIDX_WRITE_COMPACT (1 << 6) #define MIDX_EXT_REV "rev" #define MIDX_EXT_BITMAP "bitmap" #define MIDX_EXT_MIDX "midx" -const unsigned char *get_midx_checksum(struct multi_pack_index *m); +const char *midx_get_checksum_hex(const struct multi_pack_index *m) /* static buffer */; +const unsigned char *midx_get_checksum_hash(const struct multi_pack_index *m); void get_midx_filename(struct odb_source *source, struct strbuf *out); void get_midx_filename_ext(struct odb_source *source, struct strbuf *out, const unsigned char *hash, const char *ext); @@ -128,6 +132,10 @@ int write_midx_file_only(struct odb_source *source, struct string_list *packs_to_include, const char *preferred_pack_name, const char *refs_snapshot, unsigned flags); +int write_midx_file_compact(struct odb_source *source, + struct multi_pack_index *from, + struct multi_pack_index *to, + unsigned flags); void clear_midx_file(struct repository *r); int verify_midx_file(struct odb_source *source, unsigned flags); int expire_midx_packs(struct odb_source *source, unsigned flags); diff --git a/object-file.c b/object-file.c index c62e5496e0795d..f0b029ff0b2cb0 100644 --- a/object-file.c +++ b/object-file.c @@ -1868,6 +1868,64 @@ int odb_source_loose_for_each_object(struct odb_source *source, NULL, NULL, &data); } +static int count_loose_object(const struct object_id *oid UNUSED, + struct object_info *oi UNUSED, + void *payload) +{ + unsigned long *count = payload; + (*count)++; + return 0; +} + +int odb_source_loose_count_objects(struct odb_source *source, + enum odb_count_objects_flags flags, + unsigned long *out) +{ + const unsigned hexsz = source->odb->repo->hash_algo->hexsz - 2; + char *path = NULL; + DIR *dir = NULL; + int ret; + + if (flags & ODB_COUNT_OBJECTS_APPROXIMATE) { + unsigned long count = 0; + struct dirent *ent; + + path = xstrfmt("%s/17", source->path); + + dir = opendir(path); + if (!dir) { + if (errno == ENOENT) { + *out = 0; + ret = 0; + goto out; + } + + ret = error_errno("cannot open object shard '%s'", path); + goto out; + } + + while ((ent = readdir(dir)) != NULL) { + if (strspn(ent->d_name, "0123456789abcdef") != hexsz || + ent->d_name[hexsz] != '\0') + continue; + count++; + } + + *out = count * 256; + ret = 0; + } else { + *out = 0; + ret = odb_source_loose_for_each_object(source, NULL, count_loose_object, + out, 0); + } + +out: + if (dir) + closedir(dir); + free(path); + return ret; +} + static int append_loose_object(const struct object_id *oid, const char *path UNUSED, void *data) diff --git a/object-file.h b/object-file.h index ff6da6529640dc..f8d8805a18cc8c 100644 --- a/object-file.h +++ b/object-file.h @@ -139,6 +139,20 @@ int odb_source_loose_for_each_object(struct odb_source *source, void *cb_data, unsigned flags); +/* + * Count the number of loose objects in this source. + * + * The object count is approximated by opening a single sharding directory for + * loose objects and scanning its contents. The result is then extrapolated by + * 256. This should generally work as a reasonable estimate given that the + * object hash is supposed to be indistinguishable from random. + * + * Returns 0 on success, a negative error code otherwise. + */ +int odb_source_loose_count_objects(struct odb_source *source, + enum odb_count_objects_flags flags, + unsigned long *out); + /** * format_object_header() is a thin wrapper around s xsnprintf() that * writes the initial " " part of the loose object diff --git a/object-name.c b/object-name.c index 7b14c3bf9b9b48..e5adec4c9d5084 100644 --- a/object-name.c +++ b/object-name.c @@ -837,7 +837,11 @@ int repo_find_unique_abbrev_r(struct repository *r, char *hex, const unsigned hexsz = algo->hexsz; if (len < 0) { - unsigned long count = repo_approximate_object_count(r); + unsigned long count; + + if (odb_count_objects(r->objects, ODB_COUNT_OBJECTS_APPROXIMATE, &count) < 0) + count = 0; + /* * Add one because the MSB only tells us the highest bit set, * not including the value of all the _other_ bits (so "15" diff --git a/odb.c b/odb.c index 84a31084d3884b..350e23f3c0798d 100644 --- a/odb.c +++ b/odb.c @@ -917,6 +917,41 @@ int odb_for_each_object(struct object_database *odb, return 0; } +int odb_count_objects(struct object_database *odb, + enum odb_count_objects_flags flags, + unsigned long *out) +{ + struct odb_source *source; + unsigned long count = 0; + int ret; + + if (odb->object_count_valid && odb->object_count_flags == flags) { + *out = odb->object_count; + return 0; + } + + odb_prepare_alternates(odb); + for (source = odb->sources; source; source = source->next) { + unsigned long c; + + ret = odb_source_count_objects(source, flags, &c); + if (ret < 0) + goto out; + + count += c; + } + + odb->object_count = count; + odb->object_count_valid = 1; + odb->object_count_flags = flags; + + *out = count; + ret = 0; + +out: + return ret; +} + void odb_assert_oid_type(struct object_database *odb, const struct object_id *oid, enum object_type expect) { @@ -1030,7 +1065,7 @@ void odb_reprepare(struct object_database *o) for (source = o->sources; source; source = source->next) odb_source_reprepare(source); - o->approximate_object_count_valid = 0; + o->object_count_valid = 0; obj_read_unlock(); } diff --git a/odb.h b/odb.h index 86e0365c243863..9aee260105ae54 100644 --- a/odb.h +++ b/odb.h @@ -3,7 +3,6 @@ #include "hashmap.h" #include "object.h" -#include "odb/source.h" #include "oidset.h" #include "oidmap.h" #include "string-list.h" @@ -12,6 +11,7 @@ struct oidmap; struct oidtree; struct strbuf; +struct strvec; struct repository; struct multi_pack_index; @@ -110,10 +110,11 @@ struct object_database { /* * A fast, rough count of the number of objects in the repository. * These two fields are not meant for direct access. Use - * repo_approximate_object_count() instead. + * odb_count_objects() instead. */ - unsigned long approximate_object_count; - unsigned approximate_object_count_valid : 1; + unsigned long object_count; + unsigned object_count_flags; + unsigned object_count_valid : 1; /* * Submodule source paths that will be added as additional sources to @@ -339,6 +340,42 @@ struct object_info { */ #define OBJECT_INFO_INIT { 0 } +/* Flags that can be passed to `odb_read_object_info_extended()`. */ +enum object_info_flags { + /* Invoke lookup_replace_object() on the given hash. */ + OBJECT_INFO_LOOKUP_REPLACE = (1 << 0), + + /* Do not reprepare object sources when the first lookup has failed. */ + OBJECT_INFO_QUICK = (1 << 1), + + /* + * Do not attempt to fetch the object if missing (even if fetch_is_missing is + * nonzero). + */ + OBJECT_INFO_SKIP_FETCH_OBJECT = (1 << 2), + + /* Die if object corruption (not just an object being missing) was detected. */ + OBJECT_INFO_DIE_IF_CORRUPT = (1 << 3), + + /* + * We have already tried reading the object, but it couldn't be found + * via any of the attached sources, and are now doing a second read. + * This second read asks the individual sources to also evaluate + * whether any on-disk state may have changed that may have caused the + * object to appear. + * + * This flag is for internal use, only. The second read only occurs + * when `OBJECT_INFO_QUICK` was not passed. + */ + OBJECT_INFO_SECOND_READ = (1 << 4), + + /* + * This is meant for bulk prefetching of missing blobs in a partial + * clone. Implies OBJECT_INFO_SKIP_FETCH_OBJECT and OBJECT_INFO_QUICK. + */ + OBJECT_INFO_FOR_PREFETCH = (OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK), +}; + /* * Read object info from the object database and populate the `object_info` * structure. Returns 0 on success, a negative error code otherwise. @@ -432,6 +469,18 @@ enum odb_for_each_object_flags { ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS = (1<<4), }; +/* + * A callback function that can be used to iterate through objects. If given, + * the optional `oi` parameter will be populated the same as if you would call + * `odb_read_object_info()`. + * + * Returning a non-zero error code will cause iteration to abort. The error + * code will be propagated. + */ +typedef int (*odb_for_each_object_cb)(const struct object_id *oid, + struct object_info *oi, + void *cb_data); + /* * Iterate through all objects contained in the object database. Note that * objects may be iterated over multiple times in case they are either stored @@ -452,6 +501,27 @@ int odb_for_each_object(struct object_database *odb, void *cb_data, unsigned flags); +enum odb_count_objects_flags { + /* + * Instead of providing an accurate count, allow the number of objects + * to be approximated. Details of how this approximation works are + * subject to the specific source's implementation. + */ + ODB_COUNT_OBJECTS_APPROXIMATE = (1 << 0), +}; + +/* + * Count the number of objects in the given object database. This object count + * may double-count objects that are stored in multiple backends, or which are + * stored multiple times in a single backend. + * + * Returns 0 on success, a negative error code otherwise. The number of objects + * will be assigned to the `out` pointer on success. + */ +int odb_count_objects(struct object_database *odb, + enum odb_count_objects_flags flags, + unsigned long *out); + enum { /* * By default, `odb_write_object()` does not actually write anything diff --git a/odb/source-files.c b/odb/source-files.c index 14cb9adecadd99..c08d8993e378a0 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -93,6 +93,35 @@ static int odb_source_files_for_each_object(struct odb_source *source, return 0; } +static int odb_source_files_count_objects(struct odb_source *source, + enum odb_count_objects_flags flags, + unsigned long *out) +{ + struct odb_source_files *files = odb_source_files_downcast(source); + unsigned long count; + int ret; + + ret = packfile_store_count_objects(files->packed, flags, &count); + if (ret < 0) + goto out; + + if (!(flags & ODB_COUNT_OBJECTS_APPROXIMATE)) { + unsigned long loose_count; + + ret = odb_source_loose_count_objects(source, flags, &loose_count); + if (ret < 0) + goto out; + + count += loose_count; + } + + *out = count; + ret = 0; + +out: + return ret; +} + static int odb_source_files_freshen_object(struct odb_source *source, const struct object_id *oid) { @@ -220,6 +249,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb, files->base.read_object_info = odb_source_files_read_object_info; files->base.read_object_stream = odb_source_files_read_object_stream; files->base.for_each_object = odb_source_files_for_each_object; + files->base.count_objects = odb_source_files_count_objects; files->base.freshen_object = odb_source_files_freshen_object; files->base.write_object = odb_source_files_write_object; files->base.write_object_stream = odb_source_files_write_object_stream; diff --git a/odb/source.h b/odb/source.h index caac5581495ece..96c906e7a1b350 100644 --- a/odb/source.h +++ b/odb/source.h @@ -2,6 +2,7 @@ #define ODB_SOURCE_H #include "object.h" +#include "odb.h" enum odb_source_type { /* @@ -14,61 +15,10 @@ enum odb_source_type { ODB_SOURCE_FILES, }; -/* Flags that can be passed to `odb_read_object_info_extended()`. */ -enum object_info_flags { - /* Invoke lookup_replace_object() on the given hash. */ - OBJECT_INFO_LOOKUP_REPLACE = (1 << 0), - - /* Do not reprepare object sources when the first lookup has failed. */ - OBJECT_INFO_QUICK = (1 << 1), - - /* - * Do not attempt to fetch the object if missing (even if fetch_is_missing is - * nonzero). - */ - OBJECT_INFO_SKIP_FETCH_OBJECT = (1 << 2), - - /* Die if object corruption (not just an object being missing) was detected. */ - OBJECT_INFO_DIE_IF_CORRUPT = (1 << 3), - - /* - * We have already tried reading the object, but it couldn't be found - * via any of the attached sources, and are now doing a second read. - * This second read asks the individual sources to also evaluate - * whether any on-disk state may have changed that may have caused the - * object to appear. - * - * This flag is for internal use, only. The second read only occurs - * when `OBJECT_INFO_QUICK` was not passed. - */ - OBJECT_INFO_SECOND_READ = (1 << 4), - - /* - * This is meant for bulk prefetching of missing blobs in a partial - * clone. Implies OBJECT_INFO_SKIP_FETCH_OBJECT and OBJECT_INFO_QUICK. - */ - OBJECT_INFO_FOR_PREFETCH = (OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK), -}; - struct object_id; -struct object_info; struct odb_read_stream; -struct odb_transaction; -struct odb_write_stream; struct strvec; -/* - * A callback function that can be used to iterate through objects. If given, - * the optional `oi` parameter will be populated the same as if you would call - * `odb_read_object_info()`. - * - * Returning a non-zero error code will cause iteration to abort. The error - * code will be propagated. - */ -typedef int (*odb_for_each_object_cb)(const struct object_id *oid, - struct object_info *oi, - void *cb_data); - /* * The source is the part of the object database that stores the actual * objects. It thus encapsulates the logic to read and write the specific @@ -192,6 +142,21 @@ struct odb_source { void *cb_data, unsigned flags); + /* + * This callback is expected to count objects in the given object + * database source. The callback function does not have to guarantee + * that only unique objects are counted. The result shall be assigned + * to the `out` pointer. + * + * Accepts `enum odb_count_objects_flag` flags to alter the behaviour. + * + * The callback is expected to return 0 on success, or a negative error + * code otherwise. + */ + int (*count_objects)(struct odb_source *source, + enum odb_count_objects_flags flags, + unsigned long *out); + /* * This callback is expected to freshen the given object so that its * last access time is set to the current time. This is used to ensure @@ -383,6 +348,18 @@ static inline int odb_source_for_each_object(struct odb_source *source, return source->for_each_object(source, request, cb, cb_data, flags); } +/* + * Count the number of objects in the given object database source. + * + * Returns 0 on success, a negative error code otherwise. + */ +static inline int odb_source_count_objects(struct odb_source *source, + enum odb_count_objects_flags flags, + unsigned long *out) +{ + return source->count_objects(source, flags, out); +} + /* * Freshen an object in the object database by updating its timestamp. * Returns 1 in case the object has been freshened, 0 in case the object does diff --git a/odb/streaming.c b/odb/streaming.c index a4355cd2458c38..5927a12954ba59 100644 --- a/odb/streaming.c +++ b/odb/streaming.c @@ -7,6 +7,7 @@ #include "environment.h" #include "repository.h" #include "odb.h" +#include "odb/source.h" #include "odb/streaming.h" #include "replace-object.h" diff --git a/pack-bitmap.c b/pack-bitmap.c index 22419bfb33dd4e..f6ec18d83afe21 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -441,11 +441,11 @@ char *midx_bitmap_filename(struct multi_pack_index *midx) struct strbuf buf = STRBUF_INIT; if (midx->has_chain) get_split_midx_filename_ext(midx->source, &buf, - get_midx_checksum(midx), + midx_get_checksum_hash(midx), MIDX_EXT_BITMAP); else get_midx_filename_ext(midx->source, &buf, - get_midx_checksum(midx), + midx_get_checksum_hash(midx), MIDX_EXT_BITMAP); return strbuf_detach(&buf, NULL); @@ -502,7 +502,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git, if (load_bitmap_header(bitmap_git) < 0) goto cleanup; - if (!hasheq(get_midx_checksum(bitmap_git->midx), bitmap_git->checksum, + if (!hasheq(midx_get_checksum_hash(bitmap_git->midx), bitmap_git->checksum, bitmap_repo(bitmap_git)->hash_algo)) { error(_("checksum doesn't match in MIDX and bitmap")); goto cleanup; @@ -2819,8 +2819,7 @@ void test_bitmap_walk(struct rev_info *revs) if (bitmap_is_midx(found)) fprintf_ln(stderr, "Located via MIDX '%s'.", - hash_to_hex_algop(get_midx_checksum(found->midx), - revs->repo->hash_algo)); + midx_get_checksum_hex(found->midx)); else fprintf_ln(stderr, "Located via pack '%s'.", hash_to_hex_algop(found->pack->hash, diff --git a/pack-revindex.c b/pack-revindex.c index 1fe0afe89907c5..1b67863606a75f 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -394,11 +394,11 @@ int load_midx_revindex(struct multi_pack_index *m) if (m->has_chain) get_split_midx_filename_ext(m->source, &revindex_name, - get_midx_checksum(m), + midx_get_checksum_hash(m), MIDX_EXT_REV); else get_midx_filename_ext(m->source, &revindex_name, - get_midx_checksum(m), + midx_get_checksum_hash(m), MIDX_EXT_REV); ret = load_revindex_from_disk(m->source->odb->repo->hash_algo, diff --git a/packfile.c b/packfile.c index 215a23e42be0fe..d4de9f3ffe831e 100644 --- a/packfile.c +++ b/packfile.c @@ -1101,37 +1101,35 @@ struct packfile_list_entry *packfile_store_get_packs(struct packfile_store *stor return store->packs.head; } -/* - * Give a fast, rough count of the number of objects in the repository. This - * ignores loose objects completely. If you have a lot of them, then either - * you should repack because your performance will be awful, or they are - * all unreachable objects about to be pruned, in which case they're not really - * interesting as a measure of repo size in the first place. - */ -unsigned long repo_approximate_object_count(struct repository *r) +int packfile_store_count_objects(struct packfile_store *store, + enum odb_count_objects_flags flags UNUSED, + unsigned long *out) { - if (!r->objects->approximate_object_count_valid) { - struct odb_source *source; - unsigned long count = 0; - struct packed_git *p; + struct packfile_list_entry *e; + struct multi_pack_index *m; + unsigned long count = 0; + int ret; - odb_prepare_alternates(r->objects); + m = get_multi_pack_index(store->source); + if (m) + count += m->num_objects + m->num_objects_in_base; - for (source = r->objects->sources; source; source = source->next) { - struct multi_pack_index *m = get_multi_pack_index(source); - if (m) - count += m->num_objects + m->num_objects_in_base; + for (e = packfile_store_get_packs(store); e; e = e->next) { + if (e->pack->multi_pack_index) + continue; + if (open_pack_index(e->pack)) { + ret = -1; + goto out; } - repo_for_each_pack(r, p) { - if (p->multi_pack_index || open_pack_index(p)) - continue; - count += p->num_objects; - } - r->objects->approximate_object_count = count; - r->objects->approximate_object_count_valid = 1; + count += e->pack->num_objects; } - return r->objects->approximate_object_count; + + *out = count; + ret = 0; + +out: + return ret; } unsigned long unpack_object_header_buffer(const unsigned char *buf, diff --git a/packfile.h b/packfile.h index 8b04a258a7b9d5..a16ec3950d2507 100644 --- a/packfile.h +++ b/packfile.h @@ -268,6 +268,16 @@ enum kept_pack_type { KEPT_PACK_IN_CORE = (1 << 1), }; +/* + * Count the number objects contained in the given packfile store. If + * successful, the number of objects will be written to the `out` pointer. + * + * Return 0 on success, a negative error code otherwise. + */ +int packfile_store_count_objects(struct packfile_store *store, + enum odb_count_objects_flags flags, + unsigned long *out); + /* * Retrieve the cache of kept packs from the given packfile store. Accepts a * combination of `kept_pack_type` flags. The cache is computed on demand and @@ -365,12 +375,6 @@ int packfile_store_for_each_object(struct packfile_store *store, #define PACKDIR_FILE_GARBAGE 4 extern void (*report_garbage)(unsigned seen_bits, const char *path); -/* - * Give a rough count of objects in the repository. This sacrifices accuracy - * for speed. - */ -unsigned long repo_approximate_object_count(struct repository *r); - void pack_report(struct repository *repo); /* diff --git a/repository.c b/repository.c index 0b8f7ec20089e0..9e5537f53961ed 100644 --- a/repository.c +++ b/repository.c @@ -3,6 +3,7 @@ #include "repository.h" #include "hook.h" #include "odb.h" +#include "odb/source.h" #include "config.h" #include "gettext.h" #include "object.h" diff --git a/submodule-config.c b/submodule-config.c index 1f19fe207741bc..72a46b7a54bc3d 100644 --- a/submodule-config.c +++ b/submodule-config.c @@ -14,6 +14,7 @@ #include "strbuf.h" #include "object-name.h" #include "odb.h" +#include "odb/source.h" #include "parse-options.h" #include "thread-utils.h" #include "tree-walk.h" diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c index 6de5d1665afbfc..388d29e2b53db3 100644 --- a/t/helper/test-read-midx.c +++ b/t/helper/test-read-midx.c @@ -26,18 +26,22 @@ static int read_midx_file(const char *object_dir, const char *checksum, int show_objects) { uint32_t i; - struct multi_pack_index *m; + struct multi_pack_index *m, *tip; + int ret = 0; - m = setup_midx(object_dir); + m = tip = setup_midx(object_dir); if (!m) return 1; if (checksum) { - while (m && strcmp(hash_to_hex(get_midx_checksum(m)), checksum)) + while (m && strcmp(midx_get_checksum_hex(m), checksum)) m = m->base_midx; - if (!m) - return 1; + if (!m) { + ret = error(_("could not find MIDX with checksum %s"), + checksum); + goto out; + } } printf("header: %08x %d %d %d %d\n", @@ -82,9 +86,10 @@ static int read_midx_file(const char *object_dir, const char *checksum, } } - close_midx(m); +out: + close_midx(tip); - return 0; + return ret; } static int read_midx_checksum(const char *object_dir) @@ -94,7 +99,7 @@ static int read_midx_checksum(const char *object_dir) m = setup_midx(object_dir); if (!m) return 1; - printf("%s\n", hash_to_hex(get_midx_checksum(m))); + printf("%s\n", midx_get_checksum_hex(m)); close_midx(m); return 0; diff --git a/t/meson.build b/t/meson.build index bafb8e63a9363e..919a0ae4cbb38e 100644 --- a/t/meson.build +++ b/t/meson.build @@ -626,6 +626,7 @@ integration_tests = [ 't5332-multi-pack-reuse.sh', 't5333-pseudo-merge-bitmaps.sh', 't5334-incremental-multi-pack-index.sh', + 't5335-compact-multi-pack-index.sh', 't5351-unpack-large-objects.sh', 't5400-send-pack.sh', 't5401-update-hooks.sh', diff --git a/t/t0450/adoc-help-mismatches b/t/t0450/adoc-help-mismatches index 8ee2d3f7c81502..e8d6c13ccd0333 100644 --- a/t/t0450/adoc-help-mismatches +++ b/t/t0450/adoc-help-mismatches @@ -33,7 +33,6 @@ merge merge-file merge-index merge-one-file -multi-pack-index name-rev notes push diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index a7c79225f67526..58e0b685b105f5 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -21,7 +21,7 @@ midx_read_expect () { EXTRA_CHUNKS="$5" { cat <<-EOF && - header: 4d494458 1 $HASH_LEN $NUM_CHUNKS $NUM_PACKS + header: 4d494458 2 $HASH_LEN $NUM_CHUNKS $NUM_PACKS chunks: pack-names oid-fanout oid-lookup object-offsets$EXTRA_CHUNKS num_objects: $NUM_OBJECTS packs: @@ -512,12 +512,7 @@ test_expect_success 'verify invalid chunk offset' ' "improper chunk offset(s)" ' -test_expect_success 'verify packnames out of order' ' - corrupt_midx_and_verify $MIDX_BYTE_PACKNAME_ORDER "z" $objdir \ - "pack names out of order" -' - -test_expect_success 'verify packnames out of order' ' +test_expect_success 'verify missing pack' ' corrupt_midx_and_verify $MIDX_BYTE_PACKNAME_ORDER "a" $objdir \ "failed to load pack" ' @@ -578,6 +573,15 @@ test_expect_success 'verify incorrect checksum' ' $objdir "incorrect checksum" ' +test_expect_success 'setup for v1-specific fsck tests' ' + git -c midx.version=1 multi-pack-index write +' + +test_expect_success 'verify packnames out of order (v1)' ' + corrupt_midx_and_verify $MIDX_BYTE_PACKNAME_ORDER "z" $objdir \ + "pack names out of order" +' + test_expect_success 'repack progress off for redirected stderr' ' GIT_PROGRESS_DELAY=0 git multi-pack-index --object-dir=$objdir repack 2>err && test_line_count = 0 err diff --git a/t/t5335-compact-multi-pack-index.sh b/t/t5335-compact-multi-pack-index.sh new file mode 100755 index 00000000000000..40f3844282f04e --- /dev/null +++ b/t/t5335-compact-multi-pack-index.sh @@ -0,0 +1,293 @@ +#!/bin/sh + +test_description='multi-pack-index compaction' + +. ./test-lib.sh + +GIT_TEST_MULTI_PACK_INDEX=0 +GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 +GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL=0 + +objdir=.git/objects +packdir=$objdir/pack +midxdir=$packdir/multi-pack-index.d +midx_chain=$midxdir/multi-pack-index-chain + +nth_line() { + local n="$1" + shift + awk "NR==$n" "$@" +} + +write_packs () { + for c in "$@" + do + test_commit "$c" && + + git pack-objects --all --unpacked $packdir/pack-$c && + git prune-packed && + + git multi-pack-index write --incremental --bitmap || return 1 + done +} + +test_midx_layer_packs () { + local checksum="$1" && + shift && + + test-tool read-midx $objdir "$checksum" >out && + + printf "%s\n" "$@" >expect && + # NOTE: do *not* pipe through sort here, we want to ensure the + # order of packs is preserved during compaction. + grep "^pack-" out | cut -d"-" -f2 >actual && + + test_cmp expect actual +} + +test_midx_layer_object_uniqueness () { + : >objs.all + while read layer + do + test-tool read-midx --show-objects $objdir "$layer" >out && + grep "\.pack$" out | cut -d" " -f1 | sort >objs.layer && + test_stdout_line_count = 0 comm -12 objs.all objs.layer && + cat objs.all objs.layer | sort >objs.tmp && + mv objs.tmp objs.all || return 1 + done <$midx_chain +} + +test_expect_success 'MIDX compaction with lex-ordered pack names' ' + git init midx-compact-lex-order && + ( + cd midx-compact-lex-order && + + git config maintenance.auto false && + + write_packs A B C D E && + test_line_count = 5 $midx_chain && + + git multi-pack-index compact --incremental --bitmap \ + "$(nth_line 2 "$midx_chain")" \ + "$(nth_line 4 "$midx_chain")" && + test_line_count = 3 $midx_chain && + + test_midx_layer_packs "$(nth_line 1 "$midx_chain")" A && + test_midx_layer_packs "$(nth_line 2 "$midx_chain")" B C D && + test_midx_layer_packs "$(nth_line 3 "$midx_chain")" E && + + test_midx_layer_object_uniqueness + ) +' + +test_expect_success 'MIDX compaction with non-lex-ordered pack names' ' + git init midx-compact-non-lex-order && + ( + cd midx-compact-non-lex-order && + + git config maintenance.auto false && + + write_packs D C A B E && + test_line_count = 5 $midx_chain && + + git multi-pack-index compact --incremental --bitmap \ + "$(nth_line 2 "$midx_chain")" \ + "$(nth_line 4 "$midx_chain")" && + test_line_count = 3 $midx_chain && + + test_midx_layer_packs "$(nth_line 1 "$midx_chain")" D && + test_midx_layer_packs "$(nth_line 2 "$midx_chain")" C A B && + test_midx_layer_packs "$(nth_line 3 "$midx_chain")" E && + + test_midx_layer_object_uniqueness + ) +' + +test_expect_success 'setup for bogus MIDX compaction scenarios' ' + git init midx-compact-bogus && + ( + cd midx-compact-bogus && + + git config maintenance.auto false && + + write_packs A B C + ) +' + +test_expect_success 'MIDX compaction with missing endpoints' ' + ( + cd midx-compact-bogus && + + test_must_fail git multi-pack-index compact --incremental \ + "" "" 2>err && + test_grep "could not find MIDX: " err && + + test_must_fail git multi-pack-index compact --incremental \ + "" "$(nth_line 2 "$midx_chain")" 2>err && + test_grep "could not find MIDX: " err && + + test_must_fail git multi-pack-index compact --incremental \ + "$(nth_line 2 "$midx_chain")" "" 2>err && + test_grep "could not find MIDX: " err + ) +' + +test_expect_success 'MIDX compaction with reversed endpoints' ' + ( + cd midx-compact-bogus && + + from="$(nth_line 3 "$midx_chain")" && + to="$(nth_line 1 "$midx_chain")" && + + test_must_fail git multi-pack-index compact --incremental \ + "$from" "$to" 2>err && + + test_grep "MIDX $from must be an ancestor of $to" err + ) +' + +test_expect_success 'MIDX compaction with identical endpoints' ' + ( + cd midx-compact-bogus && + + from="$(nth_line 3 "$midx_chain")" && + to="$(nth_line 3 "$midx_chain")" && + + test_must_fail git multi-pack-index compact --incremental \ + "$from" "$to" 2>err && + + test_grep "MIDX compaction endpoints must be unique" err + ) +' + +test_expect_success 'MIDX compaction with midx.version=1' ' + ( + cd midx-compact-bogus && + + test_must_fail git -c midx.version=1 multi-pack-index compact \ + "$(nth_line 1 "$midx_chain")" \ + "$(nth_line 2 "$midx_chain")" 2>err && + + test_grep "fatal: cannot perform MIDX compaction with v1 format" err + ) +' + +midx_objs_by_pack () { + awk '/\.pack$/ { split($3, a, "-"); print a[2], $1 }' | sort +} + +tag_objs_from_pack () { + objs="$(git rev-list --objects --no-object-names "$2")" && + printf "$1 %s\n" $objs | sort +} + +test_expect_success 'MIDX compaction preserves pack object selection' ' + git init midx-compact-preserve-selection && + ( + cd midx-compact-preserve-selection && + + git config maintenance.auto false && + + test_commit A && + test_commit B && + + # Create two packs, one containing just the objects from + # A, and another containing all objects from the + # repository. + p1="$(echo A | git pack-objects --revs --delta-base-offset \ + $packdir/pack-1)" && + p0="$(echo B | git pack-objects --revs --delta-base-offset \ + $packdir/pack-0)" && + + echo "pack-1-$p1.idx" | git multi-pack-index write \ + --incremental --bitmap --stdin-packs && + echo "pack-0-$p0.idx" | git multi-pack-index write \ + --incremental --bitmap --stdin-packs && + + write_packs C && + + git multi-pack-index compact --incremental --bitmap \ + "$(nth_line 1 "$midx_chain")" \ + "$(nth_line 2 "$midx_chain")" && + + + test-tool read-midx --show-objects $objdir \ + "$(nth_line 1 "$midx_chain")" >AB.info && + test-tool read-midx --show-objects $objdir \ + "$(nth_line 2 "$midx_chain")" >C.info && + + midx_objs_by_pack AB.actual && + midx_objs_by_pack C.actual && + + { + tag_objs_from_pack 1 A && + tag_objs_from_pack 0 A..B + } | sort >AB.expect && + tag_objs_from_pack C B..C >C.expect && + + test_cmp AB.expect AB.actual && + test_cmp C.expect C.actual + ) +' + +test_expect_success 'MIDX compaction with bitmaps' ' + git init midx-compact-with-bitmaps && + ( + cd midx-compact-with-bitmaps && + + git config maintenance.auto false && + + write_packs foo bar baz quux woot && + + test-tool read-midx --bitmap $objdir >bitmap.expect && + git multi-pack-index compact --incremental --bitmap \ + "$(nth_line 2 "$midx_chain")" \ + "$(nth_line 4 "$midx_chain")" && + test-tool read-midx --bitmap $objdir >bitmap.actual && + + test_cmp bitmap.expect bitmap.actual && + + true + ) +' + +test_expect_success 'MIDX compaction with bitmaps (non-trivial)' ' + git init midx-compact-with-bitmaps-non-trivial && + ( + cd midx-compact-with-bitmaps-non-trivial && + + git config maintenance.auto false && + + git branch -m main && + + # D(4) + # / + # A(1) --- B(2) --- C(3) --- G(7) + # \ + # E(5) --- F(6) + write_packs A B C && + git checkout -b side && + write_packs D && + git checkout -b other B && + write_packs E F && + git checkout main && + write_packs G && + + # Compact layers 2-4, leaving us with: + # + # [A, [B, C, D], E, F, G] + git multi-pack-index compact --incremental --bitmap \ + "$(nth_line 2 "$midx_chain")" \ + "$(nth_line 4 "$midx_chain")" && + + # Then compact the top two layers, condensing the above + # such that the new 4th layer contains F and G. + # + # [A, [B, C, D], E, [F, G]] + git multi-pack-index compact --incremental --bitmap \ + "$(nth_line 4 "$midx_chain")" \ + "$(nth_line 5 "$midx_chain")" + ) +' + +test_done diff --git a/tmp-objdir.c b/tmp-objdir.c index e436eed07e4449..d199d39e7c9d51 100644 --- a/tmp-objdir.c +++ b/tmp-objdir.c @@ -11,6 +11,7 @@ #include "strvec.h" #include "quote.h" #include "odb.h" +#include "odb/source.h" #include "repository.h" struct tmp_objdir {