diff --git a/builtin/cat-file.c b/builtin/cat-file.c index b6f12f41d6070a..a02661653a5c06 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -24,6 +24,7 @@ #include "object-file.h" #include "object-name.h" #include "odb.h" +#include "odb/source.h" #include "odb/streaming.h" #include "replace-object.h" #include "promisor-remote.h" @@ -859,8 +860,17 @@ static void batch_each_object(struct batch_options *opt, */ odb_prepare_alternates(the_repository->objects); for (source = the_repository->objects->sources; source; source = source->next) { - int ret = odb_source_loose_for_each_object(source, NULL, batch_one_object_oi, - &payload, flags); + int ret; + if (!source->packed) { + /* + * Non-files source: dispatch through vtable. + */ + ret = odb_source_for_each_object(source, NULL, + batch_one_object_oi, &payload, flags); + } else { + ret = odb_source_loose_for_each_object(source, NULL, + batch_one_object_oi, &payload, flags); + } if (ret) break; } @@ -882,11 +892,14 @@ static void batch_each_object(struct batch_options *opt, struct object_info oi = { 0 }; for (source = the_repository->objects->sources; source; source = source->next) { - struct odb_source_files *files = odb_source_files_downcast(source); - int ret = packfile_store_for_each_object(files->packed, &oi, - batch_one_object_oi, &payload, flags); - if (ret) - break; + if (!source->packed) + continue; + { + int ret = packfile_store_for_each_object(source->packed, &oi, + batch_one_object_oi, &payload, flags); + if (ret) + break; + } } } diff --git a/builtin/fsck.c b/builtin/fsck.c index 9bab32effed7ec..8746a31a335b46 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -17,6 +17,7 @@ #include "object-file.h" #include "object-name.h" #include "odb.h" +#include "odb/source.h" #include "odb/streaming.h" #include "path.h" #include "read-cache-ll.h" @@ -769,24 +770,74 @@ static int fsck_subdir(unsigned int nr, const char *path UNUSED, void *data) return 0; } +static int fsck_vtable_object(const struct object_id *oid, + struct object_info *oi UNUSED, + void *data UNUSED) +{ + enum object_type type; + unsigned long size; + void *contents; + struct object *obj; + int eaten; + + contents = odb_read_object(the_repository->objects, oid, + &type, &size); + if (!contents) { + errors_found |= ERROR_OBJECT; + error(_("%s: object missing"), + describe_object(oid)); + return 0; + } + + obj = parse_object_buffer(the_repository, oid, type, + size, contents, &eaten); + if (!obj) { + errors_found |= ERROR_OBJECT; + error(_("%s: object corrupt or missing"), + describe_object(oid)); + if (!eaten) + free(contents); + return 0; + } + obj->flags |= HAS_OBJ; + if (fsck_obj(obj, contents, size)) + errors_found |= ERROR_OBJECT; + if (!eaten) + free(contents); + return 0; +} + static void fsck_source(struct odb_source *source) { struct progress *progress = NULL; - struct for_each_loose_cb cb_data = { - .progress = progress, - }; if (verbose) fprintf_ln(stderr, _("Checking object directory")); - if (show_progress) - progress = start_progress(the_repository, - _("Checking object directories"), 256); + if (!source->packed) { + /* + * Non-files source: iterate objects through the vtable + * and fsck each one. + */ + odb_source_for_each_object(source, NULL, + fsck_vtable_object, NULL, 0); + return; + } - for_each_loose_file_in_source(source, fsck_loose, - fsck_cruft, fsck_subdir, &cb_data); - display_progress(progress, 256); - stop_progress(&progress); + { + struct for_each_loose_cb cb_data = { + .progress = progress, + }; + + if (show_progress) + progress = start_progress(the_repository, + _("Checking object directories"), 256); + + for_each_loose_file_in_source(source, fsck_loose, + fsck_cruft, fsck_subdir, &cb_data); + display_progress(progress, 256); + stop_progress(&progress); + } } static int fsck_cache_tree(struct cache_tree *it, const char *index_path) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index da1087930cbd10..c95bafcff78865 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -32,6 +32,7 @@ #include "packfile.h" #include "object-file.h" #include "odb.h" +#include "odb/source.h" #include "odb/streaming.h" #include "replace-object.h" #include "dir.h" @@ -1541,57 +1542,15 @@ static int have_duplicate_entry(const struct object_id *oid, static int want_cruft_object_mtime(struct repository *r, const struct object_id *oid, - unsigned flags, uint32_t mtime) + unsigned flags UNUSED, uint32_t mtime UNUSED) { - struct odb_source *source; - - for (source = r->objects->sources; source; source = source->next) { - struct odb_source_files *files = odb_source_files_downcast(source); - struct packed_git **cache = packfile_store_get_kept_pack_cache(files->packed, flags); - - for (; *cache; cache++) { - struct packed_git *p = *cache; - off_t ofs; - uint32_t candidate_mtime; - - ofs = find_pack_entry_one(oid, p); - if (!ofs) - continue; - - /* - * We have a copy of the object 'oid' in a non-cruft - * pack. We can avoid packing an additional copy - * regardless of what the existing copy's mtime is since - * it is outside of a cruft pack. - */ - if (!p->is_cruft) - return 0; - - /* - * If we have a copy of the object 'oid' in a cruft - * pack, then either read the cruft pack's mtime for - * that object, or, if that can't be loaded, assume the - * pack's mtime itself. - */ - if (!load_pack_mtimes(p)) { - uint32_t pos; - if (offset_to_pack_pos(p, ofs, &pos) < 0) - continue; - candidate_mtime = nth_packed_mtime(p, pos); - } else { - candidate_mtime = p->mtime; - } - - /* - * We have a surviving copy of the object in a cruft - * pack whose mtime is greater than or equal to the one - * we are considering. We can thus avoid packing an - * additional copy of that object. - */ - if (mtime <= candidate_mtime) - return 0; - } - } + /* + * Check if the object exists in a kept source. Dispatches through + * the vtable: files backends check kept packs, non-files backends + * check their own kept tracking via OBJECT_INFO_KEPT_ONLY. + */ + if (odb_has_object_kept(r->objects, oid)) + return 0; return -1; } @@ -1657,7 +1616,7 @@ static int want_found_object(const struct object_id *oid, int exclude, return 0; if (ignore_packed_keep_in_core && p->pack_keep_in_core) return 0; - if (has_object_kept_pack(p->repo, oid, flags)) + if (odb_has_object_kept(p->repo->objects, oid)) return 0; } else { /* @@ -1726,8 +1685,6 @@ static int want_object_in_pack_mtime(const struct object_id *oid, uint32_t found_mtime) { int want; - struct packfile_list_entry *e; - struct odb_source *source; if (!exclude && local) { /* @@ -1757,25 +1714,18 @@ static int want_object_in_pack_mtime(const struct object_id *oid, odb_prepare_alternates(the_repository->objects); - for (source = the_repository->objects->sources; source; source = source->next) { - struct multi_pack_index *m = get_multi_pack_index(source); - struct pack_entry e; - - if (m && fill_midx_entry(m, oid, &e)) { - want = want_object_in_pack_one(e.p, oid, exclude, found_pack, found_offset, found_mtime); - if (want != -1) - return want; - } - } - - for (source = the_repository->objects->sources; source; source = source->next) { - struct odb_source_files *files = odb_source_files_downcast(source); + { + struct object_info oi = OBJECT_INFO_INIT; - for (e = files->packed->packs.head; e; e = e->next) { - struct packed_git *p = e->pack; - want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset, found_mtime); - if (!exclude && want > 0) - packfile_list_prepend(&files->packed->packs, p); + if (!odb_read_object_info_extended(the_repository->objects, + oid, &oi, + OBJECT_INFO_QUICK) && + oi.whence == OI_PACKED) { + struct packed_git *p = oi.u.packed.pack; + want = want_object_in_pack_one(p, oid, exclude, + found_pack, + found_offset, + found_mtime); if (want != -1) return want; } @@ -4065,7 +4015,7 @@ static void show_cruft_commit(struct commit *commit, void *data) static int cruft_include_check_obj(struct object *obj, void *data UNUSED) { - return !has_object_kept_pack(to_pack.repo, &obj->oid, KEPT_PACK_IN_CORE); + return !odb_has_object_kept(to_pack.repo->objects, &obj->oid); } static int cruft_include_check(struct commit *commit, void *data) @@ -4365,17 +4315,15 @@ static void add_objects_in_unpacked_packs(void) odb_prepare_alternates(to_pack.repo->objects); for (source = to_pack.repo->objects->sources; source; source = source->next) { - struct odb_source_files *files = odb_source_files_downcast(source); - if (!source->local) continue; - if (packfile_store_for_each_object(files->packed, &oi, - add_object_in_unpacked_pack, NULL, - ODB_FOR_EACH_OBJECT_PACK_ORDER | - ODB_FOR_EACH_OBJECT_LOCAL_ONLY | - ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | - ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS)) + if (odb_source_for_each_object(source, &oi, + add_object_in_unpacked_pack, NULL, + ODB_FOR_EACH_OBJECT_PACK_ORDER | + ODB_FOR_EACH_OBJECT_LOCAL_ONLY | + ODB_FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS | + ODB_FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS)) die(_("cannot open pack index")); } } diff --git a/loose.c b/loose.c index 07333be6969fcc..806b6eec0cd57f 100644 --- a/loose.c +++ b/loose.c @@ -128,13 +128,17 @@ int repo_read_loose_object_map(struct repository *repo) int repo_write_loose_object_map(struct repository *repo) { - struct odb_source_files *files = odb_source_files_downcast(repo->objects->sources); - kh_oid_map_t *map = files->loose->map->to_compat; + struct odb_source *source = repo->objects->sources; + kh_oid_map_t *map; struct lock_file lock; int fd; khiter_t iter; struct strbuf buf = STRBUF_INIT, path = STRBUF_INIT; + if (!source->loose || !source->loose->map) + return 0; + map = source->loose->map->to_compat; + if (!should_use_loose_object_map(repo)) return 0; @@ -235,8 +239,10 @@ int repo_loose_object_map_oid(struct repository *repo, khiter_t pos; for (source = repo->objects->sources; source; source = source->next) { - struct odb_source_files *files = odb_source_files_downcast(source); - struct loose_object_map *loose_map = files->loose->map; + struct loose_object_map *loose_map; + if (!source->loose) + continue; + loose_map = source->loose->map; if (!loose_map) continue; map = (to == repo->compat_hash_algo) ? diff --git a/midx.c b/midx.c index 81d6ab11e6eb0e..f6d63cc38b75d7 100644 --- a/midx.c +++ b/midx.c @@ -101,9 +101,10 @@ static int midx_read_object_offsets(const unsigned char *chunk_start, struct multi_pack_index *get_multi_pack_index(struct odb_source *source) { - struct odb_source_files *files = odb_source_files_downcast(source); - packfile_store_prepare(files->packed); - return files->packed->midx; + if (!source->packed) + return NULL; + packfile_store_prepare(source->packed); + return source->packed->midx; } static struct multi_pack_index *load_multi_pack_index_one(struct odb_source *source, diff --git a/object-name.c b/object-name.c index e5adec4c9d5084..8f503b985f8b9b 100644 --- a/object-name.c +++ b/object-name.c @@ -20,6 +20,7 @@ #include "packfile.h" #include "pretty.h" #include "object-file.h" +#include "odb/source.h" #include "read-cache-ll.h" #include "repo-settings.h" #include "repository.h" @@ -111,13 +112,28 @@ static enum cb_next match_prefix(const struct object_id *oid, void *arg) return ds->ambiguous ? CB_BREAK : CB_CONTINUE; } +static int disambiguate_cb(const struct object_id *oid, + struct object_info *oi UNUSED, void *data) +{ + struct disambiguate_state *ds = data; + update_candidates(ds, oid); + return ds->ambiguous ? 1 : 0; +} + static void find_short_object_filename(struct disambiguate_state *ds) { struct odb_source *source; - for (source = ds->repo->objects->sources; source && !ds->ambiguous; source = source->next) - oidtree_each(odb_source_loose_cache(source, &ds->bin_pfx), - &ds->bin_pfx, ds->len, match_prefix, ds); + for (source = ds->repo->objects->sources; source && !ds->ambiguous; source = source->next) { + if (source->for_each_unique_abbrev) { + odb_source_for_each_unique_abbrev( + source, &ds->bin_pfx, ds->len, + disambiguate_cb, ds); + } else { + oidtree_each(odb_source_loose_cache(source, &ds->bin_pfx), + &ds->bin_pfx, ds->len, match_prefix, ds); + } + } } static int match_hash(unsigned len, const unsigned char *a, const unsigned char *b) @@ -208,15 +224,23 @@ static void find_short_packed_object(struct disambiguate_state *ds) odb_prepare_alternates(ds->repo->objects); for (source = ds->repo->objects->sources; source && !ds->ambiguous; source = source->next) { - struct multi_pack_index *m = get_multi_pack_index(source); - if (m) - unique_in_midx(m, ds); + if (source->for_each_unique_abbrev) { + odb_source_for_each_unique_abbrev( + source, &ds->bin_pfx, ds->len, + disambiguate_cb, ds); + } else { + struct multi_pack_index *m = get_multi_pack_index(source); + if (m) + unique_in_midx(m, ds); + } } - repo_for_each_pack(ds->repo, p) { - if (ds->ambiguous) - break; - unique_in_pack(p, ds); + if (!ds->repo->objects->sources->for_each_unique_abbrev) { + repo_for_each_pack(ds->repo, p) { + if (ds->ambiguous) + break; + unique_in_pack(p, ds); + } } } @@ -796,19 +820,38 @@ static void find_abbrev_len_for_pack(struct packed_git *p, mad->init_len = mad->cur_len; } -static void find_abbrev_len_packed(struct min_abbrev_data *mad) +static int abbrev_len_cb(const struct object_id *oid, + struct object_info *oi UNUSED, void *data) { - struct packed_git *p; + struct min_abbrev_data *mad = data; + extend_abbrev_len(oid, mad); + return 0; +} +static void find_abbrev_len_packed(struct min_abbrev_data *mad) +{ odb_prepare_alternates(mad->repo->objects); - for (struct odb_source *source = mad->repo->objects->sources; source; source = source->next) { - struct multi_pack_index *m = get_multi_pack_index(source); - if (m) - find_abbrev_len_for_midx(m, mad); + + for (struct odb_source *source = mad->repo->objects->sources; + source; source = source->next) { + if (source->for_each_unique_abbrev) { + mad->init_len = 0; + odb_source_for_each_unique_abbrev( + source, mad->oid, mad->cur_len, + abbrev_len_cb, mad); + mad->init_len = mad->cur_len; + } else { + struct multi_pack_index *m = get_multi_pack_index(source); + if (m) + find_abbrev_len_for_midx(m, mad); + } } - repo_for_each_pack(mad->repo, p) - find_abbrev_len_for_pack(p, mad); + if (!mad->repo->objects->sources->for_each_unique_abbrev) { + struct packed_git *p; + repo_for_each_pack(mad->repo, p) + find_abbrev_len_for_pack(p, mad); + } } void strbuf_repo_add_unique_abbrev(struct strbuf *sb, struct repository *repo, diff --git a/odb.c b/odb.c index 350e23f3c0798d..5363d28d4c1687 100644 --- a/odb.c +++ b/odb.c @@ -981,6 +981,45 @@ int odb_write_object_stream(struct object_database *odb, return odb_source_write_object_stream(odb->sources, stream, len, oid); } +int odb_has_object_kept(struct object_database *odb, + const struct object_id *oid) +{ + struct odb_source *source; + + odb_prepare_alternates(odb); + for (source = odb->sources; source; source = source->next) + if (!odb_source_read_object_info(source, oid, NULL, + OBJECT_INFO_KEPT_ONLY)) + return 1; + return 0; +} + +int odb_write_packfile(struct object_database *odb, + int pack_fd, + struct odb_write_packfile_options *opts) +{ + return odb_source_write_packfile(odb->sources, pack_fd, opts); +} + +int odb_for_each_unique_abbrev(struct object_database *odb, + const struct object_id *oid_prefix, + unsigned int prefix_len, + odb_for_each_object_cb cb, + void *cb_data) +{ + int ret; + + odb_prepare_alternates(odb); + for (struct odb_source *source = odb->sources; source; source = source->next) { + ret = odb_source_for_each_unique_abbrev(source, oid_prefix, + prefix_len, cb, cb_data); + if (ret) + return ret; + } + + return 0; +} + struct object_database *odb_new(struct repository *repo, const char *primary_source, const char *secondary_sources) diff --git a/odb.h b/odb.h index 9aee260105ae54..2b38ad897322ea 100644 --- a/odb.h +++ b/odb.h @@ -374,6 +374,13 @@ enum object_info_flags { * clone. Implies OBJECT_INFO_SKIP_FETCH_OBJECT and OBJECT_INFO_QUICK. */ OBJECT_INFO_FOR_PREFETCH = (OBJECT_INFO_SKIP_FETCH_OBJECT | OBJECT_INFO_QUICK), + + /* + * Only consider objects marked as "kept" (surviving GC). Used by + * helper backends that track kept status per object. Backends that + * do not support kept tracking should return -1 (not found). + */ + OBJECT_INFO_KEPT_ONLY = (1 << 5), }; /* @@ -413,6 +420,14 @@ int odb_has_object(struct object_database *odb, int odb_freshen_object(struct object_database *odb, const struct object_id *oid); +/* + * Check if an object exists in a kept source. Returns 1 if found in + * any kept source, 0 otherwise. Uses OBJECT_INFO_KEPT_ONLY flag for + * backends that track kept status per object. + */ +int odb_has_object_kept(struct object_database *odb, + const struct object_id *oid); + void odb_assert_oid_type(struct object_database *odb, const struct object_id *oid, enum object_type expect); @@ -570,6 +585,25 @@ int odb_write_object_stream(struct object_database *odb, struct odb_write_stream *stream, size_t len, struct object_id *oid); +/* + * Ingest a pack from a file descriptor into the primary source. + * Returns 0 on success, a negative error code otherwise. + */ +struct odb_write_packfile_options; +int odb_write_packfile(struct object_database *odb, + int pack_fd, + struct odb_write_packfile_options *opts); + +/* + * Iterate over all objects across all sources whose ID starts with + * the given prefix. Used for object name disambiguation. + */ +int odb_for_each_unique_abbrev(struct object_database *odb, + const struct object_id *oid_prefix, + unsigned int prefix_len, + odb_for_each_object_cb cb, + void *cb_data); + void parse_alternates(const char *string, int sep, const char *relative_base, diff --git a/odb/source-files.c b/odb/source-files.c index c08d8993e378a0..43d878bda65059 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -1,14 +1,21 @@ #include "git-compat-util.h" #include "abspath.h" #include "chdir-notify.h" +#include "config.h" #include "gettext.h" #include "lockfile.h" +#include "loose.h" +#include "midx.h" #include "object-file.h" #include "odb.h" #include "odb/source.h" #include "odb/source-files.h" +#include "pack-objects.h" #include "packfile.h" +#include "run-command.h" #include "strbuf.h" +#include "strvec.h" +#include "oidtree.h" #include "write-or-die.h" static void odb_source_files_reparent(const char *name UNUSED, @@ -232,6 +239,255 @@ static int odb_source_files_write_alternate(struct odb_source *source, return ret; } +static int odb_source_files_write_packfile(struct odb_source *source, + int pack_fd, + struct odb_write_packfile_options *opts) +{ + struct odb_source_files *files = odb_source_files_downcast(source); + struct child_process cmd = CHILD_PROCESS_INIT; + int fsck_objects = 0; + int use_index_pack = 1; + int ret; + + if (opts && opts->nr_objects) { + int transfer_unpack_limit = -1; + int fetch_unpack_limit = -1; + int unpack_limit = 100; + + repo_config_get_int(source->odb->repo, "fetch.unpacklimit", + &fetch_unpack_limit); + repo_config_get_int(source->odb->repo, "transfer.unpacklimit", + &transfer_unpack_limit); + if (0 <= fetch_unpack_limit) + unpack_limit = fetch_unpack_limit; + else if (0 <= transfer_unpack_limit) + unpack_limit = transfer_unpack_limit; + + if (opts->nr_objects < (unsigned int)unpack_limit && + !opts->from_promisor && !opts->lockfile_out) + use_index_pack = 0; + } + + cmd.in = pack_fd; + cmd.git_cmd = 1; + + if (!use_index_pack) { + strvec_push(&cmd.args, "unpack-objects"); + if (opts && opts->quiet) + strvec_push(&cmd.args, "-q"); + if (opts && opts->pack_header_version) + strvec_pushf(&cmd.args, "--pack_header=%"PRIu32",%"PRIu32, + opts->pack_header_version, + opts->pack_header_entries); + repo_config_get_bool(source->odb->repo, "transfer.fsckobjects", + &fsck_objects); + repo_config_get_bool(source->odb->repo, "receive.fsckobjects", + &fsck_objects); + if (fsck_objects) + strvec_push(&cmd.args, "--strict"); + if (opts && opts->max_input_size) + strvec_pushf(&cmd.args, "--max-input-size=%lu", + opts->max_input_size); + ret = run_command(&cmd); + if (ret) + return error(_("unpack-objects failed")); + return 0; + } + + strvec_push(&cmd.args, "index-pack"); + strvec_push(&cmd.args, "--stdin"); + strvec_push(&cmd.args, "--keep=write_packfile"); + + if (opts && opts->pack_header_version) + strvec_pushf(&cmd.args, "--pack_header=%"PRIu32",%"PRIu32, + opts->pack_header_version, + opts->pack_header_entries); + + if (opts) { + if (opts->use_thin_pack) + strvec_push(&cmd.args, "--fix-thin"); + if (opts->from_promisor) + strvec_push(&cmd.args, "--promisor"); + if (opts->check_self_contained) + strvec_push(&cmd.args, "--check-self-contained-and-connected"); + if (opts->max_input_size) + strvec_pushf(&cmd.args, "--max-input-size=%lu", + opts->max_input_size); + if (opts->shallow_file) + strvec_pushf(&cmd.env, "GIT_SHALLOW_FILE=%s", + opts->shallow_file); + if (opts->report_end_of_input) + strvec_push(&cmd.args, "--report-end-of-input"); + if (opts->fsck_objects) + fsck_objects = 1; + } + + if (!fsck_objects) { + repo_config_get_bool(source->odb->repo, "transfer.fsckobjects", + &fsck_objects); + repo_config_get_bool(source->odb->repo, "fetch.fsckobjects", + &fsck_objects); + } + if (fsck_objects) + strvec_push(&cmd.args, "--strict"); + + if (opts && opts->lockfile_out) { + cmd.out = -1; + ret = start_command(&cmd); + if (ret) + return error(_("index-pack failed to start")); + *opts->lockfile_out = index_pack_lockfile(source->odb->repo, + cmd.out, NULL); + close(cmd.out); + ret = finish_command(&cmd); + } else { + ret = run_command(&cmd); + } + + if (ret) + return error(_("index-pack failed")); + + if (opts && opts->check_self_contained) + opts->self_contained_out = 1; + + packfile_store_reprepare(files->packed); + return 0; +} + +static int match_hash_prefix(unsigned len, const unsigned char *a, + const unsigned char *b) +{ + while (len > 1) { + if (*a != *b) + return 0; + a++; b++; len -= 2; + } + if (len) + if ((*a ^ *b) & 0xf0) + return 0; + return 1; +} + +struct abbrev_cb_data { + odb_for_each_object_cb cb; + void *cb_data; + int ret; +}; + +static enum cb_next abbrev_loose_cb(const struct object_id *oid, void *data) +{ + struct abbrev_cb_data *d = data; + d->ret = d->cb(oid, NULL, d->cb_data); + return d->ret ? CB_BREAK : CB_CONTINUE; +} + +static int odb_source_files_for_each_unique_abbrev(struct odb_source *source, + const struct object_id *oid_prefix, + unsigned int prefix_len, + odb_for_each_object_cb cb, + void *cb_data) +{ + struct odb_source_files *files = odb_source_files_downcast(source); + struct multi_pack_index *m; + struct packfile_list_entry *entry; + unsigned int hexsz = source->odb->repo->hash_algo->hexsz; + unsigned int len = prefix_len > hexsz ? hexsz : prefix_len; + + /* Search loose objects */ + { + struct oidtree *tree = odb_source_loose_cache(source, oid_prefix); + if (tree) { + struct abbrev_cb_data d = { cb, cb_data, 0 }; + oidtree_each(tree, oid_prefix, prefix_len, abbrev_loose_cb, &d); + if (d.ret) + return d.ret; + } + } + + /* Search multi-pack indices */ + m = get_multi_pack_index(source); + for (; m; m = m->base_midx) { + uint32_t num, i, first = 0; + + if (!m->num_objects) + continue; + + num = m->num_objects + m->num_objects_in_base; + bsearch_one_midx(oid_prefix, m, &first); + + for (i = first; i < num; i++) { + struct object_id oid; + const struct object_id *current; + int ret; + + current = nth_midxed_object_oid(&oid, m, i); + if (!match_hash_prefix(len, oid_prefix->hash, current->hash)) + break; + ret = cb(current, NULL, cb_data); + if (ret) + return ret; + } + } + + /* Search packs not covered by MIDX */ + for (entry = packfile_store_get_packs(files->packed); entry; entry = entry->next) { + struct packed_git *p = entry->pack; + uint32_t num, i, first = 0; + + if (p->multi_pack_index) + continue; + if (open_pack_index(p) || !p->num_objects) + continue; + + num = p->num_objects; + bsearch_pack(oid_prefix, p, &first); + + for (i = first; i < num; i++) { + struct object_id oid; + int ret; + + nth_packed_object_id(&oid, p, i); + if (!match_hash_prefix(len, oid_prefix->hash, oid.hash)) + break; + ret = cb(&oid, NULL, cb_data); + if (ret) + return ret; + } + } + + return 0; +} + +static int odb_source_files_convert_object_id(struct odb_source *source, + const struct object_id *src, + const struct git_hash_algo *to, + struct object_id *dest) +{ + struct odb_source_files *files = odb_source_files_downcast(source); + struct loose_object_map *map; + kh_oid_map_t *hash_map; + khiter_t pos; + + if (!files->loose || !files->loose->map) + return -1; + + map = files->loose->map; + + if (to == source->odb->repo->compat_hash_algo) + hash_map = map->to_compat; + else if (to == source->odb->repo->hash_algo) + hash_map = map->to_storage; + else + return -1; + + pos = kh_get_oid_map(hash_map, *src); + if (pos == kh_end(hash_map)) + return -1; + + oidcpy(dest, kh_value(hash_map, pos)); + return 0; +} + struct odb_source_files *odb_source_files_new(struct object_database *odb, const char *path, bool local) @@ -242,6 +498,8 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb, odb_source_init(&files->base, odb, ODB_SOURCE_FILES, path, local); files->loose = odb_source_loose_new(&files->base); files->packed = packfile_store_new(&files->base); + files->base.packed = files->packed; + files->base.loose = files->loose; files->base.free = odb_source_files_free; files->base.close = odb_source_files_close; @@ -256,6 +514,9 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb, files->base.begin_transaction = odb_source_files_begin_transaction; files->base.read_alternates = odb_source_files_read_alternates; files->base.write_alternate = odb_source_files_write_alternate; + files->base.write_packfile = odb_source_files_write_packfile; + files->base.for_each_unique_abbrev = odb_source_files_for_each_unique_abbrev; + files->base.convert_object_id = odb_source_files_convert_object_id; /* * Ideally, we would only ever store absolute paths in the source. This diff --git a/odb/source.h b/odb/source.h index 96c906e7a1b350..c4b8bd76dc2e64 100644 --- a/odb/source.h +++ b/odb/source.h @@ -13,12 +13,42 @@ enum odb_source_type { /* The "files" backend that uses loose objects and packfiles. */ ODB_SOURCE_FILES, + + /* An external helper process (git-local-). */ + ODB_SOURCE_HELPER, }; struct object_id; struct odb_read_stream; struct strvec; +/* + * Options for write_packfile. When NULL is passed, the backend + * uses sensible defaults. + */ +struct odb_write_packfile_options { + unsigned int nr_objects; + uint32_t pack_header_version; + uint32_t pack_header_entries; + int use_thin_pack; + int from_promisor; + int fsck_objects; + int check_self_contained; + unsigned long max_input_size; + int quiet; + int show_progress; + int report_end_of_input; + const char *shallow_file; + char **lockfile_out; + + /* + * Output: set to 1 by the backend if the ingested pack was + * verified as self-contained (all referenced objects present). + * Used by the transport layer to skip connectivity checks. + */ + int self_contained_out; +}; + /* * The source is the part of the object database that stores the actual * objects. It thus encapsulates the logic to read and write the specific @@ -48,6 +78,21 @@ struct odb_source { */ bool local; + /* + * Packfile store for this source, or NULL if this source does + * not manage pack files. Set by the files backend; left NULL + * by backends that store objects differently. Used by the + * pack iterator (repo_for_each_pack) to skip non-pack sources + * without downcasting. + */ + struct packfile_store *packed; + + /* + * Loose object store for this source, or NULL if this source + * does not manage loose objects. + */ + struct odb_source_loose *loose; + /* * This object store is ephemeral, so there is no need to fsync. */ @@ -237,6 +282,45 @@ struct odb_source { */ int (*write_alternate)(struct odb_source *source, const char *alternate); + + /* + * Ingest a pack from a file descriptor. Each backend chooses + * its own ingestion strategy: + * + * - The files backend spawns index-pack (large packs) or + * unpack-objects (small packs), then registers the result. + * + * - Non-files backends may parse the pack and write each + * object individually through write_object. + * + * Returns 0 on success, a negative error code otherwise. + */ + int (*write_packfile)(struct odb_source *source, + int pack_fd, + struct odb_write_packfile_options *opts); + + /* + * Iterate over all objects whose object ID starts with the + * given prefix. Used for object name disambiguation. + * + * Returns 0 on success, a negative error code in case + * iteration has failed, or a non-zero value from the callback. + */ + int (*for_each_unique_abbrev)(struct odb_source *source, + const struct object_id *oid_prefix, + unsigned int prefix_len, + odb_for_each_object_cb cb, + void *cb_data); + + /* + * Translate an object ID from one hash algorithm to another + * using the source's internal mapping (for SHA-1/SHA-256 + * migration). Returns 0 on success, -1 if no mapping exists. + */ + int (*convert_object_id)(struct odb_source *source, + const struct object_id *src, + const struct git_hash_algo *to, + struct object_id *dest); }; /* @@ -442,4 +526,43 @@ static inline int odb_source_begin_transaction(struct odb_source *source, return source->begin_transaction(source, out); } +/* + * Ingest a pack from a file descriptor into the given source. Returns 0 on + * success, a negative error code otherwise. + */ +static inline int odb_source_write_packfile(struct odb_source *source, + int pack_fd, + struct odb_write_packfile_options *opts) +{ + return source->write_packfile(source, pack_fd, opts); +} + +/* + * Iterate over all objects in the source whose ID starts with the given + * prefix. Used for object name disambiguation. + */ +static inline int odb_source_for_each_unique_abbrev(struct odb_source *source, + const struct object_id *oid_prefix, + unsigned int prefix_len, + odb_for_each_object_cb cb, + void *cb_data) +{ + return source->for_each_unique_abbrev(source, oid_prefix, prefix_len, + cb, cb_data); +} + +/* + * Translate an object ID between hash algorithms using the source's mapping. + * Returns 0 on success, -1 if no mapping exists. + */ +static inline int odb_source_convert_object_id(struct odb_source *source, + const struct object_id *src, + const struct git_hash_algo *to, + struct object_id *dest) +{ + if (!source->convert_object_id) + return -1; + return source->convert_object_id(source, src, to, dest); +} + #endif diff --git a/packfile.c b/packfile.c index d4de9f3ffe831e..9760a701aa6e73 100644 --- a/packfile.c +++ b/packfile.c @@ -363,8 +363,9 @@ static int unuse_one_window(struct object_database *odb) struct pack_window *lru_w = NULL, *lru_l = NULL; for (source = odb->sources; source; source = source->next) { - struct odb_source_files *files = odb_source_files_downcast(source); - for (e = files->packed->packs.head; e; e = e->next) + if (!source->packed) + continue; + for (e = source->packed->packs.head; e; e = e->next) scan_windows(e->pack, &lru_p, &lru_w, &lru_l); } @@ -539,8 +540,9 @@ static int close_one_pack(struct repository *r) int accept_windows_inuse = 1; for (source = r->objects->sources; source; source = source->next) { - struct odb_source_files *files = odb_source_files_downcast(source); - for (e = files->packed->packs.head; e; e = e->next) { + if (!source->packed) + continue; + for (e = source->packed->packs.head; e; e = e->next) { if (e->pack->pack_fd == -1) continue; find_lru_pack(e->pack, &lru_p, &mru_w, &accept_windows_inuse); @@ -1249,10 +1251,11 @@ const struct packed_git *has_packed_and_bad(struct repository *r, struct odb_source *source; for (source = r->objects->sources; source; source = source->next) { - struct odb_source_files *files = odb_source_files_downcast(source); struct packfile_list_entry *e; - for (e = files->packed->packs.head; e; e = e->next) + if (!source->packed) + continue; + for (e = source->packed->packs.head; e; e = e->next) if (oidset_contains(&e->pack->bad_objects, oid)) return e->pack; } @@ -2266,8 +2269,10 @@ int has_object_pack(struct repository *r, const struct object_id *oid) odb_prepare_alternates(r->objects); for (source = r->objects->sources; source; source = source->next) { - struct odb_source_files *files = odb_source_files_downcast(source); - int ret = find_pack_entry(files->packed, oid, &e); + int ret; + if (!source->packed) + continue; + ret = find_pack_entry(source->packed, oid, &e); if (ret) return ret; } @@ -2282,10 +2287,11 @@ int has_object_kept_pack(struct repository *r, const struct object_id *oid, struct pack_entry e; for (source = r->objects->sources; source; source = source->next) { - struct odb_source_files *files = odb_source_files_downcast(source); struct packed_git **cache; - cache = packfile_store_get_kept_pack_cache(files->packed, flags); + if (!source->packed) + continue; + cache = packfile_store_get_kept_pack_cache(source->packed, flags); for (; *cache; cache++) { struct packed_git *p = *cache; diff --git a/packfile.h b/packfile.h index a16ec3950d2507..65627188dc70e3 100644 --- a/packfile.h +++ b/packfile.h @@ -193,8 +193,10 @@ static inline struct repo_for_each_pack_data repo_for_eack_pack_data_init(struct odb_prepare_alternates(repo->objects); for (struct odb_source *source = repo->objects->sources; source; source = source->next) { - struct odb_source_files *files = odb_source_files_downcast(source); - struct packfile_list_entry *entry = packfile_store_get_packs(files->packed); + struct packfile_list_entry *entry; + if (!source->packed) + continue; + entry = packfile_store_get_packs(source->packed); if (!entry) continue; data.source = source; @@ -214,8 +216,10 @@ static inline void repo_for_each_pack_data_next(struct repo_for_each_pack_data * return; for (source = data->source->next; source; source = source->next) { - struct odb_source_files *files = odb_source_files_downcast(source); - struct packfile_list_entry *entry = packfile_store_get_packs(files->packed); + struct packfile_list_entry *entry; + if (!source->packed) + continue; + entry = packfile_store_get_packs(source->packed); if (!entry) continue; data->source = source;