Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
180 changes: 164 additions & 16 deletions src/together/lib/cli/api/beta/jig/jig.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@

WARMUP_ENV_NAME = os.getenv("WARMUP_ENV_NAME", "TORCHINDUCTOR_CACHE_DIR")
WARMUP_DEST = os.getenv("WARMUP_DEST", "torch_cache")
BUILDX_OUTPUT_OPTS = "compression=zstd,compression-level=3,force-compression=true,oci-mediatypes=true"

_TRACK_POLL_INTERVAL = 3
_TRACK_TIMEOUT = 600
Expand Down Expand Up @@ -477,7 +478,8 @@ def _build_warm_image(base_image: str) -> None:
# generate cache dockerfile - copy cache to same location used during warmup
final_dockerfile = f"""FROM {base_image}
COPY {cache_dir.name} /app/{WARMUP_DEST}
ENV {WARMUP_ENV_NAME}=/app/{WARMUP_DEST}"""
ENV {WARMUP_ENV_NAME}=/app/{WARMUP_DEST}
LABEL jig.warmed=true"""

console.print("\N{FIRE} Building final image with cache...")
final_cmd = ["docker", "build", "--platform", "linux/amd64", "-t", base_image, "-f", "-", "."]
Expand Down Expand Up @@ -537,20 +539,31 @@ def registry(self) -> str:
def image(self, tag: str) -> str:
return f"{self.registry()}{self.name}:{tag}"

def _metadata_path(self, tag: str) -> Path:
"""Path for buildx --metadata-file output, used to recover the pushed digest cross-process."""
return self.config._path.parent / f".jig-{self.name}-{tag}.metadata.json"

def image_with_digest(self, tag: str = "latest") -> str:
image = self.image(tag)
if tag != "latest":
return image
try:
cmd = ["docker", "inspect", "--format={{json .RepoDigests}}", image]
if (repo_digests := _run(cmd).stdout.strip()) and repo_digests != "null":
for digest in json.loads(repo_digests):
if digest.startswith(self.registry()):
return str(digest)
except subprocess.CalledProcessError as e:
msg = e.stderr.strip() if e.stderr else "Docker command failed"
raise JigError(f"Failed to get digest for {image}: {msg}") from e
raise JigError(f"No registry digest found for {image}. Make sure the image was pushed to registry first")
digest = json.loads(self._metadata_path(tag).read_text()).get("containerimage.digest")
except (FileNotFoundError, json.JSONDecodeError, OSError):
digest = None
if digest:
return f"{image}@{digest}"
r = subprocess.run(
["docker", "inspect", "--format={{json .RepoDigests}}", image],
capture_output=True,
text=True,
)
if r.returncode == 0 and (repo_digests := r.stdout.strip()) and repo_digests != "null":
try:
for d in json.loads(repo_digests):
if d.startswith(self.registry()):
return str(d)
except (json.JSONDecodeError, TypeError):
pass
raise JigError(f"Could not find digest for {image}. Build and push the image first.")

def sync_secrets_from_deployment(self) -> None:
"""Sync remote secrets into local state if secrets have never been tracked.
Expand Down Expand Up @@ -610,13 +623,31 @@ def build(self, tag: str = "latest", warmup: bool = False, docker_args: str | No
)

console.print(f"Building {image}")
cmd = ["docker", "build", "--platform", "linux/amd64", "-t", image, "."]
if warmup or os.getenv("JIG_DISABLE_BUILDX"):
cmd = ["docker", "build", "--platform", "linux/amd64", "-t", image]
else:
builder = _ensure_zstd_builder()
if not builder:
raise JigError("`docker buildx` is required to build images.")
cmd = [
"docker",
"buildx",
"build",
"--builder",
builder,
"--platform",
"linux/amd64",
"--load",
"-t",
image,
]
if self.config.image.dockerfile_path != "Dockerfile":
cmd.extend(["-f", self.config.image.dockerfile_path])

extra_args = docker_args or os.getenv("DOCKER_BUILD_EXTRA_ARGS", "")
if extra_args:
cmd.extend(shlex.split(extra_args))
cmd.append(".")
if subprocess.run(cmd).returncode != 0:
raise JigError("Build failed")

Expand All @@ -633,10 +664,88 @@ def push(self, tag: str = "latest") -> None:
raise JigError("Registry login failed")

console.print(f"Pushing {image}")
if subprocess.run(["docker", "push", image]).returncode != 0:
self._metadata_path(tag).unlink(missing_ok=True)
# Skip buildx for warmup-baked images: a buildx rebuild would drop the warmup layer.
if _image_is_warmed(image) or os.getenv("JIG_DISABLE_BUILDX"):
ok = subprocess.run(["docker", "push", image]).returncode == 0
else:
builder = _ensure_zstd_builder()
if not builder:
raise JigError("`docker buildx` is required to build images.")
cmd = [
"docker",
"buildx",
"build",
"--builder",
builder,
"--platform",
"linux/amd64",
"--push",
"--output",
Comment thread
narasul marked this conversation as resolved.
f"type=image,name={image},{BUILDX_OUTPUT_OPTS}",
"--metadata-file",
str(self._metadata_path(tag)),
]
if self.config.image.dockerfile_path != "Dockerfile":
cmd.extend(["-f", self.config.image.dockerfile_path])
cmd.append(".")
ok = subprocess.run(cmd).returncode == 0
if not ok:
raise JigError("Push failed")
console.print("\N{CHECK MARK} Pushed")

def build_and_push(self, tag: str = "latest", docker_args: str | None = None) -> None:
"""One-shot build + push via a single buildx invocation (no --load).

Used by deploy when warmup isn't requested: layers go from the buildkit cache
straight to the registry as zstd, skipping the daemon-side export entirely.
Falls back to separate build + push when JIG_DISABLE_BUILDX is set.
"""
image = self.image(tag)
if os.getenv("JIG_DISABLE_BUILDX"):
self.build(tag, False, docker_args)
self.push(tag)
return
builder = _ensure_zstd_builder()
if not builder:
raise JigError("`docker buildx` is required to build images.")

host = self.registry().split("/")[0]
login_cmd = ["docker", "login", host, "--username", "user", "--password-stdin"]
if subprocess.run(login_cmd, input=self.together.api_key, text=True).returncode != 0:
raise JigError("Registry login failed")

if not _dockerfile(self.config):
console.print(
f"\N{INFORMATION SOURCE} Using existing {self.config.image.dockerfile_path} (not managed by jig)"
)

console.print(f"Building and pushing {image}")
self._metadata_path(tag).unlink(missing_ok=True)
cmd = [
"docker",
"buildx",
"build",
"--builder",
builder,
"--platform",
"linux/amd64",
"--push",
"--output",
f"type=image,name={image},{BUILDX_OUTPUT_OPTS}",
"--metadata-file",
str(self._metadata_path(tag)),
]
if self.config.image.dockerfile_path != "Dockerfile":
cmd.extend(["-f", self.config.image.dockerfile_path])
extra_args = docker_args or os.getenv("DOCKER_BUILD_EXTRA_ARGS", "")
if extra_args:
cmd.extend(shlex.split(extra_args))
cmd.append(".")
if subprocess.run(cmd).returncode != 0:
raise JigError("Build+push failed")
console.print("\N{CHECK MARK} Built and pushed")

def deploy(
self,
tag: str = "latest",
Expand All @@ -651,8 +760,12 @@ def deploy(
elif deployment_image := self.config.deploy.image:
console.print(f"Deploying configured image {deployment_image}")
else:
self.build(tag, warmup, docker_args)
self.push(tag)
if warmup:
# warmup needs the image locally, so we can't use buildx — fall back to the plain docker build + push path.
self.build(tag, True, docker_args)
self.push(tag)
else:
self.build_and_push(tag, docker_args)
deployment_image = self.image_with_digest(tag)

if build_only:
Expand Down Expand Up @@ -1482,3 +1595,38 @@ def jig_volumes_delete_cli(
) -> None:
"""Delete a volume."""
_run_jig_cmd(config, toml_config, lambda jig: volumes_delete(jig, name))


# == Helpers ==


def _image_is_warmed(image: str) -> bool:
"""True if the local daemon image carries the jig.warmed label set by _build_warm_image."""
r = subprocess.run(
["docker", "image", "inspect", image, "--format", '{{index .Config.Labels "jig.warmed"}}'],
capture_output=True,
text=True,
)
return r.returncode == 0 and r.stdout.strip() == "true"


def _ensure_zstd_builder(name: str = "jig-zstd") -> str | None:
"""Return the name of a docker-container buildx builder, creating one if needed.

The default 'docker' buildx driver pushes through the docker daemon, which on
legacy (non-containerd) image stores silently downgrades zstd to gzip. A
docker-container builder has its own buildkit instance and honors zstd.
Returns None if buildx is unavailable or the builder cannot be created.
"""
if os.getenv("JIG_DISABLE_BUILDX"):
return None
if subprocess.run(["docker", "buildx", "version"], capture_output=True).returncode != 0:
return None
ls = subprocess.run(["docker", "buildx", "ls"], capture_output=True, text=True)
if name in (ls.stdout or ""):
return name
create = subprocess.run(
["docker", "buildx", "create", "--name", name, "--driver", "docker-container"],
capture_output=True,
)
return name if create.returncode == 0 else None
Loading