From f697188683cbd26f9ac8c6f0a1c353cd118c579d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMatt?= Date: Thu, 14 May 2026 16:51:56 -0400 Subject: [PATCH 01/24] Switch from team mode to user mode Containers are now tracked per-user instead of per-team: ContainerInfoModel swaps team_id for user_id with an FK to users.id, the four player routes drop the team membership check and pass user.id, and the admin dashboard relabels the column accordingly. Co-Authored-By: Claude Opus 4.7 (1M context) --- __init__.py | 32 +++++++++++------------------- models.py | 6 +++--- templates/container_dashboard.html | 4 ++-- 3 files changed, 17 insertions(+), 25 deletions(-) diff --git a/__init__.py b/__init__.py index 3c90152..8836089 100644 --- a/__init__.py +++ b/__init__.py @@ -170,7 +170,7 @@ def kill_container(container_id): db.session.commit() return {"success": "Container killed"} - def renew_container(chal_id, team_id): + def renew_container(chal_id, user_id): # Get the requested challenge challenge = ContainerChallenge.challenge_model.query.filter_by( id=chal_id).first() @@ -180,7 +180,7 @@ def renew_container(chal_id, team_id): return {"error": "Challenge not found"}, 400 running_containers = ContainerInfoModel.query.filter_by( - challenge_id=challenge.id, team_id=team_id) + challenge_id=challenge.id, user_id=user_id) running_container = running_containers.first() if running_container is None: @@ -195,7 +195,7 @@ def renew_container(chal_id, team_id): return {"success": "Container renewed", "expires": running_container.expires} - def create_container(chal_id, team_id): + def create_container(chal_id, user_id): # Get the requested challenge challenge = ContainerChallenge.challenge_model.query.filter_by( id=chal_id).first() @@ -204,12 +204,12 @@ def create_container(chal_id, team_id): if challenge is None: return {"error": "Challenge not found"}, 400 - # Check for any existing containers for the team + # Check for any existing containers for the user running_containers = ContainerInfoModel.query.filter_by( - challenge_id=challenge.id, team_id=team_id) + challenge_id=challenge.id, user_id=user_id) running_container = running_containers.first() - # If a container is already running for the team, return it + # If a container is already running for the user, return it if running_container: # Check if Docker says the container is still running before returning it try: @@ -254,7 +254,7 @@ def create_container(chal_id, team_id): new_container = ContainerInfoModel( container_id=created_container.id, challenge_id=challenge.id, - team_id=team_id, + user_id=user_id, port=port, timestamp=int(time.time()), expires=expires @@ -286,11 +286,9 @@ def route_request_container(): if user is None: return {"error": "User not found"}, 400 - if user.team is None: - return {"error": "User not a member of a team"}, 400 try: - return create_container(request.json.get("chal_id"), user.team.id) + return create_container(request.json.get("chal_id"), user.id) except ContainerException as err: return {"error": str(err)}, 500 @@ -311,11 +309,9 @@ def route_renew_container(): if user is None: return {"error": "User not found"}, 400 - if user.team is None: - return {"error": "User not a member of a team"}, 400 try: - return renew_container(request.json.get("chal_id"), user.team.id) + return renew_container(request.json.get("chal_id"), user.id) except ContainerException as err: return {"error": str(err)}, 500 @@ -336,16 +332,14 @@ def route_restart_container(): if user is None: return {"error": "User not found"}, 400 - if user.team is None: - return {"error": "User not a member of a team"}, 400 running_container: ContainerInfoModel = ContainerInfoModel.query.filter_by( - challenge_id=request.json.get("chal_id"), team_id=user.team.id).first() + challenge_id=request.json.get("chal_id"), user_id=user.id).first() if running_container: kill_container(running_container.container_id) - return create_container(request.json.get("chal_id"), user.team.id) + return create_container(request.json.get("chal_id"), user.id) @containers_bp.route('/api/stop', methods=['POST']) @authed_only @@ -364,11 +358,9 @@ def route_stop_container(): if user is None: return {"error": "User not found"}, 400 - if user.team is None: - return {"error": "User not a member of a team"}, 400 running_container: ContainerInfoModel = ContainerInfoModel.query.filter_by( - challenge_id=request.json.get("chal_id"), team_id=user.team.id).first() + challenge_id=request.json.get("chal_id"), user_id=user.id).first() if running_container: return kill_container(running_container.container_id) diff --git a/models.py b/models.py index 1a6c6e9..410e669 100644 --- a/models.py +++ b/models.py @@ -31,13 +31,13 @@ class ContainerInfoModel(db.Model): challenge_id = db.Column( db.Integer, db.ForeignKey("challenges.id", ondelete="CASCADE") ) - team_id = db.Column( - db.Integer, db.ForeignKey("teams.id", ondelete="CASCADE") + user_id = db.Column( + db.Integer, db.ForeignKey("users.id", ondelete="CASCADE") ) port = db.Column(db.Integer) timestamp = db.Column(db.Integer) expires = db.Column(db.Integer) - team = relationship("Teams", foreign_keys=[team_id]) + user = relationship("Users", foreign_keys=[user_id]) challenge = relationship(ContainerChallengeModel, foreign_keys=[challenge_id]) diff --git a/templates/container_dashboard.html b/templates/container_dashboard.html index 2549a5e..fb670da 100644 --- a/templates/container_dashboard.html +++ b/templates/container_dashboard.html @@ -50,7 +50,7 @@

Containers

Challenge - Team + User Port @@ -70,7 +70,7 @@

Containers

{{ c.container_id[:12] }} {{ c.challenge.image }} {{ c.challenge.name }} [{{ c.challenge_id }}] - {{ c.team.name }} [{{ c.team_id }}] + {{ c.user.name }} [{{ c.user_id }}] {{ c.port }} {{ c.timestamp|format_time }} {{ c.expires|format_time }} From 6de0726acec97f41f190d81fbd0805db00514f15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMatt?= Date: Thu, 14 May 2026 20:18:40 -0400 Subject: [PATCH 02/24] Add Azure Container Instances backend with UAMI-based ACR pull Introduces a second container backend selectable from the admin Settings page. Choosing "Azure Container Instances" provisions a container group per challenge attempt via azure-mgmt-containerinstance, authenticates to Azure with DefaultAzureCredential, and attaches a user-assigned managed identity to each spawned group so it can pull private images from ACR without storing registry credentials. Schema gains a nullable `hostname` column on ContainerInfoModel so each ACI container's per-group DNS name can be returned to the player; the Docker backend still falls back to the global docker_hostname setting. Settings page is restructured around a backend selector with show/hide between Docker and Azure fieldsets. Also includes a handful of defensive fixes the design review flagged: the manager exposes a shutdown() method that the settings route now calls before swapping backends (was leaking BackgroundScheduler instances on every save); the ACI poller has an explicit 300s timeout and 5s polling interval; all previously silent except blocks log to stdout; the expired-container sweep commits once per pass instead of per row; and the admin kill route no longer crashes when the DB row has already been removed. Co-Authored-By: Claude Opus 4.7 (1M context) --- README.md | 43 ++++ __init__.py | 147 +++++--------- assets/view.js | 7 + container_manager.py | 8 + container_manager_aci.py | 326 ++++++++++++++++++++++++++++++ models.py | 1 + requirements.txt | 5 +- templates/container_settings.html | 157 +++++++++----- 8 files changed, 551 insertions(+), 143 deletions(-) create mode 100644 container_manager_aci.py diff --git a/README.md b/README.md index fdd952f..b113977 100644 --- a/README.md +++ b/README.md @@ -17,3 +17,46 @@ When a user clicks on a container challenge, a button labeled "Get Connection In ![Challenge dialog](dialog.png) A note, we used hidden teams as non-school teams in PCTF 2022 so if you want them to count for decreasing the dynamic challenge points, you need to remove the `Model.hidden == False,` line from the `calculate_value` function in `__init__.py`. + +## Azure Container Instances backend + +This fork supports running challenge containers on **Azure Container Instances (ACI)** instead of (or alongside) a Docker daemon. Pick **Azure Container Instances** under Backend on the settings page to switch. + +### Azure prerequisites + +1. **Resource group** for challenge container groups, e.g. `ctfd-challenges`. +2. **Azure Container Registry** with your private images. +3. **User-assigned managed identity** (referred to as the *puller* UAMI) with the **AcrPull** role on the ACR. This UAMI is attached to every spawned container group so it can pull private images without storing registry credentials in CTFd. +4. **An identity for CTFd itself** that can call ARM. Two options: + - If CTFd runs on Azure (ACI, AKS, VM, App Service): assign it a managed identity with **Contributor** on the challenge resource group and **AcrPull** on the ACR. + - If CTFd runs elsewhere (e.g. Docker on a laptop): create a **service principal** and set `AZURE_TENANT_ID`, `AZURE_CLIENT_ID`, `AZURE_CLIENT_SECRET` in CTFd's environment. The plugin uses `DefaultAzureCredential` so any of these auth methods are picked up automatically. + +### Plugin settings + +Fill in on the Settings page (only the Azure section is required when the backend is `aci`): + +- **Subscription ID** — your Azure subscription +- **Resource Group** — where container groups are created (must already exist) +- **Region** — single region for all challenges, e.g. `eastus` +- **UAMI Resource ID** — full ARM ID of the puller identity, like `/subscriptions/.../userAssignedIdentities/ctfd-puller` +- **DNS Label Prefix** — used to name container groups and DNS labels, e.g. `ctfd` → `ctfd-abc12345.eastus.azurecontainer.io` +- **ACR Login Server** — e.g. `myregistry.azurecr.io` + +### Migrating from upstream / prior installs + +This fork changes the plugin's schema (user mode instead of team mode; per-container hostname). If you had the upstream plugin installed before, `db.create_all()` will **not** ALTER existing tables. Drop them before first start so SQLAlchemy can recreate cleanly: + +```sql +DROP TABLE IF EXISTS container_info; +DROP TABLE IF EXISTS container_settings; +DROP TABLE IF EXISTS container_challenge_model; +``` + +Connect to your CTFd database (MariaDB/MySQL or SQLite depending on your setup) and run those. You'll lose existing container-challenge records and admin Settings; reconfigure on the Settings page after restart. + +### Notes & caveats + +- Provisioning a container group takes ~30-60 seconds; the "Get Connection Info" button shows a `Provisioning…` state while it waits. +- The volumes field on challenges is **ignored** in ACI mode — host-path mounts don't translate to ACI. +- Commands are parsed via `shlex.split`. For complex commands, use `sh -c "your full command line"`. +- The image dropdown is populated by listing repos/tags from the ACR. The CTFd identity needs **AcrPull** on the ACR for this to work. diff --git a/__init__.py b/__init__.py index 8836089..fd91b4b 100644 --- a/__init__.py +++ b/__init__.py @@ -16,6 +16,13 @@ from .models import ContainerChallengeModel, ContainerInfoModel, ContainerSettingsModel from .container_manager import ContainerManager, ContainerException +from .container_manager_aci import ACIContainerManager + + +def make_container_manager(settings, app): + if settings.get("backend", "docker") == "aci": + return ACIContainerManager(settings, app) + return ContainerManager(settings, app) class ContainerChallenge(BaseChallenge): @@ -145,7 +152,7 @@ def load(app: Flask): ) container_settings = settings_to_dict(ContainerSettingsModel.query.all()) - container_manager = ContainerManager(container_settings, app) + container_manager = make_container_manager(container_settings, app) containers_bp = Blueprint( 'containers', __name__, template_folder='templates', static_folder='assets', url_prefix='/containers') @@ -165,9 +172,9 @@ def kill_container(container_id): except ContainerException: return {"error": "Docker is not initialized. Please check your settings."} - db.session.delete(container) - - db.session.commit() + if container is not None: + db.session.delete(container) + db.session.commit() return {"success": "Container killed"} def renew_container(chal_id, user_id): @@ -217,7 +224,7 @@ def create_container(chal_id, user_id): running_container.container_id): return json.dumps({ "status": "already_running", - "hostname": container_manager.settings.get("docker_hostname", ""), + "hostname": running_container.hostname or container_manager.settings.get("docker_hostname", ""), "port": running_container.port, "expires": running_container.expires }) @@ -256,6 +263,7 @@ def create_container(chal_id, user_id): challenge_id=challenge.id, user_id=user_id, port=port, + hostname=getattr(created_container, "hostname", None), timestamp=int(time.time()), expires=expires ) @@ -264,7 +272,7 @@ def create_container(chal_id, user_id): return json.dumps({ "status": "created", - "hostname": container_manager.settings.get("docker_hostname", ""), + "hostname": new_container.hostname or container_manager.settings.get("docker_hostname", ""), "port": port, "expires": expires }) @@ -402,99 +410,50 @@ def route_get_images(): @containers_bp.route('/api/settings/update', methods=['POST']) @admins_only def route_update_settings(): - if request.form.get("docker_base_url") is None: - return {"error": "Invalid request"}, 400 - - if request.form.get("docker_hostname") is None: - return {"error": "Invalid request"}, 400 - - if request.form.get("container_expiration") is None: - return {"error": "Invalid request"}, 400 - - if request.form.get("container_maxmemory") is None: - return {"error": "Invalid request"}, 400 - - if request.form.get("container_maxcpu") is None: - return {"error": "Invalid request"}, 400 + nonlocal container_manager + + settable_keys = ( + "backend", + "docker_base_url", + "docker_hostname", + "container_expiration", + "container_maxmemory", + "container_maxcpu", + "azure_subscription_id", + "azure_resource_group", + "azure_region", + "azure_uami_resource_id", + "azure_dns_label_prefix", + "acr_login_server", + ) - docker_base_url = ContainerSettingsModel.query.filter_by( - key="docker_base_url").first() - - docker_hostname = ContainerSettingsModel.query.filter_by( - key="docker_hostname").first() - - container_expiration = ContainerSettingsModel.query.filter_by( - key="container_expiration").first() - - container_maxmemory = ContainerSettingsModel.query.filter_by( - key="container_maxmemory").first() - - container_maxcpu = ContainerSettingsModel.query.filter_by( - key="container_maxcpu").first() - - # Create or update - if docker_base_url is None: - # Create - docker_base_url = ContainerSettingsModel( - key="docker_base_url", value=request.form.get("docker_base_url")) - db.session.add(docker_base_url) - else: - # Update - docker_base_url.value = request.form.get("docker_base_url") - - # Create or update - if docker_hostname is None: - # Create - docker_hostname = ContainerSettingsModel( - key="docker_hostname", value=request.form.get("docker_hostname")) - db.session.add(docker_hostname) - else: - # Update - docker_hostname.value = request.form.get("docker_hostname") - - # Create or update - if container_expiration is None: - # Create - container_expiration = ContainerSettingsModel( - key="container_expiration", value=request.form.get("container_expiration")) - db.session.add(container_expiration) - else: - # Update - container_expiration.value = request.form.get( - "container_expiration") - - # Create or update - if container_maxmemory is None: - # Create - container_maxmemory = ContainerSettingsModel( - key="container_maxmemory", value=request.form.get("container_maxmemory")) - db.session.add(container_maxmemory) - else: - # Update - container_maxmemory.value = request.form.get("container_maxmemory") - - # Create or update - if container_maxcpu is None: - # Create - container_maxcpu = ContainerSettingsModel( - key="container_maxcpu", value=request.form.get("container_maxcpu")) - db.session.add(container_maxcpu) - else: - # Update - container_maxcpu.value = request.form.get("container_maxcpu") + for key in settable_keys: + value = request.form.get(key) + if value is None: + continue + row = ContainerSettingsModel.query.filter_by(key=key).first() + if row is None: + db.session.add(ContainerSettingsModel(key=key, value=value)) + else: + row.value = value db.session.commit() - container_manager.settings = settings_to_dict( - ContainerSettingsModel.query.all()) + # Cleanly shut down the previous manager (stops its expiration scheduler) + # before replacing it, so we don't leak duplicate sweepers on every save. + try: + container_manager.shutdown() + except Exception as err: + print(f"[CTFd] previous manager shutdown failed: {err}") + + new_settings = settings_to_dict(ContainerSettingsModel.query.all()) + container_manager = make_container_manager(new_settings, app) - if container_manager.settings.get("docker_base_url") is not None: - try: - container_manager.initialize_connection( - container_manager.settings, app) - except ContainerException as err: - flash(str(err), "error") - return redirect(url_for(".route_containers_settings")) + try: + container_manager.initialize_connection(new_settings, app) + except ContainerException as err: + flash(str(err), "error") + return redirect(url_for(".route_containers_settings")) return redirect(url_for(".route_containers_dashboard")) diff --git a/assets/view.js b/assets/view.js index 9b39875..babccfa 100644 --- a/assets/view.js +++ b/assets/view.js @@ -62,7 +62,9 @@ function container_request(challenge_id) { ); var requestError = document.getElementById("container-request-error"); + var originalLabel = requestButton.innerHTML; requestButton.setAttribute("disabled", "disabled"); + requestButton.innerHTML = "Provisioning… (may take up to a minute)"; var xhr = new XMLHttpRequest(); xhr.open("POST", path, true); @@ -76,11 +78,13 @@ function container_request(challenge_id) { // Container error requestError.style.display = ""; requestError.firstElementChild.innerHTML = data.error; + requestButton.innerHTML = originalLabel; requestButton.removeAttribute("disabled"); } else if (data.message !== undefined) { // CTFd error requestError.style.display = ""; requestError.firstElementChild.innerHTML = data.message; + requestButton.innerHTML = originalLabel; requestButton.removeAttribute("disabled"); } else { // Success @@ -111,7 +115,9 @@ function container_reset(challenge_id) { var connectionInfo = document.getElementById("container-connection-info"); var requestError = document.getElementById("container-request-error"); + var originalLabel = resetButton.innerHTML; resetButton.setAttribute("disabled", "disabled"); + resetButton.innerHTML = "Provisioning…"; var xhr = new XMLHttpRequest(); xhr.open("POST", path, true); @@ -121,6 +127,7 @@ function container_reset(challenge_id) { xhr.send(JSON.stringify({ chal_id: challenge_id })); xhr.onload = function () { var data = JSON.parse(this.responseText); + resetButton.innerHTML = originalLabel; if (data.error !== undefined) { // Container rrror requestError.style.display = ""; diff --git a/container_manager.py b/container_manager.py index 6919b72..cc0c623 100644 --- a/container_manager.py +++ b/container_manager.py @@ -222,3 +222,11 @@ def is_connected(self) -> bool: except: return False return True + + def shutdown(self) -> None: + try: + scheduler = getattr(self, "expiration_scheduler", None) + if scheduler is not None: + scheduler.shutdown(wait=False) + except (SchedulerNotRunningError, AttributeError): + pass diff --git a/container_manager_aci.py b/container_manager_aci.py new file mode 100644 index 0000000..629d147 --- /dev/null +++ b/container_manager_aci.py @@ -0,0 +1,326 @@ +import atexit +import shlex +import time +import uuid + +from flask import Flask +from apscheduler.schedulers.background import BackgroundScheduler +from apscheduler.schedulers import SchedulerNotRunningError +from azure.identity import DefaultAzureCredential +from azure.core.exceptions import HttpResponseError, ResourceNotFoundError +from azure.mgmt.containerinstance import ContainerInstanceManagementClient +from azure.mgmt.containerinstance.models import ( + ContainerGroup, + Container, + ResourceRequests, + ResourceRequirements, + OperatingSystemTypes, + ContainerGroupRestartPolicy, + IpAddress, + Port, + ContainerPort, + ImageRegistryCredential, + ContainerGroupIdentity, + ResourceIdentityType, +) +from azure.containerregistry import ContainerRegistryClient + +from CTFd.models import db +from .models import ContainerInfoModel +from .container_manager import ContainerException + + +class _CreatedContainer: + """Stand-in for the docker SDK Container object so calling code stays uniform.""" + + def __init__(self, id: str, hostname: str, port: int): + self.id = id + self.hostname = hostname + self.port = port + + +REQUIRED_SETTINGS = ( + "azure_subscription_id", + "azure_resource_group", + "azure_region", + "azure_uami_resource_id", +) + + +class ACIContainerManager: + def __init__(self, settings, app): + self.settings = settings + self.app = app + self.client = None + self.acr_client = None + self.credential = None + self.expiration_seconds = 0 + self.expiration_scheduler = None + + if not self._has_required_settings(): + return + + try: + self.initialize_connection(settings, app) + except ContainerException: + print("ACI could not initialize or connect.") + + def _has_required_settings(self) -> bool: + return all(self.settings.get(k) for k in REQUIRED_SETTINGS) + + def initialize_connection(self, settings, app) -> None: + self.settings = settings + self.app = app + + try: + if self.expiration_scheduler is not None: + self.expiration_scheduler.shutdown() + except (SchedulerNotRunningError, AttributeError): + pass + + if not self._has_required_settings(): + self.client = None + return + + try: + self.credential = DefaultAzureCredential() + self.client = ContainerInstanceManagementClient( + self.credential, settings["azure_subscription_id"] + ) + except Exception as e: + self.client = None + raise ContainerException(f"CTFd could not connect to Azure: {e}") + + try: + self.expiration_seconds = int(settings.get("container_expiration", 0)) * 60 + except (ValueError, AttributeError): + self.expiration_seconds = 0 + + EXPIRATION_CHECK_INTERVAL = 5 + if self.expiration_seconds > 0: + self.expiration_scheduler = BackgroundScheduler() + self.expiration_scheduler.add_job( + func=self.kill_expired_containers, + args=(app,), + trigger="interval", + seconds=EXPIRATION_CHECK_INTERVAL, + ) + self.expiration_scheduler.start() + atexit.register(lambda: self.expiration_scheduler.shutdown()) + + def is_connected(self) -> bool: + if self.client is None: + return False + try: + next( + iter( + self.client.container_groups.list_by_resource_group( + self.settings["azure_resource_group"] + ) + ), + None, + ) + return True + except Exception as e: + print(f"[CTFd-ACI] is_connected failed: {e}") + return False + + def shutdown(self) -> None: + try: + if self.expiration_scheduler is not None: + self.expiration_scheduler.shutdown(wait=False) + except (SchedulerNotRunningError, AttributeError): + pass + + def is_container_running(self, container_id: str) -> bool: + if self.client is None: + return False + try: + cg = self.client.container_groups.get( + self.settings["azure_resource_group"], container_id + ) + except ResourceNotFoundError: + return False + except Exception as e: + print(f"[CTFd-ACI] is_container_running({container_id}) failed: {e}") + return False + + if cg.provisioning_state != "Succeeded": + return False + if not cg.containers: + return False + inst = cg.containers[0].instance_view + if inst is None or inst.current_state is None: + return False + return inst.current_state.state == "Running" + + def kill_expired_containers(self, app: Flask): + with app.app_context(): + now = int(time.time()) + containers = ContainerInfoModel.query.all() + deleted = False + for container in containers: + if container.expires - now < 0: + try: + self.kill_container(container.container_id) + except ContainerException as e: + print(f"[CTFd-ACI] kill_expired_containers: {e}") + db.session.delete(container) + deleted = True + if deleted: + db.session.commit() + + def create_container(self, image: str, port: int, command: str, volumes: str): + if self.client is None: + raise ContainerException("ACI client is not initialized") + + rg = self.settings["azure_resource_group"] + region = self.settings["azure_region"] + uami = self.settings["azure_uami_resource_id"] + login_server = self.settings.get("acr_login_server", "") + dns_prefix = self.settings.get("azure_dns_label_prefix", "ctfd") + + cpu = 1.0 + memory_gb = 1.5 + try: + mem_mb = int(self.settings.get("container_maxmemory") or 0) + if mem_mb > 0: + memory_gb = max(0.1, mem_mb / 1024) + except ValueError: + pass + try: + cpu_setting = float(self.settings.get("container_maxcpu") or 0) + if cpu_setting > 0: + cpu = cpu_setting + except ValueError: + pass + + unique = uuid.uuid4().hex[:8] + group_name = f"{dns_prefix}-{unique}"[:63].rstrip("-") + dns_label = group_name + + command_list = None + if command: + try: + command_list = shlex.split(command) + except ValueError: + command_list = [command] + + identity = ContainerGroupIdentity( + type=ResourceIdentityType.USER_ASSIGNED, + user_assigned_identities={uami: {}}, + ) + image_registry_creds = [] + if login_server: + image_registry_creds.append( + ImageRegistryCredential(server=login_server, identity=uami) + ) + + container = Container( + name="challenge", + image=image, + command=command_list, + resources=ResourceRequirements( + requests=ResourceRequests(memory_in_gb=memory_gb, cpu=cpu) + ), + ports=[ContainerPort(port=port)], + ) + + ip_address = IpAddress( + type="Public", + ports=[Port(port=port, protocol="TCP")], + dns_name_label=dns_label, + ) + + group = ContainerGroup( + location=region, + containers=[container], + os_type=OperatingSystemTypes.LINUX, + restart_policy=ContainerGroupRestartPolicy.NEVER, + ip_address=ip_address, + image_registry_credentials=image_registry_creds or None, + identity=identity, + ) + + try: + poller = self.client.container_groups.begin_create_or_update( + rg, group_name, group, polling_interval=5 + ) + created = poller.result(timeout=300) + except HttpResponseError as e: + raise ContainerException(f"ACI create failed: {e.message}") + except Exception as e: + raise ContainerException(f"ACI create failed: {e}") + + if created.ip_address is None or created.ip_address.fqdn is None: + raise ContainerException("ACI container group has no public FQDN") + + return _CreatedContainer( + id=group_name, hostname=created.ip_address.fqdn, port=port + ) + + def get_container_port(self, container_id: str): + if self.client is None: + return None + try: + cg = self.client.container_groups.get( + self.settings["azure_resource_group"], container_id + ) + if cg.containers and cg.containers[0].ports: + return str(cg.containers[0].ports[0].port) + except ResourceNotFoundError: + return None + except Exception as e: + print(f"[CTFd-ACI] get_container_port({container_id}) failed: {e}") + return None + return None + + def get_container_hostname(self, container_id: str): + if self.client is None: + return None + try: + cg = self.client.container_groups.get( + self.settings["azure_resource_group"], container_id + ) + if cg.ip_address and cg.ip_address.fqdn: + return cg.ip_address.fqdn + except ResourceNotFoundError: + return None + except Exception as e: + print(f"[CTFd-ACI] get_container_hostname({container_id}) failed: {e}") + return None + return None + + def get_images(self): + login_server = self.settings.get("acr_login_server", "") + if not login_server or self.credential is None: + return [] + try: + registry = ContainerRegistryClient( + f"https://{login_server}", self.credential + ) + images = [] + for repo in registry.list_repository_names(): + try: + for tag in registry.list_tag_properties(repo): + images.append(f"{login_server}/{repo}:{tag.name}") + except Exception as e: + print(f"[CTFd-ACI] list_tag_properties({repo}) failed: {e}") + continue + images.sort() + return images + except Exception as e: + print(f"[CTFd-ACI] get_images failed: {e}") + return [] + + def kill_container(self, container_id: str): + if self.client is None: + raise ContainerException("ACI client is not initialized") + try: + self.client.container_groups.begin_delete( + self.settings["azure_resource_group"], container_id + ) + except ResourceNotFoundError: + pass + except Exception as e: + raise ContainerException(f"ACI delete failed: {e}") diff --git a/models.py b/models.py index 410e669..cbefd0c 100644 --- a/models.py +++ b/models.py @@ -35,6 +35,7 @@ class ContainerInfoModel(db.Model): db.Integer, db.ForeignKey("users.id", ondelete="CASCADE") ) port = db.Column(db.Integer) + hostname = db.Column(db.String(512), nullable=True) timestamp = db.Column(db.Integer) expires = db.Column(db.Integer) user = relationship("Users", foreign_keys=[user_id]) diff --git a/requirements.txt b/requirements.txt index 520f9c8..cdde8af 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,6 @@ docker paramiko -apscheduler \ No newline at end of file +apscheduler +azure-identity +azure-mgmt-containerinstance +azure-containerregistry diff --git a/templates/container_settings.html b/templates/container_settings.html index 704a784..4773e05 100644 --- a/templates/container_settings.html +++ b/templates/container_settings.html @@ -2,7 +2,7 @@ {% block content %}
-

Docker Config

+

Container Backend Config

@@ -16,69 +16,130 @@

Docker Config

{% endif %} {% endwith %}
-
+
+
- - -
-
- - -
-
- - -
-
- - -
-
- - + +
+ +
+ Docker Settings +
+ + +
+
+ + +
+
+ +
+ Azure Container Instances +

+ CTFd authenticates to Azure via DefaultAzureCredential + (Managed Identity when running on Azure, or + AZURE_TENANT_ID/AZURE_CLIENT_ID/AZURE_CLIENT_SECRET + env vars for a service principal). The CTFd identity needs + Contributor on the target resource group and + AcrPull on the ACR. The UAMI below is attached + to each spawned container group and pulls private images on its + behalf — it needs AcrPull on the ACR. +

+
+ + +
+
+ + +
+
+ + +
+
+ + +
+
+ + + Containers get DNS like + <prefix>-<random>.<region>.azurecontainer.io +
+
+ + +
+
+ +
+ Resource Limits +
+ + +
+
+ + +
+
+ + +
+
+
Cancel
+ +
- -
-

Instructions

-

- The Base URL should be the local socket address of the Docker daemon, i.e. - unix://var/run/docker.sock, or it can be a remote SSH address, e.g. - ssh://root@example.com. In either case, sudo will not be executed. For a local socket, the user - CTFd is running as should have permissions for Docker; for SSH connections, the SSH user in the Base URL should - be root or have Docker permissions. -

{% endblock content %} {% block scripts %} -{% endblock scripts %} \ No newline at end of file +{% endblock scripts %} From e7bbdf93ba2df00057821e1dddd2219490b4e6a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMatt?= Date: Fri, 15 May 2026 07:59:56 -0400 Subject: [PATCH 03/24] Provision containers asynchronously with a polling status endpoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The synchronous request flow couldn't survive a 30-60 second ACI provision through CTFd's stock Gunicorn (30s default timeout) and nginx (60s default proxy_read_timeout), let alone Cloudflare's 100s edge limit. POST /containers/api/request now writes a placeholder row with status="provisioning", spawns a daemon thread to call the backend, and returns HTTP 202 with the row id. A new GET /api/status/ lets the frontend poll every 3 seconds (up to 3 minutes) until the row flips to running or failed. ContainerInfoModel switches to an auto-increment id primary key so the row exists before the backend has assigned a container_id; container_id, port, and hostname are nullable until provisioning completes; status and error_message are new columns. A unique constraint on (challenge_id, user_id) closes the TOCTOU window — concurrent requests catch IntegrityError and return the winning row. If a user stops or resets while provisioning is still in flight, the worker re-fetches the row after create succeeds; finding it gone, it kills the just-created container so we don't leak (and pay for) an orphan ACI group. Admin /api/kill, /api/purge, /api/stop, and the admin dashboard template now handle rows where container_id is still None (because the backend hasn't been called yet), and the dashboard adds a Status column showing provisioning / running / failed. The view.js request and reset handlers are rewritten in fetch with a polling loop; renew and stop stay XHR since they're fast operations. Co-Authored-By: Claude Opus 4.7 (1M context) --- README.md | 2 +- __init__.py | 290 +++++++++++++++++++---------- assets/view.js | 227 +++++++++++++--------- container_manager.py | 21 ++- container_manager_aci.py | 9 +- models.py | 11 +- templates/container_dashboard.html | 25 ++- 7 files changed, 373 insertions(+), 212 deletions(-) diff --git a/README.md b/README.md index b113977..b00266f 100644 --- a/README.md +++ b/README.md @@ -56,7 +56,7 @@ Connect to your CTFd database (MariaDB/MySQL or SQLite depending on your setup) ### Notes & caveats -- Provisioning a container group takes ~30-60 seconds; the "Get Connection Info" button shows a `Provisioning…` state while it waits. +- Provisioning a container group takes ~30-60 seconds. The "Get Connection Info" button shows a `Provisioning…` state while the frontend polls `/containers/api/status/` every 3 seconds. The original POST returns immediately with HTTP 202, so this works behind reverse proxies that have short response timeouts (default Gunicorn 30s, default nginx 60s, Cloudflare free tier 100s). - The volumes field on challenges is **ignored** in ACI mode — host-path mounts don't translate to ACI. - Commands are parsed via `shlex.split`. For complex commands, use `sh -c "your full command line"`. - The image dropdown is populated by listing repos/tags from the ACR. The CTFd identity needs **AcrPull** on the ACR for this to work. diff --git a/__init__.py b/__init__.py index fd91b4b..183835c 100644 --- a/__init__.py +++ b/__init__.py @@ -4,8 +4,10 @@ import json import datetime import math +import threading from flask import Blueprint, request, Flask, render_template, url_for, redirect, flash +from sqlalchemy.exc import IntegrityError from CTFd.models import db, Solves from CTFd.plugins import register_plugin_assets_directory @@ -202,80 +204,101 @@ def renew_container(chal_id, user_id): return {"success": "Container renewed", "expires": running_container.expires} - def create_container(chal_id, user_id): - # Get the requested challenge - challenge = ContainerChallenge.challenge_model.query.filter_by( - id=chal_id).first() - - # Make sure the challenge exists and is a container challenge - if challenge is None: - return {"error": "Challenge not found"}, 400 - - # Check for any existing containers for the user - running_containers = ContainerInfoModel.query.filter_by( - challenge_id=challenge.id, user_id=user_id) - running_container = running_containers.first() + def _running_response(row: ContainerInfoModel): + return { + "status": "running", + "id": row.id, + "hostname": row.hostname or container_manager.settings.get("docker_hostname", ""), + "port": row.port, + "expires": row.expires, + } - # If a container is already running for the user, return it - if running_container: - # Check if Docker says the container is still running before returning it + def _provision_async(manager, row_id, image, internal_port, command, volumes, expiration_seconds): + with app.app_context(): + if ContainerInfoModel.query.get(row_id) is None: + return try: - if container_manager.is_container_running( - running_container.container_id): - return json.dumps({ - "status": "already_running", - "hostname": running_container.hostname or container_manager.settings.get("docker_hostname", ""), - "port": running_container.port, - "expires": running_container.expires - }) - else: - # Container is not running, it must have died or been killed, - # remove it from the database and create a new one - running_containers.delete() + created = manager.create_container(image, internal_port, command, volumes) + except ContainerException as e: + row = ContainerInfoModel.query.get(row_id) + if row is not None: + row.status = "failed" + row.error_message = str(e)[:1000] db.session.commit() - except ContainerException as err: - return {"error": str(err)}, 500 - - # TODO: Should insert before creating container, then update. That would avoid a TOCTOU issue - - # Run a new Docker container - try: - created_container = container_manager.create_container( - challenge.image, challenge.port, challenge.command, challenge.volumes) - except ContainerException as err: - return {"error": str(err)} - - # Fetch the random port Docker assigned - port = container_manager.get_container_port(created_container.id) - - # Port may be blank if the container failed to start - if port is None: - return json.dumps({ - "status": "error", - "error": "Could not get port" - }) + print(f"[CTFd] provision failed for row {row_id}: {e}") + return + except Exception as e: + row = ContainerInfoModel.query.get(row_id) + if row is not None: + row.status = "failed" + row.error_message = str(e)[:1000] + db.session.commit() + print(f"[CTFd] provision exception for row {row_id}: {e}") + return - expires = int(time.time() + container_manager.expiration_seconds) + # Re-fetch in case the user stopped/reset the request while we were + # blocked on the backend. If the row is gone, the user no longer + # wants this container — kill it so we don't leak (and pay for) it. + row = ContainerInfoModel.query.get(row_id) + if row is None: + try: + manager.kill_container(created.id) + except Exception as e: + print(f"[CTFd] orphan cleanup failed for {created.id}: {e}") + return + + row.container_id = created.id + row.hostname = getattr(created, "hostname", None) + host_port = None + try: + host_port = manager.get_container_port(created.id) + except Exception as e: + print(f"[CTFd] get_container_port failed for {created.id}: {e}") + if host_port is None: + row.port = internal_port + else: + try: + row.port = int(host_port) + except (TypeError, ValueError): + row.port = internal_port + if expiration_seconds > 0: + row.expires = int(time.time() + expiration_seconds) + row.status = "running" + row.error_message = None + db.session.commit() - # Insert the new container into the database - new_container = ContainerInfoModel( - container_id=created_container.id, + def _spawn_new(challenge, user_id): + now = int(time.time()) + initial_expires = now + (container_manager.expiration_seconds or 3600) + row = ContainerInfoModel( challenge_id=challenge.id, user_id=user_id, - port=port, - hostname=getattr(created_container, "hostname", None), - timestamp=int(time.time()), - expires=expires + status="provisioning", + timestamp=now, + expires=initial_expires, ) - db.session.add(new_container) - db.session.commit() - - return json.dumps({ - "status": "created", - "hostname": new_container.hostname or container_manager.settings.get("docker_hostname", ""), - "port": port, - "expires": expires - }) + db.session.add(row) + try: + db.session.commit() + except IntegrityError: + db.session.rollback() + existing = ContainerInfoModel.query.filter_by( + challenge_id=challenge.id, user_id=user_id).first() + if existing: + if existing.status == "running": + return _running_response(existing), 200 + return {"status": existing.status, "id": existing.id}, 202 + return {"error": "Concurrent request collision"}, 500 + + threading.Thread( + target=_provision_async, + args=(container_manager, row.id, challenge.image, challenge.port, + challenge.command, challenge.volumes, + container_manager.expiration_seconds), + daemon=True, + ).start() + + return {"status": "provisioning", "id": row.id}, 202 @containers_bp.route('/api/request', methods=['POST']) @authed_only @@ -285,20 +308,58 @@ def create_container(chal_id, user_id): def route_request_container(): user = get_current_user() - # Validate the request if request.json is None: return {"error": "Invalid request"}, 400 - - if request.json.get("chal_id", None) is None: + chal_id = request.json.get("chal_id") + if chal_id is None: return {"error": "No chal_id specified"}, 400 - if user is None: return {"error": "User not found"}, 400 - try: - return create_container(request.json.get("chal_id"), user.id) - except ContainerException as err: - return {"error": str(err)}, 500 + challenge = ContainerChallenge.challenge_model.query.filter_by(id=chal_id).first() + if challenge is None: + return {"error": "Challenge not found"}, 400 + + existing = ContainerInfoModel.query.filter_by( + challenge_id=chal_id, user_id=user.id).first() + + if existing: + if existing.status == "running": + try: + if container_manager.is_container_running(existing.container_id): + return _running_response(existing), 200 + db.session.delete(existing) + db.session.commit() + except ContainerException as err: + return {"error": str(err)}, 500 + elif existing.status == "provisioning": + return {"status": "provisioning", "id": existing.id}, 202 + elif existing.status == "failed": + # Allow retry by clearing the failed row. + db.session.delete(existing) + db.session.commit() + + return _spawn_new(challenge, user.id) + + @containers_bp.route('/api/status/', methods=['GET']) + @authed_only + @during_ctf_time_only + @require_verified_emails + @ratelimit(method="GET", limit=120, interval=60) + def route_container_status(row_id): + user = get_current_user() + if user is None: + return {"error": "User not found"}, 400 + row = ContainerInfoModel.query.get(row_id) + if row is None: + return {"error": "Not found"}, 404 + if row.user_id != user.id: + return {"error": "Forbidden"}, 403 + if row.status == "running": + return _running_response(row), 200 + if row.status == "failed": + return {"status": "failed", "error": row.error_message or "Provisioning failed"}, 200 + return {"status": "provisioning", "id": row.id}, 202 @containers_bp.route('/api/renew', methods=['POST']) @authed_only @@ -331,23 +392,31 @@ def route_renew_container(): def route_restart_container(): user = get_current_user() - # Validate the request if request.json is None: return {"error": "Invalid request"}, 400 - - if request.json.get("chal_id", None) is None: + chal_id = request.json.get("chal_id") + if chal_id is None: return {"error": "No chal_id specified"}, 400 - if user is None: return {"error": "User not found"}, 400 - running_container: ContainerInfoModel = ContainerInfoModel.query.filter_by( - challenge_id=request.json.get("chal_id"), user_id=user.id).first() + challenge = ContainerChallenge.challenge_model.query.filter_by(id=chal_id).first() + if challenge is None: + return {"error": "Challenge not found"}, 400 - if running_container: - kill_container(running_container.container_id) + existing = ContainerInfoModel.query.filter_by( + challenge_id=chal_id, user_id=user.id).first() - return create_container(request.json.get("chal_id"), user.id) + if existing: + if existing.container_id: + try: + container_manager.kill_container(existing.container_id) + except ContainerException as err: + print(f"[CTFd] reset: kill_container({existing.container_id}) failed: {err}") + db.session.delete(existing) + db.session.commit() + + return _spawn_new(challenge, user.id) @containers_bp.route('/api/stop', methods=['POST']) @authed_only @@ -367,13 +436,19 @@ def route_stop_container(): if user is None: return {"error": "User not found"}, 400 - running_container: ContainerInfoModel = ContainerInfoModel.query.filter_by( + row: ContainerInfoModel = ContainerInfoModel.query.filter_by( challenge_id=request.json.get("chal_id"), user_id=user.id).first() - if running_container: - return kill_container(running_container.container_id) - - return {"error": "No container found"}, 400 + if row is None: + return {"error": "No container found"}, 400 + if row.container_id: + try: + container_manager.kill_container(row.container_id) + except ContainerException as err: + return {"error": str(err)}, 500 + db.session.delete(row) + db.session.commit() + return {"success": "Container killed"} @containers_bp.route('/api/kill', methods=['POST']) @admins_only @@ -381,20 +456,43 @@ def route_kill_container(): if request.json is None: return {"error": "Invalid request"}, 400 - if request.json.get("container_id", None) is None: - return {"error": "No container_id specified"}, 400 - - return kill_container(request.json.get("container_id")) + # Accept either the new `id` (row id) or legacy `container_id` (backend name). + row_id = request.json.get("id") + container_id = request.json.get("container_id") + row = None + if row_id is not None: + try: + row = ContainerInfoModel.query.get(int(row_id)) + except (ValueError, TypeError): + row = None + elif container_id is not None: + row = ContainerInfoModel.query.filter_by(container_id=container_id).first() + else: + return {"error": "No id or container_id specified"}, 400 + + if row is None: + return {"error": "Not found"}, 404 + if row.container_id: + try: + container_manager.kill_container(row.container_id) + except ContainerException as err: + print(f"[CTFd] admin kill failed: {err}") + db.session.delete(row) + db.session.commit() + return {"success": "Container killed"} @containers_bp.route('/api/purge', methods=['POST']) @admins_only def route_purge_containers(): containers: "list[ContainerInfoModel]" = ContainerInfoModel.query.all() for container in containers: - try: - kill_container(container.container_id) - except ContainerException: - pass + if container.container_id: + try: + container_manager.kill_container(container.container_id) + except ContainerException as err: + print(f"[CTFd] purge: kill_container({container.container_id}) failed: {err}") + db.session.delete(container) + db.session.commit() return {"success": "Purged all containers"}, 200 @containers_bp.route('/api/images', methods=['GET']) diff --git a/assets/view.js b/assets/view.js index babccfa..090874e 100644 --- a/assets/view.js +++ b/assets/view.js @@ -51,108 +51,153 @@ function mergeQueryParams(parameters, queryParameters) { return queryParameters; } -function container_request(challenge_id) { - var path = "/containers/api/request"; - var requestButton = document.getElementById("container-request-btn"); - var requestResult = document.getElementById("container-request-result"); +function _container_show_error(msg) { + var requestError = document.getElementById("container-request-error"); + requestError.style.display = ""; + requestError.firstElementChild.innerHTML = msg; +} + +function _container_restore_button(button, originalLabel) { + if (!button) return; + button.innerHTML = originalLabel; + button.removeAttribute("disabled"); +} + +function _container_show_running(data, opts) { + opts = opts || {}; var connectionInfo = document.getElementById("container-connection-info"); + var requestResult = document.getElementById("container-request-result"); var containerExpires = document.getElementById("container-expires"); - var containerExpiresTime = document.getElementById( - "container-expires-time" - ); + var containerExpiresTime = document.getElementById("container-expires-time"); + var requestButton = document.getElementById("container-request-btn"); var requestError = document.getElementById("container-request-error"); - var originalLabel = requestButton.innerHTML; - requestButton.setAttribute("disabled", "disabled"); - requestButton.innerHTML = "Provisioning… (may take up to a minute)"; + requestError.style.display = "none"; + requestError.firstElementChild.innerHTML = ""; + if (opts.removeRequestButton && requestButton && requestButton.parentNode) { + requestButton.parentNode.removeChild(requestButton); + } + connectionInfo.innerHTML = data.hostname + ":" + data.port; + containerExpires.innerHTML = Math.ceil( + (new Date(data.expires * 1000) - new Date()) / 1000 / 60 + ); + containerExpiresTime.innerHTML = new Date( + data.expires * 1000 + ).toLocaleTimeString(); + requestResult.style.display = ""; +} - var xhr = new XMLHttpRequest(); - xhr.open("POST", path, true); - xhr.setRequestHeader("Content-Type", "application/json"); - xhr.setRequestHeader("Accept", "application/json"); - xhr.setRequestHeader("CSRF-Token", init.csrfNonce); - xhr.send(JSON.stringify({ chal_id: challenge_id })); - xhr.onload = function () { - var data = JSON.parse(this.responseText); - if (data.error !== undefined) { - // Container error - requestError.style.display = ""; - requestError.firstElementChild.innerHTML = data.error; - requestButton.innerHTML = originalLabel; - requestButton.removeAttribute("disabled"); - } else if (data.message !== undefined) { - // CTFd error - requestError.style.display = ""; - requestError.firstElementChild.innerHTML = data.message; - requestButton.innerHTML = originalLabel; - requestButton.removeAttribute("disabled"); - } else { - // Success - requestError.style.display = "none"; - requestError.firstElementChild.innerHTML = ""; - requestButton.parentNode.removeChild(requestButton); - connectionInfo.innerHTML = data.hostname + ":" + data.port; - containerExpires.innerHTML = Math.ceil( - (new Date(data.expires * 1000) - new Date()) / 1000 / 60 - ); - containerExpiresTime.innerHTML = new Date( - data.expires * 1000 - ).toLocaleTimeString(); - requestResult.style.display = ""; +function _container_poll_status(rowId, button, originalLabel, opts) { + var attempts = 0; + var maxAttempts = 60; // 60 * 3s = 3 minutes + var timer = setInterval(function () { + attempts++; + if (attempts > maxAttempts) { + clearInterval(timer); + _container_show_error("Provisioning timed out after 3 minutes"); + _container_restore_button(button, originalLabel); + return; } - console.log(data); - }; + fetch("/containers/api/status/" + rowId, { + method: "GET", + headers: { + "Accept": "application/json", + "CSRF-Token": init.csrfNonce, + }, + credentials: "same-origin", + }) + .then(function (r) { + return r.json(); + }) + .then(function (data) { + if (data.status === "running") { + clearInterval(timer); + _container_show_running(data, opts); + if (opts && opts.restoreButtonOnSuccess) { + _container_restore_button(button, originalLabel); + } + } else if (data.status === "failed") { + clearInterval(timer); + _container_show_error(data.error || "Provisioning failed"); + _container_restore_button(button, originalLabel); + } + // else: still provisioning, keep polling + }) + .catch(function (err) { + clearInterval(timer); + _container_show_error("Status check failed: " + err); + _container_restore_button(button, originalLabel); + }); + }, 3000); +} + +function _container_async_action(path, challenge_id, button, opts) { + var originalLabel = button.innerHTML; + button.setAttribute("disabled", "disabled"); + button.innerHTML = "Provisioning…"; + + fetch(path, { + method: "POST", + headers: { + "Content-Type": "application/json", + "Accept": "application/json", + "CSRF-Token": init.csrfNonce, + }, + credentials: "same-origin", + body: JSON.stringify({ chal_id: challenge_id }), + }) + .then(function (r) { + return r.json(); + }) + .then(function (data) { + if (data.error !== undefined) { + _container_show_error(data.error); + _container_restore_button(button, originalLabel); + return; + } + if (data.message !== undefined) { + _container_show_error(data.message); + _container_restore_button(button, originalLabel); + return; + } + if (data.status === "running") { + _container_show_running(data, opts); + if (opts && opts.restoreButtonOnSuccess) { + _container_restore_button(button, originalLabel); + } + return; + } + if (data.status === "provisioning") { + _container_poll_status(data.id, button, originalLabel, opts); + return; + } + _container_show_error("Unexpected response: " + JSON.stringify(data)); + _container_restore_button(button, originalLabel); + }) + .catch(function (err) { + _container_show_error("Request failed: " + err); + _container_restore_button(button, originalLabel); + }); +} + +function container_request(challenge_id) { + var requestButton = document.getElementById("container-request-btn"); + _container_async_action( + "/containers/api/request", + challenge_id, + requestButton, + { removeRequestButton: true } + ); } function container_reset(challenge_id) { - var path = "/containers/api/reset"; var resetButton = document.getElementById("container-reset-btn"); - var requestResult = document.getElementById("container-request-result"); - var containerExpires = document.getElementById("container-expires"); - var containerExpiresTime = document.getElementById( - "container-expires-time" + _container_async_action( + "/containers/api/reset", + challenge_id, + resetButton, + { restoreButtonOnSuccess: true } ); - var connectionInfo = document.getElementById("container-connection-info"); - var requestError = document.getElementById("container-request-error"); - - var originalLabel = resetButton.innerHTML; - resetButton.setAttribute("disabled", "disabled"); - resetButton.innerHTML = "Provisioning…"; - - var xhr = new XMLHttpRequest(); - xhr.open("POST", path, true); - xhr.setRequestHeader("Content-Type", "application/json"); - xhr.setRequestHeader("Accept", "application/json"); - xhr.setRequestHeader("CSRF-Token", init.csrfNonce); - xhr.send(JSON.stringify({ chal_id: challenge_id })); - xhr.onload = function () { - var data = JSON.parse(this.responseText); - resetButton.innerHTML = originalLabel; - if (data.error !== undefined) { - // Container rrror - requestError.style.display = ""; - requestError.firstElementChild.innerHTML = data.error; - resetButton.removeAttribute("disabled"); - } else if (data.message !== undefined) { - // CTFd error - requestError.style.display = ""; - requestError.firstElementChild.innerHTML = data.message; - resetButton.removeAttribute("disabled"); - } else { - // Success - requestError.style.display = "none"; - connectionInfo.innerHTML = data.hostname + ":" + data.port; - containerExpires.innerHTML = Math.ceil( - (new Date(data.expires * 1000) - new Date()) / 1000 / 60 - ); - containerExpiresTime.innerHTML = new Date( - data.expires * 1000 - ).toLocaleTimeString(); - requestResult.style.display = ""; - resetButton.removeAttribute("disabled"); - } - console.log(data); - }; } function container_renew(challenge_id) { diff --git a/container_manager.py b/container_manager.py index cc0c623..d4a8214 100644 --- a/container_manager.py +++ b/container_manager.py @@ -121,19 +121,20 @@ def wrapper_run_command(self, *args, **kwargs): @run_command def kill_expired_containers(self, app: Flask): with app.app_context(): + now = int(time.time()) containers: "list[ContainerInfoModel]" = ContainerInfoModel.query.all() - + deleted = False for container in containers: - delta_seconds = container.expires - int(time.time()) - if delta_seconds < 0: - try: - self.kill_container(container.container_id) - except ContainerException: - print( - "[Container Expiry Job] Docker is not initialized. Please check your settings.") - + if container.expires - now < 0: + if container.container_id: + try: + self.kill_container(container.container_id) + except ContainerException as e: + print(f"[Container Expiry Job] kill failed: {e}") db.session.delete(container) - db.session.commit() + deleted = True + if deleted: + db.session.commit() @run_command def is_container_running(self, container_id: str) -> bool: diff --git a/container_manager_aci.py b/container_manager_aci.py index 629d147..0c65ae9 100644 --- a/container_manager_aci.py +++ b/container_manager_aci.py @@ -161,10 +161,11 @@ def kill_expired_containers(self, app: Flask): deleted = False for container in containers: if container.expires - now < 0: - try: - self.kill_container(container.container_id) - except ContainerException as e: - print(f"[CTFd-ACI] kill_expired_containers: {e}") + if container.container_id: + try: + self.kill_container(container.container_id) + except ContainerException as e: + print(f"[CTFd-ACI] kill_expired_containers: {e}") db.session.delete(container) deleted = True if deleted: diff --git a/models.py b/models.py index cbefd0c..5949b13 100644 --- a/models.py +++ b/models.py @@ -27,20 +27,27 @@ def __init__(self, *args, **kwargs): class ContainerInfoModel(db.Model): __mapper_args__ = {"polymorphic_identity": "container_info"} - container_id = db.Column(db.String(512), primary_key=True) + id = db.Column(db.Integer, primary_key=True, autoincrement=True) + container_id = db.Column(db.String(512), nullable=True, index=True) challenge_id = db.Column( db.Integer, db.ForeignKey("challenges.id", ondelete="CASCADE") ) user_id = db.Column( db.Integer, db.ForeignKey("users.id", ondelete="CASCADE") ) - port = db.Column(db.Integer) + port = db.Column(db.Integer, nullable=True) hostname = db.Column(db.String(512), nullable=True) + status = db.Column(db.String(32), default="provisioning", nullable=False) + error_message = db.Column(db.Text, nullable=True) timestamp = db.Column(db.Integer) expires = db.Column(db.Integer) user = relationship("Users", foreign_keys=[user_id]) challenge = relationship(ContainerChallengeModel, foreign_keys=[challenge_id]) + __table_args__ = ( + db.UniqueConstraint("challenge_id", "user_id", + name="uq_container_chal_user"), + ) class ContainerSettingsModel(db.Model): diff --git a/templates/container_dashboard.html b/templates/container_dashboard.html index fb670da..ab6421b 100644 --- a/templates/container_dashboard.html +++ b/templates/container_dashboard.html @@ -58,6 +58,8 @@

Containers

Expires + Status + Running Kill @@ -67,20 +69,29 @@

Containers

{% if containers %} {% for c in containers %} - {{ c.container_id[:12] }} + {{ c.container_id[:12] if c.container_id else "—" }} {{ c.challenge.image }} {{ c.challenge.name }} [{{ c.challenge_id }}] {{ c.user.name }} [{{ c.user_id }}] - {{ c.port }} + {{ c.port if c.port is not none else "—" }} {{ c.timestamp|format_time }} {{ c.expires|format_time }} + {% if c.status == "running" %} + running + {% elif c.status == "provisioning" %} + provisioning + {% elif c.status == "failed" %} + failed + {% else %} + {{ c.status }} + {% endif %} {% if c.is_running %} Yes {% else %} No {% endif %} + onclick="killContainer({{ c.id }})"> {% endfor %} {% endif %} @@ -116,7 +127,7 @@

Containers

}; } - function killContainer(container_id) { + function killContainer(row_id) { var path = "/containers/api/kill"; var xhr = new XMLHttpRequest(); @@ -124,12 +135,10 @@

Containers

xhr.setRequestHeader("Content-Type", "application/json"); xhr.setRequestHeader("Accept", "application/json"); xhr.setRequestHeader("CSRF-Token", init.csrfNonce); - xhr.send(JSON.stringify({ container_id: container_id })); + xhr.send(JSON.stringify({ id: row_id })); xhr.onload = function () { var data = JSON.parse(this.responseText); - if (data.success == undefined) { - purgeButton.removeAttribute("disabled"); - } else { + if (data.success !== undefined) { window.location.reload(); } console.log(data); From 15068e142570afb53a6c11f3c8454b4484609f6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMatt?= Date: Fri, 15 May 2026 08:06:12 -0400 Subject: [PATCH 04/24] Tighten ACI resource clamping, image listing, and identity shape Clamp container memory to [0.1, 16] GB and CPU to [0.1, 4.0] cores so an admin can't submit a spec ACI will reject at provision time. Cap get_images at 20 tags per repo and order by last-updated descending, so a large ACR doesn't hang the create-challenge dropdown while it enumerates every tag. Pass UserAssignedIdentities() rather than {} to ContainerGroupIdentity for SDK type-correctness; the wire payload is identical. Co-Authored-By: Claude Opus 4.7 (1M context) --- container_manager_aci.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/container_manager_aci.py b/container_manager_aci.py index 0c65ae9..701fcb4 100644 --- a/container_manager_aci.py +++ b/container_manager_aci.py @@ -22,8 +22,15 @@ ImageRegistryCredential, ContainerGroupIdentity, ResourceIdentityType, + UserAssignedIdentities, ) -from azure.containerregistry import ContainerRegistryClient +from azure.containerregistry import ContainerRegistryClient, ArtifactTagOrder + +ACI_MIN_CPU = 0.1 +ACI_MAX_CPU = 4.0 +ACI_MIN_MEM_GB = 0.1 +ACI_MAX_MEM_GB = 16.0 +MAX_TAGS_PER_REPO = 20 from CTFd.models import db from .models import ContainerInfoModel @@ -186,13 +193,13 @@ def create_container(self, image: str, port: int, command: str, volumes: str): try: mem_mb = int(self.settings.get("container_maxmemory") or 0) if mem_mb > 0: - memory_gb = max(0.1, mem_mb / 1024) + memory_gb = max(ACI_MIN_MEM_GB, min(ACI_MAX_MEM_GB, mem_mb / 1024)) except ValueError: pass try: cpu_setting = float(self.settings.get("container_maxcpu") or 0) if cpu_setting > 0: - cpu = cpu_setting + cpu = max(ACI_MIN_CPU, min(ACI_MAX_CPU, cpu_setting)) except ValueError: pass @@ -209,7 +216,7 @@ def create_container(self, image: str, port: int, command: str, volumes: str): identity = ContainerGroupIdentity( type=ResourceIdentityType.USER_ASSIGNED, - user_assigned_identities={uami: {}}, + user_assigned_identities={uami: UserAssignedIdentities()}, ) image_registry_creds = [] if login_server: @@ -303,8 +310,15 @@ def get_images(self): images = [] for repo in registry.list_repository_names(): try: - for tag in registry.list_tag_properties(repo): + count = 0 + for tag in registry.list_tag_properties( + repo, + order_by=ArtifactTagOrder.LAST_UPDATED_ON_DESCENDING, + ): images.append(f"{login_server}/{repo}:{tag.name}") + count += 1 + if count >= MAX_TAGS_PER_REPO: + break except Exception as e: print(f"[CTFd-ACI] list_tag_properties({repo}) failed: {e}") continue From a832ce08323dc1a4ca30397c5199b88ba5b7822a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMatt?= Date: Fri, 15 May 2026 09:32:08 -0400 Subject: [PATCH 05/24] Document Docker Compose deployment for the ~/ctfd-stack layout Walks through clone, custom Dockerfile that pip-installs the Azure SDK, compose.yaml wiring with service-principal env vars, Caddy as the TLS front-end, schema-drop instructions for upgraders, and an end-to-end verification step. Matches the common self-hosted layout where compose.yaml, CTFd/, and caddy/ sit side-by-side under a single parent directory. Co-Authored-By: Claude Opus 4.7 (1M context) --- README.md | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) diff --git a/README.md b/README.md index b00266f..02d2a76 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,157 @@ When a user clicks on a container challenge, a button labeled "Get Connection In A note, we used hidden teams as non-school teams in PCTF 2022 so if you want them to count for decreasing the dynamic challenge points, you need to remove the `Model.hidden == False,` line from the `calculate_value` function in `__init__.py`. +## Deployment on Linux with Docker Compose + +This recipe matches a common self-hosted layout: a top-level directory holding `compose.yaml`, the CTFd source tree, and a Caddy reverse-proxy directory side-by-side: + +``` +~/ctfd-stack/ +├── compose.yaml +├── CTFd/ ← cloned from github.com/CTFd/CTFd +│ ├── Dockerfile ← upstream +│ ├── Dockerfile.ctfd ← you'll add this +│ └── CTFd/ +│ └── plugins/ +│ └── containers/ ← this plugin lives here +└── caddy/ + ├── Caddyfile + └── data/, config/ +``` + +Adapt to your own paths. Docker Engine 24+ with the `docker compose` v2 plugin is assumed. + +### 1. Clone CTFd and the plugin + +The plugin directory **must** be named exactly `containers` — that's where the URL prefix `/containers` is registered. + +```bash +cd ~/ctfd-stack + +git clone https://github.com/CTFd/CTFd.git +git clone https://github.com/therealcybermattlee/CTFd-Docker-Plugin.git \ + CTFd/CTFd/plugins/containers +``` + +After this you should have `~/ctfd-stack/CTFd/CTFd/plugins/containers/__init__.py`. + +### 2. Custom Dockerfile that installs the plugin's Python deps + +The stock `ctfd/ctfd` image doesn't include `azure-identity`, `azure-mgmt-containerinstance`, or `azure-containerregistry`. Create `~/ctfd-stack/CTFd/Dockerfile.ctfd`: + +```dockerfile +FROM ctfd/ctfd:latest +USER root +COPY CTFd/plugins/containers/requirements.txt /tmp/plugin-requirements.txt +RUN pip install --no-cache-dir -r /tmp/plugin-requirements.txt +USER 1001 +``` + +The build context will be the `CTFd/` directory, so the `COPY` path is relative to that. + +### 3. Wire it into `compose.yaml` + +Add a `build:` block to your existing `ctfd` service so it picks up the custom Dockerfile, and pass the Azure auth env vars through. A minimal stanza: + +```yaml +services: + ctfd: + build: + context: ./CTFd + dockerfile: Dockerfile.ctfd + image: ctfd-with-containers:latest + restart: unless-stopped + environment: + # --- Existing CTFd env vars stay (SECRET_KEY, DATABASE_URL, REDIS_URL, etc.) --- + + # --- Azure SDK auth (skip these if CTFd runs on Azure with a managed identity) --- + AZURE_TENANT_ID: "${AZURE_TENANT_ID}" + AZURE_CLIENT_ID: "${AZURE_CLIENT_ID}" + AZURE_CLIENT_SECRET: "${AZURE_CLIENT_SECRET}" + # --- For the Docker backend, mount the host's Docker socket instead --- + # volumes: + # - /var/run/docker.sock:/var/run/docker.sock +``` + +Then drop the values in `~/ctfd-stack/.env` next to `compose.yaml`: + +``` +AZURE_TENANT_ID=00000000-0000-0000-0000-000000000000 +AZURE_CLIENT_ID=00000000-0000-0000-0000-000000000000 +AZURE_CLIENT_SECRET=... +``` + +Lock it down: `chmod 600 .env` and make sure `.env` is in `.gitignore` if the directory is under version control. + +### 4. Caddy in front of CTFd + +If you're using the layout shown above, Caddy is your TLS terminator. A minimal `caddy/Caddyfile` looks like: + +``` +your-ctfd.example.com { + reverse_proxy ctfd:8000 +} +``` + +The CTFd service listens on port 8000 inside its container; expose only Caddy on `:443`/`:80` to the public. With the async refactor in this plugin, the `POST /containers/api/request` returns in under a second (HTTP 202) and `GET /containers/api/status/` is also fast — so the default Caddy/reverse-proxy timeouts are fine. No timeout bumps required. + +### 5. Build and start + +```bash +cd ~/ctfd-stack +docker compose build ctfd +docker compose up -d +``` + +Confirm the plugin loaded: + +```bash +docker compose logs -f ctfd | head -50 +``` + +You shouldn't see `ImportError` or `ModuleNotFoundError`. Hit `https://your-ctfd.example.com/admin` and look for **Plugins → Containers** in the navbar dropdown. + +### 6. If you previously had the upstream plugin installed, drop its tables + +This fork's schema is incompatible with upstream (user mode instead of team mode, `hostname` column, async status columns, unique constraint on `(challenge_id, user_id)`). On a brand-new database this step is a no-op. On an upgrade, drop the tables once so SQLAlchemy can recreate them on next start: + +```bash +# MariaDB / MySQL — service name and creds must match your compose.yaml +docker compose exec db mariadb -uctfd -pctfd ctfd \ + -e "DROP TABLE IF EXISTS container_info; DROP TABLE IF EXISTS container_settings;" +``` + +Restart CTFd: `docker compose restart ctfd`. + +### 7. Configure via the admin UI + +1. Browse to `https://your-ctfd.example.com/admin` and log in. +2. **Plugins → Containers → Settings**. +3. Set **Backend** to **Azure Container Instances** and fill the Azure fields (Subscription ID, Resource Group, Region, UAMI Resource ID, ACR Login Server, DNS prefix). See [Plugin settings](#plugin-settings) below for what each field means. +4. Save. The dashboard badge flips to **Docker Connected** (green) — the label still says "Docker" but it's the same connection check. +5. **Admin → Challenges → New Challenge → container** to create your first challenge. The Image dropdown is populated from your ACR (CTFd's identity needs `AcrPull`). + +### 8. Verify end-to-end + +As a normal player (not admin), open the challenge and click **Get Connection Info**. You should see `Provisioning…` for ~30-60s, then a `hostname:port` line. From the host shell, confirm the container group exists: + +```bash +az container list -g -o table +``` + +Click **Stop** in the UI and watch the container group disappear within ~30s. + +### Updating the plugin later + +```bash +cd ~/ctfd-stack/CTFd/CTFd/plugins/containers +git pull +cd ~/ctfd-stack +docker compose build ctfd && docker compose up -d ctfd +``` + +If a future update introduces new schema columns, you'll see startup errors mentioning unknown columns — drop the affected tables (step 6) and CTFd recreates them. + ## Azure Container Instances backend This fork supports running challenge containers on **Azure Container Instances (ACI)** instead of (or alongside) a Docker daemon. Pick **Azure Container Instances** under Backend on the settings page to switch. From ba97c4ff3ddcd9fc67b7b3ad304d5c61d84caf4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMatt?= Date: Fri, 15 May 2026 09:42:30 -0400 Subject: [PATCH 06/24] Refine README deployment section to match the reference compose layout Updates the compose.yaml example to mirror the typical ~/ctfd-stack configuration: ctfd/ctfd:3.8.4 image swap to build:, env vars carried through (REVERSE_PROXY, WORKERS, DATABASE_URL pattern), .ctfd_secret_key file mount with a pre-create step so Docker doesn't create it as a directory. Calls out the ctfd_frontend + ctfd_backend dual-attachment required for Azure egress and DB access. Notes Caddy with Cloudflare DNS-01 (caddy-cloudflare image) needs no proxy-timeout changes thanks to the async refactor. Adds a managed-identity vs service-principal split for Azure auth, with shell commands for assigning a system-assigned MI to the CTFd VM and granting Contributor + AcrPull on the appropriate scopes. Co-Authored-By: Claude Opus 4.7 (1M context) --- README.md | 87 ++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 73 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 02d2a76..4b414ae 100644 --- a/README.md +++ b/README.md @@ -68,29 +68,86 @@ The build context will be the `CTFd/` directory, so the `COPY` path is relative ### 3. Wire it into `compose.yaml` -Add a `build:` block to your existing `ctfd` service so it picks up the custom Dockerfile, and pass the Azure auth env vars through. A minimal stanza: +The typical setup has two networks — `ctfd_frontend` (Caddy + CTFd, internet-egress) and `ctfd_backend` (DB + Redis, marked `internal: true` so the DB isn't reachable from the host network). **The CTFd service must attach to both** so it can reach the DB *and* reach `management.azure.com` for the ACI calls. + +**Replace** the pinned `image:` line in your existing `ctfd` service with a `build:` block (keep everything else — env vars, volumes, depends_on, networks). Before: + +```yaml + ctfd: + image: ctfd/ctfd:3.8.4 + # ... your existing env / volumes / depends_on / networks ... +``` + +After: ```yaml -services: ctfd: build: context: ./CTFd dockerfile: Dockerfile.ctfd - image: ctfd-with-containers:latest + image: ctfd-with-containers:latest # tag for the locally-built image restart: unless-stopped + depends_on: + - db + - cache + networks: + - ctfd_frontend + - ctfd_backend environment: - # --- Existing CTFd env vars stay (SECRET_KEY, DATABASE_URL, REDIS_URL, etc.) --- - - # --- Azure SDK auth (skip these if CTFd runs on Azure with a managed identity) --- - AZURE_TENANT_ID: "${AZURE_TENANT_ID}" - AZURE_CLIENT_ID: "${AZURE_CLIENT_ID}" - AZURE_CLIENT_SECRET: "${AZURE_CLIENT_SECRET}" - # --- For the Docker backend, mount the host's Docker socket instead --- + # --- Existing CTFd env vars stay as-is --- + UPLOAD_FOLDER: /var/uploads + LOG_FOLDER: /var/log/CTFd + DATABASE_URL: mysql+pymysql://ctfd:${MARIADB_PASSWORD}@db/ctfd + REDIS_URL: redis://cache:6379 + WORKERS: "4" + ACCESS_LOG: "-" + ERROR_LOG: "-" + REVERSE_PROXY: "true" # trust X-Forwarded-* from Caddy + + # --- Azure SDK auth — pick ONE of the two options below --- + # + # Option A (recommended when CTFd runs on an Azure VM): leave these unset and assign + # the VM a system-assigned managed identity with Contributor on the challenge RG + + # AcrPull on the ACR. DefaultAzureCredential picks up the VM's IMDS endpoint + # (169.254.169.254) automatically — no secrets in env vars. + # + # Option B (CTFd not on Azure, or you prefer an explicit service principal): + # AZURE_TENANT_ID: ${AZURE_TENANT_ID} + # AZURE_CLIENT_ID: ${AZURE_CLIENT_ID} + # AZURE_CLIENT_SECRET: ${AZURE_CLIENT_SECRET} + volumes: + - ctfd_logs:/var/log/CTFd + - ctfd_uploads:/var/uploads + - ./CTFd/.ctfd_secret_key:/opt/CTFd/.ctfd_secret_key:ro + # --- For the Docker backend (challenges run on this host), also mount the host's Docker socket --- # volumes: # - /var/run/docker.sock:/var/run/docker.sock ``` -Then drop the values in `~/ctfd-stack/.env` next to `compose.yaml`: +**Important:** the `.ctfd_secret_key` file mount must exist as a regular file on the host before `docker compose up`. If it doesn't, Docker creates an empty *directory* there and CTFd fails to start. Pre-create it once and let CTFd populate it on first boot: + +```bash +touch ~/ctfd-stack/CTFd/.ctfd_secret_key +chmod 600 ~/ctfd-stack/CTFd/.ctfd_secret_key +``` + +If you go with **Option A (managed identity on the VM)** — which is the natural fit when CTFd runs on an Azure VM: + +```bash +# Enable system-assigned MI on the CTFd host VM (run on your workstation or in Cloud Shell) +az vm identity assign -g -n + +# Grab the principalId it printed and grant the roles it needs +PRINCIPAL_ID=$(az vm show -g -n --query identity.principalId -o tsv) +az role assignment create --assignee "$PRINCIPAL_ID" \ + --role "Contributor" \ + --scope "/subscriptions//resourceGroups/" +az role assignment create --assignee "$PRINCIPAL_ID" \ + --role "AcrPull" \ + --scope "/subscriptions//resourceGroups//providers/Microsoft.ContainerRegistry/registries/" +``` + +If you go with **Option B (service principal env vars)**, drop the values in `~/ctfd-stack/.env` next to `compose.yaml`: ``` AZURE_TENANT_ID=00000000-0000-0000-0000-000000000000 @@ -98,11 +155,11 @@ AZURE_CLIENT_ID=00000000-0000-0000-0000-000000000000 AZURE_CLIENT_SECRET=... ``` -Lock it down: `chmod 600 .env` and make sure `.env` is in `.gitignore` if the directory is under version control. +Lock it down: `chmod 600 .env` and add `.env` to `.gitignore`. ### 4. Caddy in front of CTFd -If you're using the layout shown above, Caddy is your TLS terminator. A minimal `caddy/Caddyfile` looks like: +If you're using `ghcr.io/caddybuilds/caddy-cloudflare:2-alpine` (Caddy with Cloudflare DNS-01 ACME) or any other Caddy build, **no changes are required for this plugin**. A typical `caddy/Caddyfile` is just: ``` your-ctfd.example.com { @@ -110,7 +167,9 @@ your-ctfd.example.com { } ``` -The CTFd service listens on port 8000 inside its container; expose only Caddy on `:443`/`:80` to the public. With the async refactor in this plugin, the `POST /containers/api/request` returns in under a second (HTTP 202) and `GET /containers/api/status/` is also fast — so the default Caddy/reverse-proxy timeouts are fine. No timeout bumps required. +(plus your `tls { dns cloudflare {env.CF_API_TOKEN} }` block if you're using DNS-01.) + +The CTFd service listens on port 8000 inside its container; expose only Caddy on `:443`/`:80` to the public. With the async refactor in this plugin, `POST /containers/api/request` returns in under a second (HTTP 202) and `GET /containers/api/status/` is also fast — so the default Caddy and Cloudflare timeouts are fine. **No timeout bumps required**, including behind Cloudflare's 100s edge limit. ### 5. Build and start From c8e5eae5b698a2191ff3f9c64054f4a5a624c323 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMatt?= Date: Sun, 17 May 2026 11:06:06 -0400 Subject: [PATCH 07/24] Show active backend (Docker or ACI) on containers dashboard badge The badge always read "Docker Connected/Not Connected" regardless of backend. When ACI is the active backend, label it accordingly so admins can tell at a glance which provisioner is wired up. Co-Authored-By: Claude Opus 4.7 --- __init__.py | 11 ++++++++++- templates/container_dashboard.html | 4 ++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/__init__.py b/__init__.py index 183835c..8e38581 100644 --- a/__init__.py +++ b/__init__.py @@ -574,7 +574,16 @@ def route_containers_dashboard(): except ContainerException: running_containers[i].is_running = False - return render_template('container_dashboard.html', containers=running_containers, connected=connected) + backend = settings.get("backend", "docker") + backend_label = "Azure Container Instances" if backend == "aci" else "Docker" + + return render_template( + 'container_dashboard.html', + containers=running_containers, + connected=connected, + backend=backend, + backend_label=backend_label, + ) @containers_bp.route('/settings', methods=['GET']) @admins_only diff --git a/templates/container_dashboard.html b/templates/container_dashboard.html index ab6421b..b33f9e4 100644 --- a/templates/container_dashboard.html +++ b/templates/container_dashboard.html @@ -36,9 +36,9 @@

Containers

style="float:right;margin-right:10px">Settings {% if connected %} - Docker Connected + {{ backend_label }} Connected {% else %} - Docker Not Connected + {{ backend_label }} Not Connected {% endif %} From f11d3741495ad7aba0d84c32db1bf6541900a7b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMatt?= Date: Sun, 17 May 2026 11:17:35 -0400 Subject: [PATCH 08/24] Avoid ZeroDivisionError when challenge.decay is 0 State-only PATCHes (e.g. Finish -> set visible) re-run calculate_value on the existing challenge. If decay is 0, the curve math hits a divide by zero and returns 500, leaving the Options modal stuck open. Treat decay=0 as a static challenge worth `initial` instead of throwing. Co-Authored-By: Claude Opus 4.7 --- __init__.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/__init__.py b/__init__.py index 8e38581..4cb257e 100644 --- a/__init__.py +++ b/__init__.py @@ -80,6 +80,13 @@ def read(cls, challenge): @classmethod def calculate_value(cls, challenge): + # No decay curve configured — treat as a static challenge worth `initial`. + if not challenge.decay: + if challenge.initial is not None: + challenge.value = challenge.initial + db.session.commit() + return challenge + Model = get_model() solve_count = ( From d8b02c51c1b3ae51a814583123802676228fe7f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMatt?= Date: Sun, 17 May 2026 11:26:17 -0400 Subject: [PATCH 09/24] Defer markdown renderer init in view.js to avoid load-time crash `CTFd.lib.markdown()` is undefined at script-parse time on CTFd 3.8.x, so reading it at module top-level threw a TypeError that aborted the rest of view.js. Without preRender/render/postRender registered, the challenge modal failed to open. Initialize the renderer lazily inside preRender (and on first render() as a fallback). Co-Authored-By: Claude Opus 4.7 --- assets/view.js | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/assets/view.js b/assets/view.js index 090874e..b7dc364 100644 --- a/assets/view.js +++ b/assets/view.js @@ -1,10 +1,13 @@ CTFd._internal.challenge.data = undefined; -CTFd._internal.challenge.renderer = CTFd.lib.markdown(); - -CTFd._internal.challenge.preRender = function () {}; +CTFd._internal.challenge.preRender = function () { + CTFd._internal.challenge.renderer = CTFd.lib.markdown(); +}; CTFd._internal.challenge.render = function (markdown) { + if (!CTFd._internal.challenge.renderer) { + CTFd._internal.challenge.renderer = CTFd.lib.markdown(); + } return CTFd._internal.challenge.renderer.render(markdown); }; From f89530fb53064e7bd8d5df0b02dc64b838c7ca86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMatt?= Date: Sun, 17 May 2026 11:38:16 -0400 Subject: [PATCH 10/24] Cache-bust plugin JS via content-hash query string Append ?v= to create.js/update.js/view.js URLs so browsers fetch a fresh copy whenever the file content changes. The hash is computed once at module load, so a container restart picks up new asset content automatically. Co-Authored-By: Claude Opus 4.7 --- __init__.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/__init__.py b/__init__.py index 4cb257e..2a38859 100644 --- a/__init__.py +++ b/__init__.py @@ -4,8 +4,27 @@ import json import datetime import math +import os +import hashlib import threading + +_PLUGIN_DIR = os.path.dirname(os.path.abspath(__file__)) + + +def _asset_version(relpath): + full = os.path.join(_PLUGIN_DIR, relpath) + try: + with open(full, "rb") as f: + return hashlib.sha256(f.read()).hexdigest()[:8] + except OSError: + return str(int(time.time())) + + +def _versioned(path): + rel = path.split("/plugins/containers/assets/", 1)[-1] + return f"{path}?v={_asset_version(os.path.join('assets', rel))}" + from flask import Blueprint, request, Flask, render_template, url_for, redirect, flash from sqlalchemy.exc import IntegrityError @@ -36,9 +55,9 @@ class ContainerChallenge(BaseChallenge): "view": "/plugins/containers/assets/view.html", } scripts = { # Scripts that are loaded when a template is loaded - "create": "/plugins/containers/assets/create.js", - "update": "/plugins/containers/assets/update.js", - "view": "/plugins/containers/assets/view.js", + "create": _versioned("/plugins/containers/assets/create.js"), + "update": _versioned("/plugins/containers/assets/update.js"), + "view": _versioned("/plugins/containers/assets/view.js"), } # Route at which files are accessible. This must be registered using register_plugin_assets_directory() route = "/plugins/containers/assets/" From 7f588593eb8841f2d29e13d5e152f2e9de016f74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMatt?= Date: Sun, 17 May 2026 13:13:14 -0400 Subject: [PATCH 11/24] Make view.js renderer detection resilient to CTFd version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CTFd 3.8.x ships with CTFd.lib only exposing {\$, dayjs} — the markdown helper isn't there. Detect both legacy (CTFd.lib.markdown() returns a renderer) and newer (CTFd.lib.markdown is the renderer) shapes, and fall back to escaped plain text if neither exists so the modal still opens. Co-Authored-By: Claude Opus 4.7 --- assets/view.js | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/assets/view.js b/assets/view.js index b7dc364..a285615 100644 --- a/assets/view.js +++ b/assets/view.js @@ -1,12 +1,32 @@ CTFd._internal.challenge.data = undefined; +function _container_get_renderer() { + var md = CTFd && CTFd.lib && CTFd.lib.markdown; + if (typeof md === "function") { + // Older API: CTFd.lib.markdown() returns a renderer + try { return md(); } catch (e) { /* fall through */ } + } + if (md && typeof md.render === "function") { + // Newer API: CTFd.lib.markdown IS the renderer + return md; + } + // Fallback: escape text and wrap in

+ return { + render: function (text) { + var div = document.createElement("div"); + div.textContent = text == null ? "" : String(text); + return "

" + div.innerHTML + "

"; + }, + }; +} + CTFd._internal.challenge.preRender = function () { - CTFd._internal.challenge.renderer = CTFd.lib.markdown(); + CTFd._internal.challenge.renderer = _container_get_renderer(); }; CTFd._internal.challenge.render = function (markdown) { if (!CTFd._internal.challenge.renderer) { - CTFd._internal.challenge.renderer = CTFd.lib.markdown(); + CTFd._internal.challenge.renderer = _container_get_renderer(); } return CTFd._internal.challenge.renderer.render(markdown); }; From 3418bb711fd15c13f17c0b115bd9e9e57de03842 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMatt?= Date: Sun, 17 May 2026 13:22:44 -0400 Subject: [PATCH 12/24] Inject container request UI from view.js in postRender CTFd 3.8.x renders the challenge modal server-side using Alpine.js, and the {% block connection_info %} override in view.html no longer applies (block name/structure changed in newer CTFd). Inject the Request Connection Info button (and reset/stop/renew controls) via JS in postRender by anchoring off the stable .challenge-desc element. This removes our dependency on CTFd's internal template structure. Co-Authored-By: Claude Opus 4.7 --- assets/view.js | 51 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/assets/view.js b/assets/view.js index a285615..da09c38 100644 --- a/assets/view.js +++ b/assets/view.js @@ -31,7 +31,56 @@ CTFd._internal.challenge.render = function (markdown) { return CTFd._internal.challenge.renderer.render(markdown); }; -CTFd._internal.challenge.postRender = function () {}; +function _container_inject_controls() { + var modal = document.getElementById("challenge-window") || document.querySelector('[role="dialog"]'); + if (!modal) return; + if (modal.querySelector("#container-request-btn") || modal.querySelector("#container-request-result")) { + return; // already injected + } + var descSpan = modal.querySelector(".challenge-desc"); + if (!descSpan) return; + + var idInput = modal.querySelector("#challenge-id"); + var challengeId = idInput ? parseInt(idInput.value, 10) : NaN; + if (!challengeId) return; + + var wrap = document.createElement("div"); + wrap.className = "container-challenge-controls text-center my-3"; + wrap.innerHTML = [ + '', + '', + '', + ].join("\n"); + descSpan.parentNode.insertBefore(wrap, descSpan.nextSibling); + + wrap.querySelector("#container-request-btn").addEventListener("click", function () { + container_request(challengeId); + }); + wrap.querySelector("#container-reset-btn").addEventListener("click", function () { + container_reset(challengeId); + }); + wrap.querySelector("#container-stop-btn").addEventListener("click", function () { + container_stop(challengeId); + }); + wrap.querySelector("#container-renew-btn").addEventListener("click", function () { + container_renew(challengeId); + }); +} + +CTFd._internal.challenge.postRender = function () { + // Defer one tick so Alpine has finished swapping in $store.challenge.data.view + setTimeout(_container_inject_controls, 0); +}; CTFd._internal.challenge.submit = function (preview) { var challenge_id = parseInt(CTFd.lib.$("#challenge-id").val()); From 9bb00b417baaf9c10acc52aacf57c80f9d42208a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMatt?= Date: Sun, 17 May 2026 13:25:25 -0400 Subject: [PATCH 13/24] Round ACI resource requests to Azure's required precision ACI rejects memory_in_gb that isn't a multiple of 0.1 (e.g. 1500 MB / 1024 = 1.46484375 GB returned MemoryRequirementNotTimesOfOneTenthGB). Round memory to the nearest 0.1 GB and cpu to the nearest 0.01 cores so user-provided MB/CPU settings always satisfy Azure's precision. Co-Authored-By: Claude Opus 4.7 --- container_manager_aci.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/container_manager_aci.py b/container_manager_aci.py index 701fcb4..35168a3 100644 --- a/container_manager_aci.py +++ b/container_manager_aci.py @@ -196,12 +196,15 @@ def create_container(self, image: str, port: int, command: str, volumes: str): memory_gb = max(ACI_MIN_MEM_GB, min(ACI_MAX_MEM_GB, mem_mb / 1024)) except ValueError: pass + # ACI requires memory to be in 0.1 GB increments and CPU in 0.01 increments. + memory_gb = round(memory_gb * 10) / 10 try: cpu_setting = float(self.settings.get("container_maxcpu") or 0) if cpu_setting > 0: cpu = max(ACI_MIN_CPU, min(ACI_MAX_CPU, cpu_setting)) except ValueError: pass + cpu = round(cpu * 100) / 100 unique = uuid.uuid4().hex[:8] group_name = f"{dns_prefix}-{unique}"[:63].rstrip("-") From 152d8a2c7045ccc1eb8eb37a67f6520200670704 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMatt?= Date: Sun, 17 May 2026 13:47:11 -0400 Subject: [PATCH 14/24] Restore container state on modal reopen Previously, closing and reopening the challenge modal would always show the Request Connection Info button, and clicking it would spawn a NEW container even if the user already had one running for that challenge. Add a GET /api/running/ endpoint that returns the user's existing container state (running, provisioning, or none) without spawning anything, and have view.js call it on modal open so the UI reflects the actual backend state. Co-Authored-By: Claude Opus 4.7 --- __init__.py | 28 ++++++++++++++++++++++++++++ assets/view.js | 25 +++++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/__init__.py b/__init__.py index 2a38859..9cc73e5 100644 --- a/__init__.py +++ b/__init__.py @@ -387,6 +387,34 @@ def route_container_status(row_id): return {"status": "failed", "error": row.error_message or "Provisioning failed"}, 200 return {"status": "provisioning", "id": row.id}, 202 + @containers_bp.route('/api/running/', methods=['GET']) + @authed_only + @ratelimit(method="GET", limit=120, interval=60) + def route_running_container(chal_id): + user = get_current_user() + if user is None: + return {"error": "User not found"}, 400 + row = ContainerInfoModel.query.filter_by( + challenge_id=chal_id, user_id=user.id).first() + if row is None: + return {"status": "none"}, 200 + if row.status == "running": + try: + if container_manager.is_container_running(row.container_id): + return _running_response(row), 200 + except ContainerException: + pass + # Stale row — backend reports container is gone. Clean it up + # so the next request can spawn a fresh one. + db.session.delete(row) + db.session.commit() + return {"status": "none"}, 200 + if row.status == "provisioning": + return {"status": "provisioning", "id": row.id}, 202 + if row.status == "failed": + return {"status": "failed", "error": row.error_message or "Provisioning failed"}, 200 + return {"status": "none"}, 200 + @containers_bp.route('/api/renew', methods=['POST']) @authed_only @during_ctf_time_only diff --git a/assets/view.js b/assets/view.js index da09c38..6d71efe 100644 --- a/assets/view.js +++ b/assets/view.js @@ -75,6 +75,31 @@ function _container_inject_controls() { wrap.querySelector("#container-renew-btn").addEventListener("click", function () { container_renew(challengeId); }); + + // Check if this user already has a container for this challenge. + fetch("/containers/api/running/" + challengeId, { + method: "GET", + headers: { "Accept": "application/json" }, + credentials: "same-origin", + }) + .then(function (r) { return r.json().then(function (d) { return { status: r.status, data: d }; }); }) + .then(function (res) { + var data = res.data || {}; + if (data.status === "running") { + var btn = wrap.querySelector("#container-request-btn"); + if (btn && btn.parentNode) btn.parentNode.removeChild(btn); + _container_show_running(data, { removeRequestButton: true }); + } else if (data.status === "provisioning") { + var btn2 = wrap.querySelector("#container-request-btn"); + if (btn2) { + var originalLabel = btn2.innerHTML; + btn2.setAttribute("disabled", "disabled"); + btn2.innerHTML = "Provisioning…"; + _container_poll_status(data.id, btn2, originalLabel, { removeRequestButton: true }); + } + } + }) + .catch(function () { /* fall back to fresh-request UI */ }); } CTFd._internal.challenge.postRender = function () { From aff08ef68d1a22b614e4df70f6053bf4d4e345af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMatt?= Date: Sun, 17 May 2026 14:25:13 -0400 Subject: [PATCH 15/24] Tear down container on solve; put username in ACI hostname MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two related improvements: 1) When a player submits a correct flag, kill their ACI container and delete the ContainerInfoModel row. The container has served its purpose; keeping it running costs money and leaks compute. 2) When provisioning, derive the ACI container-group name from the user's name instead of pure randomness. URLs go from beyondctf-41769317..azurecontainer.io to beyondctf-claude-a3b8..azurecontainer.io — easier to debug in Azure portal, and players see their handle. A 4-char random suffix is retained so per-user-per-challenge resets don't collide on the brief window where the old group is being deleted. Co-Authored-By: Claude Opus 4.7 --- __init__.py | 28 +++++++++++++++++++++++----- container_manager.py | 2 +- container_manager_aci.py | 13 ++++++++++--- 3 files changed, 34 insertions(+), 9 deletions(-) diff --git a/__init__.py b/__init__.py index 9cc73e5..619bfa7 100644 --- a/__init__.py +++ b/__init__.py @@ -165,6 +165,21 @@ def solve(cls, user, team, challenge, request): ContainerChallenge.calculate_value(challenge) + # Tear down the player's container for this challenge — it's served + # its purpose and we don't want to keep paying ACI for it. + manager = getattr(cls, "container_manager", None) + if manager is not None and user is not None: + info = ContainerInfoModel.query.filter_by( + challenge_id=challenge.id, user_id=user.id).first() + if info is not None: + if info.container_id: + try: + manager.kill_container(info.container_id) + except ContainerException as e: + print(f"[CTFd] solve cleanup kill failed for {info.container_id}: {e}") + db.session.delete(info) + db.session.commit() + def settings_to_dict(settings): return { @@ -181,6 +196,8 @@ def load(app: Flask): container_settings = settings_to_dict(ContainerSettingsModel.query.all()) container_manager = make_container_manager(container_settings, app) + # Make the manager available to ContainerChallenge classmethods (e.g. solve()). + ContainerChallenge.container_manager = container_manager containers_bp = Blueprint( 'containers', __name__, template_folder='templates', static_folder='assets', url_prefix='/containers') @@ -239,12 +256,12 @@ def _running_response(row: ContainerInfoModel): "expires": row.expires, } - def _provision_async(manager, row_id, image, internal_port, command, volumes, expiration_seconds): + def _provision_async(manager, row_id, image, internal_port, command, volumes, expiration_seconds, owner=None): with app.app_context(): if ContainerInfoModel.query.get(row_id) is None: return try: - created = manager.create_container(image, internal_port, command, volumes) + created = manager.create_container(image, internal_port, command, volumes, owner=owner) except ContainerException as e: row = ContainerInfoModel.query.get(row_id) if row is not None: @@ -293,7 +310,7 @@ def _provision_async(manager, row_id, image, internal_port, command, volumes, ex row.error_message = None db.session.commit() - def _spawn_new(challenge, user_id): + def _spawn_new(challenge, user_id, user_name=None): now = int(time.time()) initial_expires = now + (container_manager.expiration_seconds or 3600) row = ContainerInfoModel( @@ -321,6 +338,7 @@ def _spawn_new(challenge, user_id): args=(container_manager, row.id, challenge.image, challenge.port, challenge.command, challenge.volumes, container_manager.expiration_seconds), + kwargs={"owner": user_name}, daemon=True, ).start() @@ -365,7 +383,7 @@ def route_request_container(): db.session.delete(existing) db.session.commit() - return _spawn_new(challenge, user.id) + return _spawn_new(challenge, user.id, user_name=user.name) @containers_bp.route('/api/status/', methods=['GET']) @authed_only @@ -470,7 +488,7 @@ def route_restart_container(): db.session.delete(existing) db.session.commit() - return _spawn_new(challenge, user.id) + return _spawn_new(challenge, user.id, user_name=user.name) @containers_bp.route('/api/stop', methods=['POST']) @authed_only diff --git a/container_manager.py b/container_manager.py index d4a8214..899d81b 100644 --- a/container_manager.py +++ b/container_manager.py @@ -144,7 +144,7 @@ def is_container_running(self, container_id: str) -> bool: return container[0].status == "running" @run_command - def create_container(self, image: str, port: int, command: str, volumes: str): + def create_container(self, image: str, port: int, command: str, volumes: str, owner: str = None): kwargs = {} # Set the memory and CPU limits for the container diff --git a/container_manager_aci.py b/container_manager_aci.py index 35168a3..a6a7c59 100644 --- a/container_manager_aci.py +++ b/container_manager_aci.py @@ -1,4 +1,5 @@ import atexit +import re import shlex import time import uuid @@ -178,7 +179,7 @@ def kill_expired_containers(self, app: Flask): if deleted: db.session.commit() - def create_container(self, image: str, port: int, command: str, volumes: str): + def create_container(self, image: str, port: int, command: str, volumes: str, owner: str = None): if self.client is None: raise ContainerException("ACI client is not initialized") @@ -206,8 +207,14 @@ def create_container(self, image: str, port: int, command: str, volumes: str): pass cpu = round(cpu * 100) / 100 - unique = uuid.uuid4().hex[:8] - group_name = f"{dns_prefix}-{unique}"[:63].rstrip("-") + unique = uuid.uuid4().hex[:4] + if owner: + slug = re.sub(r"[^a-z0-9-]+", "-", owner.lower()).strip("-") + slug = re.sub(r"-{2,}", "-", slug) or "user" + group_name = f"{dns_prefix}-{slug}-{unique}" + else: + group_name = f"{dns_prefix}-{uuid.uuid4().hex[:8]}" + group_name = group_name[:63].rstrip("-") dns_label = group_name command_list = None From 9ff50514978d07cb7619223faa6761109a2fb2ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMatt?= Date: Sun, 17 May 2026 14:29:17 -0400 Subject: [PATCH 16/24] Fix NameError on /containers/dashboard route_containers_dashboard referenced a bare `settings` name that doesn't exist in its scope, causing every visit to the admin containers dashboard to 500. Use container_manager.settings, which is the live dict already available in the closure. Co-Authored-By: Claude Opus 4.7 --- __init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/__init__.py b/__init__.py index 619bfa7..b434b65 100644 --- a/__init__.py +++ b/__init__.py @@ -646,7 +646,7 @@ def route_containers_dashboard(): except ContainerException: running_containers[i].is_running = False - backend = settings.get("backend", "docker") + backend = container_manager.settings.get("backend", "docker") backend_label = "Azure Container Instances" if backend == "aci" else "Docker" return render_template( From ab22bd0a89256004df3ddf025834ce959ea38f54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMatt?= Date: Sun, 17 May 2026 14:45:29 -0400 Subject: [PATCH 17/24] Use unicode-aware slugify for ACI hostnames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous regex sanitizer left odd output for usernames with accents (héllo -> h-llo), pure-symbol names ('!!!' -> empty), or non-ASCII scripts (日本語 -> empty -> invalid Azure name). Replace it with a small NFKD-based slugifier that strips diacritics, collapses non-alphanumerics into single hyphens, caps length, and falls back to 'user' for anything that slugs to empty. Validated against 14 edge cases (accents, mixed case, oversized, unicode-only, all-symbol, leading/trailing punctuation, etc.). Co-Authored-By: Claude Opus 4.7 --- container_manager_aci.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/container_manager_aci.py b/container_manager_aci.py index a6a7c59..2ff0f6e 100644 --- a/container_manager_aci.py +++ b/container_manager_aci.py @@ -2,8 +2,31 @@ import re import shlex import time +import unicodedata import uuid + +def _slugify(value, max_len=20, fallback="user"): + """Return a DNS-safe slug for ACI container-group and DNS labels. + + Azure requires names to be lowercase alphanumeric or hyphens, start + and end with alphanumeric, and at most 63 chars (full group name). + """ + if not value: + return fallback + # Strip accents and decompose unicode to ASCII where possible. + normalized = unicodedata.normalize("NFKD", str(value)) + ascii_only = normalized.encode("ascii", "ignore").decode("ascii") + ascii_only = ascii_only.lower() + # Replace any run of non-alphanumeric chars with a single hyphen. + slug = re.sub(r"[^a-z0-9]+", "-", ascii_only) + slug = slug.strip("-") + if not slug: + return fallback + if len(slug) > max_len: + slug = slug[:max_len].rstrip("-") + return slug or fallback + from flask import Flask from apscheduler.schedulers.background import BackgroundScheduler from apscheduler.schedulers import SchedulerNotRunningError @@ -209,9 +232,7 @@ def create_container(self, image: str, port: int, command: str, volumes: str, ow unique = uuid.uuid4().hex[:4] if owner: - slug = re.sub(r"[^a-z0-9-]+", "-", owner.lower()).strip("-") - slug = re.sub(r"-{2,}", "-", slug) or "user" - group_name = f"{dns_prefix}-{slug}-{unique}" + group_name = f"{dns_prefix}-{_slugify(owner)}-{unique}" else: group_name = f"{dns_prefix}-{uuid.uuid4().hex[:8]}" group_name = group_name[:63].rstrip("-") From 9e19bd6b112b3cd23478003a46a829101cb3637e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9CMatt?= Date: Fri, 22 May 2026 09:34:17 -0400 Subject: [PATCH 18/24] Add per-challenge size tiers for container resources Container challenges previously used a single global CPU/RAM setting (default 1 vCPU / 1.5 GB), so intensive challenges like an LLM couldn't get more without bumping every container. Add a per-challenge Size tier (small/medium/large) selectable in the admin create/edit form. Each tier maps to fixed CPU/memory values that flow through the spawn path into both the ACI and Docker backends; the global setting remains the fallback for the unset path. "small" equals the historical default, so existing challenges are unchanged. A guarded ALTER TABLE in load() adds the new `size` column on upgrade (create_all never alters existing tables) and backfills rows to 'small'. Co-Authored-By: Claude Opus 4.7 --- __init__.py | 39 +++++++++++++++++++++++++++++---- assets/create.html | 15 +++++++++++++ assets/update.html | 15 +++++++++++++ container_manager.py | 23 +++++++++++--------- container_manager_aci.py | 47 ++++++++++++++++++++++++++-------------- models.py | 20 +++++++++++++++++ 6 files changed, 129 insertions(+), 30 deletions(-) diff --git a/__init__.py b/__init__.py index b434b65..37cc974 100644 --- a/__init__.py +++ b/__init__.py @@ -35,7 +35,7 @@ def _versioned(path): from CTFd.utils.user import get_current_user from CTFd.utils.modes import get_model -from .models import ContainerChallengeModel, ContainerInfoModel, ContainerSettingsModel +from .models import ContainerChallengeModel, ContainerInfoModel, ContainerSettingsModel, resolve_size from .container_manager import ContainerManager, ContainerException from .container_manager_aci import ACIContainerManager @@ -79,6 +79,7 @@ def read(cls, challenge): "image": challenge.image, "port": challenge.port, "command": challenge.command, + "size": challenge.size, "initial": challenge.initial, "decay": challenge.decay, "minimum": challenge.minimum, @@ -187,8 +188,37 @@ def settings_to_dict(settings): } +def _ensure_size_column(): + """Add the per-challenge `size` column on installs that predate it. + + `db.create_all()` creates missing tables but never ALTERs existing ones, so + upgrading an instance that already has challenges needs this. The ADD COLUMN + ... DEFAULT 'small' also backfills existing rows on MySQL/MariaDB and SQLite, + so legacy challenges keep their current 1 vCPU / 1.5 GB behavior. Idempotent. + """ + from sqlalchemy import inspect as sa_inspect, text + + table = ContainerChallengeModel.__table__.name + try: + columns = [c["name"] for c in sa_inspect(db.engine).get_columns(table)] + except Exception as e: + print(f"[CTFd] could not inspect {table} for `size` column: {e}") + return + if "size" in columns: + return + try: + with db.engine.begin() as conn: + conn.execute( + text(f"ALTER TABLE {table} ADD COLUMN size VARCHAR(16) DEFAULT 'small'") + ) + print(f"[CTFd] added `size` column to {table}") + except Exception as e: + print(f"[CTFd] failed to add `size` column to {table}: {e}") + + def load(app: Flask): app.db.create_all() + _ensure_size_column() CHALLENGE_CLASSES["container"] = ContainerChallenge register_plugin_assets_directory( app, base_path="/plugins/containers/assets/" @@ -256,12 +286,12 @@ def _running_response(row: ContainerInfoModel): "expires": row.expires, } - def _provision_async(manager, row_id, image, internal_port, command, volumes, expiration_seconds, owner=None): + def _provision_async(manager, row_id, image, internal_port, command, volumes, expiration_seconds, owner=None, cpu=None, memory=None): with app.app_context(): if ContainerInfoModel.query.get(row_id) is None: return try: - created = manager.create_container(image, internal_port, command, volumes, owner=owner) + created = manager.create_container(image, internal_port, command, volumes, owner=owner, cpu=cpu, memory=memory) except ContainerException as e: row = ContainerInfoModel.query.get(row_id) if row is not None: @@ -333,12 +363,13 @@ def _spawn_new(challenge, user_id, user_name=None): return {"status": existing.status, "id": existing.id}, 202 return {"error": "Concurrent request collision"}, 500 + cpu, memory_mb = resolve_size(getattr(challenge, "size", None)) threading.Thread( target=_provision_async, args=(container_manager, row.id, challenge.image, challenge.port, challenge.command, challenge.volumes, container_manager.expiration_seconds), - kwargs={"owner": user_name}, + kwargs={"owner": user_name, "cpu": cpu, "memory": memory_mb}, daemon=True, ).start() diff --git a/assets/create.html b/assets/create.html index 8893172..d971a33 100644 --- a/assets/create.html +++ b/assets/create.html @@ -68,6 +68,21 @@ +
+ + +
+
+
+ + +
+
diff --git a/assets/update.js b/assets/update.js index 0e33b01..f7aeb73 100644 --- a/assets/update.js +++ b/assets/update.js @@ -1,28 +1,58 @@ -var containerImage = document.getElementById("container-image"); -var containerImageDefault = document.getElementById("container-image-default"); -var path = "/containers/api/images"; +(function () { + var containerImage = document.getElementById("container-image"); + var containerImageDefault = document.getElementById("container-image-default"); -var xhr = new XMLHttpRequest(); -xhr.open("GET", path, true); -xhr.setRequestHeader("Accept", "application/json"); -xhr.setRequestHeader("CSRF-Token", init.csrfNonce); -xhr.send(); -xhr.onload = function () { - var data = JSON.parse(this.responseText); - if (data.error != undefined) { - // Error - containerImageDefault.innerHTML = data.error; - } else { - // Success - for (var i = 0; i < data.images.length; i++) { + function setStatus(msg) { + if (containerImageDefault) containerImageDefault.innerHTML = msg; + } + function existingValues() { + var vals = {}; + for (var i = 0; i < containerImage.options.length; i++) { + vals[containerImage.options[i].value] = true; + } + return vals; + } + + // The saved image is already rendered as a