diff --git a/.dockerignore b/.dockerignore index 609a1b746..384ba4b99 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,4 +1,16 @@ +.git +node_modules +.venv env +__pycache__ +*.pyc +*.pyo enferno/media enferno/imports logs +backups +.env +.env.* +.DS_Store +*.md +docs/node_modules diff --git a/.gitignore b/.gitignore index fea7c8a77..f63a36744 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,9 @@ backups/* cookies.txt *.egg-info/ + +# Release signing: NEVER commit secret keys. The pinned public key is baked +# into the installer/updater, not stored as a loose file in the repo. +*.key +bayanat-release.key +bayanat-release.pub diff --git a/CHANGELOG.md b/CHANGELOG.md index bcd75c710..e478eafd0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## v4.0.1 + +### Fixed + +- Bulk OCR: celery worker now consumes the `ocr` queue. The systemd unit written by the installer was only subscribing to the default `celery` queue, so tasks dispatched by bulk OCR (UI and `flask ocr process`) silently piled up in Redis. Single-media OCR was not affected. Existing installs can fix in place by adding `-Q celery,ocr` to `ExecStart` in `/etc/systemd/system/bayanat-celery.service`, then `systemctl daemon-reload && systemctl restart bayanat-celery`. + ## v4.0.0 ### Database Migrations (Alembic) diff --git a/SAFE_EXPUNGING.md b/SAFE_EXPUNGING.md new file mode 100644 index 000000000..cdc2eb497 --- /dev/null +++ b/SAFE_EXPUNGING.md @@ -0,0 +1,43 @@ +# Safe Expunging Process + +This document describes when and how history-altering operations are permitted on the Bayanat source repository, satisfying the SLSA v1.2 Source track "Safe Expunging Process" requirement. + +## Scope + +Applies to the public repository `sjacorg/bayanat` and the private release repository `sjacorg/bayanat.prod`, specifically to operations that remove or rewrite committed history on protected references (`main`, release tags matching `v*`). + +## Default + +History on protected references is append-only. Force-push, branch deletion, tag deletion, and retagging are blocked by repository rulesets. + +## Permitted Reasons to Expunge + +Expunging may be approved only for one of the following reasons: + +1. **Secret leak.** An unredacted credential, private key, or access token was committed. +2. **Personal data leak.** Non-public personal data of an identifiable individual was committed. +3. **Legal or safety order.** A verified order from counsel or a credible safety concern requires removal of specific content. +4. **Malicious injection.** Attacker-introduced code or data must be removed as part of incident response. + +Bug fixes, style corrections, and cleanup are never valid reasons. + +## Approval + +Both maintainers must approve in writing, recorded in the security advisory created for the incident. + +## Procedure + +1. File a private security advisory at https://github.com/sjacorg/bayanat/security/advisories with the reason, affected commits, and proposed action. +2. Record both maintainer approvals in the advisory. +3. If the reason involves a secret, rotate it before rewriting. +4. Rewrite with `git filter-repo` (not `filter-branch`), preserving commit signatures where possible. +5. Temporarily bypass branch protection, force-update the protected reference, then re-enable protection. +6. Invalidate and regenerate any affected release tags. Old tags are not reused. + +## Consumer Notification + +After any expunging action, publish a public security advisory that includes: + +- What was removed and why (redacted as needed). +- New commit hashes and release tags that replace the expunged revisions. +- Operator guidance (re-clone, re-verify signatures, check deployed commit against the new history). diff --git a/bayanat b/bayanat index c5d350958..a607a08fc 100755 --- a/bayanat +++ b/bayanat @@ -608,19 +608,31 @@ _setup_database() { systemctl enable --now postgresql redis-server log "Setting up database..." - sudo -u postgres createuser -s "$APP_USER" 2>/dev/null || true + # Role is a plain owner, not a superuser. Extensions are created by the + # postgres superuser below so the app role doesn't need that privilege. + sudo -u postgres createuser "$APP_USER" 2>/dev/null || true + # Idempotent for upgrades: drop superuser if a previous install granted it. + sudo -u postgres psql -c "ALTER USER \"$APP_USER\" NOSUPERUSER;" 2>/dev/null || true sudo -u postgres createdb bayanat -O "$APP_USER" 2>/dev/null || true - - # Configure pg_hba trust auth for app user + sudo -u postgres psql -d bayanat \ + -c "CREATE EXTENSION IF NOT EXISTS pg_trgm;" \ + -c "CREATE EXTENSION IF NOT EXISTS postgis;" >/dev/null + + # Configure pg_hba peer auth for app user. Peer auth maps the OS user to + # the PG role over the local socket, so only processes running as + # $APP_USER can connect as $APP_USER. Replaces the previous 'trust' rule + # which let any local OS user connect as the app role. local pg_hba pg_hba=$(find /etc/postgresql -name pg_hba.conf 2>/dev/null | head -1) [[ -n "$pg_hba" ]] || die "Cannot find pg_hba.conf" - local rule="local all $APP_USER trust" + local rule="local all $APP_USER peer" if grep -qF "$rule" "$pg_hba"; then log "pg_hba.conf already configured" else log "Configuring pg_hba.conf for $APP_USER" + # Remove any prior trust rule for this user from a previous install + sed -i "/^local[[:space:]]\+all[[:space:]]\+$APP_USER[[:space:]]\+.*trust/d" "$pg_hba" # Insert before the "local all all" catch-all, or append if grep -q "^local.*all.*all" "$pg_hba"; then sed -i "/^local.*all.*all/i $rule" "$pg_hba" @@ -724,6 +736,36 @@ _init_database() { fi } +ADMIN_USERNAME="" +ADMIN_PASSWORD="" + +_bootstrap_admin() { + # Provision the initial admin out-of-band. Replaces the deleted + # /api/create-admin wizard endpoint, which was unauthenticated and + # claimable by the first network client during the install window. + # The password is fed to flask install over stdin (--password-stdin) + # so it is never visible in /proc//cmdline or `ps`. + local tag="$1" + local pw + pw=$(python3 -c "import secrets; print(secrets.token_urlsafe(20))" 2>/dev/null) || \ + pw=$(openssl rand -base64 24 | tr -d '/+=' | head -c 24) + + log "Bootstrapping initial admin user..." + local out + out=$(printf '%s\n' "$pw" \ + | flask_run "$tag" install --username admin --password-stdin 2>&1) || true + + if echo "$out" | grep -q "already installed"; then + log "Admin user already exists, skipping bootstrap" + elif echo "$out" | grep -q "installed successfully"; then + ADMIN_USERNAME="admin" + ADMIN_PASSWORD="$pw" + else + warn "Admin bootstrap unexpected output:" + warn "$out" + fi +} + _install_uwsgi_config() { cat > "$SHARED_DIR/uwsgi-prod.ini" << EOF [uwsgi] @@ -776,7 +818,7 @@ User=$APP_USER Group=$APP_USER WorkingDirectory=$CURRENT_LINK EnvironmentFile=$SHARED_DIR/.env -ExecStart=$CURRENT_LINK/.venv/bin/celery -A enferno.tasks worker --autoscale 2,5 -B +ExecStart=$CURRENT_LINK/.venv/bin/celery -A enferno.tasks worker --autoscale 2,5 -B -Q celery,ocr Restart=always RestartSec=3 @@ -819,7 +861,6 @@ EOF _install_sudoers() { cat > /etc/sudoers.d/bayanat << 'EOF' -bayanat ALL=(root) NOPASSWD: /usr/local/sbin/bayanat-start-update bayanat ALL=(root) NOPASSWD: /usr/local/bin/bayanat status bayanat ALL=(root) NOPASSWD: /usr/local/bin/bayanat snapshots bayanat ALL=(root) NOPASSWD: /usr/bin/systemctl restart bayanat-celery @@ -828,22 +869,6 @@ EOF visudo -cf /etc/sudoers.d/bayanat >/dev/null || die "sudoers syntax invalid" } -_install_update_wrapper() { - # Root-owned wrapper. Launches `bayanat update` as a transient systemd - # unit so the update outlives Flask restart, SSH disconnect, and - # browser close. Must be in sudoers at this exact path. - install -m 0755 -o root -g root /dev/stdin /usr/local/sbin/bayanat-start-update <<'EOF' -#!/bin/bash -# Installed root:root 0755 by `bayanat install`. Do not edit. -set -euo pipefail -exec /usr/bin/systemd-run \ - --unit=bayanat-update \ - --collect \ - --property=Restart=no \ - /usr/local/bin/bayanat update -EOF -} - _install_self() { # Copy the bayanat CLI from the given release directory to # /usr/local/bin/bayanat. Source is $RELEASES_DIR/$tag/bayanat, NOT $0 — @@ -895,6 +920,7 @@ cmd_install() { _install_deps "$tag" _init_database "$tag" + _bootstrap_admin "$tag" # Activate release swap_symlink "$RELEASES_DIR/$tag" @@ -905,7 +931,6 @@ cmd_install() { _install_systemd _configure_caddy "$domain" _install_sudoers - _install_update_wrapper _install_self "$tag" chown -R "$APP_USER:$APP_USER" "$BAYANAT_ROOT" @@ -915,11 +940,31 @@ cmd_install() { _verify_service_health - log "Installation complete" + local access_url if [[ "$domain" == "localhost" ]]; then - log "Access: http://$(hostname -I | awk '{print $1}')" + access_url="http://$(hostname -I | awk '{print $1}')" else - log "Access: https://$domain" + access_url="https://$domain" + fi + + log "Installation complete" + log "Access: $access_url" + + if [[ -n "$ADMIN_PASSWORD" ]]; then + log "" + log "============================================================" + log " Bayanat is ready. Sign in to finish setup:" + log "" + log " URL : $access_url/login" + log " Username : $ADMIN_USERNAME" + log " Password : $ADMIN_PASSWORD" + log "" + log " Save these credentials now - the password is not stored" + log " in plaintext anywhere. After signing in, the setup wizard" + log " will walk you through language, default data, and other" + log " configuration. Change the password from your account" + log " settings." + log "============================================================" fi } diff --git a/docker-compose.yml b/docker-compose.yml index 3d7cf788e..35eaf8a24 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -44,14 +44,14 @@ services: dockerfile: ./flask/Dockerfile args: - ROLE=flask - - ENV_FILE=${ENV_FILE:-.env} + - ENV_FILE=${ENV_FILE:-.env.docker} volumes: - '${PWD}/backups:/app/backups/:rw' - '${MEDIA_PATH:-./enferno/media}:/app/enferno/media/:rw' - '${PWD}/enferno/imports:/app/enferno/imports/:rw' - '${PWD}/logs/:/app/logs/:rw' - '${PWD}/config.json:/app/config.json:rw' - - '${PWD}/${ENV_FILE:-.env}:/app/.env:ro' + - '${PWD}/${ENV_FILE:-.env.docker}:/app/.env:ro' depends_on: postgres: condition: service_healthy @@ -72,7 +72,7 @@ services: dockerfile: ./flask/Dockerfile args: - ROLE=celery - - ENV_FILE=${ENV_FILE:-.env} + - ENV_FILE=${ENV_FILE:-.env.docker} volumes_from: - bayanat read_only: true @@ -93,7 +93,7 @@ services: dockerfile: ./flask/Dockerfile args: - ROLE=celery-ocr - - ENV_FILE=${ENV_FILE:-.env} + - ENV_FILE=${ENV_FILE:-.env.docker} volumes_from: - bayanat read_only: true @@ -127,9 +127,9 @@ services: - /opt/bitnami/nginx/logs/ - /opt/bitnami/nginx/conf/bitnami/certs/ healthcheck: - test: [ "CMD", "service", "nginx", "status" ] - interval: 3s - retries: 10 + test: [ "CMD", "bash", "-c", "exec 3<>/dev/tcp/localhost/80" ] + interval: 10s + retries: 5 volumes: redis_data: diff --git a/docs/deployment/auto-update-runbook.md b/docs/deployment/auto-update-runbook.md index f6a598588..9d1edcb79 100644 --- a/docs/deployment/auto-update-runbook.md +++ b/docs/deployment/auto-update-runbook.md @@ -97,7 +97,6 @@ sudo bayanat update --recover | Path | Purpose | |---|---| | `/usr/local/bin/bayanat` | The CLI script | -| `/usr/local/sbin/bayanat-start-update` | Root wrapper the UI invokes via sudo | | `/etc/sudoers.d/bayanat` | Granted commands for the `bayanat` user | | `/opt/bayanat/state/update.json` | Current update state (sanitized JSON) | | `/opt/bayanat/state/update.lock` | PID lock file | @@ -106,10 +105,12 @@ sudo bayanat update --recover ## Admin UI surface -- Nav-bar banner chip: shows when `latest != current` -- Progress dialog: polls `/admin/api/updates/status` every 2 s during an - active update -- Settings toggle: System Administration -> "Auto-apply patch releases" +The UI is read-only for updates: it surfaces availability but never applies +an update. Updates run from the CLI as root (`sudo bayanat update`). + +- Nav-bar banner chip: shows when `latest != current`, with the CLI command + to run on the server +- Status: `/admin/api/updates/status` reflects a CLI-initiated update's state - Snapshots page: `/admin/snapshots/` (read-only list; restore stays on the CLI) diff --git a/docs/deployment/docker.md b/docs/deployment/docker.md index 7652ec481..1a118b8a5 100644 --- a/docs/deployment/docker.md +++ b/docs/deployment/docker.md @@ -6,31 +6,53 @@ Docker Compose deployment is still in beta. For production environments, [native ## Prerequisites -- Docker and Docker Compose installed -- `.env` file configured (see [Configuration](/deployment/configuration)) +- Docker Engine with the Compose v2 plugin (`docker compose`, not the legacy `docker-compose` binary) +- `.env.docker` file configured (see [Configuration](/deployment/configuration)) ## Quick Start ```bash -docker-compose up -d +docker compose --env-file .env.docker up -d ``` This starts PostgreSQL, Redis, the Flask app, NGINX, and Celery. -## Create Admin User +::: tip +The `--env-file .env.docker` flag is required so Compose can substitute `${POSTGRES_USER}`, `${POSTGRES_PASSWORD}`, and `${REDIS_PASSWORD}` placeholders in `docker-compose.yml`. Without it, those services boot with empty credentials and the Flask container fails to connect. +::: + +## First Admin User + +The entrypoint creates an `admin` user automatically on the first startup +(when the database has no schema yet) and prints a one-time random +password to the container logs. Retrieve it with: ```bash -docker-compose exec bayanat uv run flask install +docker compose --env-file .env.docker logs bayanat | grep -A4 "Generated password" ``` +Sign in at the Bayanat URL with `admin` and the printed password. The +setup wizard runs after first login. Change the admin password from your +account settings afterwards. + +If the auto-bootstrap was missed or the admin account was deleted, run +the CLI directly: + +```bash +docker compose --env-file .env.docker exec bayanat uv run flask install -u admin +``` + +It generates a fresh password and prints it. If an admin already exists +the command exits without changing anything. + ## Development ```bash -docker-compose -f docker-compose-dev.yml up +docker compose -f docker-compose-dev.yml up ``` ## Testing ```bash -docker-compose -f docker-compose-test.yml up +docker compose -f docker-compose-test.yml up ``` diff --git a/docs/deployment/installation.md b/docs/deployment/installation.md index 49d20529d..ca918fce0 100644 --- a/docs/deployment/installation.md +++ b/docs/deployment/installation.md @@ -34,7 +34,7 @@ This will: - Set up systemd services for Bayanat and Celery - Start everything -Once complete, open your domain in a browser. The setup wizard will guide you through creating an admin account and configuring the application. +Once complete, the installer prints the initial `admin` username and a one-time generated password to the terminal. Open your domain in a browser, sign in with those credentials, then the setup wizard will guide you through configuring the application. Change the admin password from your account settings after first login. **Check status:** @@ -150,7 +150,7 @@ uv run flask install uv run flask run ``` -Access at [http://127.0.0.1:5000](http://127.0.0.1:5000). The setup wizard will guide further configuration. +Access at [http://127.0.0.1:5000](http://127.0.0.1:5000). Sign in with the credentials printed by `flask install`, then the setup wizard will guide further configuration. ::: warning `flask run` is development mode only. Continue with the steps below for production. @@ -243,14 +243,19 @@ sudo systemctl enable --now bayanat-celery Docker deployment is still in beta. For production, native deployment is recommended. ::: -After [configuring](/deployment/configuration) and generating a `.env` file: +After [configuring](/deployment/configuration) and generating a `.env.docker` file: ```bash -docker-compose up -d +docker compose --env-file .env.docker up -d ``` -Install the admin user: +The first startup creates an `admin` user and prints a generated +password to the container logs. Retrieve it with: ```bash -docker-compose exec bayanat uv run flask install +docker compose --env-file .env.docker logs bayanat | grep -A4 "Generated password" ``` + +If the auto-bootstrap was missed or the admin was deleted, run +`docker compose --env-file .env.docker exec bayanat uv run flask install -u admin` +to mint a fresh credential. diff --git a/enferno/admin/models/Actor.py b/enferno/admin/models/Actor.py index 968848586..b2a403122 100644 --- a/enferno/admin/models/Actor.py +++ b/enferno/admin/models/Actor.py @@ -456,75 +456,36 @@ def from_json(self, json: dict[str, Any]) -> "Actor": # Related Actors (actor_relations) if "actor_relations" in json: - # collect related actors ids (helps with finding removed ones) - rel_ids = [] - for relation in json["actor_relations"]: - actor = db.session.get(Actor, relation["actor"]["id"]) - - # Extra (check those actors exit) - - if actor: - rel_ids.append(actor.id) - # this will update/create the relationship (will flush to db!) - self.relate_actor(actor, relation=relation) - - # Find out removed relations and remove them - # just loop existing relations and remove if the destination actor not in the related ids - - for r in self.actor_relations: - # get related actor (in or out) - rid = r.get_other_id(self.id) - if not (rid in rel_ids): - r.delete() - - # -revision related - db.session.get(Actor, rid).create_revision() + self.sync_relations( + json["actor_relations"], + Actor, + "actor", + self.relate_actor, + self.actor_relations, + lambda r: r.get_other_id(self.id), + ) # Related Bulletins (bulletin_relations) if "bulletin_relations" in json: - # collect related bulletin ids (helps with finding removed ones) - rel_ids = [] - for relation in json["bulletin_relations"]: - bulletin = db.session.get(Bulletin, relation["bulletin"]["id"]) - - # Extra (check those bulletins exit) - if bulletin: - rel_ids.append(bulletin.id) - # this will update/create the relationship (will flush to db!) - self.relate_bulletin(bulletin, relation=relation) - - # Find out removed relations and remove them - # just loop existing relations and remove if the destination bulletin not in the related ids - for r in self.bulletin_relations: - if not (r.bulletin_id in rel_ids): - rel_bulletin = r.bulletin - r.delete() - - # -revision related - rel_bulletin.create_revision() - - # Related Incidents (incidents_relations) + self.sync_relations( + json["bulletin_relations"], + Bulletin, + "bulletin", + self.relate_bulletin, + self.bulletin_relations, + lambda r: r.bulletin_id, + ) + + # Related Incidents (incident_relations) if "incident_relations" in json: - # collect related incident ids (helps with finding removed ones) - rel_ids = [] - for relation in json["incident_relations"]: - incident = db.session.get(Incident, relation["incident"]["id"]) - if incident: - rel_ids.append(incident.id) - # helper method to update/create the relationship (will flush to db) - self.relate_incident(incident, relation=relation) - - # Find out removed relations and remove them - # just loop existing relations and remove if the destination incident no in the related ids - - for r in self.incident_relations: - # get related bulletin (in or out) - if not (r.incident_id in rel_ids): - rel_incident = r.incident - r.delete() - - # -revision related incident - rel_incident.create_revision() + self.sync_relations( + json["incident_relations"], + Incident, + "incident", + self.relate_incident, + self.incident_relations, + lambda r: r.incident_id, + ) if "comments" in json: self.comments = json["comments"] diff --git a/enferno/admin/models/Bulletin.py b/enferno/admin/models/Bulletin.py index 333d58746..919eb6ac2 100644 --- a/enferno/admin/models/Bulletin.py +++ b/enferno/admin/models/Bulletin.py @@ -411,74 +411,36 @@ def from_json(self, json: dict[str, Any]) -> "Bulletin": # Related Bulletins (bulletin_relations) if "bulletin_relations" in json: - # collect related bulletin ids (helps with finding removed ones) - rel_ids = [] - for relation in json["bulletin_relations"]: - bulletin = db.session.get(Bulletin, relation["bulletin"]["id"]) - # Extra (check those bulletins exit) - - if bulletin: - rel_ids.append(bulletin.id) - # this will update/create the relationship (will flush to db) - self.relate_bulletin(bulletin, relation=relation) - - # Find out removed relations and remove them - # just loop existing relations and remove if the destination bulletin no in the related ids - - for r in self.bulletin_relations: - # get related bulletin (in or out) - rid = r.get_other_id(self.id) - if not (rid in rel_ids): - r.delete() - - # ------- create revision on the other side of the relationship - db.session.get(Bulletin, rid).create_revision() - - # Related Actors (actors_relations) + self.sync_relations( + json["bulletin_relations"], + Bulletin, + "bulletin", + self.relate_bulletin, + self.bulletin_relations, + lambda r: r.get_other_id(self.id), + ) + + # Related Actors (actor_relations) if "actor_relations" in json: - # collect related bulletin ids (helps with finding removed ones) - rel_ids = [] - for relation in json["actor_relations"]: - actor = db.session.get(Actor, relation["actor"]["id"]) - if actor: - rel_ids.append(actor.id) - # helper method to update/create the relationship (will flush to db) - self.relate_actor(actor, relation=relation) - - # Find out removed relations and remove them - # just loop existing relations and remove if the destination actor no in the related ids - - for r in self.actor_relations: - # get related bulletin (in or out) - if not (r.actor_id in rel_ids): - rel_actor = r.actor - r.delete() - - # --revision relation - rel_actor.create_revision() - - # Related Incidents (incidents_relations) + self.sync_relations( + json["actor_relations"], + Actor, + "actor", + self.relate_actor, + self.actor_relations, + lambda r: r.actor_id, + ) + + # Related Incidents (incident_relations) if "incident_relations" in json: - # collect related incident ids (helps with finding removed ones) - rel_ids = [] - for relation in json["incident_relations"]: - incident = db.session.get(Incident, relation["incident"]["id"]) - if incident: - rel_ids.append(incident.id) - # helper method to update/create the relationship (will flush to db) - self.relate_incident(incident, relation=relation) - - # Find out removed relations and remove them - # just loop existing relations and remove if the destination incident no in the related ids - - for r in self.incident_relations: - # get related bulletin (in or out) - if not (r.incident_id in rel_ids): - rel_incident = r.incident - r.delete() - - # --revision relation - rel_incident.create_revision() + self.sync_relations( + json["incident_relations"], + Incident, + "incident", + self.relate_incident, + self.incident_relations, + lambda r: r.incident_id, + ) self.publish_date = json.get("publish_date", None) if self.publish_date == "": diff --git a/enferno/admin/models/Incident.py b/enferno/admin/models/Incident.py index 09bb06fd6..aeba2732e 100644 --- a/enferno/admin/models/Incident.py +++ b/enferno/admin/models/Incident.py @@ -283,76 +283,36 @@ def from_json(self, json: dict[str, Any]) -> "Incident": # Related Actors (actor_relations) if "actor_relations" in json and "check_ar" in json: - # collect related actors ids (helps with finding removed ones) - rel_ids = [] - for relation in json["actor_relations"]: - actor = db.session.get(Actor, relation["actor"]["id"]) - - # Extra (check those actors exit) - - if actor: - rel_ids.append(actor.id) - # this will update/create the relationship (will flush to db!) - self.relate_actor(actor, relation=relation) - - # Find out removed relations and remove them - # just loop existing relations and remove if the destination actor not in the related ids - - for r in self.actor_relations: - if not (r.actor_id in rel_ids): - rel_actor = r.actor - r.delete() - - # -revision related actor - rel_actor.create_revision() + self.sync_relations( + json["actor_relations"], + Actor, + "actor", + self.relate_actor, + self.actor_relations, + lambda r: r.actor_id, + ) # Related Bulletins (bulletin_relations) if "bulletin_relations" in json and "check_br" in json: - # collect related bulletin ids (helps with finding removed ones) - rel_ids = [] - for relation in json["bulletin_relations"]: - bulletin = db.session.get(Bulletin, relation["bulletin"]["id"]) - - # Extra (check those bulletins exit) - if bulletin: - rel_ids.append(bulletin.id) - # this will update/create the relationship (will flush to db!) - self.relate_bulletin(bulletin, relation=relation) - - # Find out removed relations and remove them - # just loop existing relations and remove if the destination bulletin not in the related ids - for r in self.bulletin_relations: - if not (r.bulletin_id in rel_ids): - rel_bulletin = r.bulletin - r.delete() - - # -revision related bulletin - rel_bulletin.create_revision() - - # Related Incidnets (incident_relations) + self.sync_relations( + json["bulletin_relations"], + Bulletin, + "bulletin", + self.relate_bulletin, + self.bulletin_relations, + lambda r: r.bulletin_id, + ) + + # Related Incidents (incident_relations) if "incident_relations" in json and "check_ir" in json: - # collect related incident ids (helps with finding removed ones) - rel_ids = [] - for relation in json["incident_relations"]: - incident = db.session.get(Incident, relation["incident"]["id"]) - # Extra (check those incidents exit) - - if incident: - rel_ids.append(incident.id) - # this will update/create the relationship (will flush to db) - self.relate_incident(incident, relation=relation) - - # Find out removed relations and remove them - # just loop existing relations and remove if the destination incident no in the related ids - - for r in self.incident_relations: - # get related incident (in or out) - rid = r.get_other_id(self.id) - if not (rid in rel_ids): - r.delete() - - # - revision related incident - db.session.get(Incident, rid).create_revision() + self.sync_relations( + json["incident_relations"], + Incident, + "incident", + self.relate_incident, + self.incident_relations, + lambda r: r.get_other_id(self.id), + ) if "comments" in json: self.comments = json["comments"] diff --git a/enferno/admin/models/Media.py b/enferno/admin/models/Media.py index e0d4c7fc9..6dcea6cf7 100644 --- a/enferno/admin/models/Media.py +++ b/enferno/admin/models/Media.py @@ -1,5 +1,6 @@ import json import pathlib +import secrets from pathlib import Path from typing import Any from unidecode import unidecode @@ -145,6 +146,20 @@ def generate_file_name(filename: str) -> str: decoded = secure_filename(unidecode(filename)).lower() return f"{DateHelper.utcnow().strftime('%Y%m%d-%H%M%S')}-{decoded}" + @staticmethod + def generate_inline_file_name(filename: str) -> str: + """Opaque, unguessable name for inline rich-text uploads (BAY-01-020). + + Inline media is served on a session-only route with no per-item access + check, so the old timestamp+basename name let any authenticated user + reconstruct a filename and fetch media for items they can't access. A + random token makes the URL a capability only held by viewers of the + (access-controlled) description that embeds it. + """ + decoded = secure_filename(unidecode(filename)).lower().rsplit(".", 1) + suffix = f".{decoded[1]}" if len(decoded) == 2 and decoded[1] else "" + return f"{secrets.token_urlsafe(24)}{suffix}" + @staticmethod def validate_file_extension(filepath: str, allowed_extensions: list[str]) -> bool: """ diff --git a/enferno/admin/validation/models.py b/enferno/admin/validation/models.py index 65057f6e2..f6b38ba1c 100644 --- a/enferno/admin/validation/models.py +++ b/enferno/admin/validation/models.py @@ -1902,7 +1902,6 @@ class FullConfigValidationModel(ConfigValidationModel): SECURITY_FRESHNESS: int = Field(gt=0) SECURITY_FRESHNESS_GRACE_PERIOD: int = Field(ge=0) DISABLE_MULTIPLE_SESSIONS: bool - AUTO_APPLY_PATCH_UPDATES: bool = False RECAPTCHA_ENABLED: bool RECAPTCHA_PUBLIC_KEY: Optional[str] = None RECAPTCHA_PRIVATE_KEY: Optional[str] = None @@ -2086,7 +2085,11 @@ def validate_url(cls, v: HttpUrl) -> str: if domain.startswith("www."): domain = domain[4:] allowed_domains = Config.get("YTDLP_ALLOWED_DOMAINS") - if not any(domain.endswith(allowed) for allowed in allowed_domains): + # Match the registered domain or a real subdomain, not any suffix + # (BAY-01-014): plain endswith let "evilyoutube.com" pass "youtube.com". + if not any( + domain == allowed or domain.endswith("." + allowed) for allowed in allowed_domains + ): raise ValueError(f"Imports not allowed from {domain}") return str(v) diff --git a/enferno/admin/views/__init__.py b/enferno/admin/views/__init__.py index a50c38de7..7b87ea473 100644 --- a/enferno/admin/views/__init__.py +++ b/enferno/admin/views/__init__.py @@ -3,7 +3,7 @@ import os from functools import wraps -from flask import Blueprint, g, request +from flask import Blueprint, current_app, g, request from flask_security.decorators import auth_required, current_user from enferno.admin.models import Activity @@ -100,6 +100,20 @@ def has_role_assignment_permission(roles: list) -> bool: return True +def fresh_auth(func): + """Require a freshly-authenticated session for privileged mutations. + + The freshness window is taken from the operator-configured + SECURITY_FRESHNESS / SECURITY_FRESHNESS_GRACE_PERIOD settings rather than a + hardcoded value (BAY-01-016), so admins re-authenticate before sensitive + state changes even on an otherwise-valid but stale session. + """ + return auth_required( + within=lambda: current_app.config["SECURITY_FRESHNESS"], + grace=lambda: current_app.config["SECURITY_FRESHNESS_GRACE_PERIOD"], + )(func) + + @admin.before_request @auth_required("session") def before_request() -> None: @@ -119,7 +133,7 @@ def ctx() -> dict: Returns: - dict of users """ - users = User.query.order_by(User.username).all() + users = User.query.order_by(User.username).all() # noqa: F811 if current_user and current_user.is_authenticated: users = [u.to_compact() for u in users] return {"users": users} diff --git a/enferno/admin/views/actors.py b/enferno/admin/views/actors.py index d1040072e..2aa9f0ffe 100644 --- a/enferno/admin/views/actors.py +++ b/enferno/admin/views/actors.py @@ -154,15 +154,9 @@ def api_actors(validated_data: dict) -> Response: "name": item.name, "name_ar": item.name_ar, "status": item.status, - "assigned_to": ( - {"id": item.assigned_to.id, "name": item.assigned_to.name} - if item.assigned_to - else None - ), + "assigned_to": (item.assigned_to.to_compact() if item.assigned_to else None), "first_peer_reviewer": ( - {"id": item.first_peer_reviewer.id, "name": item.first_peer_reviewer.name} - if item.first_peer_reviewer - else None + item.first_peer_reviewer.to_compact() if item.first_peer_reviewer else None ), "roles": ( [ @@ -404,6 +398,7 @@ def api_actor_bulk_update( if not current_user.has_role("Admin"): # silently discard access roles bulk.pop("roles", None) + bulk.pop("rolesReplace", None) if ids and len(bulk): job = bulk_update_actors.delay(ids, bulk, current_user.id) diff --git a/enferno/admin/views/bulletins.py b/enferno/admin/views/bulletins.py index f6b9a62a3..04fb7bc0b 100644 --- a/enferno/admin/views/bulletins.py +++ b/enferno/admin/views/bulletins.py @@ -139,15 +139,9 @@ def api_bulletins(validated_data: dict) -> Response: "sjac_title": item.sjac_title, "sjac_title_ar": item.sjac_title_ar, "status": item.status, - "assigned_to": ( - {"id": item.assigned_to.id, "name": item.assigned_to.name} - if item.assigned_to - else None - ), + "assigned_to": (item.assigned_to.to_compact() if item.assigned_to else None), "first_peer_reviewer": ( - {"id": item.first_peer_reviewer.id, "name": item.first_peer_reviewer.name} - if item.first_peer_reviewer - else None + item.first_peer_reviewer.to_compact() if item.first_peer_reviewer else None ), "roles": ( [ @@ -397,6 +391,7 @@ def api_bulletin_bulk_update( if not current_user.has_role("Admin"): # silently discard access roles bulk.pop("roles", None) + bulk.pop("rolesReplace", None) if ids and len(bulk): job = bulk_update_bulletins.delay(ids, bulk, current_user.id) diff --git a/enferno/admin/views/history.py b/enferno/admin/views/history.py index 99c19322b..9d95cd4c1 100644 --- a/enferno/admin/views/history.py +++ b/enferno/admin/views/history.py @@ -1,13 +1,38 @@ from __future__ import annotations from flask import Response +from flask_security.decorators import current_user from sqlalchemy import desc -from enferno.admin.models import BulletinHistory, ActorHistory, IncidentHistory, LocationHistory +from enferno.admin.models import ( + Activity, + Actor, + ActorHistory, + Bulletin, + BulletinHistory, + Incident, + IncidentHistory, + LocationHistory, +) +from enferno.extensions import db from enferno.utils.http_response import HTTPResponse import enferno.utils.typing as t from . import admin, require_view_history + +def _deny_history(parent_label: str, parent_id: int) -> Response: + """Log a denied history view and return a forbidden response.""" + Activity.create( + current_user, + Activity.ACTION_VIEW, + Activity.STATUS_DENIED, + {"id": parent_id}, + parent_label, + details=f"Unauthorized attempt to view history of restricted {parent_label} {parent_id}.", + ) + return HTTPResponse.forbidden("Restricted Access") + + # Bulletin History Helpers @@ -23,6 +48,12 @@ def api_bulletinhistory(bulletinid: t.id) -> Response: Returns: - json feed of item's history / error. """ + bulletin = db.session.get(Bulletin, bulletinid) + if not bulletin: + return HTTPResponse.not_found("Bulletin not found") + if not current_user.can_access(bulletin): + return _deny_history("bulletin", bulletinid) + result = ( BulletinHistory.query.filter_by(bulletin_id=bulletinid) .order_by(desc(BulletinHistory.created_at)) @@ -49,6 +80,12 @@ def api_actorhistory(actorid: t.id) -> Response: Returns: - json feed of item's history / error. """ + actor = db.session.get(Actor, actorid) + if not actor: + return HTTPResponse.not_found("Actor not found") + if not current_user.can_access(actor): + return _deny_history("actor", actorid) + result = ( ActorHistory.query.filter_by(actor_id=actorid).order_by(desc(ActorHistory.created_at)).all() ) @@ -72,6 +109,12 @@ def api_incidenthistory(incidentid: t.id) -> Response: Returns: - json feed of item's history / error. """ + incident = db.session.get(Incident, incidentid) + if not incident: + return HTTPResponse.not_found("Incident not found") + if not current_user.can_access(incident): + return _deny_history("incident", incidentid) + result = ( IncidentHistory.query.filter_by(incident_id=incidentid) .order_by(desc(IncidentHistory.created_at)) diff --git a/enferno/admin/views/incidents.py b/enferno/admin/views/incidents.py index 9bff00f73..75fc37c4c 100644 --- a/enferno/admin/views/incidents.py +++ b/enferno/admin/views/incidents.py @@ -162,15 +162,9 @@ def api_incidents(validated_data: dict) -> Response: "title": item.title, "title_ar": item.title_ar, "status": item.status, - "assigned_to": ( - {"id": item.assigned_to.id, "name": item.assigned_to.name} - if item.assigned_to - else None - ), + "assigned_to": (item.assigned_to.to_compact() if item.assigned_to else None), "first_peer_reviewer": ( - {"id": item.first_peer_reviewer.id, "name": item.first_peer_reviewer.name} - if item.first_peer_reviewer - else None + item.first_peer_reviewer.to_compact() if item.first_peer_reviewer else None ), "roles": ( [ diff --git a/enferno/admin/views/media.py b/enferno/admin/views/media.py index 22c5554ba..58c3621f5 100644 --- a/enferno/admin/views/media.py +++ b/enferno/admin/views/media.py @@ -324,6 +324,7 @@ def api_medias_upload() -> Response: # return signed url from s3 valid for some time @admin.route("/api/media/") +@_require_media_access def serve_media( filename: str, ) -> Response: @@ -411,6 +412,7 @@ def serve_media( @admin.route("/api/serve/media/") +@_require_media_access def api_local_serve_media( filename: str, ) -> Response: @@ -458,6 +460,7 @@ def api_local_serve_media( @admin.route("/api/media//proxy") @auth_required() +@_require_media_access def api_media_proxy(id: int) -> Response: """Proxy media file through Flask -- ensures same-origin inline display for PDFs.""" media = Media.query.get(id) @@ -509,8 +512,9 @@ def api_inline_medias_upload() -> Response: f"File exceeds maximum allowed size of {max_size_mb} MB", status=413 ) - # final file - filename = Media.generate_file_name(f.filename) + # final file: opaque, unguessable name so inline media can't be + # enumerated/reconstructed by other users (BAY-01-020) + filename = Media.generate_inline_file_name(f.filename) filepath = (Media.inline_dir / filename).as_posix() f.save(filepath) response = {"location": filename} @@ -540,6 +544,7 @@ def api_local_serve_inline_media(filename: str) -> Response: @admin.get("/api/media/") @auth_required("session") +@_require_media_access def api_media_get(id: int): """Get a single media item by ID with extraction and bulletin info.""" media = Media.query.get(id) @@ -586,14 +591,14 @@ def api_media_update(id: t.id, validated_data: dict) -> Response: if media is None: return HTTPResponse.not_found("Media not found") - if not current_user.can_access(media): + if not current_user.can_edit(media): Activity.create( current_user, Activity.ACTION_VIEW, Activity.STATUS_DENIED, validated_data, "media", - details="Unauthorized attempt to update restricted media.", + details="Unauthorized attempt to update media outside edit boundary.", ) return HTTPResponse.forbidden("Restricted Access") @@ -742,6 +747,7 @@ def api_ocr_stats(): @admin.get("/api/extraction/") @auth_required("session") +@_require_media_access def api_extraction_get(extraction_id: int): """Return full extraction data including text.""" extraction = Extraction.query.get(extraction_id) @@ -772,6 +778,12 @@ def api_extraction_update(extraction_id: int): if not extraction: return HTTPResponse.not_found("Extraction not found") + media = Media.query.get(extraction.media_id) + if not media: + return HTTPResponse.not_found("Parent media not found") + if not current_user.can_access(media): + return HTTPResponse.forbidden("Restricted Access") + data = request.json or {} action = data.get("action") @@ -820,7 +832,7 @@ def api_extraction_update(extraction_id: int): details=detail_map.get(action), ) - return jsonify(extraction.to_dict()) + return jsonify(extraction.to_compact_dict()) @admin.put("/api/media//orientation") diff --git a/enferno/admin/views/system.py b/enferno/admin/views/system.py index e6c146052..fb638375c 100644 --- a/enferno/admin/views/system.py +++ b/enferno/admin/views/system.py @@ -5,7 +5,7 @@ from flask import Response, request from flask.templating import render_template from flask_babel import gettext -from flask_security.decorators import auth_required, current_user, roles_required +from flask_security.decorators import current_user, roles_required from enferno.admin.constants import Constants from enferno.admin.models import ( @@ -23,11 +23,11 @@ from enferno.utils.config_utils import ConfigManager from enferno.utils.http_response import HTTPResponse from enferno.utils.validation_utils import validate_with -from . import admin, PER_PAGE +from . import admin, PER_PAGE, fresh_auth @admin.get("/system-administration/") -@auth_required(within=15, grace=0) +@fresh_auth @roles_required("Admin") def system_admin() -> str: """Endpoint for system administration.""" @@ -76,6 +76,7 @@ def api_config() -> str: @admin.put("/api/configuration/") +@fresh_auth @roles_required("Admin") @validate_with(ConfigRequestModel) def api_config_write( @@ -107,6 +108,7 @@ def api_config_write( @admin.post("/api/reload/") +@fresh_auth @roles_required("Admin") def api_app_reload() -> Response: """ @@ -233,31 +235,8 @@ def api_updates_available() -> Response: return HTTPResponse.success(data=payload) -@admin.post("/api/updates/start") -@auth_required(within=15, grace=0) -@roles_required("Admin") -def api_updates_start() -> Response: - """Launch `bayanat update` out-of-process via the sudoers-granted wrapper. - - Fresh-auth required (within 15 min) to limit stale-cookie exposure: a - compromised admin session cannot trigger a privileged update without a - recent password prompt. - """ - try: - subprocess.run( - ["sudo", "-n", "/usr/local/sbin/bayanat-start-update"], - check=True, - timeout=10, - ) - except subprocess.TimeoutExpired: - return HTTPResponse.error("Update start timed out", status=504) - except subprocess.CalledProcessError as e: - return HTTPResponse.error(f"Failed to start update: {e}", status=500) - return HTTPResponse.success(data={"status": "started"}) - - @admin.get("/snapshots/") -@auth_required(within=15, grace=0) +@fresh_auth @roles_required("Admin") def snapshots_page() -> str: """Render the snapshots list page.""" diff --git a/enferno/admin/views/users.py b/enferno/admin/views/users.py index 3745e0164..3dc6b686d 100644 --- a/enferno/admin/views/users.py +++ b/enferno/admin/views/users.py @@ -6,7 +6,7 @@ from flask import Response, request, current_app, session from flask.templating import render_template from flask_security import logout_user -from flask_security.decorators import auth_required, current_user, roles_accepted, roles_required +from flask_security.decorators import current_user, roles_accepted, roles_required from flask_security.twofactor import tf_disable from sqlalchemy import or_ @@ -26,7 +26,7 @@ from enferno.utils.logging_utils import get_logger from enferno.utils.validation_utils import validate_with import enferno.utils.typing as t -from . import admin, PER_PAGE +from . import admin, PER_PAGE, fresh_auth logger = get_logger() @@ -68,7 +68,7 @@ def api_users() -> Response: @admin.get("/users/", defaults={"id": None}) @admin.get("/users/") -@auth_required(within=15, grace=0) +@fresh_auth @roles_required("Admin") def users(id) -> str: """ @@ -158,6 +158,7 @@ def api_user_sessions(id: int) -> Any: @admin.delete("/api/session/logout") +@fresh_auth @roles_required("Admin") def logout_session() -> Response: """ @@ -201,6 +202,7 @@ def logout_session() -> Response: @admin.delete("/api/user//sessions/logout") +@fresh_auth @roles_required("Admin") def logout_all_sessions(user_id: int) -> Any: """ @@ -246,6 +248,7 @@ def logout_all_sessions(user_id: int) -> Any: @admin.delete("/api/user/revoke_2fa") +@fresh_auth @roles_required("Admin") def revoke_2fa() -> Response: """ @@ -273,6 +276,7 @@ def revoke_2fa() -> Response: @admin.post("/api/user/") +@fresh_auth @roles_required("Admin") @validate_with(UserRequestModel) def api_user_create( @@ -353,6 +357,7 @@ def api_user_check( @admin.put("/api/user/") +@fresh_auth @roles_required("Admin") @validate_with(UserRequestModel) def api_user_update( @@ -432,6 +437,7 @@ def api_check_password( @admin.post("/api/user/force-reset") +@fresh_auth @roles_required("Admin") @validate_with(UserForceResetRequestModel) def api_user_force_reset(validated_data: dict) -> Response: @@ -459,6 +465,7 @@ def api_user_force_reset(validated_data: dict) -> Response: @admin.post("/api/user/force-reset-all") +@fresh_auth @roles_required("Admin") def api_user_force_reset_all() -> Response: """ @@ -475,6 +482,7 @@ def api_user_force_reset_all() -> Response: @admin.delete("/api/user/") +@fresh_auth @roles_required("Admin") def api_user_delete( id: t.id, @@ -513,7 +521,7 @@ def api_user_delete( # Roles routes @admin.route("/roles/") -@auth_required(within=15, grace=0) +@fresh_auth @roles_required("Admin") def roles() -> str: """ @@ -554,6 +562,7 @@ def api_roles() -> Response: @admin.post("/api/role/") +@fresh_auth @roles_required("Admin") @validate_with(RoleRequestModel) def api_role_create( @@ -588,6 +597,7 @@ def api_role_create( @admin.put("/api/role/") +@fresh_auth @roles_required("Admin") @validate_with(RoleRequestModel) def api_role_update(id: t.id, validated_data: dict) -> Response: @@ -618,6 +628,7 @@ def api_role_update(id: t.id, validated_data: dict) -> Response: @admin.delete("/api/role/") +@fresh_auth @roles_required("Admin") def api_role_delete( id: t.id, @@ -659,6 +670,7 @@ def api_role_delete( @admin.post("/api/role/import/") +@fresh_auth @roles_required("Admin") def api_role_import() -> Response: """ diff --git a/enferno/app.py b/enferno/app.py index 46741a40a..5666aeedb 100755 --- a/enferno/app.py +++ b/enferno/app.py @@ -2,7 +2,7 @@ import pandas as pd from urllib.parse import urlparse -from flask import Flask, render_template, current_app +from flask import Flask, render_template, current_app, request from flask_login import user_logged_in, user_logged_out from flask_security import Security, SQLAlchemyUserDatastore from flask_security import current_user @@ -57,7 +57,7 @@ from enferno.user.models import WebAuthn from enferno.user.views import bp_user from enferno.utils.logging_utils import get_logger -from enferno.utils.rate_limit_utils import ratelimit_handler +from enferno.utils.rate_limit_utils import get_real_ip, ratelimit_handler logger = get_logger() @@ -147,11 +147,38 @@ def register_extensions(app): mail.init_app(app) limiter.init_app(app) + _apply_login_rate_limit(app) # Initialize Talisman with security headers register_talisman(app) +def _apply_login_rate_limit(app): + """Stack per-username and per-IP Flask-Limiter limits on POST /login. + + The /login view is owned by Flask-Security; we wrap it post-registration + so the same limiter / Redis storage / 429 handler used elsewhere applies. + """ + login_view = app.view_functions.get("security.login") + if login_view is None: + return + + def _username_key(): + return f"login:user:{(request.form.get('username') or '').lower().strip()}" + + wrapped = limiter.limit( + app.config["LOGIN_RATE_LIMIT_PER_USERNAME"], + key_func=_username_key, + methods=["POST"], + )(login_view) + wrapped = limiter.limit( + app.config["LOGIN_RATE_LIMIT_PER_IP"], + key_func=get_real_ip, + methods=["POST"], + )(wrapped) + app.view_functions["security.login"] = wrapped + + def register_talisman(app): """ Register Flask-Talisman for security headers including CSP. diff --git a/enferno/commands.py b/enferno/commands.py index 5674640ba..02b371a49 100644 --- a/enferno/commands.py +++ b/enferno/commands.py @@ -3,6 +3,7 @@ import os from datetime import datetime, timezone +from typing import Optional import click from flask import current_app @@ -22,10 +23,7 @@ from enferno.utils.db_alignment_helpers import DBAlignmentChecker from enferno.utils.logging_utils import get_logger from sqlalchemy import text -from enferno.admin.models import Bulletin -from enferno.admin.models.DynamicField import DynamicField from enferno.admin.models.DynamicFormHistory import DynamicFormHistory -from enferno.utils.date_helper import DateHelper from enferno.utils.form_history_utils import record_form_history from enferno.utils.validation_utils import validate_password_policy @@ -114,39 +112,100 @@ def import_data() -> None: @click.command() +@click.option("-u", "--username", default=None, help="Admin username (prompted if not provided)") +@click.option("-p", "--password", default=None, help="Admin password (generated if not provided)") +@click.option( + "--password-stdin", + "password_stdin", + is_flag=True, + default=False, + help="Read admin password from stdin (avoids argv exposure)", +) @with_appcontext -def install() -> None: - """Install a default Admin user and add an Admin role to it.""" +def install(username: Optional[str], password: Optional[str], password_stdin: bool = False) -> None: + """Install a default Admin user and add an Admin role to it. + + Non-interactive use: + flask install -u admin # generate a password + flask install -u admin -p '' # supply via flag + echo '' | flask install -u admin --password-stdin + """ + import secrets + import sys + + if password_stdin: + if password: + click.echo("Cannot combine --password and --password-stdin.") + return + password = sys.stdin.readline().rstrip("\n") + if not password: + click.echo("Empty password on stdin.") + return + logger.info("Installing admin user.") admin_role = Role.query.filter(Role.name == "Admin").first() - # check if there's an existing admin if admin_role.users.all(): click.echo("An admin user is already installed.") logger.error("An admin user is already installed.") return - # to make sure username doesn't already exist - while True: - u = click.prompt("Admin username?", default="admin") - check = User.query.filter(User.username == u.lower()).first() - if check is not None: + # Resolve username + if username: + u = username.strip() + if User.query.filter(User.username == u.lower()).first() is not None: + click.echo(f"Username '{u}' already exists.") + logger.error("Install aborted: username already exists.") + return + else: + while True: + u = click.prompt("Admin username?", default="admin") + if User.query.filter(User.username == u.lower()).first() is None: + break click.echo("Username already exists.") - else: - break - while True: - p = click.prompt("Admin Password?", hide_input=True) + + # Resolve password (generate if not supplied; show it once) + generated = False + if password: try: - p = validate_password_policy(p) - break + p = validate_password_policy(password) except ValueError as e: click.echo(str(e)) + logger.error("Install aborted: password failed policy check.") + return + elif username: + # Non-interactive (username supplied, password not) → generate. + while True: + candidate = secrets.token_urlsafe(20) + try: + p = validate_password_policy(candidate) + generated = True + break + except ValueError: + # token_urlsafe is high-entropy; loop guard for the rare zxcvbn miss + continue + else: + while True: + p = click.prompt("Admin Password?", hide_input=True) + try: + p = validate_password_policy(p) + break + except ValueError as e: + click.echo(str(e)) + user = User(username=u, password=hash_password(p), active=1) user.name = "Admin" user.roles.append(admin_role) check = user.save() if check: - click.echo("Admin user installed successfully.") + if generated: + click.echo("=" * 60) + click.echo(f"Admin user installed: {u}") + click.echo(f"Generated password : {p}") + click.echo("Save this now — it is not stored in plaintext anywhere.") + click.echo("=" * 60) + else: + click.echo("Admin user installed successfully.") logger.info("Admin user installed successfully.") else: click.echo("Error installing admin user.") @@ -528,7 +587,6 @@ def fail(msg): fail("Redis not reachable") try: - from celery import current_app as celery_app from enferno.tasks import celery inspector = celery.control.inspect(timeout=2) @@ -771,7 +829,7 @@ def status() -> None: total_extracted = sum(s["count"] for s in status_map.values()) pending = total_media - total_extracted - click.echo(f"\nOCR Status Summary") + click.echo("\nOCR Status Summary") click.echo(f"{'─' * 40}") click.echo(f"Total media: {total_media:,}") click.echo(f"Pending (no OCR): {pending:,}") diff --git a/enferno/data_import/utils/media_import.py b/enferno/data_import/utils/media_import.py index 4a1754817..2ccb68c9c 100644 --- a/enferno/data_import/utils/media_import.py +++ b/enferno/data_import/utils/media_import.py @@ -9,6 +9,7 @@ from enferno.admin.models import Media, Bulletin, Source, Label, Location, Activity from enferno.data_import.models import DataImport from enferno.user.models import User, Role +from enferno.utils.validation_utils import sanitize_string from enferno.utils.data_helpers import get_file_hash, media_check_duplicates from enferno.utils.date_helper import DateHelper import arrow, shutil @@ -20,7 +21,6 @@ import enferno.utils.typing as t from enferno.extensions import db from sqlalchemy import any_ -from urllib.parse import urlparse logger = get_logger() @@ -568,6 +568,7 @@ def create_bulletin(self, info: dict) -> None: db.session.add(bulletin) def update_description(description): + description = sanitize_string(description or "") if bulletin.description: bulletin.description += f"
{description}" else: @@ -593,20 +594,19 @@ def update_description(description): channel_url = info.get("channel_url") channel = info.get("channel") - domain = info.get("extractor_key") + domain = info.get("extractor_key") or info.get("extractor") if not domain: - url = urlparse(info.get("source_url")).netloc.lower() - url = domain[4:] if domain.startswith("www.") else domain - domain = url.split(".")[0].first() - - main_source = Source.query.filter(Source.title == domain).first() - - if not main_source: - main_source = Source() - main_source.title = domain - main_source.etl_id = info.get("webpage_url_domain") or url - main_source.save() - bulletin.sources.append(main_source) + self.data_import.add_to_log( + "yt-dlp metadata missing extractor_key; skipping Source linkage." + ) + else: + main_source = Source.query.filter(Source.title == domain).first() + if not main_source: + main_source = Source() + main_source.title = domain + main_source.etl_id = info.get("webpage_url_domain") + main_source.save() + bulletin.sources.append(main_source) source = None @@ -665,12 +665,12 @@ def update_description(description): bulletin.publish_date = upload_date if description := info.get("description"): - bulletin.description = description + bulletin.description = sanitize_string(description) else: bulletin.source_link = info.get("old_path") if info.get("text_content"): - bulletin.description = info.get("text_content") + bulletin.description = sanitize_string(info.get("text_content")) if info.get("transcription"): update_description(info.get("transcription")) diff --git a/enferno/data_import/utils/sheet_import.py b/enferno/data_import/utils/sheet_import.py index 10e994af3..1f2a70b63 100644 --- a/enferno/data_import/utils/sheet_import.py +++ b/enferno/data_import/utils/sheet_import.py @@ -27,6 +27,7 @@ from enferno.utils.base import DatabaseException from enferno.utils.date_helper import DateHelper +from enferno.utils.validation_utils import sanitize_string from enferno.user.models import Role, User import enferno.utils.typing as t @@ -144,7 +145,7 @@ def parse_csv(filepath: str) -> dict: - A dictionary containing the columns and the head of the file. """ # read the file partially only for parsing purposes - df = pd.read_csv(filepath, keep_default_na=False) + df = pd.read_csv(filepath, keep_default_na=False, on_bad_lines="skip", index_col=False) df.dropna(how="all", axis=1, inplace=True) df = df.astype(str) @@ -165,11 +166,13 @@ def parse_excel(filepath: str, sheet: Any) -> dict: Returns: - A dictionary containing the columns and the head of the file. """ - df = pd.read_excel(filepath, sheet_name=sheet) + df = pd.read_excel(filepath, sheet_name=sheet, engine="openpyxl") df.dropna(how="all", axis=1, inplace=True) df = df.astype(str) - columns = df.columns.to_list() + # XLSX preserves numeric header cells as numeric column labels; coerce so the + # API contract (list[str]) holds regardless of header cell types. + columns = [str(c) for c in df.columns] # drop nan values before generating head rows df.fillna("", inplace=True) head = df.head().to_dict() @@ -187,7 +190,7 @@ def get_sheets(filepath: str) -> list: Returns: - A list of the sheet names in the Excel file. """ - xls = pd.ExcelFile(filepath) + xls = pd.ExcelFile(filepath, engine="openpyxl") return xls.sheet_names @staticmethod @@ -202,8 +205,8 @@ def sheet_to_df(filepath: str, sheet: Optional[list] = None) -> pd.DataFrame: Returns: - A DataFrame containing the parsed data. """ - if sheet: - df = pd.read_excel(filepath, sheet_name=sheet, keep_default_na=False) + if isinstance(sheet, (str, int)): + df = pd.read_excel(filepath, sheet_name=sheet, keep_default_na=False, engine="openpyxl") else: df = pd.read_csv(filepath, keep_default_na=False) @@ -554,7 +557,7 @@ def set_description(self, map_item: Any) -> None: description += "\n" if description: - self.actor_profile.description = description + self.actor_profile.description = sanitize_string(description) if old_description: self.actor_profile.description += old_description self.data_import.add_to_log("Processed description") @@ -707,7 +710,10 @@ def handle_mismatch(self, field: str, value: Any) -> None: None """ self.data_import.add_to_log(f"Field value mismatch {field}.\n Appending to description.") - self.actor_profile.description += f"

\n

{field}: {str(value)}" + # Sanitize untrusted field/value before the v-html sink (BAY-01-039). + self.actor_profile.description += ( + f"

\n

{sanitize_string(str(field))}: {sanitize_string(str(value))}" + ) def gen_value(self, field: str) -> None: """ diff --git a/enferno/data_import/views.py b/enferno/data_import/views.py index d1f3eaf78..b28d7c35c 100644 --- a/enferno/data_import/views.py +++ b/enferno/data_import/views.py @@ -91,7 +91,7 @@ def api_imports() -> Response: per_page = request.args.get("per_page", PER_PAGE, int) q = request.json.get("q", None) - if q and (batch_id := q.get("batch_id")): + if isinstance(q, dict) and (batch_id := q.get("batch_id")): result = ( DataImport.query.filter(DataImport.batch_id == batch_id) .order_by(-DataImport.id) @@ -179,8 +179,11 @@ def etl_process() -> Response: - response contains the processing result """ - files = request.json.pop("files") - meta = request.json + body = request.json or {} + files = body.pop("files", None) + if not isinstance(files, list) or not files: + return HTTPResponse.error("Missing `files` array", status=417) + meta = body batch_id = shortuuid.uuid()[:9] process_files.delay(files=files, meta=meta, user_id=current_user.id, batch_id=batch_id) @@ -249,15 +252,43 @@ def api_local_csv_delete() -> str: return "" +def _resolve_import_path(filename: Optional[str]) -> Optional[str]: + """ + Resolve a user-supplied filename to a path inside IMPORT_DIR. + + Returns the resolved POSIX path string, or None if the filename is + missing or escapes the import directory (traversal attempt). + """ + if not filename: + return None + import_dir = Path(current_app.config.get("IMPORT_DIR")).resolve() + joined = safe_join(str(import_dir), filename) + if joined is None: + return None + candidate = Path(joined).resolve() + try: + candidate.relative_to(import_dir) + except ValueError: + return None + return candidate.as_posix() + + @imports.post("/api/csv/analyze") @roles_required("Admin") def api_csv_analyze() -> Response: """API endpoint to analyze a csv file.""" # locate file - filename = request.json.get("file").get("filename") - import_dir = Path(current_app.config.get("IMPORT_DIR")) + file_obj = request.json.get("file") + if not isinstance(file_obj, dict): + return HTTPResponse.error("Missing or malformed `file` field", status=417) + filename = file_obj.get("filename") + if not isinstance(filename, str) or not filename: + return HTTPResponse.error("Missing `file.filename`", status=417) + filepath = _resolve_import_path(filename) + if filepath is None: + logger.warning("Rejected CSV analyze for invalid path: %r", filename) + return HTTPResponse.error("Invalid file path", status=400) - filepath = (import_dir / filename).as_posix() result = SheetImport.parse_csv(filepath) if result: @@ -271,10 +302,17 @@ def api_csv_analyze() -> Response: @roles_required("Admin") def api_xls_sheet() -> Response: """API endpoint to get sheets from an excel file.""" - filename = request.json.get("file").get("filename") - import_dir = Path(current_app.config.get("IMPORT_DIR")) + file_obj = request.json.get("file") + if not isinstance(file_obj, dict): + return HTTPResponse.error("Missing or malformed `file` field", status=417) + filename = file_obj.get("filename") + if not isinstance(filename, str) or not filename: + return HTTPResponse.error("Missing `file.filename`", status=417) + filepath = _resolve_import_path(filename) + if filepath is None: + logger.warning("Rejected XLS sheets for invalid path: %r", filename) + return HTTPResponse.error("Invalid file path", status=400) - filepath = (import_dir / filename).as_posix() sheets = SheetImport.get_sheets(filepath) return HTTPResponse.success(data=sheets) @@ -285,11 +323,20 @@ def api_xls_sheet() -> Response: def api_xls_analyze() -> Response: """API endpoint to analyze an excel file.""" # locate file - filename = request.json.get("file").get("filename") - import_dir = Path(current_app.config.get("IMPORT_DIR")) + file_obj = request.json.get("file") + if not isinstance(file_obj, dict): + return HTTPResponse.error("Missing or malformed `file` field", status=417) + filename = file_obj.get("filename") + if not isinstance(filename, str) or not filename: + return HTTPResponse.error("Missing `file.filename`", status=417) + filepath = _resolve_import_path(filename) + if filepath is None: + logger.warning("Rejected XLS analyze for invalid path: %r", filename) + return HTTPResponse.error("Invalid file path", status=400) - filepath = (import_dir / filename).as_posix() sheet = request.json.get("sheet") + if not isinstance(sheet, (str, int)): + return HTTPResponse.error("Missing or invalid `sheet` value", status=417) result = SheetImport.parse_excel(filepath, sheet) @@ -357,6 +404,8 @@ def api_mapping_update(id: t.id) -> Response: map = db.session.get(Mapping, id) if map: data = request.json.get("data") + if not isinstance(data, dict): + return HTTPResponse.error("Update request missing parameters data", status=417) m = data.get("map", None) name = request.json.get("name", None) if m and name: diff --git a/enferno/export/models.py b/enferno/export/models.py index b1154b349..d795c01ca 100644 --- a/enferno/export/models.py +++ b/enferno/export/models.py @@ -1,11 +1,8 @@ -import os - from datetime import datetime as dt from pathlib import Path from typing import Any, Union import arrow -from flask import current_app from sqlalchemy import ARRAY from flask_security.decorators import current_user @@ -67,6 +64,20 @@ def expired(self): else: return True + @staticmethod + def _accessible_item_ids(table: str, items: Any) -> list: + """Filter requested export item IDs to those the current user may access + (BAY-01-026). Admins keep everything; others keep only in-scope items. + """ + from enferno.admin.models import Actor, Bulletin, Incident + + model = {"bulletin": Bulletin, "actor": Actor, "incident": Incident}.get(table) + if not model or not isinstance(items, list): + return [] + rows = model.query.filter(model.id.in_(items)).all() + allowed = {r.id for r in rows if current_user and current_user.can_access(r)} + return [i for i in items if i in allowed] + def from_json(self, table: str, json: dict) -> "Export": """ Export Deserializer. @@ -78,13 +89,19 @@ def from_json(self, table: str, json: dict) -> "Export": Returns: - Export object """ + if not isinstance(json, dict): + json = {} cfg = json.get("config") - items = json.get("items") + if not isinstance(cfg, dict): + cfg = {} self.requester = current_user self.table = table - self.items = items - self.tags = cfg.get("tags") if "tags" in cfg else [] + # Store only items the requester can access (BAY-01-026): keep crafted / + # out-of-scope IDs from ever being persisted, approved, or exported. The + # worker re-validates access at generation time too (BAY-01-003). + self.items = self._accessible_item_ids(table, json.get("items")) + self.tags = cfg.get("tags", []) self.comment = cfg.get("comment") self.file_format = cfg.get("format") self.include_media = cfg.get("includeMedia") diff --git a/enferno/export/views.py b/enferno/export/views.py index 499bef737..97cecbb8e 100644 --- a/enferno/export/views.py +++ b/enferno/export/views.py @@ -168,8 +168,10 @@ def api_export_get(id: t.id) -> Response: if export is None: return HTTPResponse.not_found("Export not found") - else: - return HTTPResponse.success(data=export.to_dict(), message="Export retrieved successfully") + # Same ownership guard as the list/download routes (BAY-01-015). + if not current_user.has_role("Admin") and current_user.id != export.requester_id: + return HTTPResponse.forbidden("Forbidden") + return HTTPResponse.success(data=export.to_dict(), message="Export retrieved successfully") @export.post("/api/exports/") diff --git a/enferno/settings.py b/enferno/settings.py index f9db8f882..6978259b7 100644 --- a/enferno/settings.py +++ b/enferno/settings.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import os from datetime import timedelta +from urllib.parse import quote import bleach import redis @@ -46,7 +47,8 @@ class Config(object): if (POSTGRES_USER and POSTGRES_PASSWORD) or POSTGRES_HOST != "localhost": SQLALCHEMY_DATABASE_URI = ( - f"postgresql://{POSTGRES_USER}:{POSTGRES_PASSWORD}@{POSTGRES_HOST}/{POSTGRES_DB}" + f"postgresql://{quote(POSTGRES_USER, safe='')}:{quote(POSTGRES_PASSWORD, safe='')}" + f"@{POSTGRES_HOST}/{POSTGRES_DB}" ) else: SQLALCHEMY_DATABASE_URI = f"postgresql:///{POSTGRES_DB}" @@ -57,12 +59,13 @@ class Config(object): REDIS_HOST = os.environ.get("REDIS_HOST", "localhost") REDIS_PORT = int(os.environ.get("REDIS_PORT", 6379)) REDIS_PASSWORD = os.environ.get("REDIS_PASSWORD", "") - REDIS_URL = f"redis://:{REDIS_PASSWORD}@{REDIS_HOST}:{REDIS_PORT}/0" + _redis_pw_quoted = quote(REDIS_PASSWORD, safe="") + REDIS_URL = f"redis://:{_redis_pw_quoted}@{REDIS_HOST}:{REDIS_PORT}/0" # Celery # Has to be in small case - celery_broker_url = f"redis://:{REDIS_PASSWORD}@{REDIS_HOST}:{REDIS_PORT}/2" - result_backend = f"redis://:{REDIS_PASSWORD}@{REDIS_HOST}:{REDIS_PORT}/3" + celery_broker_url = f"redis://:{_redis_pw_quoted}@{REDIS_HOST}:{REDIS_PORT}/2" + result_backend = f"redis://:{_redis_pw_quoted}@{REDIS_HOST}:{REDIS_PORT}/3" # Security SECURITY_REGISTERABLE = manager.get_config("SECURITY_REGISTERABLE") @@ -102,6 +105,12 @@ class Config(object): security_freshness_grace_period = manager.get_config("SECURITY_FRESHNESS_GRACE_PERIOD") SECURITY_FRESHNESS_GRACE_PERIOD = timedelta(minutes=security_freshness_grace_period) + # Login brute-force throttle (Flask-Limiter, applied per-method=POST on /login). + LOGIN_RATE_LIMIT_PER_USERNAME = os.environ.get( + "LOGIN_RATE_LIMIT_PER_USERNAME", "10 per 15 minutes" + ) + LOGIN_RATE_LIMIT_PER_IP = os.environ.get("LOGIN_RATE_LIMIT_PER_IP", "30 per 15 minutes") + SECURITY_TWO_FACTOR_REQUIRED = manager.get_config("SECURITY_TWO_FACTOR_REQUIRED") SECURITY_PASSWORD_LENGTH_MIN = manager.get_config("SECURITY_PASSWORD_LENGTH_MIN") @@ -126,10 +135,6 @@ class Config(object): DISABLE_MULTIPLE_SESSIONS = manager.get_config("DISABLE_MULTIPLE_SESSIONS") SESSION_RETENTION_PERIOD = manager.get_config("SESSION_RETENTION_PERIOD") - # Auto-apply patch releases (e.g. 4.1.0 -> 4.1.1) in the background. - # Minor and major bumps always require manual approval regardless. - AUTO_APPLY_PATCH_UPDATES = manager.get_config("AUTO_APPLY_PATCH_UPDATES") - # Recaptcha RECAPTCHA_ENABLED = manager.get_config("RECAPTCHA_ENABLED") RECAPTCHA_PUBLIC_KEY = manager.get_config("RECAPTCHA_PUBLIC_KEY") @@ -137,7 +142,7 @@ class Config(object): # Session SESSION_TYPE = "redis" - SESSION_REDIS = redis.from_url(f"redis://:{REDIS_PASSWORD}@{REDIS_HOST}:{REDIS_PORT}/1") + SESSION_REDIS = redis.from_url(f"redis://:{_redis_pw_quoted}@{REDIS_HOST}:{REDIS_PORT}/1") PERMANENT_SESSION_LIFETIME = 3600 # Google 0Auth @@ -384,7 +389,7 @@ class TestConfig: REDIS_HOST = os.environ.get("REDIS_HOST", "localhost") REDIS_PORT = int(os.environ.get("REDIS_PORT", 6379)) REDIS_PASSWORD = os.environ.get("REDIS_PASSWORD", "") - REDIS_URL = f"redis://:{REDIS_PASSWORD}@{REDIS_HOST}:{REDIS_PORT}/0" + REDIS_URL = f"redis://:{quote(REDIS_PASSWORD, safe='')}@{REDIS_HOST}:{REDIS_PORT}/0" # Celery - use in-memory for tests to avoid Redis dependency celery_broker_url = "memory://" @@ -416,6 +421,10 @@ class TestConfig: SECURITY_MULTI_FACTOR_RECOVERY_CODES_N = 3 SECURITY_MULTI_FACTOR_RECOVERY_CODES_KEYS = None SECURITY_MULTI_FACTOR_RECOVERY_CODE_TTL = None + + # Login throttle (Flask-Limiter, applied to security.login). Tighter in tests. + LOGIN_RATE_LIMIT_PER_USERNAME = "5 per 15 minutes" + LOGIN_RATE_LIMIT_PER_IP = "10 per 15 minutes" SECURITY_TWO_FACTOR_ENABLED_METHODS = ["authenticator"] SECURITY_TWO_FACTOR = True SECURITY_TWO_FACTOR_RESCUE_MAIL = "test@example.com" @@ -611,9 +620,6 @@ class TestConfig: # Notifications NOTIFICATIONS = NOTIFICATIONS_DEFAULT_CONFIG - # Auto-update - AUTO_APPLY_PATCH_UPDATES = False - # Dependencies (from dep_utils) HAS_WHISPER = dep_utils.has_whisper # Use actual dependency detection HAS_TESSERACT = dep_utils.has_tesseract # Use actual dependency detection diff --git a/enferno/setup/views.py b/enferno/setup/views.py index 1e1f8cbc5..1374a6715 100644 --- a/enferno/setup/views.py +++ b/enferno/setup/views.py @@ -8,11 +8,10 @@ Response, current_app, ) -from flask_security import hash_password, login_user, roles_required, current_user +from flask_security import auth_required, roles_required, current_user from enferno.admin.models import Eventtype, PotentialViolation, ClaimedViolation -from enferno.extensions import db -from enferno.user.models import User, Role +from enferno.user.models import User from enferno.utils.config_utils import ConfigManager from enferno.utils.data_helpers import import_default_data from enferno.utils.http_response import HTTPResponse @@ -30,15 +29,19 @@ def check_installation() -> bool: @bp_setup.before_app_request def handle_installation_check() -> Optional[Response]: - """Redirect to setup wizard if the app is not installed.""" + """Redirect to setup wizard if the app is not installed. + + The admin user is created out-of-band by the installer's CLI + bootstrap (`flask install`), not by this blueprint, so the wizard + requires authentication. Pre-auth flow paths are exempted so the + operator can sign in. + """ excluded_paths = [ "/setup_wizard", "/static", "/assets", "/_debug_toolbar", "/favicon.ico", - "/api/create-admin", - "/api/check-admin", "/api/default-config", "/api/import-data", "/api/check-data-imported", @@ -49,73 +52,28 @@ def handle_installation_check() -> Optional[Response]: "/admin/api/reload", "/fs-static", "/health", - ] - login_flow_paths = [ "/login", "/wan-signin", "/tf-validate", "/tf-select", + "/auth", + "/logout", ] - # Add /login to excluded paths if users exist - if User.query.first() is not None: - excluded_paths.extend(login_flow_paths) - if not any(request.path.startswith(path) for path in excluded_paths): if check_installation(): return redirect("/setup_wizard") @bp_setup.route("/setup_wizard") +@auth_required("session") def setup_wizard() -> str: - """Render the setup wizard template.""" + """Render the setup wizard template (admin-only post-install).""" + if not current_user.has_role("Admin"): + return redirect("/") return render_template("setup_wizard.html") -@bp_setup.post("/api/create-admin") -def create_admin() -> Any: - """Create an admin user if one doesn't exist.""" - admin_role = Role.query.filter(Role.name == "Admin").first() - - if admin_role.users.all(): - return HTTPResponse.error("Admin user already exists") - - data = request.json - username = data.get("username") - password = data.get("password") - - if not username or not password: - return HTTPResponse.error("Username and password are required") - - if User.query.filter(User.username == username.lower()).first(): - return HTTPResponse.error("Username already exists") - - new_admin = User(username=username, password=hash_password(password), active=1, name="Admin") - new_admin.roles.append(admin_role) - - db.session.add(new_admin) - try: - db.session.commit() - login_user(new_admin) - return HTTPResponse.created( - message="Admin user installed successfully", - data={"item": new_admin.to_dict()}, - ) - except Exception: - db.session.rollback() - return HTTPResponse.error("Failed to create admin user", status=500) - - -@bp_setup.get("/api/check-admin") -def check_admin() -> Dict[str, str]: - """Check if an admin user exists.""" - admin_role = Role.query.filter(Role.name == "Admin").first() - if admin_role and admin_role.users.first(): - return HTTPResponse.success(data={"status": "exists"}, message="Admin user already exists") - else: - return HTTPResponse.success(data={"status": "not_found"}, message="No admin user found") - - @bp_setup.post("/api/import-data") @roles_required("Admin") def import_data() -> Response: diff --git a/enferno/static/js/components/UpdateBanner.js b/enferno/static/js/components/UpdateBanner.js index 983979de0..12baa15a0 100644 --- a/enferno/static/js/components/UpdateBanner.js +++ b/enferno/static/js/components/UpdateBanner.js @@ -5,7 +5,6 @@ const UpdateBanner = Vue.defineComponent({ latest: null, releaseNotesUrl: null, dialog: false, - starting: false, }; }, computed: { @@ -32,23 +31,6 @@ const UpdateBanner = Vue.defineComponent({ // silent: background poll should never spam the UI } }, - async startUpdate() { - this.starting = true; - try { - await axios.post('/admin/api/updates/start'); - this.dialog = false; - this.$emit('update-started'); - } catch (e) { - const msg = (e?.response?.data?.message) || 'Failed to start update'; - if (this.$root && typeof this.$root.showSnack === 'function') { - this.$root.showSnack(msg, 'error'); - } else { - console.error(msg); - } - } finally { - this.starting = false; - } - }, }, template: ` Release notes

- The update will take about 60 seconds. The app will be briefly - unavailable. A pre-update database snapshot will be taken - automatically. + To update, run this on the server as an administrator: +
sudo bayanat update {{ latest }}
- Cancel - - Update now - + Close diff --git a/enferno/tasks/bulk_ops.py b/enferno/tasks/bulk_ops.py index e42e803d3..8d5162cbf 100644 --- a/enferno/tasks/bulk_ops.py +++ b/enferno/tasks/bulk_ops.py @@ -50,13 +50,16 @@ def bulk_update_bulletins(ids: list, bulk: dict, cur_user_id: t.id) -> None: first_peer_reviewer_id = bulk.get("first_peer_reviewer_id") clear_reviewer = bulk.get("reviewerClear") + total_updated = 0 for group in chunks: # Fetch bulletins bulletins = Bulletin.query.filter(Bulletin.id.in_(group)) + # Only items the caller may actually modify, so downstream revisions, + # activity, notifications and counts never cover skipped items (BAY-01-018). + mutated = [] for bulletin in bulletins: # check user can access each bulletin if not user.can_access(bulletin): - # Log? continue # get Status initially @@ -115,27 +118,27 @@ def bulk_update_bulletins(ids: list, bulk: dict, cur_user_id: t.id) -> None: # add only to session db.session.add(bulletin) + mutated.append(bulletin) - revmaps = [] - bulletins = Bulletin.query.filter(Bulletin.id.in_(group)).all() - for bulletin in bulletins: - # this commits automatically - tmp = {"bulletin_id": bulletin.id, "user_id": cur_user.id, "data": bulletin.to_dict()} - revmaps.append(tmp) + revmaps = [ + {"bulletin_id": b.id, "user_id": cur_user.id, "data": b.to_dict()} for b in mutated + ] db.session.bulk_insert_mappings(BulletinHistory, revmaps) # commit session when a batch of items and revisions are added db.session.commit() - - # Record Activity - updated = [b.to_mini() for b in bulletins] - Activity.create( - cur_user, Activity.ACTION_BULK_UPDATE, Activity.STATUS_SUCCESS, updated, "bulletin" - ) + total_updated += len(mutated) + + # Record Activity only for items actually updated + if mutated: + updated = [b.to_mini() for b in mutated] + Activity.create( + cur_user, Activity.ACTION_BULK_UPDATE, Activity.STATUS_SUCCESS, updated, "bulletin" + ) # perhaps allow a little time out time.sleep(0.1) - logger.info(f"Bulletin bulk-update successful. User ID: {cur_user_id} Total: {len(ids)}") + logger.info(f"Bulletin bulk-update successful. User ID: {cur_user_id} Total: {total_updated}") assigner = db.session.get(User, cur_user_id) # Notify admin @@ -143,7 +146,7 @@ def bulk_update_bulletins(ids: list, bulk: dict, cur_user_id: t.id) -> None: Constants.NotificationEvent.BULK_OPERATION_STATUS, assigner, "Bulk Operation Status", - f"Bulk update of {len(ids)} Bulletins has been completed successfully.", + f"Bulk update of {total_updated} Bulletins has been completed successfully.", ) # send notifications for assignments and reviews @@ -152,7 +155,7 @@ def bulk_update_bulletins(ids: list, bulk: dict, cur_user_id: t.id) -> None: Constants.NotificationEvent.NEW_ASSIGNMENT, db.session.get(User, assigned_to_id), "New Assignment", - f"{len(ids)} Bulletins have been assigned to you by {assigner.username} for analysis.", + f"{total_updated} Bulletins have been assigned to you by {assigner.username} for analysis.", ) if first_peer_reviewer_id: @@ -160,7 +163,7 @@ def bulk_update_bulletins(ids: list, bulk: dict, cur_user_id: t.id) -> None: Constants.NotificationEvent.REVIEW_NEEDED, db.session.get(User, first_peer_reviewer_id), "Review Needed", - f"{len(ids)} Bulletins have been assigned to you by {assigner.username} for review.", + f"{total_updated} Bulletins have been assigned to you by {assigner.username} for review.", ) @@ -192,13 +195,15 @@ def bulk_update_actors(ids: list, bulk: dict, cur_user_id: t.id) -> None: first_peer_reviewer_id = bulk.get("first_peer_reviewer_id") clear_reviewer = bulk.get("reviewerClear") + total_updated = 0 for group in chunks: # Fetch bulletins actors = Actor.query.filter(Actor.id.in_(group)) + # Only items the caller may actually modify (BAY-01-018). + mutated = [] for actor in actors: # check user can access each actor if not user.can_access(actor): - # Log? continue # get Status initially @@ -257,27 +262,25 @@ def bulk_update_actors(ids: list, bulk: dict, cur_user_id: t.id) -> None: # add only to session db.session.add(actor) + mutated.append(actor) - revmaps = [] - actors = Actor.query.filter(Actor.id.in_(group)).all() - for actor in actors: - # this commits automatically - tmp = {"actor_id": actor.id, "user_id": cur_user.id, "data": actor.to_dict()} - revmaps.append(tmp) + revmaps = [{"actor_id": a.id, "user_id": cur_user.id, "data": a.to_dict()} for a in mutated] db.session.bulk_insert_mappings(ActorHistory, revmaps) # commit session when a batch of items and revisions are added db.session.commit() - - # Record Activity - updated = [b.to_mini() for b in actors] - Activity.create( - cur_user, Activity.ACTION_BULK_UPDATE, Activity.STATUS_SUCCESS, updated, "actor" - ) + total_updated += len(mutated) + + # Record Activity only for items actually updated + if mutated: + updated = [b.to_mini() for b in mutated] + Activity.create( + cur_user, Activity.ACTION_BULK_UPDATE, Activity.STATUS_SUCCESS, updated, "actor" + ) # perhaps allow a little time out time.sleep(0.25) - logger.info(f"Actors bulk-update successful. User ID: {cur_user_id} Total: {len(ids)}") + logger.info(f"Actors bulk-update successful. User ID: {cur_user_id} Total: {total_updated}") assigner = db.session.get(User, cur_user_id) # Notify admin @@ -285,7 +288,7 @@ def bulk_update_actors(ids: list, bulk: dict, cur_user_id: t.id) -> None: Constants.NotificationEvent.BULK_OPERATION_STATUS, assigner, "Bulk Operation Status", - f"Bulk update of {len(ids)} Actors has been completed successfully.", + f"Bulk update of {total_updated} Actors has been completed successfully.", ) # send notifications for assignments and reviews @@ -294,7 +297,7 @@ def bulk_update_actors(ids: list, bulk: dict, cur_user_id: t.id) -> None: Constants.NotificationEvent.NEW_ASSIGNMENT, db.session.get(User, assigned_to_id), "New Assignment", - f"{len(ids)} Actors have been assigned to you by {assigner.username} for analysis.", + f"{total_updated} Actors have been assigned to you by {assigner.username} for analysis.", ) if first_peer_reviewer_id: @@ -302,7 +305,7 @@ def bulk_update_actors(ids: list, bulk: dict, cur_user_id: t.id) -> None: Constants.NotificationEvent.REVIEW_NEEDED, db.session.get(User, first_peer_reviewer_id), "Review Needed", - f"{len(ids)} Actors have been assigned to you by {assigner.username} for review.", + f"{total_updated} Actors have been assigned to you by {assigner.username} for review.", ) @@ -340,13 +343,15 @@ def bulk_update_incidents(ids: list, bulk: dict, cur_user_id: t.id) -> None: first_peer_reviewer_id = bulk.get("first_peer_reviewer_id") clear_reviewer = bulk.get("reviewerClear") + total_updated = 0 for group in chunks: # Fetch bulletins incidents = Incident.query.filter(Incident.id.in_(group)) + # Only items the caller may actually modify (BAY-01-018). + mutated = [] for incident in incidents: # check if user can access incident if not user.can_access(incident): - # Log? continue # get Status initially @@ -403,23 +408,23 @@ def bulk_update_incidents(ids: list, bulk: dict, cur_user_id: t.id) -> None: # add only to session db.session.add(incident) + mutated.append(incident) - revmaps = [] - incidents = Incident.query.filter(Incident.id.in_(group)).all() - for incident in incidents: - # this commits automatically - tmp = {"incident_id": incident.id, "user_id": cur_user.id, "data": incident.to_dict()} - revmaps.append(tmp) + revmaps = [ + {"incident_id": i.id, "user_id": cur_user.id, "data": i.to_dict()} for i in mutated + ] db.session.bulk_insert_mappings(IncidentHistory, revmaps) # commit session when a batch of items and revisions are added db.session.commit() + total_updated += len(mutated) - # Record Activity - updated = [b.to_mini() for b in incidents] - Activity.create( - cur_user, Activity.ACTION_BULK_UPDATE, Activity.STATUS_SUCCESS, updated, "incident" - ) + # Record Activity only for items actually updated + if mutated: + updated = [b.to_mini() for b in mutated] + Activity.create( + cur_user, Activity.ACTION_BULK_UPDATE, Activity.STATUS_SUCCESS, updated, "incident" + ) # restrict or assign related items if assign_related or restrict_related: @@ -445,7 +450,7 @@ def bulk_update_incidents(ids: list, bulk: dict, cur_user_id: t.id) -> None: # perhaps allow a little time out time.sleep(0.25) - logger.info(f"Incidents bulk-update successful. User ID: {cur_user_id} Total: {len(ids)}") + logger.info(f"Incidents bulk-update successful. User ID: {cur_user_id} Total: {total_updated}") assigner = db.session.get(User, cur_user_id) # Notify admin @@ -453,7 +458,7 @@ def bulk_update_incidents(ids: list, bulk: dict, cur_user_id: t.id) -> None: Constants.NotificationEvent.BULK_OPERATION_STATUS, assigner, "Bulk Operation Status", - f"Bulk update of {len(ids)} Incidents has been completed successfully.", + f"Bulk update of {total_updated} Incidents has been completed successfully.", ) # send notifications for assignments and reviews @@ -462,7 +467,7 @@ def bulk_update_incidents(ids: list, bulk: dict, cur_user_id: t.id) -> None: Constants.NotificationEvent.NEW_ASSIGNMENT, db.session.get(User, assigned_to_id), "New Assignment", - f"{len(ids)} Incidents have been assigned to you by {assigner.username} for analysis.", + f"{total_updated} Incidents have been assigned to you by {assigner.username} for analysis.", ) if first_peer_reviewer_id: @@ -470,5 +475,5 @@ def bulk_update_incidents(ids: list, bulk: dict, cur_user_id: t.id) -> None: Constants.NotificationEvent.REVIEW_NEEDED, db.session.get(User, first_peer_reviewer_id), "Review Needed", - f"{len(ids)} Incidents have been assigned to you by {assigner.username} for review.", + f"{total_updated} Incidents have been assigned to you by {assigner.username} for review.", ) diff --git a/enferno/tasks/data_import.py b/enferno/tasks/data_import.py index 50a00a5e4..b03dc28f9 100644 --- a/enferno/tasks/data_import.py +++ b/enferno/tasks/data_import.py @@ -3,6 +3,7 @@ from typing import Any, Literal, Optional from celery import chord, group +from celery.exceptions import SoftTimeLimitExceeded from werkzeug.utils import safe_join import enferno.utils.typing as t @@ -20,7 +21,7 @@ logger = get_logger("celery.tasks.data_import") -@celery.task(rate_limit=10) +@celery.task(rate_limit=10, soft_time_limit=600, time_limit=660) def etl_process_file( batch_id: t.id, file: str, meta: Any, user_id: t.id, data_import_id: t.id ) -> Optional[Literal["done"]]: @@ -29,6 +30,10 @@ def etl_process_file( di = MediaImport(batch_id, meta, user_id=user_id, data_import_id=data_import_id) di.process(file) return "done" + except SoftTimeLimitExceeded: + log = db.session.get(DataImport, data_import_id) + log.fail(TimeoutError(f"etl_process_file exceeded soft_time_limit on {file}")) + raise except Exception as e: log = db.session.get(DataImport, data_import_id) log.fail(e) diff --git a/enferno/tasks/exports.py b/enferno/tasks/exports.py index 449b26ea7..1a671263f 100644 --- a/enferno/tasks/exports.py +++ b/enferno/tasks/exports.py @@ -14,7 +14,7 @@ from enferno.admin.models import Actor, Bulletin, Incident from enferno.export.models import Export from enferno.tasks import BULK_CHUNK_SIZE, celery, cfg, chunk_list -from enferno.utils.csv_utils import convert_list_attributes +from enferno.utils.csv_utils import convert_list_attributes, escape_csv_formula_cell from enferno.utils.date_helper import DateHelper from enferno.utils.logging_utils import get_logger from enferno.utils.pdf_utils import PDFUtil @@ -54,6 +54,29 @@ def generate_export(export_id: t.id) -> Any: raise NotImplementedError(f"Unsupported export file format: {export_request.file_format!r}") +def _accessible_items(requester, query_iter, export_id: t.id): + """Yield only the items the export requester is authorised to access. + + Filters here mirror the per-record group check enforced on direct + GET endpoints. Without it the Celery export pipeline would happily + serialise restricted items the requester cannot open in the UI. + """ + if not requester: + logger.warning("Export #%s has no requester; skipping all items", export_id) + return + for item in query_iter: + if requester.can_access(item): + yield item + else: + logger.warning( + "Export #%s skipped restricted %s id=%s for requester %s", + export_id, + item.__tablename__, + item.id, + requester.id, + ) + + def clear_failed_export(export_request: Export) -> None: """ Clear failed export task. @@ -85,23 +108,27 @@ def generate_pdf_files(export_id: t.id) -> t.id | Literal[False]: - export_id if successful, False otherwise. """ export_request = db.session.get(Export, export_id) + requester = export_request.requester chunks = chunk_list(export_request.items, BULK_CHUNK_SIZE) dir_id = Export.generate_export_dir() try: for group in chunks: if export_request.table == "bulletin": - for bulletin in Bulletin.query.filter(Bulletin.id.in_(group)): + rows = Bulletin.query.filter(Bulletin.id.in_(group)) + for bulletin in _accessible_items(requester, rows, export_id): pdf = PDFUtil(bulletin) pdf.generate_pdf(f"{Export.export_dir}/{dir_id}/{pdf.filename}") elif export_request.table == "actor": - for actor in Actor.query.filter(Actor.id.in_(group)): + rows = Actor.query.filter(Actor.id.in_(group)) + for actor in _accessible_items(requester, rows, export_id): pdf = PDFUtil(actor) pdf.generate_pdf(f"{Export.export_dir}/{dir_id}/{pdf.filename}") elif export_request.table == "incident": - for incident in Incident.query.filter(Incident.id.in_(group)): + rows = Incident.query.filter(Incident.id.in_(group)) + for incident in _accessible_items(requester, rows, export_id): pdf = PDFUtil(incident) pdf.generate_pdf(f"{Export.export_dir}/{dir_id}/{pdf.filename}") @@ -130,6 +157,7 @@ def generate_json_file(export_id: t.id) -> t.id | Literal[False]: - export_id if successful, False otherwise. """ export_request = db.session.get(Export, export_id) + requester = export_request.requester chunks = chunk_list(export_request.items, BULK_CHUNK_SIZE) file_path, dir_id = Export.generate_export_file() export_type = export_request.table @@ -139,21 +167,17 @@ def generate_json_file(export_id: t.id) -> t.id | Literal[False]: file.write(f'"{export_type}s": [ \n') for group in chunks: if export_type == "bulletin": - batch = ",".join( - bulletin.to_json() - for bulletin in Bulletin.query.filter(Bulletin.id.in_(group)) - ) - file.write(f"{batch}\n") + rows = Bulletin.query.filter(Bulletin.id.in_(group)) elif export_type == "actor": - batch = ",".join( - actor.to_json() for actor in Actor.query.filter(Actor.id.in_(group)) - ) - file.write(f"{batch}\n") + rows = Actor.query.filter(Actor.id.in_(group)) elif export_type == "incident": - batch = ",".join( - incident.to_json() - for incident in Incident.query.filter(Incident.id.in_(group)) - ) + rows = Incident.query.filter(Incident.id.in_(group)) + else: + rows = [] + batch = ",".join( + item.to_json() for item in _accessible_items(requester, rows, export_id) + ) + if batch: file.write(f"{batch}\n") # less db overhead time.sleep(0.2) @@ -181,6 +205,7 @@ def generate_csv_file(export_id: t.id) -> t.id | Literal[False]: - export_id if successful, False otherwise. """ export_request = db.session.get(Export, export_id) + requester = export_request.requester file_path, dir_id = Export.generate_export_file() export_type = export_request.table @@ -189,6 +214,14 @@ def generate_csv_file(export_id: t.id) -> t.id | Literal[False]: for id in export_request.items: if export_type == "bulletin": bulletin = db.session.get(Bulletin, id) + if not bulletin or not requester or not requester.can_access(bulletin): + if bulletin: + logger.warning( + "Export #%s skipped restricted bulletin id=%s", + export_id, + bulletin.id, + ) + continue # adjust list attributes to normal dicts adjusted = convert_list_attributes(bulletin.to_csv_dict()) # normalize @@ -200,6 +233,14 @@ def generate_csv_file(export_id: t.id) -> t.id | Literal[False]: elif export_type == "actor": actor = db.session.get(Actor, id) + if not actor or not requester or not requester.can_access(actor): + if actor: + logger.warning( + "Export #%s skipped restricted actor id=%s", + export_id, + actor.id, + ) + continue # adjust list attributes to normal dicts actor_dict = convert_list_attributes(actor.to_csv_dict()) @@ -216,6 +257,8 @@ def generate_csv_file(export_id: t.id) -> t.id | Literal[False]: else: csv_df = pd.concat([csv_df, df], ignore_index=True) + # Neutralize spreadsheet formula injection before writing (BAY-01-024). + csv_df = csv_df.map(escape_csv_formula_cell) csv_df.to_csv(f"{file_path}.csv") export_request.file_id = dir_id @@ -264,7 +307,7 @@ def generate_export_media(previous_result: int) -> t.id | Literal[False]: # UI switch disabled, but just in case... return False - for item in items: + for item in _accessible_items(export_request.requester, items, export_request.id): if item.medias: media = item.medias[0] target_file = f"{Export.export_dir}/{export_request.file_id}/{media.media_file}" diff --git a/enferno/tasks/extraction.py b/enferno/tasks/extraction.py index c99fd9fa2..27f094f65 100644 --- a/enferno/tasks/extraction.py +++ b/enferno/tasks/extraction.py @@ -64,17 +64,14 @@ def process_media_extraction_task( _save_failed_extraction(media_id, "DOCX extraction failed") return {"success": False, "media_id": media_id, "error": "DOCX extraction failed"} elif ext == "pdf": - page_images = pdf_to_images(file_bytes) + # Cap rasterization at the OCR page limit so a crafted high-page-count + # PDF can't exhaust resources before the limit applies (BAY-01-023). + max_pages = current_app.config.get("PDF_OCR_MAX_PAGES", 20) + page_images = pdf_to_images(file_bytes, max_pages=max_pages) if not page_images: _save_failed_extraction(media_id, "PDF conversion failed") return {"success": False, "media_id": media_id, "error": "PDF conversion failed"} - max_pages = current_app.config.get("PDF_OCR_MAX_PAGES", 20) - total_pages = len(page_images) - if total_pages > max_pages: - logger.warning(f"PDF {media_id} has {total_pages} pages, truncating to {max_pages}") - page_images = page_images[:max_pages] - page_results = [extract_text(img, hints) for img in page_images] page_results = [r for r in page_results if r is not None] diff --git a/enferno/tasks/maintenance.py b/enferno/tasks/maintenance.py index 26da251e8..e78a8be5b 100644 --- a/enferno/tasks/maintenance.py +++ b/enferno/tasks/maintenance.py @@ -1,11 +1,9 @@ # -*- coding: utf-8 -*- import json import os -import subprocess from datetime import date, datetime, timedelta, timezone import requests -from packaging.version import Version from enferno.admin.constants import Constants from enferno.admin.models import Activity, Location @@ -45,16 +43,6 @@ def _current_version() -> str: return "0.0.0" -def _is_patch_bump(current: str, target: str) -> bool: - try: - c, t = Version(current), Version(target) - except Exception: - return False - if t <= c: - return False - return c.major == t.major and c.minor == t.minor - - @celery.task def check_for_updates(): """Poll GitHub releases. Cache latest. Notify admins on new tag. Optionally auto-apply patch.""" @@ -87,21 +75,6 @@ def check_for_updates(): if _redis_get_str(UPDATE_NOTIFIED_KEY) == latest_tag: return - auto_apply = bool(getattr(cfg, "AUTO_APPLY_PATCH_UPDATES", False)) - - if auto_apply and _is_patch_bump(current, latest_tag): - logger.info(f"auto-applying patch update {current} -> {latest_tag}") - try: - subprocess.run( - ["sudo", "-n", "/usr/local/sbin/bayanat-start-update"], - check=True, - timeout=10, - ) - rds.set(UPDATE_NOTIFIED_KEY, latest_tag) - return - except Exception as e: - logger.warning(f"auto-apply failed, falling back to notification: {e}") - Notification.create_for_admins( title=f"Update available: {latest_tag}", message=f"A new Bayanat release is available. {release.get('html_url', '')}", diff --git a/enferno/templates/setup_wizard.html b/enferno/templates/setup_wizard.html index de1baa000..cbeef7e1e 100644 --- a/enferno/templates/setup_wizard.html +++ b/enferno/templates/setup_wizard.html @@ -65,42 +65,6 @@ - - - - -
- {{ _('Create your first admin account. This account will have unrestricted - access to all features and settings in Bayanat.') }} -
- - - - - {{ _('Create Admin Account') }} - -
-
- - - -
- {{ _('Two Factor Authentication Policies') }}{{ _('Two Factor Authentication Policies') }}{{ _('Two Factor Authentication Policies') }} { - this.adminCreated = adminResponse.data.status === 'exists'; - this.dataImported = dataResponse.data.status === 'imported'; - }).catch(error => { - console.error('Error checking progress:', error); - if (error.response && error.response.status === 403) { - window.location.href = '/login'; // Redirect to login on 403 error - } else { - this.showSnack('Error checking progress. Please try again.'); - } - }).finally(() => { - this.loading = false; - }); + // Admin always exists at this point (created by the installer + // CLI bootstrap; the wizard requires session auth). + this.adminCreated = true; + api.get('/api/check-data-imported') + .then(dataResponse => { + this.dataImported = dataResponse.data.status === 'imported'; + }) + .catch(error => { + console.error('Error checking progress:', error); + if (error.response && error.response.status === 403) { + window.location.href = '/login'; + } else { + this.showSnack('Error checking progress. Please try again.'); + } + }) + .finally(() => { + this.loading = false; + }); }, fetchDefaultSettings() { @@ -593,32 +557,6 @@

{{ _('Two Factor Authentication Policies') }} { - console.log(response.data); - this.adminCreated = true; - this.step = 3; - }) - .catch(error => { - console.error('Error creating admin:', error); - if (error.response && error.response.data && error.response.data.description) { - this.error = error.response.data.description; - } else { - this.error = 'Error creating admin. Please try again.'; - } - }) - .finally(() => { - this.loading = false; - }); - }, - waitForReload() { setTimeout(() => { api.get('/').then(res => { diff --git a/enferno/user/models.py b/enferno/user/models.py index 326fa0fac..19450bc4e 100644 --- a/enferno/user/models.py +++ b/enferno/user/models.py @@ -19,6 +19,19 @@ # Redis key namespace to set flag for forcing password reset SECURITY_KEY_NAMESPACE = "security:user" +# Workflow statuses in which the assigned DA may mutate the item. +# Mirrors the frontend editAllowed() rule. Changes here must match +# the editAllowed() helper in admin/templates/admin/{bulletins,actors,incidents}.html. +EDITABLE_STATUSES = frozenset( + { + "Human Created", + "Assigned", + "Updated", + "Peer Reviewed", + "Revisited", + } +) + logger = get_logger() @@ -338,6 +351,34 @@ def can_access(self, obj: Any) -> bool: return False + def can_edit(self, obj: Any) -> bool: + """ + Check if the user can mutate (create/update) an entity. + + Mirrors the frontend editAllowed() rule: + - Admin can always edit. + - Otherwise must hold DA role. + - For Bulletin/Actor/Incident: must be the assigned analyst AND + the item must be in an editable workflow status. + - For Media: inherit from the parent Bulletin/Actor. + - In all cases the user must first pass can_access() (visibility). + """ + if not self.can_access(obj): + return False + if self.has_role("Admin"): + return True + if not self.has_role("DA"): + return False + + if obj.__tablename__ == "media": + parent = obj.bulletin or obj.actor + return parent is not None and self.can_edit(parent) + + if obj.__tablename__ in ("bulletin", "actor", "incident"): + return obj.assigned_to_id == self.id and obj.status in EDITABLE_STATUSES + + return False + def from_json(self, item: dict) -> "User": """ Populate the User object from a JSON dictionary. diff --git a/enferno/user/views.py b/enferno/user/views.py index 428c41128..84954a2e3 100644 --- a/enferno/user/views.py +++ b/enferno/user/views.py @@ -165,10 +165,14 @@ def auth_callback() -> Response: "User not found. Ask an administrator to create an account for you." ) - # Update the user's Google ID if it doesn't exist + # Enforce a durable binding to the Google subject (BAY-01-019). Bind on + # first login; on later logins for the same email, refuse if the Google + # subject differs instead of silently inheriting the existing identity. if u.google_id is None: u.google_id = unique_id u.save() + elif u.google_id != unique_id: + return HTTPResponse.forbidden("Google account does not match the linked identity.") # Check if 2FA is required before completing login tf_plugin = current_app.extensions["security"]._tf_plugin diff --git a/enferno/utils/base.py b/enferno/utils/base.py index 8ea873b8a..7949e6129 100644 --- a/enferno/utils/base.py +++ b/enferno/utils/base.py @@ -134,6 +134,43 @@ def to_dict(self, mode=None): data.update(self.get_dynamic_fields()) return data + @staticmethod + def sync_relations(submitted, model, key, relate, existing, other_id): + """ + Reconcile a set of nested relations against a submitted payload, enforcing + per-target access control. Only links to targets the current user can access + are created, updated, or deleted; relations to inaccessible items are left + untouched in both directions. Single authorization boundary for all + Actor/Bulletin/Incident relation paths. + + Args: + - submitted: list of relation dicts from the request payload. + - model: the target ORM model class (Actor/Bulletin/Incident). + - key: nested dict key holding the target item ("actor"/"bulletin"/"incident"). + - relate: bound relate_* helper used to create/update each link. + - existing: iterable of the entity's current relation rows. + - other_id: callable mapping a relation row to its target id. + """ + from flask_login import current_user + + seen = [] + for relation in submitted: + target = db.session.get(model, relation[key]["id"]) + if not target or not current_user.can_access(target): + continue + seen.append(target.id) + relate(target, relation=relation) + + for r in existing: + rid = other_id(r) + if rid in seen: + continue + target = db.session.get(model, rid) + if not target or not current_user.can_access(target): + continue + r.delete() + target.create_revision() + class ComponentDataMixin(BaseMixin): __abstract__ = True diff --git a/enferno/utils/config_utils.py b/enferno/utils/config_utils.py index 4b999dce6..7e40a505d 100644 --- a/enferno/utils/config_utils.py +++ b/enferno/utils/config_utils.py @@ -166,7 +166,6 @@ class ConfigManager: "twitter.com", ], "YTDLP_COOKIES": "", - "AUTO_APPLY_PATCH_UPDATES": False, "NOTIFICATIONS": NOTIFICATIONS_DEFAULT_CONFIG, # Import from notification_config.py } ) @@ -241,7 +240,6 @@ class ConfigManager: "YTDLP_PROXY": "Proxy URL to use with Web Import", "YTDLP_ALLOWED_DOMAINS": "Allowed Domains for Web Import", "YTDLP_COOKIES": "Cookies to use with Web Import", - "AUTO_APPLY_PATCH_UPDATES": "Auto-apply patch releases", "NOTIFICATIONS": "Notifications", } ) @@ -289,7 +287,6 @@ def serialize(): "SECURITY_ZXCVBN_MINIMUM_SCORE": cfg.SECURITY_ZXCVBN_MINIMUM_SCORE, "DISABLE_MULTIPLE_SESSIONS": cfg.DISABLE_MULTIPLE_SESSIONS, "SESSION_RETENTION_PERIOD": cfg.SESSION_RETENTION_PERIOD, - "AUTO_APPLY_PATCH_UPDATES": cfg.AUTO_APPLY_PATCH_UPDATES, "RECAPTCHA_ENABLED": cfg.RECAPTCHA_ENABLED, "RECAPTCHA_PUBLIC_KEY": cfg.RECAPTCHA_PUBLIC_KEY, "RECAPTCHA_PRIVATE_KEY": ConfigManager.MASK_STRING if cfg.RECAPTCHA_PRIVATE_KEY else "", diff --git a/enferno/utils/csv_utils.py b/enferno/utils/csv_utils.py index c8a4e8274..1a3a156b9 100644 --- a/enferno/utils/csv_utils.py +++ b/enferno/utils/csv_utils.py @@ -1,6 +1,18 @@ from typing import Iterable, Optional +def escape_csv_formula_cell(value): + """Neutralize spreadsheet formula injection (BAY-01-024). + + Prefix any string cell starting with =, +, -, or @ with a single quote so + Excel/LibreOffice treat it as inert text rather than an executable formula. + Non-string values pass through unchanged. + """ + if isinstance(value, str) and value[:1] in ("=", "+", "-", "@"): + return "'" + value + return value + + def convert_list_attributes(dictionary: dict) -> dict: """ convert dictionary list attributes into named attributes based on their index. diff --git a/enferno/utils/ocr/pdf.py b/enferno/utils/ocr/pdf.py index 852307aa1..e7a362785 100644 --- a/enferno/utils/ocr/pdf.py +++ b/enferno/utils/ocr/pdf.py @@ -9,15 +9,21 @@ MAX_DIMENSION = 4096 # Cap longest side to keep Vision API happy -def pdf_to_images(file_bytes: bytes) -> list[bytes]: +def pdf_to_images(file_bytes: bytes, max_pages: int | None = None) -> list[bytes]: """Render each PDF page as a JPEG image. - Returns a list of JPEG bytes, one per page. Empty list on failure. + Returns a list of JPEG bytes, one per page. Empty list on failure. When + max_pages is set, rasterization stops after that many pages so a crafted + high-page-count PDF cannot exhaust CPU/memory before the OCR cap is applied + (BAY-01-023). """ try: doc = fitz.open(stream=file_bytes, filetype="pdf") result = [] - for page in doc: + for i, page in enumerate(doc): + if max_pages is not None and i >= max_pages: + logger.warning(f"PDF exceeds max_pages={max_pages}; stopped rasterizing at {i}") + break pix = page.get_pixmap(dpi=DPI) longest = max(pix.width, pix.height) if longest > MAX_DIMENSION: diff --git a/enferno/utils/pdf_utils.py b/enferno/utils/pdf_utils.py index 8a0992247..6ce7e9ed6 100644 --- a/enferno/utils/pdf_utils.py +++ b/enferno/utils/pdf_utils.py @@ -1,7 +1,38 @@ +import os from typing import Optional +from urllib.parse import urlparse, unquote + from flask import render_template, current_app +def _safe_url_fetcher(url: str): + """Block external/arbitrary-file resource fetching during PDF rendering + (BAY-01-025). Untrusted rich-text img[src] would otherwise let WeasyPrint + make outbound requests (SSRF) or read local files (file:// disclosure). + + Allow only: data: URIs, the app's own static assets (BASE_URL), and local + files under the app root (the logo and rewritten inline media). Everything + else is refused, the resource is skipped and PDF generation continues. + """ + from weasyprint import default_url_fetcher + + parsed = urlparse(url) + if parsed.scheme == "data": + return default_url_fetcher(url) + if parsed.scheme == "file": + root = os.path.realpath(current_app.root_path) + path = os.path.realpath(unquote(parsed.path)) + if path == root or path.startswith(root + os.sep): + return default_url_fetcher(url) + raise ValueError(f"PDF export: blocked file URL outside app root: {url}") + if parsed.scheme in ("http", "https"): + base = current_app.config.get("BASE_URL") or "" + if base and url.startswith(base): + return default_url_fetcher(url) + raise ValueError(f"PDF export: blocked external URL: {url}") + raise ValueError(f"PDF export: blocked URL scheme: {url}") + + class PDFUtil: """PDF generation utility class.""" @@ -33,7 +64,7 @@ def generate_pdf(self, output: Optional[str] = None) -> None: if output: from weasyprint import HTML - HTML(string=html).write_pdf(output) + HTML(string=html, url_fetcher=_safe_url_fetcher).write_pdf(output) @property def filename(self): diff --git a/flask/Dockerfile b/flask/Dockerfile index 9c11a7995..a0e0857a9 100644 --- a/flask/Dockerfile +++ b/flask/Dockerfile @@ -1,68 +1,79 @@ -# ---- use a base image to compile requirements / save image size ----- -FROM ubuntu:24.04 as base -ENV DEBIAN_FRONTEND=noninteractive +# ---- builder stage: compile python deps with uv ----------------- +FROM ghcr.io/astral-sh/uv:0.5.11-python3.12-bookworm-slim AS builder -RUN apt-get update -y && \ - apt-get install -yq python3.12 python3.12-dev python3.12-venv python3-pip curl \ - libjpeg8-dev libzip-dev libxml2-dev libssl-dev libffi-dev libxslt1-dev \ - libmysqlclient-dev libncurses5-dev libpq-dev \ - libimage-exiftool-perl +ENV UV_COMPILE_BYTECODE=1 \ + UV_LINK_MODE=copy \ + UV_PYTHON_DOWNLOADS=0 \ + UV_NO_DEV=1 \ + DEBIAN_FRONTEND=noninteractive WORKDIR /app -# Sets utf-8 encoding for Python -ENV LANG=C.UTF-8 -# Turns off writing .pyc files -ENV PYTHONDONTWRITEBYTECODE=1 -# Seems to speed things up -ENV PYTHONUNBUFFERED=1 -# Install UV -RUN curl -LsSf https://astral.sh/uv/install.sh | sh -ENV PATH="/root/.local/bin:$PATH" +# Build-time headers only; runtime libs installed in the final stage. +# libimage-exiftool-perl is needed at build time because pyexifinfo's +# setup.py probes for the exiftool binary during wheel install. +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + libpq-dev \ + libffi-dev \ + libxml2-dev \ + libxslt1-dev \ + libjpeg-dev \ + zlib1g-dev \ + libopenjp2-7-dev \ + libimage-exiftool-perl \ + && rm -rf /var/lib/apt/lists/* COPY pyproject.toml uv.lock /app/ - RUN uv sync --frozen --no-install-project -# ----------------- main container ------------------------- +# ---- runtime stage ----------------------------------------------- +FROM python:3.12-slim-bookworm AS runtime -FROM ubuntu:24.04 +ENV DEBIAN_FRONTEND=noninteractive \ + LANG=C.UTF-8 \ + PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PATH="/app/.venv/bin:$PATH" \ + XDG_CACHE_HOME=/tmp/.cache -ENV DEBIAN_FRONTEND=noninteractive ARG ROLE ENV ROLE=${ROLE} -RUN echo "Building ${ROLE} container." -RUN if [ "$ROLE" = "flask" ]; then \ - apt-get update -y && apt-get install -yq python3.12 python3.12-dev python3.12-venv \ - postgis libpango-1.0-0 libharfbuzz0b libpangoft2-1.0-0 libffi-dev \ - libjpeg-dev libopenjp2-7-dev; \ - elif [ "$ROLE" = "celery" ]; then \ - apt-get update -y && apt-get install -yq python3.12 python3.12-dev python3.12-venv \ - postgis libimage-exiftool-perl ffmpeg libpango-1.0-0 libharfbuzz0b \ - libpangoft2-1.0-0 libffi-dev libjpeg-dev libopenjp2-7-dev; \ - fi -RUN apt clean -RUN apt autoremove + +# Shared runtime libs: psycopg2 (libpq5), weasyprint (pango/cairo/harfbuzz), +# pillow (libjpeg, libopenjp2), lxml (libxml2, libxslt). +# Celery roles also need exiftool + ffmpeg for media processing. +RUN apt-get update && apt-get install -y --no-install-recommends \ + libpq5 \ + libpango-1.0-0 \ + libpangoft2-1.0-0 \ + libharfbuzz0b \ + libcairo2 \ + libxml2 \ + libxslt1.1 \ + libjpeg62-turbo \ + libopenjp2-7 \ + zlib1g \ + libffi8 \ + fonts-dejavu-core \ + && if [ "$ROLE" = "celery" ] || [ "$ROLE" = "celery-ocr" ]; then \ + apt-get install -y --no-install-recommends \ + libimage-exiftool-perl \ + ffmpeg; \ + fi \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* WORKDIR /app -# Sets utf-8 encoding for Python -ENV LANG=C.UTF-8 -# Turns off writing .pyc files -ENV PYTHONDONTWRITEBYTECODE=1 -# Seems to speed things up -ENV PYTHONUNBUFFERED=1 +RUN useradd --system --create-home --uid 1000 ubuntu COPY --chown=ubuntu:ubuntu . /app -# copy UV-built virtualenv -COPY --from=base /app/.venv /app/.venv +COPY --from=builder --chown=ubuntu:ubuntu /app/.venv /app/.venv COPY --chown=ubuntu:ubuntu ./flask/bin/entrypoint.sh /usr/local/bin/entrypoint.sh - RUN chmod 550 /usr/local/bin/entrypoint.sh -ENV PATH="/app/.venv/bin:$PATH" - USER ubuntu CMD ["/usr/local/bin/entrypoint.sh"] diff --git a/flask/bin/entrypoint.sh b/flask/bin/entrypoint.sh index ce36533a0..090aec8ab 100644 --- a/flask/bin/entrypoint.sh +++ b/flask/bin/entrypoint.sh @@ -2,10 +2,21 @@ set -e if [ "$ROLE" = "flask" ]; then - echo ":: Creating Bayanat Database ::" - flask create-db --create-exts - echo ":: Running migrations ::" - flask db upgrade + if [ -z "$(flask db current 2>/dev/null | grep -oE '[0-9a-f]{12}')" ]; then + echo ":: Fresh DB, creating schema ::" + flask create-db --create-exts + flask db stamp head + # BAY-01-005: bootstrap admin out-of-band (no network-reachable + # /api/create-admin route exists). On a fresh DB the wizard would + # otherwise be unreachable. flask install with --username and no + # --password generates a random password and prints it to stdout; + # the operator retrieves it via `docker-compose logs bayanat`. + echo ":: Bootstrapping initial admin user ::" + flask install --username admin + else + echo ":: Existing DB, running migrations ::" + flask db upgrade + fi echo ":: Starting Bayanat ::" exec uwsgi --http 0.0.0.0:5000 --protocol uwsgi --master --processes 1 --wsgi run:app diff --git a/nginx/Dockerfile b/nginx/Dockerfile index 7b602665e..afaea6254 100644 --- a/nginx/Dockerfile +++ b/nginx/Dockerfile @@ -1,4 +1,4 @@ -FROM bitnami/nginx:1.24 as base +FROM bitnamilegacy/nginx:1.24 as base VOLUME /opt/bitnami/nginx/conf COPY --chown=1001 nginx.conf /opt/bitnami/nginx/conf/ diff --git a/pyproject.toml b/pyproject.toml index c04fca4bc..688f7d3cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "bayanat" -version = "4.0.0" +version = "4.0.1" description = "Open source data management solution for processing human rights violations and war crimes data" readme = "README.md" license = "AGPL-3.0-or-later" diff --git a/tests/conftest.py b/tests/conftest.py index 97cbfc2dc..ae350a675 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,3 @@ -import os from unittest.mock import patch from uuid import uuid4 @@ -53,6 +52,19 @@ def isolated_session_store(app, monkeypatch): monkeypatch.setattr(app.session_interface, "client", fakeredis.FakeStrictRedis()) +@pytest.fixture(autouse=True) +def _bypass_session_freshness(monkeypatch): + """Treat every test session as 'fresh'. FlaskLoginClient can't populate the + Flask-Security primary-auth timestamp the freshness check reads, so the + @fresh_auth gates (BAY-01-016) would otherwise block all privileged-endpoint + tests. Tests that assert freshness behaviour re-patch this to return False. + """ + monkeypatch.setattr( + "flask_security.decorators.check_and_update_authn_fresh", + lambda *a, **k: True, + ) + + @pytest.fixture(scope="session") def app(): """Create a Flask app context for testing.""" @@ -172,7 +184,7 @@ def setup_db_uninitialized(uninitialized_app): try: _db.session.remove() _db.drop_all() - except Exception as e: + except Exception: pass diff --git a/tests/test_pentest_fixes.py b/tests/test_pentest_fixes.py new file mode 100644 index 000000000..a8501ab10 --- /dev/null +++ b/tests/test_pentest_fixes.py @@ -0,0 +1,580 @@ +""" +Regression tests for 7ASecurity BAY-01 pentest findings. + +Each test mirrors the auditor's PoC and asserts the patched behaviour. +Run all of these with: + uv run pytest tests/test_pentest_fixes.py -v +""" + +from uuid import uuid4 + +import pytest +from flask_security.utils import hash_password + +from tests.factories import BulletinFactory + +# --------------------------------------------------------------------------- +# BAY-01-001 Revision history bypasses object-level access +# --------------------------------------------------------------------------- + + +@pytest.fixture +def history_viewer_outside_group(app, session, isolated_session_store): + """User with view_simple_history but no role intersection on any item.""" + from enferno.admin.models import Activity + from enferno.user.models import User + + u = User(username=f"hv-{uuid4().hex[:8]}", password=hash_password("password"), active=1) + u.view_simple_history = True + u.fs_uniquifier = uuid4().hex + session.add(u) + session.commit() + user_id = u.id + with app.app_context(): + with app.test_client(user=u) as client: + yield client + session.query(Activity).filter(Activity.user_id == user_id).delete(synchronize_session=False) + session.delete(u) + session.commit() + + +def _make_restricted_bulletin(session, role_name="TestRole"): + from enferno.user.models import Role + + bulletin = BulletinFactory() + session.add(bulletin) + session.commit() + role = session.query(Role).filter(Role.name == role_name).first() + assert role, f"Role {role_name} not found" + bulletin.roles.append(role) + session.commit() + return bulletin + + +def test_bay_01_001_history_blocked_when_outside_group( + session, create_test_role, history_viewer_outside_group +): + bulletin = _make_restricted_bulletin(session) + resp = history_viewer_outside_group.get(f"/admin/api/bulletinhistory/{bulletin.id}") + assert resp.status_code == 403 + + +def test_bay_01_001_history_404_for_missing_bulletin(history_viewer_outside_group): + resp = history_viewer_outside_group.get("/admin/api/bulletinhistory/99999999") + assert resp.status_code == 404 + + +# --------------------------------------------------------------------------- +# BAY-01-002 OCR extraction PUT IDOR +# --------------------------------------------------------------------------- + + +def _make_media_with_extraction(session, bulletin_role_name=None): + from enferno.admin.models import Extraction, Media + from enferno.user.models import Role + + bulletin = BulletinFactory() + session.add(bulletin) + session.commit() + if bulletin_role_name: + role = session.query(Role).filter(Role.name == bulletin_role_name).first() + bulletin.roles.append(role) + session.commit() + + media = Media( + media_file=f"test-{uuid4().hex}.png", + media_file_type="image/png", + etag=uuid4().hex, + bulletin_id=bulletin.id, + ) + session.add(media) + session.commit() + + extraction = Extraction( + media_id=media.id, + text="original text", + status="processed", + history=[], + ) + session.add(extraction) + session.commit() + return media, extraction + + +def test_bay_01_002_extraction_put_blocked_outside_group(session, create_test_role, da_client): + """DA without TestRole cannot mutate extraction on a bulletin restricted to TestRole.""" + _, extraction = _make_media_with_extraction(session, bulletin_role_name="TestRole") + resp = da_client.put( + f"/admin/api/extraction/{extraction.id}", + json={"action": "transcribe", "text": "tampered"}, + ) + assert resp.status_code == 403 + + +def test_bay_01_002_extraction_put_404_for_missing(da_client): + resp = da_client.put( + "/admin/api/extraction/99999999", + json={"action": "transcribe", "text": "x"}, + ) + assert resp.status_code == 404 + + +# --------------------------------------------------------------------------- +# BAY-01-004 Path traversal in CSV/XLS analyze +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "endpoint, payload", + [ + ("/import/api/csv/analyze", {"file": {"filename": "../../../../app/.env"}}), + ("/import/api/xls/sheets", {"file": {"filename": "../../../etc/passwd"}}), + ( + "/import/api/xls/analyze", + {"file": {"filename": "../../../../app/.env"}, "sheet": "Sheet1"}, + ), + ], +) +def test_bay_01_004_path_traversal_rejected(admin_client, endpoint, payload): + resp = admin_client.post(endpoint, json=payload) + assert resp.status_code == 400 + + +# --------------------------------------------------------------------------- +# BAY-01-003 Export pipeline filters items by requester.can_access +# --------------------------------------------------------------------------- + + +def test_bay_01_003_accessible_items_skips_restricted(session, create_test_role, users): + """Helper drops restricted items the requester cannot access.""" + from enferno.tasks.exports import _accessible_items + + _, _, _, sa_dict = users + da_user = sa_dict["da"] # DA without TestRole + + accessible = _make_restricted_bulletin(session, role_name="TestRole") + accessible.roles = [] # explicitly unrestricted + session.commit() + + restricted = _make_restricted_bulletin(session, role_name="TestRole") + + result = list(_accessible_items(da_user, [accessible, restricted], export_id=1)) + assert accessible in result + assert restricted not in result + + +def test_bay_01_003_accessible_items_no_requester_yields_nothing(session): + from enferno.tasks.exports import _accessible_items + + bulletin = BulletinFactory() + session.add(bulletin) + session.commit() + assert list(_accessible_items(None, [bulletin], export_id=1)) == [] + + +# --------------------------------------------------------------------------- +# BAY-01-005 /api/create-admin route is gone; admin bootstrap is CLI-only +# --------------------------------------------------------------------------- + + +def test_bay_01_005_create_admin_endpoint_removed(anonymous_client): + resp = anonymous_client.post( + "/api/create-admin", + json={"username": "rogue", "password": "x" * 12}, + ) + assert resp.status_code == 404 + + +def test_bay_01_005_check_admin_endpoint_removed(anonymous_client): + resp = anonymous_client.get("/api/check-admin") + assert resp.status_code == 404 + + +# --------------------------------------------------------------------------- +# BAY-01-007 Login rate limit (Flask-Limiter applied to security.login) +# --------------------------------------------------------------------------- + + +def test_bay_01_007_login_brute_force_returns_429(app, anonymous_client): + """Repeated bad logins from one client trip Flask-Limiter and return 429.""" + from enferno.extensions import limiter + + limiter.reset() + last_status = None + for _ in range(15): + resp = anonymous_client.post( + "/login", + data={"username": "noone", "password": "wrong"}, + ) + last_status = resp.status_code + if last_status == 429: + break + assert last_status == 429 + + +# --------------------------------------------------------------------------- +# BAY-01-008 Stored XSS via import sanitization gap +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "payload", + [ + "", + "safe", + "", + 'click', + ], +) +def test_bay_01_008_sanitize_strips_xss(payload): + from enferno.utils.validation_utils import sanitize_string + + cleaned = sanitize_string(payload) + assert "onerror" not in cleaned + assert "hello world

") + assert "

" in cleaned + assert "" in cleaned + + +# --------------------------------------------------------------------------- +# BAY-01-009 Media update must enforce editAllowed (assigned analyst only) +# --------------------------------------------------------------------------- + + +def test_bay_01_009_media_put_blocked_when_not_assigned(session, da_client): + """DA cannot mutate media on a bulletin that isn't assigned to them, even + when the bulletin is otherwise accessible. Mirrors the frontend + editAllowed() rule.""" + from enferno.admin.models import Media + + bulletin = BulletinFactory() + session.add(bulletin) + session.commit() + # Intentionally leave assigned_to_id unset and roles empty: the bulletin is + # visible to any analyst, but no analyst is the assigned editor. + + media = Media( + media_file=f"test-{uuid4().hex}.png", + media_file_type="image/png", + etag=uuid4().hex, + bulletin_id=bulletin.id, + ) + session.add(media) + session.commit() + + resp = da_client.put( + f"/admin/api/media/{media.id}", + json={"item": {"title": "tampered", "fileType": "image/png"}}, + ) + assert resp.status_code == 403 + + +def test_bay_01_009_media_put_404_for_missing(da_client): + resp = da_client.put( + "/admin/api/media/99999999", + json={"item": {"title": "x"}}, + ) + assert resp.status_code == 404 + + +# --------------------------------------------------------------------------- +# BAY-01-039 Sheet import handle_mismatch must not store raw HTML +# (set_description was already sanitized under BAY-01-008; this covers the +# second stored-XSS sink the report flagged) +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "payload", + ["", "", ""], +) +def test_bay_01_039_handle_mismatch_sanitizes_description(payload): + from enferno.data_import.utils.sheet_import import SheetImport + from enferno.admin.models import ActorProfile + + si = SheetImport.__new__(SheetImport) + si.data_import = type("D", (), {"add_to_log": lambda self, msg: None})() + si.actor_profile = ActorProfile() + si.actor_profile.description = "" + + si.handle_mismatch("type", payload) + + desc = si.actor_profile.description + assert " restricts the bulletin from them + restrict_role = Role(name=f"Restrict-{uuid4().hex[:6]}") + session.add(restrict_role) + session.commit() + + open_b = BulletinFactory() # no roles -> accessible + restricted_b = BulletinFactory() + session.add_all([open_b, restricted_b]) + session.commit() + restricted_b.roles = [restrict_role] + session.add(restricted_b) + session.commit() + + bulk_update_bulletins.run( + [open_b.id, restricted_b.id], + {"comments": "bulk-test", "status": "Assigned"}, + mod_user.id, + ) + + open_revs = BulletinHistory.query.filter(BulletinHistory.bulletin_id == open_b.id).count() + restricted_revs = BulletinHistory.query.filter( + BulletinHistory.bulletin_id == restricted_b.id + ).count() + assert open_revs >= 1 # accessible item got its revision + assert restricted_revs == 0 # restricted item must NOT get a fabricated revision + + +# --------------------------------------------------------------------------- +# BAY-01-021 Username masking: the list APIs now serialize assignee/reviewer +# via User.to_compact(), which masks names for users without view_usernames. +# (End-to-end list-API masking is verified live; the harness can't mutate the +# request's current_user. This covers the masking primitive the views use.) +# --------------------------------------------------------------------------- + + +def test_bay_01_021_to_compact_masks_without_view_usernames(users): + from unittest.mock import patch + import enferno.user.models as um + + admin_user, _da, _mod, _ = users + + class _NoView: + view_usernames = False + + def has_role(self, r): + return False + + class _CanView: + view_usernames = True + + def has_role(self, r): + return False + + with patch.object(um, "has_request_context", lambda: True): + with patch.object(um, "current_user", _NoView()): + c = admin_user.to_compact() + assert c["name"] == f"user-{admin_user.id}" + assert c["username"] == f"user-{admin_user.id}" + assert admin_user.name not in (c["name"], c["username"]) + + with patch.object(um, "current_user", _CanView()): + c = admin_user.to_compact() + assert c["name"] == admin_user.name + + +# --------------------------------------------------------------------------- +# BAY-01-016 Privileged admin mutation APIs must enforce session freshness +# (config-driven), not just role membership. conftest bypasses freshness for +# functional tests; here we restore the REAL freshness check so the positive +# config window engages. The harness session has no primary-auth timestamp, so +# real freshness fails on @fresh_auth routes but within=-1 routes still pass. +# --------------------------------------------------------------------------- + + +def test_bay_01_016_privileged_api_requires_fresh_session(admin_client, monkeypatch): + from flask_security.utils import check_and_update_authn_fresh as real_fresh + + monkeypatch.setattr("flask_security.decorators.check_and_update_authn_fresh", real_fresh) + + # Non-gated read still works (its auth uses within<0 -> always fresh). + assert admin_client.get("/admin/api/users/").status_code == 200 + + # Privileged mutations carry @fresh_auth (positive config window): a session + # with no primary-auth timestamp is rejected by the freshness gate. + for path, method in [ + ("/admin/api/user/revoke_2fa", "delete"), + ("/admin/api/user/force-reset-all", "post"), + ("/admin/api/reload/", "post"), + ("/admin/api/configuration/", "put"), + ]: + resp = getattr(admin_client, method)(path, json={}) + assert resp.status_code in (302, 401, 403), f"{method} {path} not freshness-gated" + + +# --------------------------------------------------------------------------- +# BAY-01-026 Export creation must store only items the requester can access, +# not arbitrary requester-supplied IDs +# --------------------------------------------------------------------------- + + +def test_bay_01_026_export_filters_inaccessible_items(session, users, monkeypatch): + import enferno.export.models as em + from enferno.export.models import Export + from enferno.user.models import Role + + _admin, da_user, _mod, _ = users + + restrict_role = Role(name=f"Restrict-{uuid4().hex[:6]}") + session.add(restrict_role) + session.commit() + + open_b = BulletinFactory() # no roles -> DA can access + restricted_b = BulletinFactory() + session.add_all([open_b, restricted_b]) + session.commit() + restricted_b.roles = [restrict_role] # DA lacks this role + session.add(restricted_b) + session.commit() + + monkeypatch.setattr(em, "current_user", da_user) + ids = Export._accessible_item_ids("bulletin", [open_b.id, restricted_b.id]) + assert open_b.id in ids + assert restricted_b.id not in ids + + +# --------------------------------------------------------------------------- +# BAY-01-023 PDF OCR must cap rasterization at the page limit, not rasterize +# the whole document and then truncate +# --------------------------------------------------------------------------- + + +def test_bay_01_023_pdf_to_images_caps_pages(): + import fitz + from enferno.utils.ocr.pdf import pdf_to_images + + doc = fitz.open() + for _ in range(10): + doc.new_page() + pdf_bytes = doc.tobytes() + doc.close() + + # without a cap, all pages render + assert len(pdf_to_images(pdf_bytes)) == 10 + # with a cap, rasterization stops early (does not render all 10 then slice) + assert len(pdf_to_images(pdf_bytes, max_pages=3)) == 3 + + +# --------------------------------------------------------------------------- +# BAY-01-025 PDF export must not fetch external/arbitrary-file resources +# (SSRF + file:// disclosure via untrusted rich-text img[src]) +# --------------------------------------------------------------------------- + + +def test_bay_01_025_pdf_url_fetcher_blocks_external(app): + import os + from enferno.utils.pdf_utils import _safe_url_fetcher + + # External requests (SSRF) and file:// outside the app root are refused. + for bad in ( + "http://169.254.169.254/latest/meta-data/", + "https://evil.example/x.png", + "file:///etc/passwd", + ): + with pytest.raises(ValueError): + _safe_url_fetcher(bad) + + # data: URIs are allowed (inline, no fetch). + _safe_url_fetcher("data:text/plain;base64,SGVsbG8=") + + # Local files under the app root (logo / rewritten inline media) are allowed. + logo = os.path.join(os.path.realpath(app.root_path), "static/img/bayanat-h-v2.png") + _safe_url_fetcher(f"file://{logo}") + + +# --------------------------------------------------------------------------- +# BAY-01-020 Inline media filenames must be opaque/unguessable, not a +# reconstructable timestamp+basename +# --------------------------------------------------------------------------- + + +def test_bay_01_020_inline_filename_is_opaque(): + from enferno.admin.models import Media + + a = Media.generate_inline_file_name("evidence.png") + b = Media.generate_inline_file_name("evidence.png") + assert a != b # random per upload, not deterministic + assert a.endswith(".png") # extension preserved + assert "evidence" not in a # original basename not leaked + # no YYYYMMDD-HHMMSS timestamp prefix + import re + + assert not re.match(r"^\d{8}-\d{6}-", a) diff --git a/tests/test_security.py b/tests/test_security.py index 458e7ee16..03c5b03b9 100644 --- a/tests/test_security.py +++ b/tests/test_security.py @@ -68,8 +68,10 @@ def test_setup_routes_exist_when_uninitialized(self): uninit_app = create_app(cfg) rules = [r.rule for r in uninit_app.url_map.iter_rules()] assert "/setup_wizard" in rules - assert "/api/check-admin" in rules - assert "/api/create-admin" in rules + # /api/create-admin and /api/check-admin removed (BAY-01-005); + # admin bootstrap is now CLI-only via `flask install`. + assert "/api/create-admin" not in rules + assert "/api/check-admin" not in rules class TestInputValidation: @@ -124,27 +126,6 @@ def test_redirect_to_setup(self, uninitialized_app, setup_db_uninitialized): assert resp.status_code == 302 assert "setup_wizard" in resp.location - def test_check_admin_not_found(self, uninitialized_app, setup_db_uninitialized): - client = uninitialized_app.test_client() - resp = client.get("/api/check-admin") - assert resp.status_code == 200 - assert resp.json["data"] == {"status": "not_found"} - assert resp.json["message"] == "No admin user found" - - def test_create_admin_user(self, uninitialized_app, session_uninitialized): - from enferno.user.models import User - - client = uninitialized_app.test_client() - resp = client.post( - "/api/create-admin", - json={"username": "testAdmin", "password": "password"}, - ) - assert resp.status_code == 201 - assert resp.json["message"] == "Admin user installed successfully" - assert resp.json["data"]["item"]["username"] == "testAdmin" - admin = User.query.filter(User.username == "testAdmin").first() - assert admin is not None - @pytest.mark.parametrize( "client_fixture, expected", [ diff --git a/tests/test_update_check.py b/tests/test_update_check.py index f0b12a9aa..ba53db471 100644 --- a/tests/test_update_check.py +++ b/tests/test_update_check.py @@ -1,6 +1,6 @@ from unittest.mock import MagicMock, patch -from enferno.tasks.maintenance import _strip_v, _is_patch_bump +from enferno.tasks.maintenance import _strip_v def test_strip_v_prefix(): @@ -9,27 +9,6 @@ def test_strip_v_prefix(): assert _strip_v("") == "" -def test_is_patch_bump_true(): - assert _is_patch_bump("4.1.0", "4.1.1") is True - assert _is_patch_bump("4.1.2", "4.1.10") is True - - -def test_is_patch_bump_false_for_minor(): - assert _is_patch_bump("4.1.0", "4.2.0") is False - - -def test_is_patch_bump_false_for_major(): - assert _is_patch_bump("4.1.0", "5.0.0") is False - - -def test_is_patch_bump_false_for_same(): - assert _is_patch_bump("4.1.0", "4.1.0") is False - - -def test_is_patch_bump_false_for_downgrade(): - assert _is_patch_bump("4.1.5", "4.1.3") is False - - def _github_response(tag): return MagicMock( raise_for_status=lambda: None, @@ -37,60 +16,35 @@ def _github_response(tag): ) -def test_auto_apply_patch_triggers_wrapper_when_flag_on(): +def test_new_release_notifies_admins(): + """Update check is notify-only: a new tag caches the latest and notifies + admins. It never triggers a privileged update (BAY-01-013).""" from enferno.tasks import maintenance fake_redis = MagicMock() fake_redis.get.return_value = None # nothing notified yet with ( - patch.object(maintenance, "cfg", MagicMock(AUTO_APPLY_PATCH_UPDATES=True)), patch.object(maintenance, "requests") as req, patch.object(maintenance, "rds", fake_redis), - patch.object(maintenance, "subprocess") as sp, patch.object(maintenance, "_current_version", return_value="4.1.0"), patch.object(maintenance, "Notification") as notif, ): req.get.return_value = _github_response("v4.1.1") maintenance.check_for_updates.run() - sp.run.assert_called_once() - args = sp.run.call_args.args[0] - assert args == ["sudo", "-n", "/usr/local/sbin/bayanat-start-update"] - notif.create_for_admins.assert_not_called() - - -def test_auto_apply_off_falls_back_to_notification(): - from enferno.tasks import maintenance - - fake_redis = MagicMock() - fake_redis.get.return_value = None - with ( - patch.object(maintenance, "cfg", MagicMock(AUTO_APPLY_PATCH_UPDATES=False)), - patch.object(maintenance, "requests") as req, - patch.object(maintenance, "rds", fake_redis), - patch.object(maintenance, "subprocess") as sp, - patch.object(maintenance, "_current_version", return_value="4.1.0"), - patch.object(maintenance, "Notification") as notif, - ): - req.get.return_value = _github_response("v4.1.1") - maintenance.check_for_updates.run() - sp.run.assert_not_called() notif.create_for_admins.assert_called_once() -def test_auto_apply_on_but_minor_bump_still_notifies(): +def test_same_version_does_not_notify(): from enferno.tasks import maintenance fake_redis = MagicMock() fake_redis.get.return_value = None with ( - patch.object(maintenance, "cfg", MagicMock(AUTO_APPLY_PATCH_UPDATES=True)), patch.object(maintenance, "requests") as req, patch.object(maintenance, "rds", fake_redis), - patch.object(maintenance, "subprocess") as sp, - patch.object(maintenance, "_current_version", return_value="4.1.0"), + patch.object(maintenance, "_current_version", return_value="4.1.1"), patch.object(maintenance, "Notification") as notif, ): - req.get.return_value = _github_response("v4.2.0") + req.get.return_value = _github_response("v4.1.1") maintenance.check_for_updates.run() - sp.run.assert_not_called() - notif.create_for_admins.assert_called_once() + notif.create_for_admins.assert_not_called() diff --git a/tests/test_update_endpoints.py b/tests/test_update_endpoints.py index 5f71d849a..8dc74aa35 100644 --- a/tests/test_update_endpoints.py +++ b/tests/test_update_endpoints.py @@ -1,15 +1,4 @@ import json -import time -from unittest.mock import patch - - -def _fresh_session(client): - """Mark the test-client session as freshly authenticated so - `@auth_required(within=15)` passes. Flask-Security stores the primary - auth timestamp in session key 'fs_paa'. - """ - with client.session_transaction() as sess: - sess["fs_paa"] = time.time() def test_available_returns_cached(admin_client): @@ -73,27 +62,8 @@ def test_status_terminal_when_success(admin_client, tmp_path, monkeypatch): assert data["running"] is False -def test_start_calls_wrapper(admin_client): - _fresh_session(admin_client) - with patch("enferno.admin.views.system.subprocess.run") as run: - resp = admin_client.post("/admin/api/updates/start") - assert resp.status_code == 200 - run.assert_called_once() - args = run.call_args.args[0] - assert args == ["sudo", "-n", "/usr/local/sbin/bayanat-start-update"] - - -def test_start_requires_fresh_auth(admin_client): - # No _fresh_session call -> session is stale -> auth_required(within=15) - # should reject with redirect/401/403. - with patch("enferno.admin.views.system.subprocess.run") as run: - resp = admin_client.post("/admin/api/updates/start") - assert resp.status_code in (302, 401, 403) - run.assert_not_called() - - -def test_non_admin_cannot_start(da_client): - _fresh_session(da_client) - resp = da_client.post("/admin/api/updates/start") - # roles_required returns 403 (Forbidden) for wrong-role users - assert resp.status_code in (401, 403) +def test_start_endpoint_removed(admin_client): + """The privileged web-triggered update endpoint is gone (BAY-01-013). + Updates are applied via the root CLI only.""" + resp = admin_client.post("/admin/api/updates/start") + assert resp.status_code == 404 diff --git a/uv.lock b/uv.lock index 9407d3dbd..9287b7adb 100644 --- a/uv.lock +++ b/uv.lock @@ -167,7 +167,7 @@ wheels = [ [[package]] name = "bayanat" -version = "4.0.0" +version = "4.0.1" source = { editable = "." } dependencies = [ { name = "amqp" },