From 8392bc8875a0eee02ac57018bef5fcbcb6ec2c49 Mon Sep 17 00:00:00 2001 From: Robert Hodges Date: Sun, 10 May 2026 13:56:39 -0700 Subject: [PATCH] Add new skills for deploying ClickHouse servers This commit adds an overview skill for deployment, knowledge about different server builds, and dev deployments using containers on docker compose or desktop Kubernetes. --- .gitignore | 2 +- altinity-deploy-clickhouse/Makefile | 91 ++++++ .../SKILL.md | 290 ++++++++++++++++++ .../assets/find-latest-versions.sh | 175 +++++++++++ .../references/INDEX.md | 102 ++++++ .../SKILL.md | 197 ++++++++++++ .../assets/README.md | 161 ++++++++++ .../assets/config.d/keeper.xml | 9 + .../assets/config.d/listen.xml | 12 + .../assets/config.d/macros.xml | 8 + .../assets/docker-compose.yml | 60 ++++ .../assets/keeper-config.xml | 34 ++ .../assets/users.d/default-password.xml | 16 + .../SKILL.md | 257 ++++++++++++++++ .../assets/README.md.template | 152 +++++++++ .../assets/helm-values.yaml | 39 +++ .../assets/installation.yaml | 93 ++++++ .../SKILL.md | 110 +++++++ .../SKILL.md | 158 ++++++++++ .../checks.sql | 119 +++++++ altinity-expert-kubernetes/Makefile | 91 ++++++ .../SKILL.md | 279 +++++++++++++++++ .../assets/kind-multinode.yaml | 13 + 23 files changed, 2467 insertions(+), 1 deletion(-) create mode 100644 altinity-deploy-clickhouse/Makefile create mode 100644 altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-builds/SKILL.md create mode 100755 altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-builds/assets/find-latest-versions.sh create mode 100644 altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-builds/references/INDEX.md create mode 100644 altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/SKILL.md create mode 100644 altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/assets/README.md create mode 100644 altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/assets/config.d/keeper.xml create mode 100644 altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/assets/config.d/listen.xml create mode 100644 altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/assets/config.d/macros.xml create mode 100644 altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/assets/docker-compose.yml create mode 100644 altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/assets/keeper-config.xml create mode 100644 altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/assets/users.d/default-password.xml create mode 100644 altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-kubernetes/SKILL.md create mode 100644 altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-kubernetes/assets/README.md.template create mode 100644 altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-kubernetes/assets/helm-values.yaml create mode 100644 altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-kubernetes/assets/installation.yaml create mode 100644 altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-overview/SKILL.md create mode 100644 altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-smoke-test/SKILL.md create mode 100644 altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-smoke-test/checks.sql create mode 100644 altinity-expert-kubernetes/Makefile create mode 100644 altinity-expert-kubernetes/skills/altinity-expert-kubernetes-desktop/SKILL.md create mode 100644 altinity-expert-kubernetes/skills/altinity-expert-kubernetes-desktop/assets/kind-multinode.yaml diff --git a/.gitignore b/.gitignore index ca81a67..982a4f8 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,6 @@ **/.claude/ output/ altinity-expert-clickhouse/skills/.system/ -releases/*.zip +**/releases/*.zip __pycache__/ *.pyc diff --git a/altinity-deploy-clickhouse/Makefile b/altinity-deploy-clickhouse/Makefile new file mode 100644 index 0000000..a71f65d --- /dev/null +++ b/altinity-deploy-clickhouse/Makefile @@ -0,0 +1,91 @@ +# Altinity Deploy ClickHouse - Skills Build System +# Builds zips, links into ~/.claude/skills, and lists skills in this domain. +# Scope: altinity-deploy-clickhouse/skills/*/SKILL.md only. + +DOMAIN_DIR := $(shell pwd) +BUILD_DIR := $(DOMAIN_DIR)/releases +SKILL_DIRS := $(shell find skills -maxdepth 2 -name "SKILL.md" ! -path "*/.system/*" -exec dirname {} \; 2>/dev/null | sort) +SKILL_ZIPS := $(foreach dir,$(SKILL_DIRS),$(BUILD_DIR)/$(notdir $(dir)).zip) + +CLAUDE_SKILLS_DIR ?= $(HOME)/.claude/skills + +.PHONY: all clean list help link unlink relink links + +all: $(BUILD_DIR) $(SKILL_ZIPS) + @echo "Built $(words $(SKILL_ZIPS)) skill packages in $(BUILD_DIR)/" + +$(BUILD_DIR): + @mkdir -p $(BUILD_DIR) + +define ZIP_template +$(BUILD_DIR)/$(notdir $(1)).zip: $(1)/SKILL.md + @echo "Packaging $(notdir $(1))..." + @cd $(1) && zip -r $(BUILD_DIR)/$(notdir $(1)).zip . -x "*.DS_Store" -x "*__MACOSX*" -x "*.git*" -x "*__pycache__*" -x "*.pyc" +endef + +$(foreach dir,$(SKILL_DIRS),$(eval $(call ZIP_template,$(dir)))) + +clean: + @echo "Cleaning build directory..." + @rm -rf $(BUILD_DIR) + +list: + @echo "Deploy skills found:" + @$(foreach dir,$(SKILL_DIRS),echo " - $(notdir $(dir)) ($(dir))";) + +link: | $(CLAUDE_SKILLS_DIR) + @$(foreach dir,$(SKILL_DIRS), \ + target="$(CLAUDE_SKILLS_DIR)/$(notdir $(dir))"; \ + src="$(DOMAIN_DIR)/$(dir)"; \ + if [ -L "$$target" ]; then \ + cur=$$(readlink "$$target"); \ + if [ "$$cur" = "$$src" ]; then \ + echo "ok $(notdir $(dir))"; \ + else \ + echo "CONFLICT $(notdir $(dir)) -> $$cur (run 'make relink' to replace)"; \ + fi; \ + elif [ -e "$$target" ]; then \ + echo "SKIP $(notdir $(dir)) (real dir/file exists, not touching)"; \ + else \ + ln -s "$$src" "$$target" && echo "linked $(notdir $(dir))"; \ + fi;) + +unlink: + @$(foreach dir,$(SKILL_DIRS), \ + target="$(CLAUDE_SKILLS_DIR)/$(notdir $(dir))"; \ + if [ -L "$$target" ]; then \ + rm "$$target" && echo "unlinked $(notdir $(dir))"; \ + fi;) + +relink: unlink link + +links: + @echo "Deploy skills in $(CLAUDE_SKILLS_DIR):" + @$(foreach dir,$(SKILL_DIRS), \ + target="$(CLAUDE_SKILLS_DIR)/$(notdir $(dir))"; \ + if [ -L "$$target" ]; then \ + echo " $(notdir $(dir)) -> $$(readlink $$target)"; \ + else \ + echo " $(notdir $(dir)) (not linked)"; \ + fi;) + +$(CLAUDE_SKILLS_DIR): + @mkdir -p $(CLAUDE_SKILLS_DIR) + +help: + @echo "Altinity Deploy ClickHouse - Skills Build System" + @echo "" + @echo "Usage:" + @echo " make Build all skill zip files" + @echo " make all Same as 'make'" + @echo " make clean Remove all built zip files" + @echo " make list List all detected deploy skills" + @echo " make link Symlink all deploy skills into ~/.claude/skills" + @echo " make unlink Remove those symlinks (leaves real dirs alone)" + @echo " make relink unlink + link" + @echo " make links Show current link state" + @echo " make help Show this help" + @echo "" + @echo "Output: releases/.zip" + @echo "" + @echo "Override link destination: make link CLAUDE_SKILLS_DIR=./.claude/skills" diff --git a/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-builds/SKILL.md b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-builds/SKILL.md new file mode 100644 index 0000000..9581327 --- /dev/null +++ b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-builds/SKILL.md @@ -0,0 +1,290 @@ +--- +name: altinity-deploy-clickhouse-builds +description: Finds the right ClickHouse build and educates users on the tradeoffs between the four supported flavors (ClickHouse Official, Altinity Antalya, Altinity Stable, Altinity FIPS), the three distribution forms (container, package, binary tarball), and the supported machine architectures (x86_64, aarch64). Use when picking a build, comparing build flavors, configuring apt/yum repos, choosing a Docker image tag, or downloading a tarball for ClickHouse. +author: Altinity Inc +version: 0.0.1 +license: Apache-2.0 +--- + +# ClickHouse Build Selection + +Pick the right ClickHouse build along three independent axes — **flavor**, **distribution form**, and **architecture** — and produce concrete coordinates (image tag, repo URL, or tarball URL) that downstream installer skills consume. Educate the user on tradeoffs when they're unsure. Do not install anything from this skill. + +--- + +## Action Mode + +Hybrid: + +- Read-only checks (registry / repo reachability, image tag existence, latest-version queries) run automatically. +- No mutating steps in this skill — selection only. Installer skills are responsible for fetching and installing. + +--- + +## Documentation References + +When the user asks a question that requires authoritative documentation (build flavor scope, FIPS support model, supported OS matrix, current install command for a specific distro), consult `references/INDEX.md`. It groups canonical URLs by topic — build flavors, artifact locations, architecture/OS, compliance — and tells you when each source is the right one to fetch. + +Use `WebFetch` to read the URL when current data matters; do not paraphrase from memory if a definitive answer is available at a known canonical source. + +--- + +## Step 1 — Choose Build Flavor + +There are four supported ClickHouse builds. Each has a distinct purpose; the choice is rarely about "newer = better." + +### 1. ClickHouse Official Builds + +- **Source:** ClickHouse, Inc. (the upstream project). +- **Audience:** Users who want to track upstream directly, run the latest features as soon as they ship, or match what the upstream community is testing. Long Term Support builds appear in March and August (by convention, check docs). All other builds are monthly with short community support tails. +- **Cadence:** Frequent releases on the upstream cadence; LTS tags exist but support windows differ from Altinity's. +- **Support:** Community / commercial support from ClickHouse, Inc. +- **Recommended for:** Development, evaluation of new features, environments aligned with the upstream community. + +### 2. Altinity Antalya Builds + +- **Source:** Altinity. +- **Audience:** Teams **developing new applications** that need cutting-edge capabilities — Iceberg-backed data lakes, Hybrid Tables (MergeTree + Iceberg in one query), swarm clusters, fast Parquet reads, OAuth/OIDC, tiered storage from MergeTree to shared Iceberg. Antalya builds are 100% compatible with matching upstream ClickHouse versions, so applications built on Antalya remain portable. +- **Cadence:** Faster than Altinity Stable; carries Altinity's feature-forward changes. +- **Support:** Altinity, on a feature-forward track rather than a long-support track. +- **Recommended for:** New application development that genuinely needs Antalya-only features. If your workload runs fine on stock ClickHouse, prefer Altinity Stable. + +> ⚠️ **Use only if you need the features.** Altinity's own documentation states Project Antalya is *not* for production use ([docs.altinity.com/altinityantalya](https://docs.altinity.com/altinityantalya/)) — read this as "don't pick Antalya by default; pick it because you specifically need the cutting-edge capabilities and accept a feature-forward support track." For workloads that don't depend on those capabilities, **Altinity Stable** is the production choice. +> +> **Verify additional scope.** Antalya features are improving rapidly; specifics (which features ride on it, which versions are current, exact distribution coverage) change over time. Confirm the current scope against Altinity's documentation before recommending. + +### 3. Altinity Stable Builds + +- **Source:** Altinity. +- **Audience:** Users who explicitly want the **conservative track** — long-term support, slower cadence, qualified releases only. Based on upstream Long Term Support (LTS) releases with selected backports and bugfixes needed by Altinity users. +- **Cadence:** Slower than upstream; selected upstream releases promoted to Altinity Stable lines after qualification. +- **Support:** Altinity, with long-term support windows and security backports — security fixes land on supported lines without forcing a major-version jump. +- **Recommended for:** Production deployments and any case where the workload doesn't need Antalya-only features. **This is the default when no preference is stated.** + +### 4. Altinity FIPS Builds + +- **Source:** Altinity. +- **Audience:** Users in **regulated environments** that require FIPS 140-3 compatible cryptography (US federal, healthcare, finance, defense, certain enterprise compliance regimes). +- **Cadence:** Tracks Altinity Stable; the differentiator is the cryptographic module, not the feature set. +- **Support:** Altinity, with the validated crypto module and the Stable-track support model. +- **Recommended for:** Deployments with an explicit FIPS 140-3 requirement. Do not choose this flavor casually — it constrains distribution coverage (see matrix below) and adds compliance obligations. + +### Default recommendations + +| User profile / signal | Recommended flavor | +|--------------------------------------------------------------------------------------------------------|-----------------------------------------------------| +| **No preference stated** | **Altinity Stable** (default) | +| Building a new application that needs Iceberg / Hybrid Tables / swarm / OAuth / cutting-edge features | Altinity Antalya (accept feature-forward track) | +| Explicit FIPS 140-3 / regulated-compliance requirement | Altinity FIPS | +| Explicit ask to track upstream / "vanilla ClickHouse" | ClickHouse Official | +| Development / demo with no preference | Altinity Stable (Antalya if exploring its features) | + +When no preference is stated, default to **Altinity Stable** and tell the user why. If the user describes a workload that needs Iceberg, Hybrid Tables, swarm clusters, or other Antalya-track features, surface Antalya as the right fit and explain the feature-forward support tradeoff so they can choose deliberately. + +--- + +## Step 2 — Choose Distribution Form + +Three forms. The target environment usually decides this, but call it out so the user can override. + +### Container image (OCI) + +- **Use for:** Docker, Docker Compose, Kubernetes, Podman. +- **Pros:** Self-contained, reproducible, easy to roll back by changing a tag. +- **Cons:** No host-level customization without rebuild. +- **Pinning rule:** Always use a fully-qualified version tag in production. Never `latest`, `stable`, or major-only (`24`). + +### Linux package (DEB / RPM) + +- **Use for:** Bare-metal Linux installs managed by `apt` (Debian, Ubuntu) or `yum` / `dnf` (RHEL, Rocky, Alma, CentOS Stream, Fedora). +- **Pros:** Integrates with systemd, package-manager-native upgrades, signed packages. +- **Cons:** Distribution-specific; must match the host's package manager and base OS major version. +- **Pinning rule:** Pin the package version explicitly (`apt install clickhouse-server=` / `dnf install clickhouse-server-`). + +### Binary tarball + +- **Use for:** Hosts without a supported package manager, custom install layouts, dev environments, air-gapped installs. +- **Pros:** No package-manager assumptions; portable across distros. +- **Cons:** No automatic systemd integration, no package-manager upgrades, more manual. +- **Pinning rule:** Record the SHA256 of the tarball and the URL it came from. + +> Source builds and other forms exist but are out of scope for this skill. + +--- + +## Step 3 — Choose Architecture + +Two common architectures. Confirm the target environment's architecture before recommending. + +| Architecture | Aliases | Common platforms | +|--------------|------------------------|----------------------------------------------------------| +| **x86_64** | `amd64` | Intel and AMD servers; most cloud VMs; most laptops | +| **aarch64** | `arm64` | Apple Silicon Macs, AWS Graviton, Ampere, Raspberry Pi 4/5, ARM-based servers | + +How to detect: + +```bash +uname -m # x86_64 or aarch64 +docker info --format '{{.Architecture}}' # for the Docker daemon +kubectl get nodes -o wide # ARCHITECTURE column +``` + +> Other architectures (ppc64le, riscv64, s390x) may have community or experimental coverage; treat as not supported in this skill unless the user explicitly confirms availability and accepts the risk. + +--- + +## Step 4 — Resolve Coverage Matrix + +Not every flavor × form × architecture combination exists. Use the matrix below as a starting point, **but always verify availability against the canonical source before recommending** — coverage changes over time. + +| Flavor | Container (x86_64) | Container (aarch64) | DEB (x86_64) | DEB (aarch64) | RPM (x86_64) | RPM (aarch64) | Tarball (x86_64) | Tarball (aarch64) | +|-------------------------|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:| +| ClickHouse Official | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| Altinity Antalya | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| Altinity Stable | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| Altinity FIPS | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +Legend: ✅ = generally available; ⚠️ = verify against current Altinity / ClickHouse documentation before committing. + +If the requested combination is `⚠️` or absent, surface that to the user and offer the closest available alternative (typically: same flavor, different distribution form; or same form, different flavor). + +--- + +## Step 5 — Find Latest Available Versions + +When the user has not pinned a specific version — or asks "what's the latest?" — discover available versions by querying GitHub releases. The script `assets/find-latest-versions.sh` automates this. It is also useful as a standalone capability when the user is just exploring. + +> **"Latest" means highest version number, not most recently published.** The script sorts results by version (descending), not by publication date. This matters for Altinity Stable in particular, where older lines (e.g. 24.8 LTS) receive ongoing patch backports concurrently with newer-line work — so the most recently published tag may not be the highest version. If the user explicitly asks for "the most recent patch on line X," look it up directly on GitHub rather than from this script's top result. + +### Use cases + +- **Standalone:** "What's the latest ClickHouse version?" or "What Altinity Antalya releases are out?" Run the script directly and report results. +- **In a deploy flow:** When `altinity-deploy-clickhouse-overview` hands off without a pinned version, run the script for the chosen flavor and propose the highest-version matching tag, then confirm with the user before locking it into the output contract. + +### Run + +```bash +# All flavors, top tag each by version (default). +./assets/find-latest-versions.sh + +# Single flavor. +./assets/find-latest-versions.sh antalya + +# Top N by version per flavor. +COUNT=5 ./assets/find-latest-versions.sh stable +``` + +The script uses `gh` CLI when available (higher rate limits when authenticated) and falls back to unauthenticated `curl`. Required dependencies: `bash` 4+, `jq`, `sort` with `-V` (GNU coreutils; macOS BSD sort supports `-V` since ~10.12), and either `gh` or `curl`. If a dependency is missing, the script prints a clear error and exits with code 2. + +### Output + +One row per match, tab-aligned: + +``` + +official v26.3.10.60-lts 2026-05-08 https://github.com/ClickHouse/ClickHouse/releases/tag/v26.3.10.60-lts +antalya v25.8.22.20001.altinityantalya 2026-05-05 https://github.com/Altinity/ClickHouse/releases/tag/v25.8.22.20001.altinityantalya +stable v24.8.14.10546.altinitystable 2026-05-08 https://github.com/Altinity/ClickHouse/releases/tag/v24.8.14.10546.altinitystable +fips v25.3.8.30001.altinityfips 2026-04-15 https://github.com/Altinity/ClickHouse/releases/tag/v25.3.8.30001.altinityfips +``` + +### Interpreting the result + +The tag name is the source of truth for the version. Strip the leading `v` and any flavor suffix to get the bare version (e.g. `v25.8.22.20001.altinityantalya` → version `25.8.22.20001`). + +**FIPS access caveat.** Tags are published on public GitHub, but the actual FIPS-validated artifacts (containers, RPMs, etc.) are *not* supported unless you have a subcription from Altinity that includes FIPS support. The tag tells you the latest version exists; confirm with Altinity before using in any system that requires vendor support. + +### Exit codes + +| Code | Meaning | +|------|------------------------------------------------------| +| 0 | Success; at least one match printed. | +| 1 | Usage error (unknown flavor argument). | +| 2 | Required dependency missing (`jq`, `gh`/`curl`). | +| 3 | GitHub query failed for at least one flavor (network or rate limit). | +| 4 | No matching tags found for the requested flavor. | + +Treat exit codes 3 and 4 as soft failures during a deploy flow — fall back to asking the user for an explicit version rather than guessing. + +### Maintenance + +The repos and tag-pattern regexes live near the top of the script. If a query starts returning zero matches, the most likely cause is that the upstream tag convention changed; update the relevant pattern. + +--- + +## Step 6 — Resolve Concrete Coordinates + +Output the concrete reference the installer needs. + +### Container images + +| Flavor | Image repository | Tag pattern | +|---------------------|-------------------------------------------|-------------------------------------------------------| +| ClickHouse Official | `clickhouse/clickhouse-server` | `` (e.g. `24.8.14.10459`) | +| ClickHouse Official Keeper | `clickhouse/clickhouse-keeper` | `` | +| Altinity Stable | `altinity/clickhouse-server` | `.altinitystable` | +| Altinity Stable Keeper | `altinity/clickhouse-keeper` | `.altinitystable` | +| Altinity Antalya | `altinity/clickhouse-server` | `.altinityantalya` | +| Altinity FIPS | `altinity/clickhouse-server` | `.altinityfips` | + +### Linux package repositories + +| Flavor | Repo (DEB / RPM) | +|---------------------|-----------------------------------| +| ClickHouse Official | `https://packages.clickhouse.com` | +| Altinity Stable | `https://builds.altinity.cloud` | +| Altinity Antalya | `https://builds.altinity.cloud` | +| Altinity FIPS | `https://builds.altinity.cloud` | + +### Binary tarballs + +Tarballs live alongside packages on the same hosts: + +- ClickHouse Official: `https://packages.clickhouse.com/tgz/` +- Altinity Stable: `https://builds.altinity.cloud/stable-tgz-repo` +- Altinity Antalya : `https://builds.altinity.cloud/antalya-tgz-repo` +- Altinity FIPS : `https://builds.altinity.cloud/fips-tgz-repo` + +> All paths above can move between releases. Verify URLs at install time and pin to a specific version. If any path is unreachable from the install environment, surface that to the user before continuing. + +--- + +## Step 7 — Sanity Checks + +Run the following automatically and report results before handing off: + +1. **Registry / repo reachability** — DNS resolves, HTTPS responds. +2. **Image tag exists** — for container forms, confirm the resolved tag is pullable. Example: + ```bash + docker manifest inspect + ``` +3. **Architecture match** — confirm the resolved tag publishes a manifest for the target architecture (multi-arch manifests should list both `amd64` and `arm64` where applicable). +4. **Package version exists** — for package forms, confirm via `apt-cache madison` / `dnf list available`. +5. **Tarball SHA256** — for tarball form, fetch the published checksum and record it. +6. **Version recency** — flag if the chosen version is more than 6 months behind the current Altinity Stable line (or upstream, for Official). +7. **FIPS compliance gate** — if FIPS is selected, confirm with the user that they have read the FIPS support matrix and understand the constrained feature surface. + +If any check fails, stop and report — do not pass an unverified reference downstream. + +--- + +## Output Contract + +Hand the installer skill a structured summary: + +``` +build_flavor: official | antalya | altinity-stable | altinity-fips +version: +distribution_form: container | deb | rpm | tarball +architecture: x86_64 | aarch64 +server_image: (container only) +keeper_image: (container only) +package_repo: (deb/rpm only) +package_version: (deb/rpm only) +tarball_url: (tarball only) +tarball_sha256: (tarball only) +intent: production | development +notes: +``` + +Downstream skills must use these exact values — no substitution. diff --git a/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-builds/assets/find-latest-versions.sh b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-builds/assets/find-latest-versions.sh new file mode 100755 index 0000000..7a4794b --- /dev/null +++ b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-builds/assets/find-latest-versions.sh @@ -0,0 +1,175 @@ +#!/usr/bin/env bash +# +# find-latest-versions.sh +# +# Discover the latest ClickHouse build versions by querying GitHub releases. +# Prints one line per matching tag, sorted by version number (highest first). +# +# Note: "highest version" is determined by version-sort on the tag, NOT by +# publication date. For Altinity Stable in particular, older lines (e.g. 24.8 +# LTS) receive ongoing patch backports, so the most-recently-published tag may +# not be the highest version number. This script returns the highest version. +# +# Usage: +# ./find-latest-versions.sh # all flavors, top 1 each +# ./find-latest-versions.sh official # only ClickHouse Official +# ./find-latest-versions.sh stable # only Altinity Stable +# ./find-latest-versions.sh antalya # only Altinity Antalya +# ./find-latest-versions.sh fips # only Altinity FIPS +# COUNT=5 ./find-latest-versions.sh stable # show top 5 by version number +# ARCH=aarch64 ./find-latest-versions.sh # filter to aarch64 (info only) +# +# Auth: +# Uses `gh` CLI if available (higher rate limits when authenticated). +# Falls back to unauthenticated `curl` (60 req/hr; sufficient for single +# lookups but will rate-limit on repeated `all` calls). +# +# Dependencies: +# - bash 4+ (associative arrays) +# - curl OR gh +# - jq +# +# Output contract: +# +# +# Exit codes: +# 0 success (at least one match printed) +# 1 usage error +# 2 dependency missing +# 3 GitHub query failed for at least one flavor +# 4 no matching tags found + +set -euo pipefail + +#----------------------------------------------------------------------------- +# Configuration — VERIFY before relying on these in production. +# Repos and tag patterns can change over time. If a query returns nothing, +# the most likely cause is that the pattern below no longer matches the +# current release tags. +#----------------------------------------------------------------------------- + +declare -A REPOS=( + [official]="ClickHouse/ClickHouse" + [stable]="Altinity/ClickHouse" + [antalya]="Altinity/ClickHouse" + [fips]="Altinity/ClickHouse" +) + +declare -A TAG_PATTERNS=( + # Patterns use awk extended regex; [.] matches a literal dot. + # Upstream tags look like v24.8.14.10459-stable or v25.1.1.123-lts. + [official]='^v[0-9]+[.][0-9]+[.][0-9]+[.][0-9]+(-(stable|lts|prestable|testing))?$' + # Altinity Stable tags carry an .altinitystable suffix. + [stable]='[.]altinitystable$' + # Altinity Antalya tags carry an .altinityantalya suffix. + [antalya]='[.]altinityantalya$' + # Altinity FIPS tags carry an .altinityfips suffix. + [fips]='[.]altinityfips$' +) + +#----------------------------------------------------------------------------- + +COUNT="${COUNT:-1}" +ARCH="${ARCH:-}" +FLAVOR="${1:-all}" + +require() { + command -v "$1" >/dev/null 2>&1 || { + echo "error: '$1' is required but not found in PATH." >&2 + exit 2 + } +} + +require jq +if ! command -v gh >/dev/null 2>&1; then + require curl +fi + +fetch_releases() { + local repo="$1" + if command -v gh >/dev/null 2>&1; then + gh api -H "Accept: application/vnd.github+json" \ + "/repos/${repo}/releases?per_page=100" + else + curl -fsS -H "Accept: application/vnd.github+json" \ + "https://api.github.com/repos/${repo}/releases?per_page=100" + fi +} + +print_flavor() { + local flavor="$1" + local repo="${REPOS[$flavor]:-}" + local pattern="${TAG_PATTERNS[$flavor]:-}" + + if [[ -z "$repo" ]]; then + printf '%-9s (no public GitHub repo configured — verify with Altinity)\n' "$flavor" + return + fi + + local json + if ! json=$(fetch_releases "$repo" 2>/dev/null); then + printf '%-9s ERROR querying %s (rate-limited or network failure)\n' "$flavor" "$repo" >&2 + FAILED=1 + return + fi + + # Emit tab-separated rows: tag\tpublished_at\thtml_url, then filter on + # the tag (first column) only so URL/date contents can't accidentally + # match the pattern. Sort version-descending on the tag column so the + # highest version number is first (NOT the most recently published). + local rows + rows=$(echo "$json" \ + | jq -r '.[] | [.tag_name, .published_at, .html_url] | @tsv' \ + | awk -F'\t' -v pat="$pattern" '$1 ~ pat' \ + | sort -t$'\t' -k1,1 -V -r \ + | head -n "$COUNT" || true) + + if [[ -z "$rows" ]]; then + printf '%-9s no tags matched pattern: %s\n' "$flavor" "$pattern" + NO_MATCH=$((NO_MATCH + 1)) + return + fi + + while IFS=$'\t' read -r tag published_at url; do + printf '%-9s %-40s %s %s\n' "$flavor" "$tag" "${published_at%T*}" "$url" + FOUND=$((FOUND + 1)) + done <<< "$rows" +} + +FOUND=0 +NO_MATCH=0 +FAILED=0 + +case "$FLAVOR" in + all) + for f in official antalya stable fips; do + print_flavor "$f" + done + ;; + official|antalya|stable|fips) + print_flavor "$FLAVOR" + ;; + -h|--help) + sed -n '2,/^set -/p' "$0" | sed 's/^# \{0,1\}//' | head -n -1 + exit 0 + ;; + *) + echo "error: unknown flavor: $FLAVOR" >&2 + echo "usage: $0 [official|antalya|stable|fips|all]" >&2 + exit 1 + ;; +esac + +if [[ -n "$ARCH" ]]; then + echo + echo "note: ARCH=$ARCH set; this script reports tags only — confirm architecture coverage" + echo " via the registry / repo (e.g. 'docker manifest inspect ')." +fi + +if [[ "$FAILED" -eq 1 ]]; then + exit 3 +fi +if [[ "$FOUND" -eq 0 ]]; then + exit 4 +fi +exit 0 diff --git a/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-builds/references/INDEX.md b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-builds/references/INDEX.md new file mode 100644 index 0000000..d4a0bfd --- /dev/null +++ b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-builds/references/INDEX.md @@ -0,0 +1,102 @@ +# Documentation References — ClickHouse Build Selection + +Canonical sources Claude should consult to verify facts that change over time +(repo paths, supported architectures, distribution coverage, FIPS support +matrix, current versions). Each entry is a pointer; fetch the URL with +`WebFetch` when current data is needed. + +--- + +## Build Flavors + +### Altinity Antalya Builds — feature-forward Altinity track +- **Landing / overview:** https://altinity.com/blog/getting-started-with-altinitys-project-antalya +- **Documentation:** https://docs.altinity.com/altinityantalya/ +- **GitHub releases (Antalya tags):** https://github.com/Altinity/ClickHouse/releases?q=altinityantalya +- **Use when:** user asks about Antalya feature scope (OAuth/OIDC, swarm clusters, Iceberg/Parquet reads, tiered storage to Iceberg), version cadence, or upstream-compatibility statement. + +### Altinity Stable Builds — long-term-support Altinity track +- **Landing:** https://altinity.com/altinity-stable/ +- **Documentation:** https://docs.altinity.com/altinitystablebuilds/ +- **GitHub releases (Stable tags):** https://github.com/Altinity/ClickHouse/releases?q=altinitystable +- **Use when:** user asks about supported Stable lines, support window, qualification process, security backports, EOL dates. + +### Altinity FIPS Builds — FIPS 140-3 compatible Altinity track +- **Landing page and documentation:** https://docs.altinity.com/altinitystablebuilds/fips-compatible-altinity-builds/ +- **GitHub releases (FIPS tags):** https://github.com/Altinity/ClickHouse/releases?q=altinityfips +- **Use when:** user asks about FIPS 140-3 scope, validated module identity, supported feature surface, subscription requirements, compliance evidence. + +### ClickHouse Official Builds — upstream from ClickHouse, Inc. +- **Documentation root:** https://clickhouse.com/docs +- **Release notes / changelog:** https://clickhouse.com/docs/whats-new/changelog +- **GitHub releases:** https://github.com/ClickHouse/ClickHouse/releases +- **Use when:** user asks about upstream version policy, LTS tags, feature availability per upstream version, EOL. + +--- + +## Artifact Locations + +### Container registries +- **Altinity server image:** https://hub.docker.com/r/altinity/clickhouse-server +- **Altinity Keeper image:** https://hub.docker.com/r/altinity/clickhouse-keeper +- **ClickHouse Inc. server image:** https://hub.docker.com/r/clickhouse/clickhouse-server +- **ClickHouse Inc. Keeper image:** https://hub.docker.com/r/clickhouse/clickhouse-keeper +- **Use when:** verifying that a tag exists, listing available tags by flavor suffix, or confirming multi-arch coverage. + +### Package & tarball repositories +- **ClickHouse Inc. packages:** https://packages.clickhouse.com +- **ClickHouse Inc. install docs:** https://clickhouse.com/docs/install +- **Altinity build host (DEB / RPM / tarball):** https://builds.altinity.cloud +- **Altinity install / packaging docs:** https://docs.altinity.com/altinitystablebuilds/stablequickstartguide/ +- **Use when:** configuring apt/yum/dnf repos, downloading tarballs, or finding the canonical install command for a given OS. + +### GitHub source / release tracking +- **ClickHouse, Inc. repo:** https://github.com/ClickHouse/ClickHouse +- **Altinity ClickHouse repo (Stable, Antalya, FIPS releases):** https://github.com/Altinity/ClickHouse +- **Altinity clickhouse-operator (K8s):** https://github.com/Altinity/clickhouse-operator +- **Use when:** discovering latest tags, reading release notes, or locating a specific commit / SHA. + +--- + +## Architecture & Build Support + +- **ClickHouse system requirements:** https://clickhouse.com/docs/install#system-requirements +- **ClickHouse supported platforms / OS:** https://clickhouse.com/docs/operations/tips +- **Altinity build support matrix:** https://docs.altinity.com/altinitystablebuilds/ +- **Use when:** user asks whether a specific OS, kernel version, or architecture is supported; or whether multi-arch images cover their target. + +--- + +## Compliance & Support + +### FIPS 140-3 background +- **NIST CMVP program (validations search):** https://csrc.nist.gov/projects/cryptographic-module-validation-program +- **NIST FIPS 140-3 standard:** https://csrc.nist.gov/publications/detail/fips/140/3/final +- **Use when:** explaining what FIPS 140-3 means, validating a vendor's certificate number, or confirming that a specific cryptographic module is in-scope. + +### Altinity support +- **Altinity support / subscriptions:** https://altinity.com/support/ +- **Altinity contact for FIPS-specific questions:** https://altinity.com/contact +- **Use when:** user asks about commercial support coverage, subscription tiers, or FIPS-validated artifact access. + +### Altinity Cloud +- **Altinity BYOC cloud subscriptions:** (Runs in user account) https://altinity.com/managed-clickhouse/bring-your-own-cloud/ +- **Altinity SaaS cloud subscriptions:** (Runs in Altinity account) https://altinity.com/managed-clickhouse/ +- **Altinity contact for cloud-specific questions:** https://altinity.com/contact +- **Use when:** User asks for an Altinity-managed service for ClickHouse, BYOC vs. SaaS choice, cloud subscription tiers. + +--- + +## Latest-Version Discovery + +The `assets/find-latest-versions.sh` script in this skill is the primary +mechanism for discovering current versions across all four flavors. It +queries GitHub releases directly. See `SKILL.md` Step 5 for usage. + +--- + +## Maintenance + +When a `(verify)` link is confirmed (or corrected), remove the marker. +When a link returns 404 or redirects unexpectedly, replace it with the +current canonical URL and note the change in the commit message. diff --git a/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/SKILL.md b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/SKILL.md new file mode 100644 index 0000000..018e5c4 --- /dev/null +++ b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/SKILL.md @@ -0,0 +1,197 @@ +--- +name: altinity-deploy-clickhouse-docker +description: Deploys ClickHouse using Docker Compose with one ClickHouse server and one Keeper. Designed to be extended to multi-shard / multi-replica clusters. Use for development, demo, and small production Docker stacks. +author: Altinity Inc +version: 0.0.1 +license: Apache-2.0 +--- + +# Deploy ClickHouse — Docker Compose + +Stand up a working ClickHouse instance with an embedded-style Keeper using `docker compose`. The compose file ships one `clickhouse-server` and one `clickhouse-keeper` so the cluster is real (Keeper-backed) but minimal. Add shards/replicas by extending the compose file later. + +--- + +## Action Mode + +Hybrid: + +- Read-only checks (`docker version`, `docker info`, port probes, `docker ps`) run automatically. +- Mutating steps (`docker compose up -d`, creation of `./data/` subdirectories, `chown` of the data tree on Linux, image pulls of large size) require explicit user confirmation. Print the exact command first. The `chown -R 101:101 data/` step requires `sudo` on Linux — call this out before running. + +--- + +## Step 1 — Verify Inputs + +Confirm with the user (or take from `altinity-deploy-clickhouse-overview` / `-builds` output): + +- **Deployment intent** — production or development/demo? +- **ClickHouse server image** — fully-qualified tag from `altinity-deploy-clickhouse-builds`. +- **Keeper image** — fully-qualified tag from `altinity-deploy-clickhouse-builds`. +- **Install directory** — where the compose project will live (default: current working directory). +- **Default user password** — required for production; can be empty for development. + +If any input is missing, ask. Do not proceed with placeholder values. + +--- + +## Step 2 — Verify the Host + +Run automatically: + +```bash +docker version +docker info --format '{{.ServerVersion}}' +docker compose version +``` + +Confirm: + +- Docker daemon is running. +- Compose v2 is available (`docker compose`, not the legacy `docker-compose`). +- Ports 8123 (HTTP), 9000 (native), and 9181 (Keeper client) are free on the host. + +If Docker is missing or the daemon is not running, stop and ask the user to install or start Docker. + +--- + +## Step 3 — Materialize the Compose Project + +The `assets/` directory in this skill contains: + +- `docker-compose.yml` — Compose stack with placeholders for image tags and the default password. +- `config.d/keeper.xml` — server-side `` config pointing at the Keeper service. +- `config.d/macros.xml` — `` for shard / replica naming (single-node defaults). +- `config.d/listen.xml` — binds the server to all interfaces inside the container so Docker NAT can reach it. External exposure is still controlled by the published-port binding in `docker-compose.yml`. +- `users.d/default-password.xml` — sets the default user password (production only). +- `keeper-config.xml` — Keeper standalone config. +- `README.md` — user-facing operations doc explaining what the install is and how to start / stop / connect to ClickHouse and Keeper. Copy as-is; no placeholder substitution. + +The compose file uses **bind mounts under `./data/`** for all persistent state — not Docker named volumes. Layout under the install directory after Step 3 completes: + +``` +/ +├── README.md ← operations doc for the installed stack +├── docker-compose.yml +├── keeper-config.xml +├── config.d/ +│ ├── keeper.xml +│ ├── listen.xml +│ └── macros.xml +├── users.d/ +│ └── default-password.xml (production only) +└── data/ ← all persistent state lives here + ├── clickhouse/ → /var/lib/clickhouse + ├── clickhouse-logs/ → /var/log/clickhouse-server + ├── keeper/ → /var/lib/clickhouse-keeper + └── keeper-logs/ → /var/log/clickhouse-keeper +``` + +Procedure: + +1. Copy `assets/` contents into the install directory. +2. Substitute placeholders: + - `${CH_SERVER_IMAGE}` — server image tag from build skill. + - `${CH_KEEPER_IMAGE}` — Keeper image tag from build skill. + - `${CH_DEFAULT_PASSWORD}` — production only; remove the `users.d/default-password.xml` for development. +3. Create the `data/` subdirectories and set ownership so the in-container `clickhouse` user (UID/GID 101) can write to them: + ```bash + mkdir -p data/clickhouse data/clickhouse-logs data/keeper data/keeper-logs + + # Linux: bind-mount targets created by Docker default to root:root. + # Align them with the clickhouse user inside the image so the server + # and Keeper can read/write their data and log directories. + if [ "$(uname)" = "Linux" ]; then + sudo chown -R 101:101 data/ + fi + # On macOS / Windows Docker Desktop, the file-sharing layer maps + # ownership for you — the chown step is a no-op there. + ``` +4. If the install directory is under version control, add `data/` to `.gitignore` — it will accumulate large amounts of state and should never be committed. +5. Show the user the materialized compose file before starting anything. + +--- + +## Step 4 — Production vs Development Defaults + +| Setting | Development | Production | +|-----------------------|--------------------------------------|----------------------------------------------| +| Image tag | Pinned stable acceptable | Pinned Altinity Stable Build version | +| Default user password | Empty (omit `users.d/default-password.xml`) | Strong password set via `users.d/default-password.xml` | +| Persistent storage | Bind mounts under `./data/` | Bind mounts under `./data/`; back up `./data/` regularly and document the schedule | +| Restart policy | `unless-stopped` | `unless-stopped` | +| Resource limits | None | `deploy.resources.limits.{cpus,memory}` set | +| Bind address | `127.0.0.1` | Restrict via host firewall; document exposure | +| TLS | Off | Out of scope for MVP — flag as follow-up | + +When in production mode, set resource limits in the compose file before bringing the stack up. Even rough values (e.g. 4 CPU / 8 GiB) beat unlimited. + +--- + +## Step 5 — Bring the Stack Up + +After confirmation: + +```bash +docker compose pull +docker compose up -d +``` + +Then poll readiness: + +```bash +docker compose ps +docker compose logs --tail=50 clickhouse +``` + +Wait until `clickhouse` reports `Ready for connections` and `keeper` reports it has joined or formed quorum. + +--- + +## Step 6 — Validate + +Hand off to `altinity-deploy-clickhouse-smoke-test` against the new endpoint: + +- Host: `localhost` +- HTTP port: `8123` +- Native port: `9000` +- User: `default` +- Password: as set above (empty for dev) + +Do not declare success until smoke tests pass. + +Once smoke tests pass, point the user at `/README.md` for day-to-day operations — it documents starting, stopping, wiping, and connecting to ClickHouse and Keeper. The README is the durable doc the user keeps; this SKILL.md is the one-time install procedure. + +--- + +## Extending Later + +This compose file is a single-node cluster on purpose — Keeper is real, macros are set, replication tables will work. To extend: + +- Add additional `clickhouse-N` services with distinct macros and a shared Keeper. +- For HA Keeper, scale Keeper to 3 nodes (separate skill — out of MVP scope). +- For multi-host, switch to Swarm or move to the Kubernetes skill. + +## Persistence and Cleanup + +State lives under `./data/` (bind mounts), not in Docker named volumes. This means: + +- `docker compose down` stops the containers but **leaves `./data/` intact** — restart with `docker compose up -d` and your tables come back. +- `docker compose down -v` is a no-op for state since there are no named volumes; data still persists. +- To **wipe state and start fresh**: + ```bash + docker compose down + # On Linux the data tree is owned by 101:101 (clickhouse), so removal needs sudo. + if [ "$(uname)" = "Linux" ]; then sudo rm -rf data/; else rm -rf data/; fi + # Re-create empty subdirs and ownership exactly as in Step 3 before bringing the stack back up. + ``` +- For backup, archive `./data/` while the stack is stopped (or use `clickhouse-backup` for hot backups — out of MVP scope). + +--- + +## Cross-Module Triggers + +| After this skill runs | Next skill | +|-----------------------|-----------------------------------------| +| Stack is up | `altinity-deploy-clickhouse-smoke-test` | +| Production intent | Flag TLS, RBAC, backup as follow-ups (skills planned) | diff --git a/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/assets/README.md b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/assets/README.md new file mode 100644 index 0000000..8e143eb --- /dev/null +++ b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/assets/README.md @@ -0,0 +1,161 @@ +# ClickHouse on Docker (Altinity Single-Node Stack) + +This directory hosts a single-node ClickHouse cluster running under Docker Compose. The stack runs two containers — `clickhouse-server` and `clickhouse-keeper` — backed by bind mounts for persistent data and logs. The Keeper is real (not embedded), so replicated tables work and the stack can be extended to multiple shards / replicas later without restructuring. + +This installation was generated by the `altinity-deploy-clickhouse-docker` skill. + +--- + +## Requirements + +- Docker daemon running, Compose v2 (`docker compose`, not the legacy `docker-compose`). +- Ports `8123` (HTTP), `9000` (native TCP), and `9181` (Keeper client) free on `127.0.0.1`. + +--- + +## Starting the stack + +```bash +docker compose pull # first time, or to refresh images +docker compose up -d +``` + +Wait for both containers to report ready: + +```bash +docker compose ps +docker compose logs --tail=50 clickhouse +docker compose logs --tail=50 keeper +``` + +The server is ready when its log shows `Ready for connections`. Keeper is ready when it logs that it has formed quorum (single-node quorum, but it still goes through the motions). + +--- + +## Stopping the stack + +```bash +docker compose down +``` + +This stops and removes the containers but **leaves `./data/` intact** — a subsequent `docker compose up -d` resumes your tables. There are no Docker named volumes to clean up, so `docker compose down -v` is also safe and won't delete data. + +To wipe state and start over, see *Wiping state* below. + +--- + +## Connecting to ClickHouse + +### From the host — HTTP + +```bash +curl 'http://localhost:8123/?query=SELECT+version()' +``` + +Or open the web playground in a browser: + +### From the host — native TCP (if `clickhouse-client` is installed locally) + +```bash +clickhouse-client --host localhost --port 9000 --user default +# If a password is set in users.d/default-password.xml, add: +# --password '' +``` + +### From inside the container (no local client needed) + +```bash +docker exec -it clickhouse-server clickhouse-client +``` + +--- + +## Connecting to Keeper + +### Keeper client (recommended) + +The Keeper client is bundled with both the server and Keeper images. From the host, exec into the Keeper container: + +```bash +docker exec -it clickhouse-keeper clickhouse-keeper-client -h localhost +``` + +Inside the client: + +``` +> ls / # browse the znode tree +> stat / # node stats +> get /clickhouse # read a znode +> help # all commands +``` + +### Four-letter ZooKeeper-protocol commands + +If `nc` (netcat) is available on the host: + +```bash +echo mntr | nc -q 1 localhost 9181 # operational metrics +echo ruok | nc -q 1 localhost 9181 # health check → imok +echo stat | nc -q 1 localhost 9181 # connection / version info +``` + +If `nc` is not on the host, run from a throwaway container on the stack's network: + +```bash +PROJECT=$(basename "$PWD") # Compose default project name +docker run --rm --network "${PROJECT}_clickhouse-net" busybox \ + sh -c 'echo mntr | nc keeper 9181' +``` + +--- + +## Where data lives + +All persistent state lives under `./data/` (bind mounts): + +``` +./data/ +├── clickhouse/ → /var/lib/clickhouse (server data) +├── clickhouse-logs/ → /var/log/clickhouse-server (server logs) +├── keeper/ → /var/lib/clickhouse-keeper (Keeper coordination state) +└── keeper-logs/ → /var/log/clickhouse-keeper (Keeper logs) +``` + +Back up `./data/` while the stack is stopped to back up the whole installation. For hot backups, see [`clickhouse-backup`](https://github.com/Altinity/clickhouse-backup). + +--- + +## Status, logs, and troubleshooting + +```bash +docker compose ps # running containers +docker compose logs -f clickhouse # tail server logs +docker compose logs -f keeper # tail Keeper logs +docker exec -it clickhouse-server \ + clickhouse-client --query 'SELECT 1' # quick liveness check +``` + +--- + +## Wiping state and starting fresh + +```bash +docker compose down + +# On Linux the data tree is owned by 101:101 (clickhouse), so removal needs sudo. +if [ "$(uname)" = "Linux" ]; then sudo rm -rf data/; else rm -rf data/; fi + +# Recreate the data subdirectories with correct ownership. +mkdir -p data/clickhouse data/clickhouse-logs data/keeper data/keeper-logs +[ "$(uname)" = "Linux" ] && sudo chown -R 101:101 data/ + +docker compose up -d +``` + +--- + +## Further reading + +- Altinity docs: +- ClickHouse docs: +- Source skill: `altinity-deploy-clickhouse-docker` diff --git a/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/assets/config.d/keeper.xml b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/assets/config.d/keeper.xml new file mode 100644 index 0000000..b67b767 --- /dev/null +++ b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/assets/config.d/keeper.xml @@ -0,0 +1,9 @@ + + + + + keeper + 9181 + + + diff --git a/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/assets/config.d/listen.xml b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/assets/config.d/listen.xml new file mode 100644 index 0000000..905b824 --- /dev/null +++ b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/assets/config.d/listen.xml @@ -0,0 +1,12 @@ + + + + :: + 0.0.0.0 + 1 + diff --git a/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/assets/config.d/macros.xml b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/assets/config.d/macros.xml new file mode 100644 index 0000000..48ac3ca --- /dev/null +++ b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/assets/config.d/macros.xml @@ -0,0 +1,8 @@ + + + + local + 01 + clickhouse-01 + + diff --git a/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/assets/docker-compose.yml b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/assets/docker-compose.yml new file mode 100644 index 0000000..f71ae86 --- /dev/null +++ b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/assets/docker-compose.yml @@ -0,0 +1,60 @@ +# Altinity Deploy ClickHouse — single-node Compose stack +# 1 ClickHouse server + 1 Keeper. Designed to be extended. +# Substitute ${CH_SERVER_IMAGE} and ${CH_KEEPER_IMAGE} from the build skill. +# +# Persistent state lives in ./data/ (bind mounts) — see SKILL.md Step 3 for +# directory creation and ownership setup. The ./data/ tree contains all data +# and logs and is what you back up. + +services: + keeper: + image: ${CH_KEEPER_IMAGE} + container_name: clickhouse-keeper + hostname: keeper + restart: unless-stopped + volumes: + - ./data/keeper:/var/lib/clickhouse-keeper + - ./data/keeper-logs:/var/log/clickhouse-keeper + - ./keeper-config.xml:/etc/clickhouse-keeper/keeper_config.xml:ro + networks: + - clickhouse-net + ports: + - "127.0.0.1:9181:9181" # Keeper client port (bound to localhost by default) + + clickhouse: + image: ${CH_SERVER_IMAGE} + container_name: clickhouse-server + hostname: clickhouse + restart: unless-stopped + depends_on: + - keeper + environment: + # Skip the upstream entrypoint's auto-management of the 'default' user. + # We control users via mounted users.d/ files (which are read-only). + # Without this, the entrypoint tries to write users.d/default-user.xml + # and crash-loops the container. + CLICKHOUSE_SKIP_USER_SETUP: "1" + ulimits: + nofile: + soft: 262144 + hard: 262144 + volumes: + - ./data/clickhouse:/var/lib/clickhouse + - ./data/clickhouse-logs:/var/log/clickhouse-server + - ./config.d:/etc/clickhouse-server/config.d:ro + - ./users.d:/etc/clickhouse-server/users.d:ro + networks: + - clickhouse-net + ports: + - "127.0.0.1:8123:8123" # HTTP + - "127.0.0.1:9000:9000" # Native TCP + # Production: uncomment and tune. Even rough limits beat unlimited. + # deploy: + # resources: + # limits: + # cpus: "4" + # memory: "8G" + +networks: + clickhouse-net: + driver: bridge diff --git a/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/assets/keeper-config.xml b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/assets/keeper-config.xml new file mode 100644 index 0000000..147179d --- /dev/null +++ b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/assets/keeper-config.xml @@ -0,0 +1,34 @@ + + + + information + /var/log/clickhouse-keeper/clickhouse-keeper.log + /var/log/clickhouse-keeper/clickhouse-keeper.err.log + 500M + 5 + + + 0.0.0.0 + 4096 + + + 9181 + 1 + /var/lib/clickhouse-keeper/coordination/log + /var/lib/clickhouse-keeper/coordination/snapshots + + + 10000 + 30000 + information + + + + + 1 + keeper + 9234 + + + + diff --git a/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/assets/users.d/default-password.xml b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/assets/users.d/default-password.xml new file mode 100644 index 0000000..48f07d8 --- /dev/null +++ b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-docker/assets/users.d/default-password.xml @@ -0,0 +1,16 @@ + + + + + + ${CH_DEFAULT_PASSWORD} + + + diff --git a/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-kubernetes/SKILL.md b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-kubernetes/SKILL.md new file mode 100644 index 0000000..0490bd7 --- /dev/null +++ b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-kubernetes/SKILL.md @@ -0,0 +1,257 @@ +--- +name: altinity-deploy-clickhouse-kubernetes +description: Deploys ClickHouse on Kubernetes using the Altinity clickhouse-operator. Covers both Helm-based installation (recommended) and raw manifests, plus a ClickHouseInstallation (CHI) custom resource. Use for Kubernetes-based ClickHouse deployments — production or development. +author: Altinity Inc +version: 0.0.1 +license: Apache-2.0 +--- + +# Deploy ClickHouse — Kubernetes (clickhouse-operator) + +Install the Altinity clickhouse-operator into a Kubernetes cluster, then create a ClickHouseInstallation (CHI) custom resource that the operator reconciles into a working ClickHouse cluster. + +Two install paths: + +- **Helm** (recommended) — managed lifecycle, easy upgrades. +- **Raw manifests** — no Helm dependency; useful for inspectable, GitOps-style workflows. + +Both paths converge on the same CHI workflow. + +--- + +## Action Mode + +Hybrid: + +- Read-only checks (`kubectl version`, `kubectl get nodes`, `kubectl get crd`, `helm version`) run automatically. +- Mutating commands (`helm install`, `helm upgrade`, `kubectl apply`, `kubectl delete`) require explicit user confirmation. Print the exact command first. + +--- + +## Step 1 — Verify Inputs + +Confirm with the user (or take from `altinity-deploy-clickhouse-overview` / `-builds`): + +- **Deployment intent** — production or development/demo? +- **Install path** — Helm or raw manifests? +- **Operator version** — pinned operator release. +- **Server image / Keeper image** — fully-qualified tags from `altinity-deploy-clickhouse-builds`. +- **Namespace** — for the operator (commonly `clickhouse-operator`) and for the ClickHouse cluster (separate namespace recommended). +- **Storage class** — for persistent volumes; must exist in the cluster. +- **Topology** — shards × replicas, Keeper count. +- **Default user password** — required for production. + +If anything is missing, ask. Do not proceed with placeholders. + +--- + +## Step 1.5 — Create Working Directory & README + +Each install gets its own working directory under the user's CWD so manifests +and notes don't collide with other ClickHouse installs (Docker Compose, other +clusters, prior kind clusters, etc.). + +Suggested layout (rename the top-level dir to something descriptive — e.g. +`clickhouse-k8s-dev/`, `clickhouse-k8s-/`): + +``` +// +├── kind/ # kind cluster config (kind installs only) +├── operator/ # operator install bundle, saved here (not /tmp) +├── chi/ # ClickHouseInstallation manifest +├── notes/ # scratch space for the user +└── README.md # what this install is + how to start/stop/connect +``` + +Render `assets/README.md.template` into `/README.md`: + +1. Substitute the placeholders (`${KIND_CLUSTER_NAME}`, `${OPERATOR_VERSION}`, + `${CH_NAMESPACE}`, `${CHI_NAME}`, `${CH_CLUSTER_NAME}`, `${CH_SERVER_IMAGE}`, + `${CH_TOPOLOGY}`, `${KEEPER_*}`, `${KUBE_CONTEXT}`, `${INSTALL_LABEL}`). +2. Strip blocks that don't apply, including the surrounding HTML markers: + - Remove `` … `` if not using kind. + - Remove `` … `` if using kind. + - Remove `` … `` if ClickHouse hasn't + been applied yet (write the README early and re-render after Step 4). + - Remove `` … `` if no Keeper + is deployed. + +The README is intended to be re-rendered as the install progresses — it's fine +to write a partial version after Step 1.5 and update it after Steps 3 and 4 as +more components come online. + +--- + +## Step 2 — Verify the Cluster + +Run automatically: + +```bash +kubectl version --output=yaml +kubectl get nodes +kubectl get storageclass +kubectl auth can-i create customresourcedefinitions +kubectl auth can-i create clusterroles +``` + +**If `kubectl` cannot reach a cluster** (the version command shows only the client, or `get nodes` errors with a connection refused / no current context): + +- **Deployment intent = development/demo** → chain to `altinity-expert-kubernetes-desktop` to provision a local Kubernetes cluster (kind / k3d / minikube). When that skill returns with a working `kubeconfig_context` and `storage_class`, resume this skill from Step 2. +- **Deployment intent = production** → stop and recommend the user point `kubectl` at a managed Kubernetes service (EKS / GKE / AKS / Altinity.Cloud) or a `kubeadm`-installed cluster. Do not provision a local dev cluster for production. + +Stop and report (without routing elsewhere) if: + +- The user lacks cluster-admin (operator install needs CRDs and ClusterRoles). +- No `StorageClass` exists or the requested one is missing. +- Node count or capacity looks insufficient for the requested topology. + +--- + +## Step 3 — Install the Operator + +### Path A — Helm (recommended) + +```bash +helm repo add altinity-clickhouse-operator \ + https://docs.altinity.com/clickhouse-operator/ +helm repo update + +# Pin the chart version to match the operator version chosen in Step 1. +helm install clickhouse-operator \ + altinity-clickhouse-operator/altinity-clickhouse-operator \ + --version \ + --namespace clickhouse-operator \ + --create-namespace \ + -f assets/helm-values.yaml +``` + +> **Verify the Helm repo URL and chart name at install time.** They have changed historically. Confirm against current Altinity documentation before running. If the URL above fails, ask the user for the canonical repo URL rather than guessing. + +The `assets/helm-values.yaml` file in this skill ships sane defaults. Adjust before install for production (resource limits, image pull policy, RBAC scope). + +### Path B — Raw manifests + +The operator ships YAML manifests pinned to a specific release. Do not download from `master` for production. + +```bash +# Replace with the pinned version from Step 1. +OPERATOR_URL="https://github.com/Altinity/clickhouse-operator/raw//deploy/operator/clickhouse-operator-install-bundle.yaml" + +curl -fsSL "$OPERATOR_URL" -o clickhouse-operator-install-bundle.yaml + +# Inspect before applying. +less clickhouse-operator-install-bundle.yaml + +kubectl apply -f clickhouse-operator-install-bundle.yaml +``` + +> **Verify the manifest URL before running.** The repo path and bundle filename can change between releases. Always pin to a specific tag and never use `master` for production. + +### Verify operator readiness (both paths) + +```bash +kubectl -n clickhouse-operator get pods +kubectl -n clickhouse-operator rollout status deployment/clickhouse-operator +kubectl get crd | grep clickhouse +``` + +Expect to see CRDs: +- `clickhouseinstallations.clickhouse.altinity.com` +- `clickhouseinstallationtemplates.clickhouse.altinity.com` +- `clickhouseoperatorconfigurations.clickhouse.altinity.com` +- `clickhousekeeperinstallations.clickhouse-keeper.altinity.com` (for Keeper-on-K8s) + +--- + +## Step 4 — Create the ClickHouseInstallation (CHI) + +The CHI is the user-facing resource. The operator reconciles it into StatefulSets, Services, ConfigMaps, and PVCs. + +Use `assets/installation.yaml` as the starting template. It defines: + +- 1 cluster, 1 shard, 1 replica (development default) +- Keeper reference (external Keeper service or in-CHI Keeper depending on topology) +- Persistent volume claim template using the chosen StorageClass +- Server image tag from the build skill +- Resource requests/limits commented for production tuning + +Procedure: + +1. Copy `assets/installation.yaml` to the working directory. +2. Substitute placeholders: + - `${CH_NAMESPACE}` — target namespace for the cluster. + - `${CH_SERVER_IMAGE}` — server image tag. + - `${CH_STORAGE_CLASS}` — storage class for PVCs. + - `${CH_STORAGE_SIZE}` — per-replica disk size (e.g. `100Gi`). + - `${CH_DEFAULT_PASSWORD_SHA256}` — sha256 of the default user password (production only). +3. For multi-shard / multi-replica, set `clusters[0].layout.shardsCount` and `replicasCount`. +4. Show the user the rendered YAML before applying. + +```bash +kubectl create namespace ${CH_NAMESPACE} # if it does not already exist +kubectl apply -n ${CH_NAMESPACE} -f installation.yaml +``` + +--- + +## Step 5 — Production vs Development Defaults + +| Setting | Development | Production | +|----------------------|--------------------------------------------|---------------------------------------------------------------------| +| Operator version | Latest stable | Pinned to a specific release | +| Image tag | Pinned acceptable | Pinned Altinity Stable Build | +| Replicas per shard | 1 | ≥2 | +| Keeper | Single Keeper acceptable | 3-node Keeper (separate StatefulSet or via clickhouse-keeper-operator) | +| Storage | Default StorageClass OK | Explicit StorageClass with backup policy | +| Resource requests | Unset acceptable | Set CPU and memory requests; set memory limits | +| `imagePullPolicy` | `IfNotPresent` | `IfNotPresent` with pinned tag (never `latest`) | +| Default user | Empty password OK | sha256 password or certificate auth | +| Service exposure | `ClusterIP` | `ClusterIP` + Ingress / LoadBalancer with TLS (TLS out of MVP scope) | +| PodDisruptionBudget | Skip | Set | +| Anti-affinity | Skip | Required across replicas in the same shard | + +--- + +## Step 6 — Wait for Reconciliation + +```bash +# Watch the CHI status. +kubectl -n ${CH_NAMESPACE} get chi -w + +# Inspect operator events. +kubectl -n ${CH_NAMESPACE} describe chi + +# Pods. +kubectl -n ${CH_NAMESPACE} get pods -l clickhouse.altinity.com/chi= +``` + +Expect the CHI status to transition to `Completed`. Each ClickHouse pod should be `Ready`. + +--- + +## Step 7 — Validate + +Hand off to `altinity-deploy-clickhouse-smoke-test` against the new endpoint. + +```bash +# Find the service. +kubectl -n ${CH_NAMESPACE} get svc -l clickhouse.altinity.com/chi= +``` + +Use the cluster service (e.g. `clickhouse-`) on port 8123 (HTTP) or 9000 (native) from inside the cluster. For ad-hoc local access: + +```bash +kubectl -n ${CH_NAMESPACE} port-forward svc/clickhouse- 8123:8123 9000:9000 +``` + +Do not declare success until smoke tests pass. + +--- + +## Cross-Module Triggers + +| Condition | Next skill | +|------------------------------------------|-----------------------------------------| +| CHI reaches `Completed` | `altinity-deploy-clickhouse-smoke-test` | +| Production intent | Flag TLS, RBAC, backup, monitoring as follow-ups | +| User asks about scaling / topology change | Update CHI spec; operator reconciles | diff --git a/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-kubernetes/assets/README.md.template b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-kubernetes/assets/README.md.template new file mode 100644 index 0000000..5e22719 --- /dev/null +++ b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-kubernetes/assets/README.md.template @@ -0,0 +1,152 @@ +# ClickHouse on Kubernetes — ${INSTALL_LABEL} + +This directory holds the manifests and notes for a ClickHouse-on-Kubernetes +install created by the `altinity-deploy-clickhouse-kubernetes` skill. + +Keep this directory if you want to reapply, modify, or tear down the install +later — the manifests here are the source of truth. + +## What's installed + + +- **Kubernetes**: kind cluster `${KIND_CLUSTER_NAME}` + (kubeconfig context: `kind-${KIND_CLUSTER_NAME}`) + + + +- **Kubernetes**: remote cluster (kubeconfig context: `${KUBE_CONTEXT}`) + + +- **Operator**: Altinity clickhouse-operator `${OPERATOR_VERSION}` in namespace `clickhouse-operator` + + +- **ClickHouse**: ${CH_TOPOLOGY} in namespace `${CH_NAMESPACE}` + - CHI name: `${CHI_NAME}` + - Image: `${CH_SERVER_IMAGE}` + + + +- **Keeper**: ${KEEPER_TOPOLOGY} in namespace `${KEEPER_NAMESPACE}` + - Image: `${KEEPER_IMAGE}` + + +## Files + +- `kind/cluster.yaml` — kind cluster config (kind installs only) +- `operator/clickhouse-operator-install-bundle.yaml` — operator manifests, pinned +- `chi/installation.yaml` — ClickHouseInstallation custom resource +- `notes/` — scratch space for logs, queries, ad-hoc notes + +## Start + + +If the kind cluster is not running: + +```bash +kind create cluster --config kind/cluster.yaml +``` + +A new kind cluster has empty state, so reapply the operator and CHI: + +```bash +kubectl apply -f operator/clickhouse-operator-install-bundle.yaml +kubectl -n clickhouse-operator rollout status deployment/clickhouse-operator +kubectl create namespace ${CH_NAMESPACE} +kubectl apply -n ${CH_NAMESPACE} -f chi/installation.yaml +``` + + + +The cluster persists; reapply only if the operator or CHI was deleted: + +```bash +kubectl --context ${KUBE_CONTEXT} apply -f operator/clickhouse-operator-install-bundle.yaml +kubectl --context ${KUBE_CONTEXT} apply -n ${CH_NAMESPACE} -f chi/installation.yaml +``` + + +## Stop / tear down + +Stop only ClickHouse (keep the cluster and operator running): + +```bash +kubectl delete -n ${CH_NAMESPACE} -f chi/installation.yaml +``` + +Remove the operator and CRDs as well: + +```bash +kubectl delete -f operator/clickhouse-operator-install-bundle.yaml +``` + + +Delete the entire kind cluster (fastest reset): + +```bash +kind delete cluster --name ${KIND_CLUSTER_NAME} +``` + + + +## Connect to ClickHouse + +Port-forward the ClickHouse service to your host: + +```bash +kubectl -n ${CH_NAMESPACE} port-forward svc/clickhouse-${CHI_NAME} 8123:8123 9000:9000 +``` + +In another terminal: + +```bash +# HTTP interface +curl 'http://localhost:8123/?query=SELECT+version()' + +# Native client (if clickhouse-client is installed locally) +clickhouse-client --host=localhost --port=9000 + +# Or exec straight into the pod +kubectl -n ${CH_NAMESPACE} exec -it chi-${CHI_NAME}-${CH_CLUSTER_NAME}-0-0-0 \ + -- clickhouse-client +``` + +Useful sanity checks once connected: + +```sql +SELECT version(); +SELECT * FROM system.clusters; +SELECT name FROM system.databases; +``` + + + +## Connect to ClickHouse Keeper + +Keeper speaks the ZooKeeper wire protocol on port 9181 (Keeper's default). + +```bash +kubectl -n ${KEEPER_NAMESPACE} port-forward svc/${KEEPER_SERVICE} 9181:9181 +``` + +Inspect via `clickhouse-keeper-client` from inside a pod: + +```bash +kubectl -n ${KEEPER_NAMESPACE} exec -it ${KEEPER_POD} \ + -- clickhouse-keeper-client --host localhost --port 9181 +``` + +Quick health probe via 4-letter words: + +```bash +echo mntr | nc localhost 9181 +echo ruok | nc localhost 9181 # expect "imok" +``` + +From inside ClickHouse, the Keeper connection state is in +`system.zookeeper_connection` and znodes are browseable via `system.zookeeper`: + +```sql +SELECT * FROM system.zookeeper_connection; +SELECT name FROM system.zookeeper WHERE path = '/'; +``` + diff --git a/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-kubernetes/assets/helm-values.yaml b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-kubernetes/assets/helm-values.yaml new file mode 100644 index 0000000..45294f8 --- /dev/null +++ b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-kubernetes/assets/helm-values.yaml @@ -0,0 +1,39 @@ +# Altinity clickhouse-operator Helm values. +# Adjust before installing. Verify the chart's expected schema against the +# Altinity clickhouse-operator chart documentation — keys can change across +# chart versions. + +operator: + # Pinned image tag. Match to the operator version chosen in Step 1. + image: + repository: altinity/clickhouse-operator + tag: "" # e.g. "0.24.0" — never leave empty for production + pullPolicy: IfNotPresent + + # Resource requests/limits for the operator pod itself. + # Production: keep these conservative — the operator is lightweight. + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + memory: 256Mi + + # RBAC scope. + # cluster-wide: operator watches CHIs in all namespaces (default). + # namespaced : operator only watches its own namespace (set to true + # if cluster-admin RBAC is restricted). + rbac: + create: true + namespaced: false + +# Metrics endpoint for Prometheus scraping. Production: enable. +metrics: + enabled: true + serviceMonitor: + enabled: false # set true if Prometheus Operator is installed + +# Pod placement. +nodeSelector: {} +tolerations: [] +affinity: {} diff --git a/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-kubernetes/assets/installation.yaml b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-kubernetes/assets/installation.yaml new file mode 100644 index 0000000..9a003f7 --- /dev/null +++ b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-kubernetes/assets/installation.yaml @@ -0,0 +1,93 @@ +# ClickHouseInstallation (CHI) — single-shard, single-replica starting template. +# Substitute placeholders below before applying. +# +# Placeholders: +# ${CH_NAMESPACE} target namespace (also pass via -n) +# ${CH_SERVER_IMAGE} fully-qualified server image tag from build skill +# ${CH_STORAGE_CLASS} StorageClass for PVCs +# ${CH_STORAGE_SIZE} per-replica disk size, e.g. 100Gi +# ${CH_DEFAULT_PASSWORD_SHA256} sha256 of default user password (production only) +# +# Production checklist: +# - shardsCount and replicasCount as required (replicasCount >= 2) +# - Keeper running with 3 nodes (separate CHK/CRD or external Keeper) +# - resources.requests / limits set (uncomment block below) +# - podDistribution / anti-affinity set +# - PodDisruptionBudget defined alongside this CHI +# - default user secured (uncomment users block below) + +apiVersion: clickhouse.altinity.com/v1 +kind: ClickHouseInstallation +metadata: + name: ch + namespace: ${CH_NAMESPACE} +spec: + defaults: + templates: + podTemplate: clickhouse-pod + dataVolumeClaimTemplate: data + logVolumeClaimTemplate: log + + configuration: + # Production: uncomment to set the default user password (sha256). + # users: + # default/password_sha256_hex: ${CH_DEFAULT_PASSWORD_SHA256} + # default/networks/ip: + # - "::1" + # - "127.0.0.1" + + # Keeper / ZooKeeper reference. For production, point at a 3-node Keeper. + # zookeeper: + # nodes: + # - host: keeper-0.keeper.${CH_NAMESPACE}.svc.cluster.local + # - host: keeper-1.keeper.${CH_NAMESPACE}.svc.cluster.local + # - host: keeper-2.keeper.${CH_NAMESPACE}.svc.cluster.local + + clusters: + - name: main + layout: + shardsCount: 1 + replicasCount: 1 # production: >= 2 + + templates: + podTemplates: + - name: clickhouse-pod + spec: + containers: + - name: clickhouse + image: ${CH_SERVER_IMAGE} + imagePullPolicy: IfNotPresent + # Production: uncomment and tune. + # resources: + # requests: + # cpu: "2" + # memory: "8Gi" + # limits: + # memory: "8Gi" + # Production: enable anti-affinity so replicas of the same shard + # don't land on the same node. + # affinity: + # podAntiAffinity: + # requiredDuringSchedulingIgnoredDuringExecution: + # - labelSelector: + # matchExpressions: + # - key: clickhouse.altinity.com/chi + # operator: In + # values: [ch] + # topologyKey: kubernetes.io/hostname + + volumeClaimTemplates: + - name: data + spec: + storageClassName: ${CH_STORAGE_CLASS} + accessModes: [ReadWriteOnce] + resources: + requests: + storage: ${CH_STORAGE_SIZE} + - name: log + spec: + storageClassName: ${CH_STORAGE_CLASS} + accessModes: [ReadWriteOnce] + resources: + requests: + storage: 10Gi diff --git a/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-overview/SKILL.md b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-overview/SKILL.md new file mode 100644 index 0000000..7cc5171 --- /dev/null +++ b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-overview/SKILL.md @@ -0,0 +1,110 @@ +--- +name: altinity-deploy-clickhouse-overview +description: Plans and dispatches a ClickHouse deployment. Use when the user asks to install, set up, deploy, stand up, or provision ClickHouse — in Docker, Kubernetes, on bare metal, or on Altinity.Cloud — and routes to the right specialist deploy skill. +author: Altinity Inc +version: 0.0.1 +license: Apache-2.0 +--- + +# Deploy ClickHouse — Overview + +Entry point for ClickHouse deployments. Determine target environment, build channel, and topology, then route to the specialist skill. Do not start installing anything from this skill. + +--- + +## Action Mode + +These deploy skills follow a **hybrid action mode**: + +- Read-only checks (e.g. `docker ps`, `kubectl get nodes`, repo reachability) run automatically. +- Mutating commands (`docker compose up`, `helm install`, `kubectl apply`, package installs) require explicit user confirmation before execution. +- Always print the exact command and expected effect before asking for confirmation. + +--- + +## Step 1 — Gather Requirements + +Ask the user (one prompt, all questions): + +1. **Target environment** — Docker (Compose), Kubernetes, bare metal, or Altinity.Cloud? +2. **Deployment intent** — **production** or **development/demo**? This is load-bearing: it changes defaults for persistence, replication, resource limits, security, and image pinning. See *Production vs Development* below. +3. **Topology** — single-node (dev/demo), or clustered (shards × replicas + Keeper)? +4. **Build flavor** — which of the four supported ClickHouse builds: + - **Altinity Stable Builds** — Altinity's long-term-support, security-backported track. **Default when the user has no preference.** Production deployments and any workload that doesn't specifically need Antalya-only features. + - **Altinity Antalya Builds** — feature-forward Altinity track. Pick for new application development that needs Iceberg-backed data lakes, Hybrid Tables, swarm clusters, OAuth/OIDC, fast Parquet reads, or other cutting-edge capabilities. Altinity's own docs note Antalya is *not* for production use; choose deliberately. + - **Altinity FIPS Builds** — FIPS 140-3 compatible cryptography. Pick when the user has an explicit FIPS compliance requirement. + - **ClickHouse Official Builds** — upstream from ClickHouse, Inc. Pick when the user explicitly wants to track upstream. + + If the user has no preference, default to **Altinity Stable** and tell them so — don't silently pick. If the user describes a workload that needs Antalya-track features, surface Antalya as the right fit and explain the feature-forward support tradeoff so they can choose deliberately. Offer `altinity-deploy-clickhouse-builds` for a full comparison. +5. **Distribution form** — container image, Linux package (DEB / RPM), or binary tarball? Determined by the target environment in most cases (Docker/K8s ⇒ container), but call it out for bare-metal. +6. **Machine architecture** — x86_64 (Intel / AMD) or aarch64 (ARM, including Apple Silicon, AWS Graviton, Ampere)? Not every build flavor publishes artifacts for every architecture — `altinity-deploy-clickhouse-builds` resolves this. +7. **Version** — specific version, latest LTS, or "let me recommend"? +8. **Scale hint** — expected data volume, ingest rate, query concurrency (rough is fine; informs sizing). + +If the user has not stated a target, deployment intent, or build flavor, do not guess. Ask. + +--- + +## Production vs Development + +The intent answer in Step 1 propagates to every downstream installer skill. Apply these defaults unless the user overrides: + +| Concern | Development / Demo | Production | +|----------------------|---------------------------------------------|-------------------------------------------------------------| +| Image / package tag | Latest stable acceptable | Pinned to specific Altinity Stable Build version | +| Persistence | Named volumes OK; ephemeral acceptable | Named volumes / PVCs with backups; never `tmpfs` | +| Replication / Keeper | Single Keeper, single replica acceptable | At least 3 Keepers, ≥2 replicas per shard | +| Resource limits | Unset or generous defaults | Explicit CPU / memory requests + limits | +| Default user / auth | `default` with no password OK locally | Strong password or certificate auth; rotate `default` user | +| Network exposure | localhost only / dev network | Restricted to required CIDRs; TLS for client and inter-node | +| Logs / monitoring | Defaults | Hooked to centralized logging + metrics | +| Smoke test depth | Basic connectivity + version | Full smoke test including replication and writeability | + +When the user says "production," do not silently apply dev defaults to save steps. If a production-grade default cannot yet be configured (e.g. TLS is out of scope for the MVP), call it out explicitly as a follow-up. + +--- + +## Step 2 — Select the Build + +Always route through `altinity-deploy-clickhouse-builds` to lock in: + +- **Build flavor** — Official, Antalya, Altinity Stable, or Altinity FIPS. +- **Version / channel** — pinned version, LTS line, or latest stable. +- **Distribution form** — container image, DEB / RPM package, or binary tarball. +- **Architecture** — x86_64 or aarch64. Not every flavor × form × architecture combination exists. +- **Concrete coordinates** — image tag, repo URL, or tarball URL. + +The build skill educates the user about the differences between flavors when they're unsure, then returns a concrete reference (e.g. `altinity/clickhouse-server:`) that downstream installer skills consume. **Altinity Stable** is the default flavor when the user states no preference; pick Antalya only when the workload genuinely needs feature-forward capabilities, FIPS only when there is an explicit compliance requirement, and ClickHouse Official only when the user explicitly wants to track upstream. + +--- + +## Step 3 — Route to the Installer + +| Target | Skill | +|------------------------------------|--------------------------------------------| +| Docker / Docker Compose | `altinity-deploy-clickhouse-docker` | +| Kubernetes (clickhouse-operator) | `altinity-deploy-clickhouse-kubernetes` | +| Bare metal (apt/yum/tar + systemd) | *(planned: `altinity-deploy-clickhouse-bare-metal`)* | +| Altinity.Cloud | *(planned: `altinity-deploy-clickhouse-altinity-cloud`)* | + +**Kubernetes + development/demo intent + no existing cluster:** chain through `altinity-expert-kubernetes-desktop` first to provision a local Kubernetes cluster (kind / k3d / minikube on Linux), then proceed to `altinity-deploy-clickhouse-kubernetes` against that cluster. If the user already has a cluster (managed service, kubeadm, existing local cluster), skip the desktop-cluster step and go directly to `altinity-deploy-clickhouse-kubernetes`. + +If the user picks a planned-but-unbuilt target, say so and offer the closest available alternative. + +--- + +## Step 4 — Validate the Install + +After any installer skill completes, run `altinity-deploy-clickhouse-smoke-test` against the new deployment. Do not declare the deployment successful without it. + +--- + +## Report + +When the deployment is done, summarize: + +- Target environment and topology +- Build channel and version installed +- Connection coordinates (host, port, user) +- Smoke-test result +- Next steps the user should consider (backup setup, RBAC, monitoring, TLS) — these are out of scope for the MVP skills but worth flagging. diff --git a/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-smoke-test/SKILL.md b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-smoke-test/SKILL.md new file mode 100644 index 0000000..388838e --- /dev/null +++ b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-smoke-test/SKILL.md @@ -0,0 +1,158 @@ +--- +name: altinity-deploy-clickhouse-smoke-test +description: Runs post-deploy smoke tests against a freshly deployed ClickHouse to validate the install. Checks connectivity, version, basic INSERT/SELECT roundtrip, system health, and replication when clustered. Use after any deploy skill completes, or to verify an existing install before declaring it ready. +author: Altinity Inc +version: 0.0.1 +license: Apache-2.0 +--- + +# Smoke Test — Post-Deploy Validation + +Confirm that a freshly deployed ClickHouse is actually serving traffic, accepting writes, and (for clustered installs) replicating. Don't declare a deployment successful without a clean run of this skill. + +--- + +## Action Mode + +Hybrid: + +- All `SELECT` queries from `checks.sql` run automatically. +- The write roundtrip `CREATE TABLE` / `INSERT` / `SELECT` / `DROP TABLE` requires explicit user confirmation. The test database is named `_altinity_smoke_test` and is dropped at the end. Print the exact SQL before asking. + +If the user declines the write roundtrip, run the read-only checks only and clearly mark the report as "read-only smoke test." + +--- + +## Step 1 — Verify Inputs + +The deploy skill that called this one should pass: + +- **Endpoint** — host, HTTP port (8123), native port (9000) +- **User / password** +- **Topology** — single-node or clustered (shards × replicas) +- **Cluster name** — only for clustered installs (the value of the `cluster` macro or a name from `system.clusters`) +- **Deployment intent** — production or development + +If invoked standalone, ask the user for these. + +--- + +## Step 2 — Connectivity + +Run automatically: + +```sql +SELECT + hostName() AS hostname, + version() AS version, + getMacro('cluster') AS cluster_macro, + getMacro('shard') AS shard_macro, + getMacro('replica') AS replica_macro, + formatReadableTimeDelta(uptime()) AS uptime +``` + +If this fails, stop and report. The deploy did not produce a working server. + +--- + +## Step 3 — Read-Only Health Checks + +Run all queries from `checks.sql`. They cover: + +1. Server uptime and version. +2. `system.clusters` — confirm expected shards/replicas if clustered. +3. `system.zookeeper` — confirm Keeper / ZooKeeper connectivity. +4. `system.replicas` — confirm no read-only replicas if replicated tables exist. +5. `system.errors` over the last hour — flag any non-trivial errors that occurred during boot. +6. `system.metrics` — sample of memory/connections/queue depth. +7. `system.disks` — confirm disks are mounted and writable. + +Report each block with a severity tag: `OK`, `Minor`, `Moderate`, `Major`, `Critical`. + +--- + +## Step 4 — Write Roundtrip + +After confirmation, run the following in order. Stop at the first failure. + +```sql +CREATE DATABASE IF NOT EXISTS _altinity_smoke_test; + +CREATE TABLE _altinity_smoke_test.ping +( + ts DateTime DEFAULT now(), + n UInt64 +) +ENGINE = MergeTree +ORDER BY ts; + +INSERT INTO _altinity_smoke_test.ping (n) +SELECT number FROM numbers(1000); + +SELECT count() AS rows, max(n) AS max_n +FROM _altinity_smoke_test.ping; +-- expect: rows = 1000, max_n = 999 + +DROP TABLE _altinity_smoke_test.ping; +DROP DATABASE _altinity_smoke_test; +``` + +For clustered deployments, also test a `Replicated*` table when the cluster name is known: + +```sql +CREATE DATABASE IF NOT EXISTS _altinity_smoke_test +ON CLUSTER '{cluster}'; + +CREATE TABLE _altinity_smoke_test.ping_repl +ON CLUSTER '{cluster}' +( + ts DateTime DEFAULT now(), + n UInt64 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/_altinity_smoke_test/ping_repl', '{replica}') +ORDER BY ts; + +INSERT INTO _altinity_smoke_test.ping_repl (n) +SELECT number FROM numbers(1000); + +-- Wait briefly for replication, then check from each replica. +SELECT + hostName(), + count() AS rows +FROM clusterAllReplicas('{cluster}', _altinity_smoke_test.ping_repl) +GROUP BY hostName(); + +DROP TABLE _altinity_smoke_test.ping_repl ON CLUSTER '{cluster}' SYNC; +DROP DATABASE _altinity_smoke_test ON CLUSTER '{cluster}' SYNC; +``` + +If `getMacro('cluster')` returned empty in Step 2, skip the `Replicated*` block and note "single-node, replication test skipped." + +--- + +## Step 5 — Production-Only Extra Checks + +When deployment intent is **production**, also check: + +- At least 2 replicas per shard reported in `system.clusters`. +- Default user is not passwordless — `SELECT name, auth_type FROM system.users WHERE name = 'default'`. +- Memory and CPU limits visible to the server look reasonable (`SELECT * FROM system.asynchronous_metrics WHERE metric IN ('OSMemoryTotal','CGroupMaxCPU')`). +- No `[Major]` or `[Critical]` rows in the report. + +If any of these fail, mark the smoke test **Failed** even if the write roundtrip succeeded. + +--- + +## Report + +Produce a single summary the deploy skill can include in its own output: + +- Connection coordinates and version +- Topology summary (single-node / N shards × M replicas) +- Read-only checks: per-block status +- Write roundtrip: passed / skipped / failed +- Replicated roundtrip: passed / skipped / failed +- Production extras (production intent only): passed / failed +- Overall: **PASS** / **PASS (read-only)** / **FAIL** + +A `FAIL` result blocks declaring the deployment successful. diff --git a/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-smoke-test/checks.sql b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-smoke-test/checks.sql new file mode 100644 index 0000000..77fe115 --- /dev/null +++ b/altinity-deploy-clickhouse/skills/altinity-deploy-clickhouse-smoke-test/checks.sql @@ -0,0 +1,119 @@ +-- Altinity Deploy ClickHouse — post-deploy smoke test (read-only) +-- Run all queries below. Each block is independent; on UNKNOWN_TABLE, skip +-- and note the table is unavailable in this build/version. + +------------------------------------------------------------------------ +-- 1. Server identity and uptime +------------------------------------------------------------------------ +SELECT + hostName() AS hostname, + version() AS version, + getMacro('cluster') AS cluster_macro, + getMacro('shard') AS shard_macro, + getMacro('replica') AS replica_macro, + uptime() AS uptime_seconds, + formatReadableTimeDelta(uptime()) AS uptime_human; + +------------------------------------------------------------------------ +-- 2. Cluster topology (clustered installs) +------------------------------------------------------------------------ +SELECT + cluster, + shard_num, + replica_num, + host_name, + port, + is_local, + errors_count +FROM system.clusters +ORDER BY cluster, shard_num, replica_num +LIMIT 100; + +------------------------------------------------------------------------ +-- 3. Keeper / ZooKeeper connectivity +------------------------------------------------------------------------ +-- Reachable if this returns rows; errors here indicate Keeper is unreachable. +SELECT name, value, ctime, mtime +FROM system.zookeeper +WHERE path = '/' +LIMIT 10; + +------------------------------------------------------------------------ +-- 4. Replica health (only meaningful if Replicated* tables exist) +------------------------------------------------------------------------ +SELECT + database, + table, + is_readonly, + is_session_expired, + future_parts, + parts_to_check, + queue_size, + inserts_in_queue, + merges_in_queue, + absolute_delay, + last_queue_update_exception +FROM system.replicas +ORDER BY (is_readonly, queue_size) DESC +LIMIT 50; + +------------------------------------------------------------------------ +-- 5. Errors in the last hour +------------------------------------------------------------------------ +SELECT + name, + value, + last_error_time, + last_error_message +FROM system.errors +WHERE last_error_time >= now() - INTERVAL 1 HOUR +ORDER BY last_error_time DESC +LIMIT 50; + +------------------------------------------------------------------------ +-- 6. Live metrics sample +------------------------------------------------------------------------ +SELECT metric, value +FROM system.metrics +WHERE metric IN ( + 'TCPConnection', + 'HTTPConnection', + 'Query', + 'BackgroundMergesAndMutationsPoolTask', + 'MemoryTracking', + 'ReplicatedFetch', + 'ReplicatedSend', + 'PartsActive', + 'PartMutation' +) +ORDER BY metric; + +------------------------------------------------------------------------ +-- 7. Disks and storage +------------------------------------------------------------------------ +SELECT + name, + path, + formatReadableSize(free_space) AS free, + formatReadableSize(total_space) AS total, + round(100.0 * (total_space - free_space) / total_space, 1) AS used_pct, + type, + is_read_only +FROM system.disks +ORDER BY name; + +------------------------------------------------------------------------ +-- 8. Async metrics — system load signals +------------------------------------------------------------------------ +SELECT metric, value +FROM system.asynchronous_metrics +WHERE metric IN ( + 'OSMemoryTotal', + 'OSMemoryAvailable', + 'CGroupMemoryUsed', + 'CGroupMaxCPU', + 'LoadAverage1', + 'jemalloc.resident', + 'MaxPartCountForPartition' +) +ORDER BY metric; diff --git a/altinity-expert-kubernetes/Makefile b/altinity-expert-kubernetes/Makefile new file mode 100644 index 0000000..917f103 --- /dev/null +++ b/altinity-expert-kubernetes/Makefile @@ -0,0 +1,91 @@ +# Altinity Expert Kubernetes - Skills Build System +# Builds zips, links into ~/.claude/skills, and lists skills in this domain. +# Scope: altinity-expert-kubernetes/skills/*/SKILL.md only. + +DOMAIN_DIR := $(shell pwd) +BUILD_DIR := $(DOMAIN_DIR)/releases +SKILL_DIRS := $(shell find skills -maxdepth 2 -name "SKILL.md" ! -path "*/.system/*" -exec dirname {} \; 2>/dev/null | sort) +SKILL_ZIPS := $(foreach dir,$(SKILL_DIRS),$(BUILD_DIR)/$(notdir $(dir)).zip) + +CLAUDE_SKILLS_DIR ?= $(HOME)/.claude/skills + +.PHONY: all clean list help link unlink relink links + +all: $(BUILD_DIR) $(SKILL_ZIPS) + @echo "Built $(words $(SKILL_ZIPS)) skill packages in $(BUILD_DIR)/" + +$(BUILD_DIR): + @mkdir -p $(BUILD_DIR) + +define ZIP_template +$(BUILD_DIR)/$(notdir $(1)).zip: $(1)/SKILL.md + @echo "Packaging $(notdir $(1))..." + @cd $(1) && zip -r $(BUILD_DIR)/$(notdir $(1)).zip . -x "*.DS_Store" -x "*__MACOSX*" -x "*.git*" -x "*__pycache__*" -x "*.pyc" +endef + +$(foreach dir,$(SKILL_DIRS),$(eval $(call ZIP_template,$(dir)))) + +clean: + @echo "Cleaning build directory..." + @rm -rf $(BUILD_DIR) + +list: + @echo "Expert Kubernetes skills found:" + @$(foreach dir,$(SKILL_DIRS),echo " - $(notdir $(dir)) ($(dir))";) + +link: | $(CLAUDE_SKILLS_DIR) + @$(foreach dir,$(SKILL_DIRS), \ + target="$(CLAUDE_SKILLS_DIR)/$(notdir $(dir))"; \ + src="$(DOMAIN_DIR)/$(dir)"; \ + if [ -L "$$target" ]; then \ + cur=$$(readlink "$$target"); \ + if [ "$$cur" = "$$src" ]; then \ + echo "ok $(notdir $(dir))"; \ + else \ + echo "CONFLICT $(notdir $(dir)) -> $$cur (run 'make relink' to replace)"; \ + fi; \ + elif [ -e "$$target" ]; then \ + echo "SKIP $(notdir $(dir)) (real dir/file exists, not touching)"; \ + else \ + ln -s "$$src" "$$target" && echo "linked $(notdir $(dir))"; \ + fi;) + +unlink: + @$(foreach dir,$(SKILL_DIRS), \ + target="$(CLAUDE_SKILLS_DIR)/$(notdir $(dir))"; \ + if [ -L "$$target" ]; then \ + rm "$$target" && echo "unlinked $(notdir $(dir))"; \ + fi;) + +relink: unlink link + +links: + @echo "Expert Kubernetes skills in $(CLAUDE_SKILLS_DIR):" + @$(foreach dir,$(SKILL_DIRS), \ + target="$(CLAUDE_SKILLS_DIR)/$(notdir $(dir))"; \ + if [ -L "$$target" ]; then \ + echo " $(notdir $(dir)) -> $$(readlink $$target)"; \ + else \ + echo " $(notdir $(dir)) (not linked)"; \ + fi;) + +$(CLAUDE_SKILLS_DIR): + @mkdir -p $(CLAUDE_SKILLS_DIR) + +help: + @echo "Altinity Expert Kubernetes - Skills Build System" + @echo "" + @echo "Usage:" + @echo " make Build all skill zip files" + @echo " make all Same as 'make'" + @echo " make clean Remove all built zip files" + @echo " make list List all detected expert-kubernetes skills" + @echo " make link Symlink all expert-kubernetes skills into ~/.claude/skills" + @echo " make unlink Remove those symlinks (leaves real dirs alone)" + @echo " make relink unlink + link" + @echo " make links Show current link state" + @echo " make help Show this help" + @echo "" + @echo "Output: releases/.zip" + @echo "" + @echo "Override link destination: make link CLAUDE_SKILLS_DIR=./.claude/skills" diff --git a/altinity-expert-kubernetes/skills/altinity-expert-kubernetes-desktop/SKILL.md b/altinity-expert-kubernetes/skills/altinity-expert-kubernetes-desktop/SKILL.md new file mode 100644 index 0000000..44603e8 --- /dev/null +++ b/altinity-expert-kubernetes/skills/altinity-expert-kubernetes-desktop/SKILL.md @@ -0,0 +1,279 @@ +--- +name: altinity-expert-kubernetes-desktop +description: Provisions a local Kubernetes cluster on a Linux host for development and demo. Presents the user with kind, k3d, and minikube and their tradeoffs, then installs the chosen tool (with confirmation; falls back to printed commands if the user declines auto-install) and creates a single-node cluster (multi-node opt-in). Use whenever the user needs a local Kubernetes cluster for development or demo and does not already have one running — including as a precursor to skills that deploy software onto Kubernetes (e.g. altinity-deploy-clickhouse-kubernetes). Linux-only; for production use a managed Kubernetes service or kubeadm. +author: Altinity Inc +version: 0.0.1 +license: Apache-2.0 +--- + +# Local Kubernetes (Dev Cluster) + +Stand up a working single-node Kubernetes cluster on the user's Linux host so that downstream skills (or the user directly) have something to deploy into. The user picks the tool (kind, k3d, or minikube); this skill walks the install, cluster creation, and verification, then hands off. + +This skill is tool-agnostic about *what* gets deployed onto the cluster afterwards. It exists so any skill or workflow that needs "a working local Kubernetes" can chain through it without re-implementing the install / create / verify dance. + +--- + +## Action Mode + +Hybrid: + +- Read-only checks (`uname`, `docker info`, `kubectl version`, `kind/k3d/minikube version`, port and RAM probes) run automatically. +- Mutating steps (download and install kind/k3d/minikube/kubectl binaries, `kind/k3d/minikube create cluster`, `kubectl config use-context`, cluster deletion) require explicit user confirmation. Always print the exact command first. +- If the user declines auto-install of a tool, **print the manual install commands** and pause for the user to run them, then resume verification. + +--- + +## Step 1 — Verify Inputs + +Confirm with the user (or take from the calling skill): + +- **Deployment intent** — must be **development / demo**. If the user signals production, stop and recommend a managed Kubernetes service or `kubeadm` instead; this skill is not for production. +- **Cluster name** — default `altinity-dev` so it doesn't collide with the user's other clusters. +- **Topology** — default **single-node** (control-plane only). If the user wants multi-node (testing anti-affinity, PDBs, replica scheduling), they need to say so. +- **Architecture** — `uname -m` resolves to `x86_64` (→ `amd64`) or `aarch64` (→ `arm64`); all three tools support both on Linux. + +If anything's missing, ask. Do not proceed with placeholders. + +--- + +## Step 2 — Verify Host + +Run automatically: + +```bash +uname -s -m +docker version --format '{{.Server.Version}}' 2>/dev/null || echo "Docker not running" +free -h | awk '/^Mem:/ {print $2, $7}' # total / available memory +df -h / | awk 'NR==2 {print $4, "free on /"}' +``` + +Stop and report if: + +- OS is not Linux (this skill is Linux-only; on macOS / Windows the user should use Docker Desktop's Kubernetes or Rancher Desktop instead). +- Docker daemon is not running and the user has not installed an alternative container runtime that the chosen tool supports. +- Available memory is below ~2 GiB for single-node or ~4 GiB for multi-node — warn before proceeding. +- Less than ~5 GiB free on `/` — warn; container images and persistent volumes can fill disk quickly. + +--- + +## Step 3 — Present the Tool Menu and Get the User's Pick + +Show the user this comparison table before they choose. Do not pick silently — local-k8s preference is a real call. + +| Tool | Startup (single-node) | RAM idle | Multi-node | Kubernetes flavor | Best for | +|-------------|------------------------|----------|----------------------|------------------------|-------------------------------------------------------| +| **kind** | ~30 s | ~1.5 GiB | first-class (config) | vanilla upstream k8s | Operator / controller development, CI parity with upstream k8s | +| **k3d** | ~10 s | ~0.5 GiB | first-class (flags) | k3s (slightly trimmed) | Lightest / fastest; good when RAM is tight | +| **minikube**| ~60 s | ~2 GiB | yes (flag) | vanilla upstream k8s | Familiar workflows; supports a VM driver if no Docker | + +Recommendations to relay: + +- **Pick kind** if the user is testing a Kubernetes operator or controller, or wants the closest parity with how operators are tested upstream. +- **Pick k3d** if memory is tight or fast iteration matters more than k8s parity. Watch for the small set of k3s differences (no in-tree cloud providers, simplified networking) — most application manifests are unaffected. +- **Pick minikube** if the user already knows it, or if they need the VM driver (e.g., running on a host where Docker isn't an option). + +If the user has no preference, **recommend kind** as the default and say so — don't pick silently. + +--- + +## Step 4 — Install the Chosen Tool and kubectl + +For each binary (the chosen cluster tool + `kubectl`), do: + +1. Check if it's already installed (`command -v `). +2. If missing, **propose the install command** and ask for confirmation. +3. If the user confirms, run the install command. +4. If the user declines, **print the commands** and pause so the user can run them manually; then re-verify before continuing. + +### Install location + +Prefer `~/.local/bin` (no `sudo`) if it's on PATH: + +```bash +mkdir -p ~/.local/bin +case ":$PATH:" in *":$HOME/.local/bin:"*) INSTALL_DIR="$HOME/.local/bin" ;; *) INSTALL_DIR="/usr/local/bin" ;; esac +``` + +Use `sudo install` for `/usr/local/bin`, plain `install` for `~/.local/bin`. Show the user which directory will be used and confirm before any `sudo` step. + +### Install commands + +Architecture resolution (run once): + +```bash +ARCH=$(uname -m | sed 's/x86_64/amd64/;s/aarch64/arm64/') +``` + +**kubectl** (needed regardless of cluster tool): + +```bash +KVER=$(curl -L -s https://dl.k8s.io/release/stable.txt) +curl -fsSL -o kubectl "https://dl.k8s.io/release/${KVER}/bin/linux/${ARCH}/kubectl" +chmod +x kubectl +[ "$INSTALL_DIR" = "/usr/local/bin" ] && sudo install kubectl "$INSTALL_DIR/" || install kubectl "$INSTALL_DIR/" +rm kubectl +kubectl version --client +``` + +**kind**: + +```bash +KIND_VER=$(curl -fsSL https://api.github.com/repos/kubernetes-sigs/kind/releases/latest | jq -r .tag_name) +curl -fsSL -o kind "https://kind.sigs.k8s.io/dl/${KIND_VER}/kind-linux-${ARCH}" +chmod +x kind +[ "$INSTALL_DIR" = "/usr/local/bin" ] && sudo install kind "$INSTALL_DIR/" || install kind "$INSTALL_DIR/" +rm kind +kind version +``` + +**k3d**: + +```bash +# Official one-liner installer (downloads latest, into /usr/local/bin by default — uses sudo internally). +curl -fsSL https://raw.githubusercontent.com/k3d-io/k3d/main/install.sh | bash +k3d version +``` + +**minikube**: + +```bash +curl -fsSL -o minikube "https://storage.googleapis.com/minikube/releases/latest/minikube-linux-${ARCH}" +chmod +x minikube +[ "$INSTALL_DIR" = "/usr/local/bin" ] && sudo install minikube "$INSTALL_DIR/" || install minikube "$INSTALL_DIR/" +rm minikube +minikube version +``` + +After each install, run the ` version` command and report the result. + +--- + +## Step 5 — Create the Cluster + +Use the user-confirmed cluster name (default `altinity-dev`). + +### Single-node (default) + +**kind:** + +```bash +kind create cluster --name altinity-dev +``` + +**k3d:** + +```bash +k3d cluster create altinity-dev +``` + +**minikube:** + +```bash +minikube start --profile altinity-dev --driver=docker +``` + +### Multi-node (opt-in) + +**kind** — uses the `assets/kind-multinode.yaml` shipped with this skill: + +```bash +kind create cluster --name altinity-dev --config kind-multinode.yaml +``` + +**k3d:** + +```bash +k3d cluster create altinity-dev --servers 1 --agents 2 +``` + +**minikube:** + +```bash +minikube start --profile altinity-dev --driver=docker --nodes=3 +``` + +Cluster creation takes 30–90 seconds. Stream output so the user sees progress; don't suppress it. + +--- + +## Step 6 — Verify the Cluster + +Run automatically: + +```bash +kubectl cluster-info +kubectl get nodes +kubectl get storageclass +kubectl get pods -A +``` + +Confirm: + +- `kubectl cluster-info` reports a reachable control plane. +- `kubectl get nodes` shows the expected nodes in `Ready` status. +- A default `StorageClass` exists. The name depends on the tool: + - **kind** → `standard` (rancher.io/local-path) + - **k3d** → `local-path` (rancher.io/local-path) + - **minikube** → `standard` (k8s.io/minikube-hostpath) +- System pods (`kube-system` namespace) are all `Running` / `Completed`. + +Record the `StorageClass` name; downstream skills that deploy persistent workloads will need it. + +If any verification fails, stop and report. Do not hand off a broken cluster. + +--- + +## Step 7 — Handoff + +The cluster is ready. Report a handoff summary back to whichever skill or workflow invoked this one: + +``` +cluster_tool: kind | k3d | minikube +cluster_name: altinity-dev +kubeconfig_context: +node_count: 1 | 3 +storage_class: standard | local-path +arch: amd64 | arm64 +notes: +``` + +Then return to the calling skill, or — if no caller — tell the user the cluster is ready and offer common next steps (deploying a workload, installing an operator, etc.). A typical consumer is `altinity-deploy-clickhouse-kubernetes`, which expects `kubeconfig_context` and `storage_class` plugged into its inputs. + +--- + +## Lifecycle and Cleanup + +These commands stop, restart, and delete the cluster. Each is a mutating action — confirm before running. + +| Action | kind | k3d | minikube | +|----------------------|--------------------------------------------|-------------------------------------------|-------------------------------------------| +| Stop (preserve data) | (kind has no stop; use `docker stop`) | `k3d cluster stop altinity-dev` | `minikube stop --profile altinity-dev` | +| Start (resume) | `docker start ` | `k3d cluster start altinity-dev` | `minikube start --profile altinity-dev` | +| Delete | `kind delete cluster --name altinity-dev` | `k3d cluster delete altinity-dev` | `minikube delete --profile altinity-dev` | +| List clusters | `kind get clusters` | `k3d cluster list` | `minikube profile list` | + +Deleting the cluster also destroys all workloads and data in it. For a dev cluster that's usually the point; for anything you want to keep, back up via `kubectl exec` / volume snapshots / application-specific backup tooling before deletion. + +--- + +## Why Not Production? + +Each of these tools is explicitly a development tool: + +- **kind** runs k8s nodes as Docker containers on a single host; no real HA, no node redundancy. +- **k3d** has the same single-host limitation, plus k3s removes some upstream features that production clusters rely on. +- **minikube** is single-host (multi-node is a single-host simulation). + +For production Kubernetes, use a managed service (EKS, GKE, AKS, Altinity.Cloud) or `kubeadm` against real nodes, then run the relevant deploy skills against that cluster directly. + +--- + +## Cross-Module Triggers + +| Condition | Next skill | +|------------------------------------------------------------------------------------------|--------------------------------------------------| +| Cluster verified, caller is the ClickHouse deploy flow | `altinity-deploy-clickhouse-kubernetes` | +| Cluster verified, no specific caller | Report ready; ask the user what to deploy | +| User asked for production K8s | Stop this skill; recommend managed K8s / kubeadm | +| Cluster creation failed (Docker not running, port in use, RAM exhausted) | Stop, report; do not hand off | diff --git a/altinity-expert-kubernetes/skills/altinity-expert-kubernetes-desktop/assets/kind-multinode.yaml b/altinity-expert-kubernetes/skills/altinity-expert-kubernetes-desktop/assets/kind-multinode.yaml new file mode 100644 index 0000000..2a73a2a --- /dev/null +++ b/altinity-expert-kubernetes/skills/altinity-expert-kubernetes-desktop/assets/kind-multinode.yaml @@ -0,0 +1,13 @@ +# kind multi-node cluster config — opt-in for altinity-deploy-clickhouse-k8s-desktop. +# Creates 1 control-plane + 2 workers so the user can test pod anti-affinity, +# PodDisruptionBudgets, and replica scheduling. +# +# Use: +# kind create cluster --name altinity-dev --config kind-multinode.yaml + +kind: Cluster +apiVersion: kind.x-k8s.io/v1alpha4 +nodes: + - role: control-plane + - role: worker + - role: worker