-
Notifications
You must be signed in to change notification settings - Fork 799
288 lines (258 loc) · 10.3 KB
/
release-canary.yml
File metadata and controls
288 lines (258 loc) · 10.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
name: Release Canary
on:
workflow_dispatch:
workflow_run:
workflows: ["Release Dev"]
types: [completed]
permissions:
actions: read
contents: read
defaults:
run:
shell: bash
jobs:
macos:
name: macOS Homebrew
if: ${{ github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' }}
runs-on: macos-latest-xlarge
timeout-minutes: 20
steps:
- name: Ensure VM driver
run: |
launchctl setenv OPENSHELL_DRIVERS vm
- name: Install and check status
run: |
curl -LsSf https://raw.githubusercontent.com/NVIDIA/OpenShell/${{ github.event.workflow_run.head_sha || github.sha }}/install.sh | sh
openshell status
ubuntu:
name: Ubuntu Docker
if: ${{ github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' }}
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- name: Ensure Docker
run: |
if ! command -v docker >/dev/null 2>&1; then
sudo apt-get update
sudo apt-get install -y docker.io
fi
sudo systemctl start docker || sudo service docker start
mkdir -p "${HOME}/.config/openshell"
printf 'OPENSHELL_DRIVERS=docker\n' > "${HOME}/.config/openshell/gateway.env"
docker info
- name: Install and check status
run: |
curl -LsSf https://raw.githubusercontent.com/NVIDIA/OpenShell/${{ github.event.workflow_run.head_sha || github.sha }}/install.sh | sh
openshell status
fedora:
name: Fedora RPM
if: ${{ github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' }}
runs-on: linux-amd64-cpu8
timeout-minutes: 20
env:
FEDORA_CANARY_CONTAINER: openshell-fedora-canary-${{ github.run_id }}-${{ github.run_attempt }}
steps:
- name: Start Fedora systemd container and root user manager
run: |
set -euo pipefail
docker run --detach \
--name "${FEDORA_CANARY_CONTAINER}" \
--privileged \
--cgroupns=host \
--tmpfs /run \
--tmpfs /tmp \
--volume /sys/fs/cgroup:/sys/fs/cgroup:rw \
fedora:latest \
bash -lc 'dnf install -y curl dbus-daemon podman systemd && exec /usr/sbin/init'
for _ in $(seq 1 120); do
if docker exec "${FEDORA_CANARY_CONTAINER}" systemctl list-units --no-pager >/dev/null 2>&1; then
break
fi
if [ "$(docker inspect -f '{{.State.Running}}' "${FEDORA_CANARY_CONTAINER}")" != "true" ]; then
echo "::error::Fedora systemd container exited before systemd became reachable"
docker logs "${FEDORA_CANARY_CONTAINER}" >&2 || true
exit 1
fi
sleep 1
done
if ! docker exec "${FEDORA_CANARY_CONTAINER}" systemctl list-units --no-pager >/dev/null 2>&1; then
echo "::error::Fedora systemd container did not become reachable within 120s"
docker logs "${FEDORA_CANARY_CONTAINER}" >&2 || true
exit 1
fi
docker exec --interactive "${FEDORA_CANARY_CONTAINER}" env \
HOME=/root \
XDG_RUNTIME_DIR=/run/user/0 \
DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/0/bus \
bash -s <<'EOF'
set -euo pipefail
# install.sh manages the RPM gateway as a systemd user unit. This
# container is booted with systemd as PID 1, but it still has no
# login session. Start root's user manager explicitly so the
# installer can test service restart and gateway registration
# instead of its "restart later" fallback.
mkdir -p "${XDG_RUNTIME_DIR}"
chmod 700 "${XDG_RUNTIME_DIR}"
systemctl start user-runtime-dir@0.service || true
systemctl start user@0.service
for _ in $(seq 1 30); do
if systemctl --user daemon-reload; then
break
fi
sleep 1
done
if ! systemctl --user daemon-reload; then
systemctl status user@0.service --no-pager >&2 || true
journalctl -u user@0.service --no-pager -n 80 >&2 || true
systemctl --user status --no-pager >&2 || true
exit 1
fi
EOF
- name: Install and check status
run: |
set -euo pipefail
docker exec --interactive "${FEDORA_CANARY_CONTAINER}" env \
HOME=/root \
XDG_RUNTIME_DIR=/run/user/0 \
DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/0/bus \
INSTALL_SH_URL="https://raw.githubusercontent.com/NVIDIA/OpenShell/${{ github.event.workflow_run.head_sha || github.sha }}/install.sh" \
bash -s <<'EOF'
set -euo pipefail
mkdir -p "${HOME}/.config/openshell"
printf 'OPENSHELL_DRIVERS=podman\n' > "${HOME}/.config/openshell/gateway.env"
podman info
curl -LsSf "${INSTALL_SH_URL}" | sh
openshell status
EOF
- name: Stop Fedora systemd container
if: always()
run: |
docker rm -f "${FEDORA_CANARY_CONTAINER}" >/dev/null 2>&1 || true
ubuntu-snap:
name: Ubuntu Snap
if: ${{ github.event.workflow_run.conclusion == 'success' }}
runs-on: ubuntu-latest
timeout-minutes: 20
steps:
- name: Install snapd
run: |
set -euo pipefail
sudo apt-get update
sudo apt-get install -y snapd
sudo systemctl enable --now snapd.socket
sudo systemctl start snapd
sudo snap wait system seed.loaded
- name: Install Docker snap
run: |
set -euo pipefail
sudo snap install docker
- name: Download snap from release-dev artifacts
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
with:
github-token: ${{ github.token }}
run-id: ${{ github.event.workflow_run.id }}
pattern: snap-linux-amd64
path: release/
merge-multiple: true
- name: Install snap (dangerous — from release, not store)
run: |
set -euo pipefail
sudo snap install ./release/*.snap --dangerous
- name: Connect interfaces
run: |
set -euo pipefail
sudo snap connect openshell:docker docker:docker-daemon
sudo snap connect openshell:log-observe
sudo snap connect openshell:system-observe
sudo snap connect openshell:ssh-keys
- name: Register snap gateway and check status
run: |
set -euo pipefail
openshell --version
sudo snap services openshell
openshell gateway add http://127.0.0.1:17670 --local --name snap-docker
openshell gateway select snap-docker
openshell status
kubernetes:
name: Kubernetes Helm (kind)
if: ${{ github.event_name == 'workflow_dispatch' || github.event.workflow_run.conclusion == 'success' }}
runs-on: ubuntu-latest
timeout-minutes: 20
env:
KIND_CLUSTER_NAME: release-canary-${{ github.run_id }}
RELEASE_NAME: openshell
RELEASE_NAMESPACE: openshell
KIND_GATEWAY_NAME: kind
steps:
- name: Install Helm
uses: azure/setup-helm@dda3372f752e03dde6b3237bc9431cdc2f7a02a2 # v5.0.0
- name: Create kind cluster
uses: helm/kind-action@ef37e7f390d99f746eb8b610417061a60e82a6cc # v1.14.0
with:
cluster_name: ${{ env.KIND_CLUSTER_NAME }}
wait: 120s
- name: Install OpenShell Helm chart from GHCR OCI
run: |
set -euo pipefail
helm install "$RELEASE_NAME" oci://ghcr.io/nvidia/openshell/helm-chart \
--version 0.0.0-dev \
--namespace "$RELEASE_NAMESPACE" --create-namespace \
--set server.disableTls=true \
--wait --timeout 5m
- name: Verify gateway pod is Ready
run: |
set -euo pipefail
kubectl wait --namespace "$RELEASE_NAMESPACE" \
--for=condition=Ready pod \
--selector="app.kubernetes.io/name=openshell,app.kubernetes.io/instance=${RELEASE_NAME}" \
--timeout=300s
- name: Port-forward gateway service
run: |
set -euo pipefail
nohup kubectl port-forward --namespace "$RELEASE_NAMESPACE" \
"svc/${RELEASE_NAME}" 8080:8080 \
> port-forward.log 2>&1 &
echo $! > port-forward.pid
for _ in $(seq 1 30); do
if (echo > /dev/tcp/127.0.0.1/8080) >/dev/null 2>&1; then
echo "port-forward is reachable"
exit 0
fi
sleep 1
done
echo "port-forward did not become reachable" >&2
cat port-forward.log >&2
exit 1
- name: Install OpenShell CLI
run: |
set -euo pipefail
mkdir -p "${HOME}/.config/openshell"
printf 'OPENSHELL_DRIVERS=docker\n' > "${HOME}/.config/openshell/gateway.env"
curl -LsSf https://raw.githubusercontent.com/NVIDIA/OpenShell/${{ github.event.workflow_run.head_sha || github.sha }}/install.sh | sh
- name: Register kind gateway and check status
run: |
set -euo pipefail
openshell gateway add http://127.0.0.1:8080 --local --name "$KIND_GATEWAY_NAME"
openshell status
- name: Diagnostics on failure
if: failure()
run: |
set +e
echo "--- helm status ---"
helm status "$RELEASE_NAME" --namespace "$RELEASE_NAMESPACE"
echo "--- helm get manifest ---"
helm get manifest "$RELEASE_NAME" --namespace "$RELEASE_NAMESPACE"
echo "--- get all ---"
kubectl get all --namespace "$RELEASE_NAMESPACE"
echo "--- describe pods ---"
kubectl describe pods --namespace "$RELEASE_NAMESPACE"
echo "--- pod logs ---"
kubectl logs --namespace "$RELEASE_NAMESPACE" \
--selector="app.kubernetes.io/name=openshell,app.kubernetes.io/instance=${RELEASE_NAME}" \
--tail=200 --all-containers --prefix
echo "--- port-forward log ---"
cat port-forward.log 2>/dev/null
echo "--- openshell gateway list ---"
openshell gateway list 2>/dev/null
echo "--- openshell version ---"
openshell --version 2>/dev/null