-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDockerfile.repro
More file actions
48 lines (43 loc) · 2.31 KB
/
Dockerfile.repro
File metadata and controls
48 lines (43 loc) · 2.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# Reproduction image for Tandem-RLVR (CSSLab/Tandem-RLVR).
#
# Build: docker build -f Dockerfile.repro -t tandem-rlvr:repro .
# Run: docker run --gpus all -it tandem-rlvr:repro bash
# Train: bash verl/run_tandem_native_grpo_deepscaler.sh
#
# Layers are merged into three RUNs to avoid pip-uninstall bloat
# (uninstalled files stay in the previous layer otherwise).
FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
ENV DEBIAN_FRONTEND=noninteractive \
PIP_NO_CACHE_DIR=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
python3.10 python3-pip python3.10-dev python3.10-distutils \
git wget ca-certificates \
&& update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 \
&& update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 \
&& rm -rf /var/lib/apt/lists/*
RUN pip install \
vllm==0.8.5 torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 \
tensordict==0.6.2 "numpy<2.0.0" "pyarrow>=15.0.0" pandas \
transformers==4.57.3 accelerate datasets \
ray[default] codetiming hydra-core wandb dill pybind11 mathruler math-verify \
"nvidia-ml-py>=12.560.30" "fastapi[standard]>=0.115.0" \
"optree>=0.13.0" "pydantic>=2.9" \
&& pip install \
https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl \
&& pip install \
https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.2.post1/flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl
WORKDIR /workspace
RUN git clone https://github.com/CSSLab/Tandem-RLVR.git \
&& cd Tandem-RLVR \
&& VLLM_DIR=$(python -c "import vllm, os; print(os.path.dirname(vllm.__file__))" 2>/dev/null | tail -1) \
&& cp -r vllm_source/vllm/* "$VLLM_DIR/" \
&& pip install -e verl \
&& python -c "import vllm; print('vllm:', vllm.__version__)" \
&& python -c "from vllm.config import TandemConfig; print('TandemConfig OK')" \
&& python -c "from vllm.v1.worker.tandem import TandemModelManager; print('TandemModelManager OK')" \
&& python -c "from vllm.v1.sample.tandem_sampler import TandemSampler; print('TandemSampler OK')" \
&& python -c "import verl; print('verl:', verl.__file__)"
WORKDIR /workspace/Tandem-RLVR
CMD ["bash"]