Tandem-RLVR/Dockerfile.repro at main · CSSLab/Tandem-RLVR · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# Reproduction image for Tandem-RLVR (CSSLab/Tandem-RLVR).
#
# Build:   docker build -f Dockerfile.repro -t tandem-rlvr:repro .
# Run:     docker run --gpus all -it tandem-rlvr:repro bash
# Train:   bash verl/run_tandem_native_grpo_deepscaler.sh
#
# Layers are merged into three RUNs to avoid pip-uninstall bloat
# (uninstalled files stay in the previous layer otherwise).

FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
ENV DEBIAN_FRONTEND=noninteractive \
    PIP_NO_CACHE_DIR=1 \
    PIP_DISABLE_PIP_VERSION_CHECK=1

RUN apt-get update \
 && apt-get install -y --no-install-recommends \
      python3.10 python3-pip python3.10-dev python3.10-distutils \
      git wget ca-certificates \
 && update-alternatives --install /usr/bin/python  python  /usr/bin/python3.10 1 \
 && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 \
 && rm -rf /var/lib/apt/lists/*

RUN pip install \
      vllm==0.8.5 torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 \
      tensordict==0.6.2 "numpy<2.0.0" "pyarrow>=15.0.0" pandas \
      transformers==4.57.3 accelerate datasets \
      ray[default] codetiming hydra-core wandb dill pybind11 mathruler math-verify \
      "nvidia-ml-py>=12.560.30" "fastapi[standard]>=0.115.0" \
      "optree>=0.13.0" "pydantic>=2.9" \
 && pip install \
      https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl \
 && pip install \
      https://github.com/flashinfer-ai/flashinfer/releases/download/v0.2.2.post1/flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl

WORKDIR /workspace
RUN git clone https://github.com/CSSLab/Tandem-RLVR.git \
 && cd Tandem-RLVR \
 && VLLM_DIR=$(python -c "import vllm, os; print(os.path.dirname(vllm.__file__))" 2>/dev/null | tail -1) \
 && cp -r vllm_source/vllm/* "$VLLM_DIR/" \
 && pip install -e verl \
 && python -c "import vllm; print('vllm:', vllm.__version__)" \
 && python -c "from vllm.config import TandemConfig; print('TandemConfig OK')" \
 && python -c "from vllm.v1.worker.tandem import TandemModelManager; print('TandemModelManager OK')" \
 && python -c "from vllm.v1.sample.tandem_sampler import TandemSampler; print('TandemSampler OK')" \
 && python -c "import verl; print('verl:', verl.__file__)"

WORKDIR /workspace/Tandem-RLVR
CMD ["bash"]