Skip to content

Commit e24d7c5

Browse files
author
Beichen Huang
committed
init
0 parents  commit e24d7c5

3,333 files changed

Lines changed: 816229 additions & 0 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.buildkite/check-wheel-size.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
4+
import os
5+
import sys
6+
import zipfile
7+
8+
# Read the VLLM_MAX_SIZE_MB environment variable, defaulting to 500 MiB
9+
# Note that we have 800 MiB quota, please use it wisely.
10+
# See https://github.com/pypi/support/issues/6326 .
11+
# Please also sync the value with the one in Dockerfile.
12+
VLLM_MAX_SIZE_MB = int(os.environ.get("VLLM_MAX_SIZE_MB", 500))
13+
14+
15+
def print_top_10_largest_files(zip_file):
16+
"""Print the top 10 largest files in the given zip file."""
17+
with zipfile.ZipFile(zip_file, "r") as z:
18+
file_sizes = [(f, z.getinfo(f).file_size) for f in z.namelist()]
19+
file_sizes.sort(key=lambda x: x[1], reverse=True)
20+
for f, size in file_sizes[:10]:
21+
print(f"{f}: {size / (1024 * 1024):.2f} MBs uncompressed.")
22+
23+
24+
def check_wheel_size(directory):
25+
"""Check the size of .whl files in the given directory."""
26+
for root, _, files in os.walk(directory):
27+
for file_name in files:
28+
if file_name.endswith(".whl"):
29+
wheel_path = os.path.join(root, file_name)
30+
wheel_size_mb = os.path.getsize(wheel_path) / (1024 * 1024)
31+
if wheel_size_mb > VLLM_MAX_SIZE_MB:
32+
print(
33+
f"Not allowed: Wheel {wheel_path} is larger "
34+
f"({wheel_size_mb:.2f} MB) than the limit "
35+
f"({VLLM_MAX_SIZE_MB} MB)."
36+
)
37+
print_top_10_largest_files(wheel_path)
38+
return 1
39+
else:
40+
print(
41+
f"Wheel {wheel_path} is within the allowed size "
42+
f"({wheel_size_mb:.2f} MB)."
43+
)
44+
return 0
45+
46+
47+
if __name__ == "__main__":
48+
if len(sys.argv) < 2:
49+
print("Usage: python check-wheel-size.py <directory>")
50+
sys.exit(1)
51+
52+
directory = sys.argv[1]
53+
sys.exit(check_wheel_size(directory))

.buildkite/generate_index.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
4+
import argparse
5+
import os
6+
7+
template = """<!DOCTYPE html>
8+
<html>
9+
<body>
10+
<h1>Links for vLLM</h1/>
11+
<a href="../{x86_wheel_html_escaped}">{x86_wheel}</a><br/>
12+
<a href="../{arm_wheel_html_escaped}">{arm_wheel}</a><br/>
13+
</body>
14+
</html>
15+
"""
16+
17+
parser = argparse.ArgumentParser()
18+
parser.add_argument("--wheel", help="The wheel path.", required=True)
19+
args = parser.parse_args()
20+
21+
filename = os.path.basename(args.wheel)
22+
23+
with open("index.html", "w") as f:
24+
print(f"Generated index.html for {args.wheel}")
25+
# sync the abi tag with .buildkite/scripts/upload-wheels.sh
26+
if "x86_64" in filename:
27+
x86_wheel = filename
28+
arm_wheel = filename.replace("x86_64", "aarch64").replace(
29+
"manylinux1", "manylinux2014"
30+
)
31+
elif "aarch64" in filename:
32+
x86_wheel = filename.replace("aarch64", "x86_64").replace(
33+
"manylinux2014", "manylinux1"
34+
)
35+
arm_wheel = filename
36+
else:
37+
raise ValueError(f"Unsupported wheel: {filename}")
38+
# cloudfront requires escaping the '+' character
39+
f.write(
40+
template.format(
41+
x86_wheel=x86_wheel,
42+
x86_wheel_html_escaped=x86_wheel.replace("+", "%2B"),
43+
arm_wheel=arm_wheel,
44+
arm_wheel_html_escaped=arm_wheel.replace("+", "%2B"),
45+
)
46+
)
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# For vllm script, with -t option (tensor parallel size).
2+
# bash ./run-lm-eval-gsm-vllm-baseline.sh -m deepseek-ai/DeepSeek-V2-Lite-Chat -b "auto" -l 1000 -f 5 -t 2
3+
model_name: "deepseek-ai/DeepSeek-V2-Lite-Chat"
4+
tasks:
5+
- name: "gsm8k"
6+
metrics:
7+
- name: "exact_match,strict-match"
8+
value: 0.671
9+
- name: "exact_match,flexible-extract"
10+
value: 0.664
11+
limit: 1000
12+
num_fewshot: 5
13+
trust_remote_code: True
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# For hf script, without -t option (tensor parallel size).
2+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh -m nm-testing/Meta-Llama-3-70B-Instruct-FBGEMM-nonuniform -b auto -l 1000 -f 5
3+
model_name: "nm-testing/Meta-Llama-3-70B-Instruct-FBGEMM-nonuniform"
4+
tasks:
5+
- name: "gsm8k"
6+
metrics:
7+
- name: "exact_match,strict-match"
8+
value: 0.905
9+
- name: "exact_match,flexible-extract"
10+
value: 0.905
11+
limit: 1000
12+
num_fewshot: 5
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# For hf script, without -t option (tensor parallel size).
2+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh -m meta-llama/Meta-Llama-3-70B-Instruct -b 32 -l 250 -f 5
3+
model_name: "meta-llama/Meta-Llama-3-70B-Instruct"
4+
tasks:
5+
- name: "gsm8k"
6+
metrics:
7+
- name: "exact_match,strict-match"
8+
value: 0.892
9+
- name: "exact_match,flexible-extract"
10+
value: 0.892
11+
limit: 250
12+
num_fewshot: 5
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# For vllm script, with -t option (tensor parallel size).
2+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Meta-Llama-3-8B-Instruct-W8A8-FP8-Channelwise-compressed-tensors -b auto -l 1000 -f 5 -t 1
3+
model_name: "nm-testing/Meta-Llama-3-8B-Instruct-W8A8-FP8-Channelwise-compressed-tensors"
4+
tasks:
5+
- name: "gsm8k"
6+
metrics:
7+
- name: "exact_match,strict-match"
8+
value: 0.752
9+
- name: "exact_match,flexible-extract"
10+
value: 0.754
11+
limit: 1000
12+
num_fewshot: 5
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# For vllm script, with -t option (tensor parallel size).
2+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Meta-Llama-3-8B-Instruct-FBGEMM-nonuniform -b auto -l 1000 -f 5 -t 1
3+
model_name: "nm-testing/Meta-Llama-3-8B-Instruct-FBGEMM-nonuniform"
4+
tasks:
5+
- name: "gsm8k"
6+
metrics:
7+
- name: "exact_match,strict-match"
8+
value: 0.753
9+
- name: "exact_match,flexible-extract"
10+
value: 0.753
11+
limit: 1000
12+
num_fewshot: 5
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# For vllm script, with -t option (tensor parallel size).
2+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Meta-Llama-3-8B-FP8-compressed-tensors-test -b 32 -l 1000 -f 5 -t 1
3+
model_name: "nm-testing/Meta-Llama-3-8B-FP8-compressed-tensors-test"
4+
tasks:
5+
- name: "gsm8k"
6+
metrics:
7+
- name: "exact_match,strict-match"
8+
value: 0.755
9+
- name: "exact_match,flexible-extract"
10+
value: 0.755
11+
limit: 1000
12+
num_fewshot: 5
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# For vllm script, with -t option (tensor parallel size).
2+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m neuralmagic/Meta-Llama-3-8B-Instruct-FP8 -b 32 -l 250 -f 5 -t 1
3+
model_name: "neuralmagic/Meta-Llama-3-8B-Instruct-FP8"
4+
tasks:
5+
- name: "gsm8k"
6+
metrics:
7+
- name: "exact_match,strict-match"
8+
value: 0.753
9+
- name: "exact_match,flexible-extract"
10+
value: 0.753
11+
limit: 1000
12+
num_fewshot: 5
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# For vllm script, with -t option (tensor parallel size).
2+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Meta-Llama-3-8B-Instruct-W8-Channel-A8-Dynamic-Asym-Per-Token-Test -b "auto" -l 250 -f 5 -t 1
3+
model_name: "nm-testing/Meta-Llama-3-8B-Instruct-W8-Channel-A8-Dynamic-Asym-Per-Token-Test"
4+
tasks:
5+
- name: "gsm8k"
6+
metrics:
7+
- name: "exact_match,strict-match"
8+
value: 0.764
9+
- name: "exact_match,flexible-extract"
10+
value: 0.764
11+
limit: 250
12+
num_fewshot: 5

0 commit comments

Comments
 (0)