Skip to content
This repository was archived by the owner on Aug 7, 2025. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions benchmarks/benchmark-ab.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ def warm_up():
click.secho("\n\nExecuting warm-up ...", fg="green")

ab_cmd = (
f"ab -c {execution_params['concurrency']} -s 300 -n {execution_params['requests']/10} -k -p "
f"ab -c {execution_params['concurrency']} -l -s 300 -n {execution_params['requests']/10} -k -p "
f"{execution_params['tmp_dir']}/benchmark/input -T {execution_params['content_type']} "
f"{execution_params['inference_url']}/{execution_params['inference_model_url']} > "
f"{execution_params['result_file']}"
Expand All @@ -247,7 +247,7 @@ def run_benchmark():

click.secho("\n\nExecuting inference performance tests ...", fg="green")
ab_cmd = (
f"ab -c {execution_params['concurrency']} -s 300 -n {execution_params['requests']} -k -p "
f"ab -c {execution_params['concurrency']} -l -s 300 -n {execution_params['requests']} -k -p "
f"{execution_params['tmp_dir']}/benchmark/input -T {execution_params['content_type']} "
f"{execution_params['inference_url']}/{execution_params['inference_model_url']} > "
f"{execution_params['result_file']}"
Expand Down
1 change: 1 addition & 0 deletions benchmarks/benchmark_config_torch_compile_gpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ models:
- "bert_torch_compile_gpu.yaml"
- "resnet50_torch_compile_gpu.yaml"
- "sam_fast_torch_compile_gpu.yaml"
- "gpt_fast_torch_compile_gpu.yaml"

# benchmark on "cpu" or "gpu".
# "cpu" is set if "hardware" is not specified
Expand Down
3 changes: 3 additions & 0 deletions benchmarks/benchmark_model_dependencies.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,6 @@ export SEGMENT_ANYTHING_FAST_USE_FLASH_4=0

echo "Installed dependencies and set environment variables for SAM Fast"

# Install dependencies for GPT Fast
pip install sentencepiece huggingface_hub

18 changes: 18 additions & 0 deletions benchmarks/models_config/gpt_fast_torch_compile_gpu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
---
gpt_fast:
7b_int4:
benchmark_engine: "ab"
url: https://torchserve.pytorch.org/mar_files/gpt_fast_7b_int4.mar
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please clearly specify the model in the name. eg. Llama-2-7b-hf

workers:
- 4
batch_delay: 100
batch_size:
- 1
input: "./examples/large_models/gpt_fast/request.json"
requests: 100
concurrency: 4
backend_profiling: False
exec_env: "local"
processors:
- "cpu"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cpu should be reomoved.

- "gpus": "all"