pytorch · sachanub · Dec 20, 2023 · Dec 20, 2023 · Dec 20, 2023 · Dec 20, 2023
diff --git a/benchmarks/benchmark-ab.py b/benchmarks/benchmark-ab.py
@@ -229,7 +229,7 @@ def warm_up():
     click.secho("\n\nExecuting warm-up ...", fg="green")
 
     ab_cmd = (
-        f"ab -c {execution_params['concurrency']} -s 300 -n {execution_params['requests']/10} -k -p "
+        f"ab -c {execution_params['concurrency']} -l -s 300 -n {execution_params['requests']/10} -k -p "
         f"{execution_params['tmp_dir']}/benchmark/input -T  {execution_params['content_type']} "
         f"{execution_params['inference_url']}/{execution_params['inference_model_url']} > "
         f"{execution_params['result_file']}"
@@ -247,7 +247,7 @@ def run_benchmark():
 
     click.secho("\n\nExecuting inference performance tests ...", fg="green")
     ab_cmd = (
-        f"ab -c {execution_params['concurrency']} -s 300 -n {execution_params['requests']} -k -p "
+        f"ab -c {execution_params['concurrency']} -l -s 300 -n {execution_params['requests']} -k -p "
         f"{execution_params['tmp_dir']}/benchmark/input -T  {execution_params['content_type']} "
         f"{execution_params['inference_url']}/{execution_params['inference_model_url']} > "
         f"{execution_params['result_file']}"

diff --git a/benchmarks/benchmark_config_torch_compile_gpu.yaml b/benchmarks/benchmark_config_torch_compile_gpu.yaml
@@ -12,6 +12,7 @@ models:
     - "bert_torch_compile_gpu.yaml"
     - "resnet50_torch_compile_gpu.yaml"
     - "sam_fast_torch_compile_gpu.yaml"
+    - "gpt_fast_torch_compile_gpu.yaml"
 
 # benchmark on "cpu" or "gpu".
 # "cpu" is set if "hardware" is not specified

diff --git a/benchmarks/benchmark_model_dependencies.sh b/benchmarks/benchmark_model_dependencies.sh
@@ -12,3 +12,6 @@ export SEGMENT_ANYTHING_FAST_USE_FLASH_4=0
 
 echo "Installed dependencies and set environment variables for SAM Fast"
 
+# Install dependencies for GPT Fast
+pip install sentencepiece huggingface_hub
+
diff --git a/benchmarks/models_config/gpt_fast_torch_compile_gpu.yaml b/benchmarks/models_config/gpt_fast_torch_compile_gpu.yaml
@@ -0,0 +1,18 @@
+---
+gpt_fast:
+    7b_int4:
+        benchmark_engine: "ab"
+        url: https://torchserve.pytorch.org/mar_files/gpt_fast_7b_int4.mar
+        workers:
+            - 4
+        batch_delay: 100
+        batch_size:
+            - 1
+        input: "./examples/large_models/gpt_fast/request.json"
+        requests: 100
+        concurrency: 4
+        backend_profiling: False
+        exec_env: "local"
+        processors:
+            - "cpu"
+            - "gpus": "all"
Original file line number	Diff line number	Diff line change
Expand Up		@@ -12,3 +12,6 @@ export SEGMENT_ANYTHING_FAST_USE_FLASH_4=0

		echo "Installed dependencies and set environment variables for SAM Fast"

		# Install dependencies for GPT Fast
		pip install sentencepiece huggingface_hub