From 3db212fe189a39338d4efbe280501f121677098c Mon Sep 17 00:00:00 2001 From: Sean Shi Date: Tue, 20 Feb 2024 17:51:10 -0800 Subject: [PATCH 01/14] prepare allowing a csv input --- scripts/throughput_benchmarks.py | 36 ++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/scripts/throughput_benchmarks.py b/scripts/throughput_benchmarks.py index c689d8cc5..f7be6dc35 100644 --- a/scripts/throughput_benchmarks.py +++ b/scripts/throughput_benchmarks.py @@ -3,6 +3,7 @@ import os import queue import random +import re import threading import time import traceback @@ -206,11 +207,26 @@ def send_requests( return results -def generate_prompt(num, hf_model): - random.seed(1) - text = lorem.words(num // 2) # Roughly 2 tokens per lorem word - tokenizer = AutoTokenizer.from_pretrained(hf_model) - return tokenizer.decode(tokenizer.encode(text)[: num - 2]) +# TODO test this +def read_input_file(input_file: str) -> List[str]: + # Only supports csvs for now + if re.match(r".*\.csv$", input_file): + with open(input_file, "r", newline="") as file: + reader = csv.reader(file) + # May have to ignore first line + return [row[0] for row in reader] + raise ValueError(f"Unsupported file type for input file {input_file}") + + +def generate_prompt(num, hf_model, inputs: Optional[List]): + # TODO handle inputs + if inputs is not None: + raise NotImplementedError + else: + random.seed(1) + text = lorem.words(num // 2) # Roughly 2 tokens per lorem word + tokenizer = AutoTokenizer.from_pretrained(hf_model) + return tokenizer.decode(tokenizer.encode(text)[: num - 2]) def generate_output_token_counts(mean, std, num, input_token_count): @@ -231,8 +247,14 @@ def run_benchmark( concurrency: int, verbose: bool, local_port: int, + input_file: Optional[str] = None, ): - prompt = generate_prompt(config.input_token_count, hf_model) + + inputs = None + + if input_file is not None: + inputs = read_input_file(input_file) + prompt = generate_prompt(config.input_token_count, hf_model, inputs) prompt_num_tokens = config.input_token_count @@ -346,6 +368,7 @@ def run_benchmarks( input_token_count: int, output_token_count_mean: int, num_trials: int = 50, + input_file: Optional[str] = None, output_file: Optional[str] = None, use_localhost: bool = False, concurrency: int = 1, @@ -396,6 +419,7 @@ def run_benchmarks_concurrency_range( input_token_count: int, output_token_count_mean: int, num_trials_per_concurrency: int = 5, + input_file: Optional[str] = None, output_file: Optional[str] = None, use_localhost: bool = False, concurrency_min: int = 1, From 140e6a6c059bba0691d50a39598ced1fa9fdc209 Mon Sep 17 00:00:00 2001 From: Sean Shi Date: Tue, 20 Feb 2024 17:53:58 -0800 Subject: [PATCH 02/14] randomly select input --- scripts/throughput_benchmarks.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/throughput_benchmarks.py b/scripts/throughput_benchmarks.py index f7be6dc35..d218ebfe3 100644 --- a/scripts/throughput_benchmarks.py +++ b/scripts/throughput_benchmarks.py @@ -219,9 +219,8 @@ def read_input_file(input_file: str) -> List[str]: def generate_prompt(num, hf_model, inputs: Optional[List]): - # TODO handle inputs if inputs is not None: - raise NotImplementedError + return random.choice(inputs) else: random.seed(1) text = lorem.words(num // 2) # Roughly 2 tokens per lorem word From e5b45df812af2172b699c96146b1c568b53daf99 Mon Sep 17 00:00:00 2001 From: Sean Shi Date: Wed, 21 Feb 2024 12:41:35 -0800 Subject: [PATCH 03/14] pass some args through --- scripts/throughput_benchmarks.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/throughput_benchmarks.py b/scripts/throughput_benchmarks.py index d218ebfe3..3b5736b61 100644 --- a/scripts/throughput_benchmarks.py +++ b/scripts/throughput_benchmarks.py @@ -397,6 +397,7 @@ def run_benchmarks( concurrency, verbose, local_port, + input_file, ) all_statistics.append(statistics) except Exception: @@ -439,6 +440,7 @@ def run_benchmarks_concurrency_range( input_token_count, output_token_count_mean, num_trials_per_concurrency * concurrency, + input_file, output_file, use_localhost, concurrency, From 37fbb77f478ec88ea3aae39bcc78d0bd40931bd7 Mon Sep 17 00:00:00 2001 From: Sean Shi Date: Wed, 21 Feb 2024 12:45:51 -0800 Subject: [PATCH 04/14] log output token count percentiles --- scripts/throughput_benchmarks.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/scripts/throughput_benchmarks.py b/scripts/throughput_benchmarks.py index 3b5736b61..4c4d16697 100644 --- a/scripts/throughput_benchmarks.py +++ b/scripts/throughput_benchmarks.py @@ -278,7 +278,8 @@ def run_benchmark( elapsed = end - start results = [result for result in results if result is not None] - num_sampled_tokens = sum([result["num_completion_tokens"] for result in results]) + sampled_token_counts = [result["num_completion_tokens"] for result in results] + num_sampled_tokens = sum(sampled_token_counts) num_prompt_tokens = prompt_num_tokens * len(results) n = len(results) time_to_process_prompt = [] @@ -314,6 +315,10 @@ def run_benchmark( p90_time_to_first_token = np.percentile(time_to_first_token, 90) p95_time_to_first_token = np.percentile(time_to_first_token, 95) p99_time_to_first_token = np.percentile(time_to_first_token, 99) + p50_sampled_token_counts = np.percentile(sampled_token_counts, 50) + p90_sampled_token_counts = np.percentile(sampled_token_counts, 90) + p95_sampled_token_counts = np.percentile(sampled_token_counts, 95) + p99_sampled_token_counts = np.percentile(sampled_token_counts, 99) statistics = { "concurrency": concurrency, @@ -351,6 +356,14 @@ def run_benchmark( "total_num_tokens": total_num_tokens, "total_num_sampled_tokens": num_sampled_tokens, } + if input_file is not None: + sampled_token_counts_statistics = { + "p50_sampled_token_counts": p50_sampled_token_counts, + "p90_sampled_token_counts": p90_sampled_token_counts, + "p95_sampled_token_counts": p95_sampled_token_counts, + "p99_sampled_token_counts": p99_sampled_token_counts, + } + statistics.update(sampled_token_counts_statistics) if verbose: print(f"Statistics: {statistics}") From 3222ffec9519a5e724b5b2cc6d7ce651a3804de9 Mon Sep 17 00:00:00 2001 From: Sean Shi Date: Wed, 21 Feb 2024 12:55:58 -0800 Subject: [PATCH 05/14] debug + ignore first line in file --- scripts/throughput_benchmarks.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/scripts/throughput_benchmarks.py b/scripts/throughput_benchmarks.py index 4c4d16697..23a7aab71 100644 --- a/scripts/throughput_benchmarks.py +++ b/scripts/throughput_benchmarks.py @@ -19,6 +19,7 @@ AUTH_USER_ID = os.getenv("AUTH_USER_ID") GATEWAY_URL = os.getenv("GATEWAY_URL") +DEBUG = os.getenv("DEBUG") app = typer.Typer(name="throughput-benchmarks", add_completion=False) MAX_CONTEXT_WINDOW = 4096 @@ -88,6 +89,8 @@ def send_request(url, request, user=None): if payload.startswith("data:"): payload_data = payload.lstrip("data:").rstrip("/n") payload_json = json.loads(payload_data) + if DEBUG: + print(payload_json) return { "payload": payload_json, @@ -214,13 +217,16 @@ def read_input_file(input_file: str) -> List[str]: with open(input_file, "r", newline="") as file: reader = csv.reader(file) # May have to ignore first line - return [row[0] for row in reader] + return [row[0] for row in reader][1:] raise ValueError(f"Unsupported file type for input file {input_file}") def generate_prompt(num, hf_model, inputs: Optional[List]): if inputs is not None: - return random.choice(inputs) + choice = random.choice(inputs) + if DEBUG: + print(f"Using input {choice}") + return choice else: random.seed(1) text = lorem.words(num // 2) # Roughly 2 tokens per lorem word From ebe948841694b38c0b848e94bc071e8bcd22051d Mon Sep 17 00:00:00 2001 From: Sean Shi Date: Wed, 21 Feb 2024 13:03:13 -0800 Subject: [PATCH 06/14] lazy try except lmao --- scripts/throughput_benchmarks.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/throughput_benchmarks.py b/scripts/throughput_benchmarks.py index 23a7aab71..43f2eee0d 100644 --- a/scripts/throughput_benchmarks.py +++ b/scripts/throughput_benchmarks.py @@ -90,7 +90,10 @@ def send_request(url, request, user=None): payload_data = payload.lstrip("data:").rstrip("/n") payload_json = json.loads(payload_data) if DEBUG: - print(payload_json) + try: + print(payload_json["output"]["text"]) + except KeyError: + pass return { "payload": payload_json, From d629d91fb6e0149879790a6ff1e8a26eb71578fa Mon Sep 17 00:00:00 2001 From: Sean Shi Date: Wed, 21 Feb 2024 13:03:52 -0800 Subject: [PATCH 07/14] oops --- scripts/throughput_benchmarks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/throughput_benchmarks.py b/scripts/throughput_benchmarks.py index 43f2eee0d..9838e496f 100644 --- a/scripts/throughput_benchmarks.py +++ b/scripts/throughput_benchmarks.py @@ -220,7 +220,7 @@ def read_input_file(input_file: str) -> List[str]: with open(input_file, "r", newline="") as file: reader = csv.reader(file) # May have to ignore first line - return [row[0] for row in reader][1:] + return [row[1] for row in reader][1:] raise ValueError(f"Unsupported file type for input file {input_file}") From e50af0d28684610ce6deccef344bbb566aa9044f Mon Sep 17 00:00:00 2001 From: Sean Shi Date: Wed, 21 Feb 2024 13:04:59 -0800 Subject: [PATCH 08/14] oops x2 --- scripts/throughput_benchmarks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/throughput_benchmarks.py b/scripts/throughput_benchmarks.py index 9838e496f..efdc7ca97 100644 --- a/scripts/throughput_benchmarks.py +++ b/scripts/throughput_benchmarks.py @@ -91,7 +91,7 @@ def send_request(url, request, user=None): payload_json = json.loads(payload_data) if DEBUG: try: - print(payload_json["output"]["text"]) + print(payload_json["output"]["text"], end="") except KeyError: pass From 2ee933958dba345a5ef6982e79fe51e22e18ccfb Mon Sep 17 00:00:00 2001 From: Sean Shi Date: Wed, 21 Feb 2024 13:06:36 -0800 Subject: [PATCH 09/14] oops x3 --- scripts/throughput_benchmarks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/throughput_benchmarks.py b/scripts/throughput_benchmarks.py index efdc7ca97..110abfd46 100644 --- a/scripts/throughput_benchmarks.py +++ b/scripts/throughput_benchmarks.py @@ -91,7 +91,7 @@ def send_request(url, request, user=None): payload_json = json.loads(payload_data) if DEBUG: try: - print(payload_json["output"]["text"], end="") + print(payload_json["output"]["text"], end="", flush=True) except KeyError: pass From cfb7e1a6d70339783f0ffc1ecd6be3773d24235c Mon Sep 17 00:00:00 2001 From: Sean Shi Date: Wed, 21 Feb 2024 13:13:56 -0800 Subject: [PATCH 10/14] . --- scripts/throughput_benchmarks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/throughput_benchmarks.py b/scripts/throughput_benchmarks.py index 110abfd46..a3b726d4d 100644 --- a/scripts/throughput_benchmarks.py +++ b/scripts/throughput_benchmarks.py @@ -229,6 +229,7 @@ def generate_prompt(num, hf_model, inputs: Optional[List]): choice = random.choice(inputs) if DEBUG: print(f"Using input {choice}") + print("---") return choice else: random.seed(1) From ff9c6ee743caa84bf935c5ce671c4e95d68ee367 Mon Sep 17 00:00:00 2001 From: Sean Shi Date: Wed, 21 Feb 2024 13:22:43 -0800 Subject: [PATCH 11/14] oops prompt sample is reused --- scripts/throughput_benchmarks.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/scripts/throughput_benchmarks.py b/scripts/throughput_benchmarks.py index a3b726d4d..a3f03187e 100644 --- a/scripts/throughput_benchmarks.py +++ b/scripts/throughput_benchmarks.py @@ -9,7 +9,7 @@ import traceback from dataclasses import dataclass from enum import Enum -from typing import List, Optional +from typing import List, Optional, Union import numpy as np import requests @@ -59,6 +59,9 @@ def from_value(cls, value): def send_request(url, request, user=None): + if DEBUG: + print(f"Using input {request}") + print("---") start = time.time() response = requests.post( url, @@ -175,17 +178,21 @@ def generate_request( def send_requests( model: str, - prompt: str, + prompt: Union[str, List[str]], output_token_counts: List[int], use_localhost: bool, concurrency: int, framework: InferenceFramework, local_port: int = 5005, ): + if type(prompt) == str: + prompt = [prompt] thread_results: queue.Queue = queue.Queue() requests_queue: queue.Queue = queue.Queue() - for output_token_count in output_token_counts: - request = generate_request(framework, prompt, output_token_count, use_localhost) + for i, output_token_count in enumerate(output_token_counts): + request = generate_request( + framework, prompt[i % len(prompt)], output_token_count, use_localhost + ) requests_queue.put(request) threads = [] for i in range(concurrency): @@ -224,12 +231,11 @@ def read_input_file(input_file: str) -> List[str]: raise ValueError(f"Unsupported file type for input file {input_file}") -def generate_prompt(num, hf_model, inputs: Optional[List]): +def generate_prompt( + num, hf_model, inputs: Optional[List], num_samples: int = 1 +) -> Union[str, List[str]]: if inputs is not None: - choice = random.choice(inputs) - if DEBUG: - print(f"Using input {choice}") - print("---") + choice = random.sample(inputs, min(num_samples, len(inputs))) return choice else: random.seed(1) @@ -263,7 +269,7 @@ def run_benchmark( if input_file is not None: inputs = read_input_file(input_file) - prompt = generate_prompt(config.input_token_count, hf_model, inputs) + prompt = generate_prompt(config.input_token_count, hf_model, inputs, num_trials) prompt_num_tokens = config.input_token_count From 8c13b3f0bb6f8acb1807d7b011692a9b5d28923c Mon Sep 17 00:00:00 2001 From: Sean Shi Date: Thu, 22 Feb 2024 02:40:51 +0000 Subject: [PATCH 12/14] revert the changes to main, I'm gonna just have it take in a distribution of output token counts --- scripts/throughput_benchmarks.py | 76 +++++--------------------------- 1 file changed, 11 insertions(+), 65 deletions(-) diff --git a/scripts/throughput_benchmarks.py b/scripts/throughput_benchmarks.py index a3f03187e..c689d8cc5 100644 --- a/scripts/throughput_benchmarks.py +++ b/scripts/throughput_benchmarks.py @@ -3,13 +3,12 @@ import os import queue import random -import re import threading import time import traceback from dataclasses import dataclass from enum import Enum -from typing import List, Optional, Union +from typing import List, Optional import numpy as np import requests @@ -19,7 +18,6 @@ AUTH_USER_ID = os.getenv("AUTH_USER_ID") GATEWAY_URL = os.getenv("GATEWAY_URL") -DEBUG = os.getenv("DEBUG") app = typer.Typer(name="throughput-benchmarks", add_completion=False) MAX_CONTEXT_WINDOW = 4096 @@ -59,9 +57,6 @@ def from_value(cls, value): def send_request(url, request, user=None): - if DEBUG: - print(f"Using input {request}") - print("---") start = time.time() response = requests.post( url, @@ -92,11 +87,6 @@ def send_request(url, request, user=None): if payload.startswith("data:"): payload_data = payload.lstrip("data:").rstrip("/n") payload_json = json.loads(payload_data) - if DEBUG: - try: - print(payload_json["output"]["text"], end="", flush=True) - except KeyError: - pass return { "payload": payload_json, @@ -178,21 +168,17 @@ def generate_request( def send_requests( model: str, - prompt: Union[str, List[str]], + prompt: str, output_token_counts: List[int], use_localhost: bool, concurrency: int, framework: InferenceFramework, local_port: int = 5005, ): - if type(prompt) == str: - prompt = [prompt] thread_results: queue.Queue = queue.Queue() requests_queue: queue.Queue = queue.Queue() - for i, output_token_count in enumerate(output_token_counts): - request = generate_request( - framework, prompt[i % len(prompt)], output_token_count, use_localhost - ) + for output_token_count in output_token_counts: + request = generate_request(framework, prompt, output_token_count, use_localhost) requests_queue.put(request) threads = [] for i in range(concurrency): @@ -220,28 +206,11 @@ def send_requests( return results -# TODO test this -def read_input_file(input_file: str) -> List[str]: - # Only supports csvs for now - if re.match(r".*\.csv$", input_file): - with open(input_file, "r", newline="") as file: - reader = csv.reader(file) - # May have to ignore first line - return [row[1] for row in reader][1:] - raise ValueError(f"Unsupported file type for input file {input_file}") - - -def generate_prompt( - num, hf_model, inputs: Optional[List], num_samples: int = 1 -) -> Union[str, List[str]]: - if inputs is not None: - choice = random.sample(inputs, min(num_samples, len(inputs))) - return choice - else: - random.seed(1) - text = lorem.words(num // 2) # Roughly 2 tokens per lorem word - tokenizer = AutoTokenizer.from_pretrained(hf_model) - return tokenizer.decode(tokenizer.encode(text)[: num - 2]) +def generate_prompt(num, hf_model): + random.seed(1) + text = lorem.words(num // 2) # Roughly 2 tokens per lorem word + tokenizer = AutoTokenizer.from_pretrained(hf_model) + return tokenizer.decode(tokenizer.encode(text)[: num - 2]) def generate_output_token_counts(mean, std, num, input_token_count): @@ -262,14 +231,8 @@ def run_benchmark( concurrency: int, verbose: bool, local_port: int, - input_file: Optional[str] = None, ): - - inputs = None - - if input_file is not None: - inputs = read_input_file(input_file) - prompt = generate_prompt(config.input_token_count, hf_model, inputs, num_trials) + prompt = generate_prompt(config.input_token_count, hf_model) prompt_num_tokens = config.input_token_count @@ -294,8 +257,7 @@ def run_benchmark( elapsed = end - start results = [result for result in results if result is not None] - sampled_token_counts = [result["num_completion_tokens"] for result in results] - num_sampled_tokens = sum(sampled_token_counts) + num_sampled_tokens = sum([result["num_completion_tokens"] for result in results]) num_prompt_tokens = prompt_num_tokens * len(results) n = len(results) time_to_process_prompt = [] @@ -331,10 +293,6 @@ def run_benchmark( p90_time_to_first_token = np.percentile(time_to_first_token, 90) p95_time_to_first_token = np.percentile(time_to_first_token, 95) p99_time_to_first_token = np.percentile(time_to_first_token, 99) - p50_sampled_token_counts = np.percentile(sampled_token_counts, 50) - p90_sampled_token_counts = np.percentile(sampled_token_counts, 90) - p95_sampled_token_counts = np.percentile(sampled_token_counts, 95) - p99_sampled_token_counts = np.percentile(sampled_token_counts, 99) statistics = { "concurrency": concurrency, @@ -372,14 +330,6 @@ def run_benchmark( "total_num_tokens": total_num_tokens, "total_num_sampled_tokens": num_sampled_tokens, } - if input_file is not None: - sampled_token_counts_statistics = { - "p50_sampled_token_counts": p50_sampled_token_counts, - "p90_sampled_token_counts": p90_sampled_token_counts, - "p95_sampled_token_counts": p95_sampled_token_counts, - "p99_sampled_token_counts": p99_sampled_token_counts, - } - statistics.update(sampled_token_counts_statistics) if verbose: print(f"Statistics: {statistics}") @@ -396,7 +346,6 @@ def run_benchmarks( input_token_count: int, output_token_count_mean: int, num_trials: int = 50, - input_file: Optional[str] = None, output_file: Optional[str] = None, use_localhost: bool = False, concurrency: int = 1, @@ -426,7 +375,6 @@ def run_benchmarks( concurrency, verbose, local_port, - input_file, ) all_statistics.append(statistics) except Exception: @@ -448,7 +396,6 @@ def run_benchmarks_concurrency_range( input_token_count: int, output_token_count_mean: int, num_trials_per_concurrency: int = 5, - input_file: Optional[str] = None, output_file: Optional[str] = None, use_localhost: bool = False, concurrency_min: int = 1, @@ -469,7 +416,6 @@ def run_benchmarks_concurrency_range( input_token_count, output_token_count_mean, num_trials_per_concurrency * concurrency, - input_file, output_file, use_localhost, concurrency, From 49219b31ee44fa6fa874b7c719a25fa34d7d4a94 Mon Sep 17 00:00:00 2001 From: Sean Shi Date: Thu, 22 Feb 2024 02:56:13 +0000 Subject: [PATCH 13/14] output token count distribution --- scripts/throughput_benchmarks.py | 52 ++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 6 deletions(-) diff --git a/scripts/throughput_benchmarks.py b/scripts/throughput_benchmarks.py index c689d8cc5..5830c577b 100644 --- a/scripts/throughput_benchmarks.py +++ b/scripts/throughput_benchmarks.py @@ -220,6 +220,31 @@ def generate_output_token_counts(mean, std, num, input_token_count): output[i] = min(output[i], MAX_CONTEXT_WINDOW - input_token_count) return output +def generate_output_token_counts_from_existing(distribution: List[int], num: int, input_token_count: int): + assert len(distribution) > 0, "Can't have a distribution with 0 tokens" + output = [] + # Sample without replacement so that we don't have as much variance + for _ in range(num // len(distribution)): + random.shuffle(distribution) + output.extend(distribution) + random.shuffle(distribution) + output.extend(distribution[:num % len(distribution)]) + assert len(output) == num + + for i in range(len(output)): + output[i] = min(output[i], MAX_CONTEXT_WINDOW - input_token_count) + return output + + +def read_distribution_from_file(fpath: str): + # Assumes the distribution is some json-formatted string that represents a list + try: + with open(fpath, "r") as fin: + return json.load(fin) + except FileNotFoundError: + print(f"File not found. Exiting.") + raise + def run_benchmark( model: str, @@ -231,17 +256,23 @@ def run_benchmark( concurrency: int, verbose: bool, local_port: int, + output_token_count_distribution: Optional[List] = None, ): prompt = generate_prompt(config.input_token_count, hf_model) prompt_num_tokens = config.input_token_count - output_token_counts = generate_output_token_counts( - config.output_token_count_mean, - config.output_token_count_std, - num_trials, - config.input_token_count, - ) + if output_token_count_distribution is not None: + output_token_counts = generate_output_token_counts_from_existing( + output_token_count_distribution, num_trials, config.input_token_count + ) + else: + output_token_counts = generate_output_token_counts( + config.output_token_count_mean, + config.output_token_count_std, + num_trials, + config.input_token_count, + ) start = time.time() results = send_requests( @@ -352,10 +383,16 @@ def run_benchmarks( verbose: bool = False, hf_model: Optional[str] = None, local_port: int = 5005, + output_token_count_distribution_file: Optional[str] = None, ): """Run benchmarks.""" all_statistics = [] config = BenchmarkConfig(input_token_count, output_token_count_mean) + + output_token_count_distribution = None + if output_token_count_distribution_file is not None: + output_token_count_distribution = read_distribution_from_file(output_token_count_distribution_file) + try: if verbose: print(f"Running benchmark for config {config}") @@ -375,6 +412,7 @@ def run_benchmarks( concurrency, verbose, local_port, + output_token_count_distribution, ) all_statistics.append(statistics) except Exception: @@ -404,6 +442,7 @@ def run_benchmarks_concurrency_range( verbose: bool = False, hf_model: Optional[str] = None, local_port: int = 5005, + output_token_count_distribution_file: Optional[str] = None, ): if output_file is not None: # Create empty file @@ -422,6 +461,7 @@ def run_benchmarks_concurrency_range( verbose, hf_model, local_port, + output_token_count_distribution_file, ) From e9771f7fadaebf0c36621a0b08865455dd97d3b8 Mon Sep 17 00:00:00 2001 From: Sean Shi Date: Mon, 26 Feb 2024 12:43:25 -0800 Subject: [PATCH 14/14] renane var to be more clear --- scripts/throughput_benchmarks.py | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/scripts/throughput_benchmarks.py b/scripts/throughput_benchmarks.py index 5830c577b..d67614a5f 100644 --- a/scripts/throughput_benchmarks.py +++ b/scripts/throughput_benchmarks.py @@ -220,7 +220,10 @@ def generate_output_token_counts(mean, std, num, input_token_count): output[i] = min(output[i], MAX_CONTEXT_WINDOW - input_token_count) return output -def generate_output_token_counts_from_existing(distribution: List[int], num: int, input_token_count: int): + +def generate_output_token_counts_from_existing( + distribution: List[int], num: int, input_token_count: int +): assert len(distribution) > 0, "Can't have a distribution with 0 tokens" output = [] # Sample without replacement so that we don't have as much variance @@ -228,7 +231,7 @@ def generate_output_token_counts_from_existing(distribution: List[int], num: int random.shuffle(distribution) output.extend(distribution) random.shuffle(distribution) - output.extend(distribution[:num % len(distribution)]) + output.extend(distribution[: num % len(distribution)]) assert len(output) == num for i in range(len(output)): @@ -242,7 +245,7 @@ def read_distribution_from_file(fpath: str): with open(fpath, "r") as fin: return json.load(fin) except FileNotFoundError: - print(f"File not found. Exiting.") + print("File not found. Exiting.") raise @@ -256,15 +259,15 @@ def run_benchmark( concurrency: int, verbose: bool, local_port: int, - output_token_count_distribution: Optional[List] = None, + response_token_count_distribution: Optional[List] = None, ): prompt = generate_prompt(config.input_token_count, hf_model) prompt_num_tokens = config.input_token_count - if output_token_count_distribution is not None: + if response_token_count_distribution is not None: output_token_counts = generate_output_token_counts_from_existing( - output_token_count_distribution, num_trials, config.input_token_count + response_token_count_distribution, num_trials, config.input_token_count ) else: output_token_counts = generate_output_token_counts( @@ -383,15 +386,17 @@ def run_benchmarks( verbose: bool = False, hf_model: Optional[str] = None, local_port: int = 5005, - output_token_count_distribution_file: Optional[str] = None, + response_token_count_distribution_file: Optional[str] = None, ): """Run benchmarks.""" all_statistics = [] config = BenchmarkConfig(input_token_count, output_token_count_mean) - output_token_count_distribution = None - if output_token_count_distribution_file is not None: - output_token_count_distribution = read_distribution_from_file(output_token_count_distribution_file) + response_token_count_distribution = None + if response_token_count_distribution_file is not None: + response_token_count_distribution = read_distribution_from_file( + response_token_count_distribution_file + ) try: if verbose: @@ -412,7 +417,7 @@ def run_benchmarks( concurrency, verbose, local_port, - output_token_count_distribution, + response_token_count_distribution, ) all_statistics.append(statistics) except Exception: @@ -442,7 +447,7 @@ def run_benchmarks_concurrency_range( verbose: bool = False, hf_model: Optional[str] = None, local_port: int = 5005, - output_token_count_distribution_file: Optional[str] = None, + response_token_count_distribution_file: Optional[str] = None, ): if output_file is not None: # Create empty file @@ -461,7 +466,7 @@ def run_benchmarks_concurrency_range( verbose, hf_model, local_port, - output_token_count_distribution_file, + response_token_count_distribution_file, )