From 4a0cd6230a3ae7001461863ab1af45f3d82267b0 Mon Sep 17 00:00:00 2001 From: Mikhail Korobov Date: Fri, 29 Jul 2022 20:38:32 +0500 Subject: [PATCH] clean up AggStats * n_results is renamed to n_success; * n_extracted_queries is removed, because it's always the same as n_results (i.e. n_success); * n_input_queries is removed: it wasn't really a number of input queries, (it was a number of processed queries), and it can be computed from other stats: success + fatal errors; * added a short comment which explains each stat value --- zyte_api/aio/client.py | 5 +---- zyte_api/stats.py | 26 ++++++++++++++------------ 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/zyte_api/aio/client.py b/zyte_api/aio/client.py index e82c832..f939b59 100644 --- a/zyte_api/aio/client.py +++ b/zyte_api/aio/client.py @@ -114,14 +114,11 @@ async def request(): try: # Try to make a request result = await request() - self.agg_stats.n_extracted_queries += 1 + self.agg_stats.n_success += 1 except Exception: self.agg_stats.n_fatal_errors += 1 raise - finally: - self.agg_stats.n_input_queries += 1 - self.agg_stats.n_results += 1 return result def request_parallel_as_completed(self, diff --git a/zyte_api/stats.py b/zyte_api/stats.py index d2d804f..1789ee5 100644 --- a/zyte_api/stats.py +++ b/zyte_api/stats.py @@ -24,16 +24,13 @@ class AggStats: def __init__(self): self.time_connect_stats = Statistics() self.time_total_stats = Statistics() - self.n_results = 0 - self.n_fatal_errors = 0 - self.n_attempts = 0 - self.n_429 = 0 - self.n_errors = 0 + self.n_success = 0 # number of successful results returned to the user + self.n_fatal_errors = 0 # number of errors returned to the user, after all retries - self.n_input_queries = 0 - self.n_extracted_queries = 0 # Queries answered without any type of error - self.n_query_responses = 0 + self.n_attempts = 0 # total amount of requests made to Zyte API, including retries + self.n_429 = 0 # number of 429 (throttling) responses + self.n_errors = 0 # number of errors, including errors which were retried self.status_codes = Counter() self.exception_types = Counter() @@ -47,8 +44,8 @@ def __str__(self): self.n_errors - self.n_fatal_errors, self.n_fatal_errors, self.error_ratio(), - self.n_extracted_queries, - self.n_input_queries, + self.n_success, + self.n_processed, self.success_ratio() ) @@ -66,7 +63,7 @@ def summary(self): self.n_fatal_errors, self.n_errors - self.n_fatal_errors) + "Successful URLs: {} of {}\n".format( - self.n_extracted_queries, self.n_input_queries) + + self.n_success, self.n_processed) + "Success ratio: {:0.1%}\n".format(self.success_ratio()) ) @@ -80,7 +77,12 @@ def error_ratio(self): @zero_on_division_error def success_ratio(self): - return self.n_extracted_queries / self.n_input_queries + return self.n_success / self.n_processed + + @property + def n_processed(self): + """ Total number of processed URLs """ + return self.n_success + self.n_fatal_errors @attr.s