From 6350223d185604c131ea9054419bd47c1b7d5618 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Fri, 16 Sep 2022 09:46:34 +0200 Subject: [PATCH] =?UTF-8?q?Zyte=20Data=20API=20=E2=86=92=20Zyte=20API?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.rst | 6 +++--- docs/command_line.rst | 20 ++++++++++---------- docs/index.rst | 4 ++-- docs/install.rst | 4 ++-- setup.py | 2 +- zyte_api/__init__.py | 2 +- zyte_api/__main__.py | 12 ++++++------ zyte_api/aio/__init__.py | 2 +- zyte_api/aio/client.py | 4 ++-- zyte_api/aio/retry.py | 3 +-- zyte_api/errors.py | 2 +- 11 files changed, 30 insertions(+), 31 deletions(-) diff --git a/README.rst b/README.rst index d20d06b..75dc378 100644 --- a/README.rst +++ b/README.rst @@ -18,7 +18,7 @@ python-zyte-api :target: https://codecov.io/gh/zytedata/zyte-api :alt: Coverage report -Python client libraries for `Zyte Data API`_. +Python client libraries for `Zyte API`_. Command-line utility and asyncio-based library are provided by this package. @@ -34,7 +34,7 @@ Installation API key ======= -Make sure you have an API key for the `Zyte Data API`_ service. +Make sure you have an API key for the `Zyte API`_ service. You can set ``ZYTE_API_KEY`` environment variable with the key to avoid passing it around explicitly. @@ -46,4 +46,4 @@ License is BSD 3-clause. * Source code: https://github.com/zytedata/python-zyte-api * Issue tracker: https://github.com/zytedata/python-zyte-api/issues -.. _Zyte Data API: https://docs.zyte.com/zyte-api/get-started.html +.. _Zyte API: https://docs.zyte.com/zyte-api/get-started.html diff --git a/docs/command_line.rst b/docs/command_line.rst index 0bddd20..c8cd39b 100644 --- a/docs/command_line.rst +++ b/docs/command_line.rst @@ -20,9 +20,9 @@ Then run a script, to get the results: .. note:: You may use ``python -m zyte_api`` instead of ``zyte-api``. -Requests to get browser HTML from those input URLs will be sent to Zyte Data -API, using up to 20 parallel connections, and the API responses will be stored -in the ``res.jsonl`` `JSON Lines`_ file, 1 response per line. +Requests to get browser HTML from those input URLs will be sent to Zyte API, +using up to 20 parallel connections, and the API responses will be stored in +the ``res.jsonl`` `JSON Lines`_ file, 1 response per line. .. _JSON Lines: https://jsonlines.org/ @@ -34,7 +34,7 @@ the content belongs to. If you need more flexibility, you can customize the requests by creating a JSON Lines file with queries: a JSON object per line. You can pass any -`Zyte Data API`_ options there. For example, you could create the following +`Zyte API`_ options there. For example, you could create the following ``requests.jsonl`` file: .. code-block:: json @@ -46,7 +46,7 @@ a JSON Lines file with queries: a JSON object per line. You can pass any See `API docs`_ for a description of all supported parameters. .. _API docs: https://docs.zyte.com/zyte-api/openapi.html -.. _Zyte Data API: https://docs.zyte.com/zyte-api/get-started.html +.. _Zyte API: https://docs.zyte.com/zyte-api/get-started.html To get results for this ``requests.jsonl`` file, run: @@ -76,11 +76,11 @@ throttling errors. They are handled by CLI automatically, but they make extraction less efficient; please tune the concurrency options to not hit the throttling errors (HTTP 429) often. -You may be also limited by the website speed. The Zyte Data API tries not to hit -any individual website too hard, but it could be better to limit this on -a client side as well. If you're extracting data from a single website, -it could make sense to decrease the amount of parallel requests; it can ensure -higher success ratio overall. +You may be also limited by the website speed. The Zyte API tries not to hit any +individual website too hard, but it could be better to limit this on a client +side as well. If you're extracting data from a single website, it could make +sense to decrease the amount of parallel requests; it can ensure higher success +ratio overall. If you're extracting data from multiple websites, it makes sense to spread the load across time: if you have websites A, B and C, don't send requests in diff --git a/docs/index.rst b/docs/index.rst index f066ab3..e34d233 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -2,7 +2,7 @@ python-zyte-api =============== -Python client libraries for `Zyte Data API`_. +Python client libraries for `Zyte API`_. Command-line utility and asyncio-based library are provided by this package. @@ -25,4 +25,4 @@ Command-line utility and asyncio-based library are provided by this package. changelog license -.. _Zyte Data API: https://docs.zyte.com/zyte-api/get-started.html \ No newline at end of file +.. _Zyte API: https://docs.zyte.com/zyte-api/get-started.html \ No newline at end of file diff --git a/docs/install.rst b/docs/install.rst index 7816f09..587b4fd 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -13,8 +13,8 @@ Installation API key ======= -Make sure you have an API key for the `Zyte Data API`_ service. +Make sure you have an API key for the `Zyte API`_ service. You can set ``ZYTE_API_KEY`` environment variable with the key to avoid passing it around explicitly. -.. _Zyte Data API: https://docs.zyte.com/zyte-api/get-started.html +.. _Zyte API: https://docs.zyte.com/zyte-api/get-started.html diff --git a/setup.py b/setup.py index d97d3eb..353c5b1 100755 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ def get_version(): setup( name='zyte-api', version=get_version(), - description='Python interface to Zyte Data API', + description='Python interface to Zyte API', long_description=open('README.rst').read() + "\n\n" + open('CHANGES.rst').read(), long_description_content_type='text/x-rst', author='Zyte Group Ltd', diff --git a/zyte_api/__init__.py b/zyte_api/__init__.py index 74f9aaa..a903afd 100644 --- a/zyte_api/__init__.py +++ b/zyte_api/__init__.py @@ -1,3 +1,3 @@ """ -Python client libraries and command line utilities for Zyte Data API +Python client libraries and command line utilities for Zyte API """ \ No newline at end of file diff --git a/zyte_api/__main__.py b/zyte_api/__main__.py index b47b5f4..5ad2407 100644 --- a/zyte_api/__main__.py +++ b/zyte_api/__main__.py @@ -1,4 +1,4 @@ -""" Basic command-line interface for Zyte Data APIs. """ +""" Basic command-line interface for Zyte API. """ import argparse import json @@ -77,11 +77,11 @@ def read_input(input_fp, intype): def _main(program_name='zyte-api'): - """ Process urls from input file through Zyte Data API """ + """ Process urls from input file through Zyte API """ p = argparse.ArgumentParser( prog=program_name, description=""" - Process input URLs from a file using Zyte Data API. + Process input URLs from a file using Zyte API. """, ) p.add_argument("input", @@ -107,11 +107,11 @@ def _main(program_name='zyte-api'): help="number of connections to the API server " "(default: %(default)s)") p.add_argument("--api-key", - help="Zyte Data API key. " + help="Zyte API key. " "You can also set %s environment variable instead " "of using this option." % ENV_VARIABLE) p.add_argument("--api-url", - help="Zyte Data API endpoint (default: %(default)s)", + help="Zyte API endpoint (default: %(default)s)", default=API_URL) p.add_argument("--loglevel", "-L", default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR"], @@ -130,7 +130,7 @@ def _main(program_name='zyte-api'): queries = queries[:args.limit] logger.info(f"Loaded {len(queries)} urls from {args.input.name}; shuffled: {args.shuffle}") - logger.info(f"Running Zyte Data API (connections: {args.n_conn})") + logger.info(f"Running Zyte API (connections: {args.n_conn})") loop = asyncio.get_event_loop() coro = run(queries, diff --git a/zyte_api/aio/__init__.py b/zyte_api/aio/__init__.py index db16ed4..b69b052 100644 --- a/zyte_api/aio/__init__.py +++ b/zyte_api/aio/__init__.py @@ -1,3 +1,3 @@ """ -Asyncio client for Zyte Data API +Asyncio client for Zyte API """ \ No newline at end of file diff --git a/zyte_api/aio/client.py b/zyte_api/aio/client.py index f939b59..82162eb 100644 --- a/zyte_api/aio/client.py +++ b/zyte_api/aio/client.py @@ -1,5 +1,5 @@ """ -Asyncio client for Zyte Data API +Asyncio client for Zyte API """ import asyncio @@ -127,7 +127,7 @@ def request_parallel_as_completed(self, endpoint: str = 'extract', session: Optional[aiohttp.ClientSession] = None, ) -> Iterator[asyncio.Future]: - """ Send multiple requests to Zyte Data API in parallel. + """ Send multiple requests to Zyte API in parallel. Return an `asyncio.as_completed` iterator. ``queries`` is a list of requests to process (dicts). diff --git a/zyte_api/aio/retry.py b/zyte_api/aio/retry.py index aaaa959..8777a52 100644 --- a/zyte_api/aio/retry.py +++ b/zyte_api/aio/retry.py @@ -1,8 +1,7 @@ # -*- coding: utf-8 -*- """ -Zyte Data Extraction retrying logic. +Zyte API retrying logic. -TODO: add sync support; only aio is supported at the moment. TODO: Implement retry logic for temparary errors (520) using the proposed retry-after header. """ import asyncio diff --git a/zyte_api/errors.py b/zyte_api/errors.py index eab2183..b608bf1 100644 --- a/zyte_api/errors.py +++ b/zyte_api/errors.py @@ -6,7 +6,7 @@ @attr.s(auto_attribs=True) class ParsedError: - """ Parsed error from Zyte Data API """ + """ Parsed error from Zyte API """ response_body: bytes data: Optional[dict] parse_error: Optional[str]