Skip to content

Commit aaf8065

Browse files
(DS-2816)[API] feat: create Pydantic v2 models for Titelive products provided by Data
1 parent f17414d commit aaf8065

File tree

5 files changed

+142
-25
lines changed

5 files changed

+142
-25
lines changed

api/src/pcapi/connectors/big_query/queries/base.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,20 @@
11
import typing
22
from collections.abc import Mapping
33

4+
import pydantic as pydantic_v2
45
import pydantic.v1 as pydantic_v1
56

67
import pcapi.connectors.big_query as big_query_connector
78

89

9-
Row = typing.TypeVar("Row", bound=pydantic_v1.BaseModel)
10+
AnyBaseModel = pydantic_v1.BaseModel | pydantic_v2.BaseModel
11+
12+
Row = typing.TypeVar("Row", bound=AnyBaseModel)
1013
RowIterator = typing.Generator[Row, None, None]
1114

1215

1316
class MalformedRow(Exception):
14-
def __init__(self, msg: str, index: int, model: type[pydantic_v1.BaseModel], raw_query: str):
17+
def __init__(self, msg: str, index: int, model: type[AnyBaseModel], raw_query: str):
1518
self.index = index
1619
self.model = model
1720
self.raw_query = raw_query
@@ -30,13 +33,13 @@ def execute(self, page_size: int = 1_000, **parameters: typing.Any) -> RowIterat
3033
for index, row in enumerate(rows):
3134
try:
3235
yield self.model(**typing.cast(Mapping, row))
33-
except (pydantic_v1.ValidationError, TypeError) as err:
36+
except (pydantic_v1.ValidationError, pydantic_v2.ValidationError, TypeError) as err:
3437
raise MalformedRow(msg=str(row), index=index, model=self.model, raw_query=self.raw_query) from err
3538

3639
@property
3740
def raw_query(self) -> str:
3841
raise NotImplementedError()
3942

4043
@property
41-
def model(self) -> type[pydantic_v1.BaseModel]:
44+
def model(self) -> type[AnyBaseModel]:
4245
raise NotImplementedError()

api/src/pcapi/connectors/big_query/queries/product.py

Lines changed: 119 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,123 @@
1+
import datetime
12
import logging
23
import typing
4+
from typing import Any
35

4-
from pydantic.v1.class_validators import validator
6+
import pydantic as pydantic_v2
57

68
from pcapi import settings
79
from pcapi.connectors.big_query.queries.base import BaseQuery
8-
from pcapi.connectors.serialization.titelive_serializers import TiteLiveBookArticle
9-
from pcapi.connectors.serialization.titelive_serializers import TiteliveMusicArticle
1010
from pcapi.connectors.titelive import TiteliveBase
11+
from pcapi.utils import date as date_utils
1112

1213

1314
logger = logging.getLogger(__name__)
1415

1516

16-
class BigQueryTiteliveBookProductModel(TiteLiveBookArticle):
17+
def _format_gtl_code(code: str) -> str:
18+
# A GTL id is 8 characters long.
19+
# Each pair represents a GTL level.
20+
# The first 2 characters are level 1, the next 2 are level 2, etc.
21+
# - example: 05030000 corresponds to a level 1 GTL of 05 and a level 2 of 03. So "Tourism & Travel World".
22+
23+
# We receive gtl_ids without leading zeros, and sometimes without trailing ones.
24+
# We must add them to have an 8-character code.
25+
26+
# We start by adding the missing zeros to the left.
27+
# If we receive a code with an odd number of characters, we must add a zero to the left.
28+
# '5030000' -> '05030000'
29+
# Otherwise, we don't add anything.
30+
# '110400' -> '110400'
31+
32+
if len(code) % 2 == 1:
33+
code = "0" + code
34+
35+
# Then we add the missing zeros to the right to have 8 characters.
36+
# '050300' -> '05030000'
37+
# '110400' -> '11040000'
38+
code = code.ljust(8, "0")
39+
return code
40+
41+
42+
class GenreTitelive(pydantic_v2.BaseModel):
43+
code: str = pydantic_v2.Field(min_length=8, max_length=8)
44+
libelle: str
45+
46+
@pydantic_v2.field_validator("code", mode="before")
47+
@classmethod
48+
def validate_code(cls, code: str) -> str:
49+
return _format_gtl_code(code)
50+
51+
52+
class TiteliveGtl(pydantic_v2.BaseModel):
53+
first: dict[str, GenreTitelive] | None = None
54+
55+
56+
class BigQueryTiteliveProductBaseModel(pydantic_v2.BaseModel):
57+
model_config = pydantic_v2.ConfigDict(populate_by_name=True)
58+
1759
ean: str
1860
titre: str
19-
recto_uuid: str | None
20-
verso_uuid: str | None
61+
recto_uuid: str | None = None
62+
verso_uuid: str | None = None
63+
has_image: bool = pydantic_v2.Field(alias="image", default=False)
64+
has_verso_image: bool = pydantic_v2.Field(alias="image_4", default=False)
65+
66+
resume: str | None = None
67+
codesupport: str | None = None
68+
gtl: TiteliveGtl | None = None
69+
dateparution: datetime.date | None = None
70+
editeur: str | None = None
71+
prix: float | None = None
72+
73+
gencod: str = pydantic_v2.Field(min_length=13, max_length=13)
74+
75+
@pydantic_v2.model_validator(mode="before")
76+
@classmethod
77+
def parse_empty_strings_as_none(cls, data: Any) -> Any:
78+
if isinstance(data, dict):
79+
return {k: (None if v == "" else v) for k, v in data.items()}
80+
return data
81+
82+
@pydantic_v2.field_validator("dateparution", mode="before")
83+
@classmethod
84+
def parse_dates(cls, v: Any) -> Any:
85+
return date_utils.parse_french_date(v)
86+
87+
@pydantic_v2.field_validator("gtl", mode="before")
88+
@classmethod
89+
def validate_gtl(cls, gtl: TiteliveGtl | list) -> TiteliveGtl | None:
90+
if isinstance(gtl, list):
91+
return None
92+
return gtl
93+
94+
@pydantic_v2.field_validator("has_image", "has_verso_image", mode="before")
95+
@classmethod
96+
def validate_image(cls, image: str | int | None) -> bool:
97+
# The API currently sends 0 (int) if no image is available, and "1" (str) if an image is available.
98+
# Because it has been famously flaky in the past, we are being defensive here and consider:
99+
# - all forms of 0 and None as False.
100+
# - all forms of "1" as True.
101+
if image is not None and int(image) not in (0, 1):
102+
raise ValueError(f"unhandled image value {image}")
103+
return bool(image and int(image) == 1)
104+
105+
106+
class BigQueryTiteliveBookProductModel(BigQueryTiteliveProductBaseModel):
21107
auteurs_multi: list[str]
22-
23-
@validator("auteurs_multi", pre=True)
108+
langueiso: str | None = None
109+
taux_tva: str | None = None
110+
id_lectorat: str | None = None
111+
112+
@pydantic_v2.field_validator("taux_tva", mode="before")
113+
@classmethod
114+
def validate_code_tva(cls, value: typing.Literal[0] | str | None) -> str | None:
115+
if value == 0:
116+
return None
117+
return value
118+
119+
@pydantic_v2.field_validator("auteurs_multi", mode="before")
120+
@classmethod
24121
def validate_auteurs_multi(cls, auteurs_multi: typing.Any) -> list:
25122
if isinstance(auteurs_multi, list):
26123
return auteurs_multi
@@ -62,11 +159,16 @@ class BigQueryTiteliveBookProductDeltaQuery(BaseQuery):
62159
model = BigQueryTiteliveBookProductModel
63160

64161

65-
class BigQueryTiteliveMusicProductModel(TiteliveMusicArticle):
66-
ean: str
67-
titre: str
68-
recto_uuid: str | None
69-
verso_uuid: str | None
162+
class BigQueryTiteliveMusicProductModel(BigQueryTiteliveProductBaseModel):
163+
label: str | None = None
164+
compositeur: str | None = None
165+
interprete: str | None = None
166+
nb_galettes: str | None = None
167+
artiste: str | None = None
168+
commentaire: str | None = None
169+
contenu_explicite: int | None = None
170+
dispo: int | None = None
171+
distributeur: str | None = None
70172

71173

72174
class BigQueryTiteliveMusicProductDeltaQuery(BaseQuery):
@@ -87,7 +189,10 @@ class BigQueryTiteliveMusicProductDeltaQuery(BaseQuery):
87189
performer as interprete,
88190
nb_discs as nb_galettes,
89191
artist as artiste,
90-
192+
comment as commentaire,
193+
explicit_content as contenu_explicite,
194+
availability as dispo,
195+
distributor as distributeur,
91196
recto_uuid,
92197
verso_uuid,
93198
image,

api/src/pcapi/core/providers/titelive_book_search.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
import logging
22
import textwrap
3+
from typing import Protocol
4+
from typing import cast
35

46
import pydantic.v1 as pydantic
57

68
import pcapi.core.fraud.models as fraud_models
9+
from pcapi.connectors.big_query.queries.product import BigQueryTiteliveBookProductModel
10+
from pcapi.connectors.big_query.queries.product import BigQueryTiteliveProductBaseModel
711
from pcapi.connectors.serialization.titelive_serializers import GenreTitelive
812
from pcapi.connectors.serialization.titelive_serializers import TiteLiveBookArticle
913
from pcapi.connectors.serialization.titelive_serializers import TiteLiveBookWork
@@ -156,15 +160,20 @@ def extract_eans_from_titelive_response(json_response: list[dict]) -> set[str]:
156160
EMPTY_GTL = GenreTitelive(code="".zfill(8), libelle="Empty GTL")
157161

158162

159-
def get_gtl_id(article: TiteliveArticle) -> str:
163+
class HasCode(Protocol):
164+
code: str
165+
166+
167+
def get_gtl_id(article: TiteliveArticle | BigQueryTiteliveProductBaseModel) -> str:
160168
if not article.gtl or not article.gtl.first:
161169
return EMPTY_GTL.code
162-
most_precise_genre = max(article.gtl.first.values(), key=lambda gtl: gtl.code)
163-
gtl_id = most_precise_genre
164-
return gtl_id.code
170+
most_precise_genre = max(article.gtl.first.values(), key=lambda gtl: cast(HasCode, gtl).code)
171+
return cast(HasCode, most_precise_genre).code
165172

166173

167-
def get_ineligibility_reasons(article: TiteLiveBookArticle, title: str) -> list[str] | None:
174+
def get_ineligibility_reasons(
175+
article: TiteLiveBookArticle | BigQueryTiteliveBookProductModel, title: str
176+
) -> list[str] | None:
168177
# Ouvrage avec pierres ou encens, jeux de société ou escape game en coffrets,
169178
# marchandisage : jouets, goodies, peluches, posters, papeterie, etc...
170179
reasons = []

api/src/pcapi/core/providers/titelive_bq_music_search.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def fill_product_specifics(
5959
artist=bq_product.artiste,
6060
author=bq_product.compositeur,
6161
comment=bq_product.commentaire,
62-
contenu_explicite=bq_product.contenu_explicite,
62+
contenu_explicite=str(bq_product.contenu_explicite),
6363
date_parution=str(bq_product.dateparution) if bq_product.dateparution else None,
6464
dispo=bq_product.dispo,
6565
distributeur=bq_product.distributeur,

api/tests/providers/test_titelive_music_product_synchro_with_bigquery.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ def test_titelive_sync_failure_event(self, mock_gcp_data, mock_gcp_backend):
168168
def test_sync_skips_unallowed_format(self, mock_execute, mock_gcp_data, mock_gcp_backend):
169169
providers_factories.ProviderFactory.create(name=providers_constants.TITELIVE_ENRICHED_BY_DATA)
170170
fixture = copy.deepcopy(fixtures.MUSIC_SEARCH_FIXTURE)
171-
fixture["result"][1]["article"]["1"]["codesupport"] = 35
171+
fixture["result"][1]["article"]["1"]["codesupport"] = "35"
172172
bq_products = self._prepare_iterator_from_fixture(fixture)
173173
mock_execute.return_value = iter(bq_products)
174174

0 commit comments

Comments
 (0)