Skip to content

Commit e5c36e4

Browse files
committed
update mal, anilist, kitsu import
1 parent f7fdb6b commit e5c36e4

File tree

8 files changed

+229
-95
lines changed

8 files changed

+229
-95
lines changed

src/app/database.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from datetime import timedelta
33

44
from django.apps import apps
5+
from django.db import models
56
from django.db.models import Count, F
67
from django.db.models.functions import TruncDate
78

@@ -45,6 +46,14 @@ def get_fields(model):
4546
return [f.name for f in model._meta.fields] # noqa: SLF001
4647

4748

49+
def get_unique_constraint_fields(model):
50+
"""Get fields that make up the unique constraint for the model."""
51+
for constraint in model._meta.constraints: # noqa: SLF001
52+
if isinstance(constraint, models.UniqueConstraint):
53+
return constraint.fields
54+
return None
55+
56+
4857
def get_properties(model):
4958
"""Get properties of a model."""
5059
return [name for name in dir(model) if isinstance(getattr(model, name), property)]

src/integrations/helpers.py

Lines changed: 101 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,107 @@
1-
from simple_history.utils import bulk_create_with_history
1+
from simple_history.utils import bulk_create_with_history, bulk_update_with_history
22

3+
import app
34

4-
def bulk_chunk_import(media_list, model, user):
5-
"""Bulk import media in chunks.
65

7-
Fixes bulk_create_with_history limit
8-
https://github.com/jazzband/django-simple-history/issues/1216#issuecomment-1903240831
9-
"""
10-
chunk_size = 500
11-
for i in range(0, len(media_list), chunk_size):
12-
bulk_create_with_history(
13-
media_list[i : i + chunk_size],
6+
def bulk_chunk_import(bulk_media, model, user, mode):
7+
"""Bulk import media in chunks."""
8+
if mode == "new":
9+
num_imported = bulk_create_new_with_history(bulk_media, model, user)
10+
11+
elif mode == "overwrite":
12+
num_imported = bulk_create_update_with_history(
13+
bulk_media,
1414
model,
15-
ignore_conflicts=True,
15+
user,
16+
)
17+
18+
return num_imported
19+
20+
21+
def bulk_create_new_with_history(bulk_media, model, user):
22+
"""Filter out existing records and bulk create only new ones."""
23+
# Get existing records' unique IDs since bulk_create_with_history
24+
# returns all objects even if they weren't created due to conflicts
25+
unique_fields = app.database.get_unique_constraint_fields(model)
26+
existing_combos = set(
27+
model.objects.values_list(*unique_fields),
28+
)
29+
30+
new_records = [
31+
record
32+
for record in bulk_media
33+
if tuple(getattr(record, field + "_id") for field in unique_fields)
34+
not in existing_combos
35+
]
36+
37+
bulk_create_with_history(
38+
new_records,
39+
model,
40+
batch_size=500,
41+
default_user=user,
42+
)
43+
44+
return len(new_records)
45+
46+
47+
def bulk_create_update_with_history(
48+
bulk_media,
49+
model,
50+
user,
51+
):
52+
"""Bulk create new records and update existing ones with history tracking."""
53+
unique_fields = app.database.get_unique_constraint_fields(model)
54+
model_fields = app.database.get_fields(model)
55+
update_fields = [
56+
field for field in model_fields if field not in unique_fields and field != "id"
57+
]
58+
59+
# Get existing objects with their unique fields and id
60+
existing_objs = model.objects.filter(
61+
**{
62+
f"{field}__in": [getattr(obj, field + "_id") for obj in bulk_media]
63+
for field in unique_fields
64+
},
65+
).values(*unique_fields, "id")
66+
67+
# Create lookup dictionary using unique field combinations
68+
existing_lookup = {
69+
tuple(obj[field] for field in unique_fields): obj["id"] for obj in existing_objs
70+
}
71+
72+
# Split records into new and existing based on unique constraints
73+
create_objs = []
74+
update_objs = []
75+
76+
for record in bulk_media:
77+
record_key = tuple(getattr(record, field + "_id") for field in unique_fields)
78+
if record_key in existing_lookup:
79+
# Set the primary key for update
80+
record.id = existing_lookup[record_key]
81+
update_objs.append(record)
82+
else:
83+
create_objs.append(record)
84+
85+
# Bulk create new records
86+
num_created = 0
87+
if create_objs:
88+
created_objs = bulk_create_with_history(
89+
create_objs,
90+
model,
91+
batch_size=500,
1692
default_user=user,
1793
)
94+
num_created = len(created_objs)
95+
96+
# Bulk update existing records
97+
num_updated = 0
98+
if update_objs and update_fields:
99+
num_updated = bulk_update_with_history(
100+
update_objs,
101+
model,
102+
fields=update_fields,
103+
batch_size=500,
104+
default_user=user,
105+
)
106+
107+
return num_created + num_updated

src/integrations/imports/anilist.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
logger = logging.getLogger(__name__)
1111

1212

13-
def importer(username, user):
13+
def importer(username, user, mode):
1414
"""Import anime and manga ratings from Anilist."""
1515
query = """
1616
query ($userName: String){
@@ -94,19 +94,21 @@ def importer(username, user):
9494
response["data"]["anime"],
9595
"anime",
9696
user,
97+
mode,
9798
)
9899

99100
manga_imported, manga_warnings = import_media(
100101
response["data"]["manga"],
101102
"manga",
102103
user,
104+
mode,
103105
)
104106

105107
warning_messages = anime_warnings + manga_warnings
106108
return anime_imported, manga_imported, "\n".join(warning_messages)
107109

108110

109-
def import_media(media_data, media_type, user):
111+
def import_media(media_data, media_type, user, mode):
110112
"""Import media of a specific type from Anilist."""
111113
logger.info("Importing %s from Anilist", media_type)
112114

@@ -123,10 +125,7 @@ def import_media(media_data, media_type, user):
123125
)
124126

125127
model = apps.get_model(app_label="app", model_name=media_type)
126-
num_before = model.objects.filter(user=user).count()
127-
helpers.bulk_chunk_import(bulk_media, model, user)
128-
num_after = model.objects.filter(user=user).count()
129-
num_imported = num_after - num_before
128+
num_imported = helpers.bulk_chunk_import(bulk_media, model, user, mode)
130129

131130
logger.info("Imported %s %s", num_imported, media_type)
132131

src/integrations/imports/kitsu.py

Lines changed: 42 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,50 @@
1515
KITSU_PAGE_LIMIT = 500
1616

1717

18-
def import_by_user_id(kitsu_id, user):
18+
def get_kitsu_id(username):
19+
"""Get the user ID from Kitsu."""
20+
url = f"{KITSU_API_BASE_URL}/users"
21+
response = app.providers.services.api_request(
22+
"KITSU",
23+
"GET",
24+
url,
25+
params={"filter[name]": username},
26+
)
27+
28+
if not response["data"]:
29+
msg = f"User {username} not found."
30+
raise ValueError(msg)
31+
if len(response["data"]) > 1:
32+
msg = (
33+
f"Multiple users found for {username}, please use your user ID. "
34+
"User IDs can be found in the URL when viewing your Kitsu profile."
35+
)
36+
raise ValueError(msg)
37+
38+
return response["data"][0]["id"]
39+
40+
41+
def importer(kitsu_id, user, mode):
1942
"""Import anime and manga ratings from Kitsu by user ID."""
43+
# Check if given ID is a username
44+
if not kitsu_id.isdigit():
45+
kitsu_id = get_kitsu_id(kitsu_id)
46+
2047
anime_response = get_media_response(kitsu_id, "anime")
21-
num_anime_imported, anime_warnings = importer(anime_response, "anime", user)
48+
num_anime_imported, anime_warnings = import_media(
49+
anime_response,
50+
"anime",
51+
user,
52+
mode,
53+
)
2254

2355
manga_response = get_media_response(kitsu_id, "manga")
24-
num_manga_imported, manga_warning = importer(manga_response, "manga", user)
56+
num_manga_imported, manga_warning = import_media(
57+
manga_response,
58+
"manga",
59+
user,
60+
mode,
61+
)
2562

2663
warning_messages = anime_warnings + manga_warning
2764
return num_anime_imported, num_manga_imported, "\n".join(warning_messages)
@@ -51,7 +88,7 @@ def get_media_response(kitsu_id, media_type):
5188
return all_data
5289

5390

54-
def importer(response, media_type, user):
91+
def import_media(response, media_type, user, mode):
5592
"""Import media from Kitsu and return the number of items imported."""
5693
logger.info("Importing %s from Kitsu", media_type)
5794

@@ -85,10 +122,7 @@ def importer(response, media_type, user):
85122
else:
86123
bulk_data.append(instance)
87124

88-
num_before = model.objects.filter(user=user).count()
89-
helpers.bulk_chunk_import(bulk_data, model, user)
90-
num_after = model.objects.filter(user=user).count()
91-
num_imported = num_after - num_before
125+
num_imported = helpers.bulk_chunk_import(bulk_data, model, user, mode)
92126

93127
logger.info("Imported %s %s", num_imported, media_type)
94128

src/integrations/imports/mal.py

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,14 @@
1212
base_url = "https://api.myanimelist.net/v2/users"
1313

1414

15-
def importer(username, user):
15+
def importer(username, user, mode):
1616
"""Import anime and manga from MyAnimeList."""
17-
anime_imported = import_media(username, user, "anime")
18-
manga_imported = import_media(username, user, "manga")
17+
anime_imported = import_media(username, user, "anime", mode)
18+
manga_imported = import_media(username, user, "manga", mode)
1919
return anime_imported, manga_imported
2020

2121

22-
def import_media(username, user, media_type):
22+
def import_media(username, user, media_type, mode):
2323
"""Import media of a specific type from MyAnimeList."""
2424
logger.info("Fetching %s from MyAnimeList", media_type)
2525
params = {
@@ -32,11 +32,7 @@ def import_media(username, user, media_type):
3232
bulk_media = add_media_list(media_data, media_type, user)
3333

3434
model = apps.get_model(app_label="app", model_name=media_type)
35-
num_before = model.objects.filter(user=user).count()
36-
helpers.bulk_chunk_import(bulk_media, model, user)
37-
num_after = model.objects.filter(user=user).count()
38-
39-
num_imported = num_after - num_before
35+
num_imported = helpers.bulk_chunk_import(bulk_media, model, user, mode)
4036
logger.info("Imported %s %s", num_imported, media_type)
4137

4238
return num_imported

0 commit comments

Comments
 (0)