diff --git a/contentcuration/contentcuration/tests/testdata.py b/contentcuration/contentcuration/tests/testdata.py index 9c9fdc6068..7c3fc814d8 100644 --- a/contentcuration/contentcuration/tests/testdata.py +++ b/contentcuration/contentcuration/tests/testdata.py @@ -165,7 +165,7 @@ def node(data, parent=None): # noqa: C901 extra_fields = data["extra_fields"] else: extra_fields = { - "mastery_model": data["mastery_model"], + "mastery_model": data.get("mastery_model", "m_of_n"), "randomize": True, "m": data.get("m") or 0, "n": data.get("n") or 0, diff --git a/contentcuration/contentcuration/tests/utils/test_nodes.py b/contentcuration/contentcuration/tests/utils/test_nodes.py index 75451d9144..06c59eacbf 100644 --- a/contentcuration/contentcuration/tests/utils/test_nodes.py +++ b/contentcuration/contentcuration/tests/utils/test_nodes.py @@ -1,4 +1,5 @@ import datetime +import uuid from time import sleep import mock @@ -7,10 +8,15 @@ from django.db.models import F from django.db.models import Max from django.test import SimpleTestCase +from le_utils.constants import content_kinds +from le_utils.constants import format_presets from ..base import StudioTestCase +from contentcuration.models import File +from contentcuration.tests import testdata from contentcuration.tests.helpers import mock_class_instance from contentcuration.utils.nodes import calculate_resource_size +from contentcuration.utils.nodes import generate_diff from contentcuration.utils.nodes import ResourceSizeHelper from contentcuration.utils.nodes import SlowCalculationError from contentcuration.utils.nodes import STALE_MAX_CALCULATION_SIZE @@ -140,3 +146,269 @@ def test_small(self): size, stale = calculate_resource_size(self.root) self.assertEqual(10, size) self.assertFalse(stale) + + +class GenerateTreesDiffTestCase(StudioTestCase): + def setUp(self): + super(GenerateTreesDiffTestCase, self).setUpBase() + self.channel.staging_tree = self.channel.main_tree.copy() + self.channel.save() + + self.main_tree = self.channel.main_tree + self.staging_tree = self.channel.staging_tree + + def _get_stat(self, diff, stat_name): + """ + Helper function to get a specific stat from the diff. + """ + for stat in diff.get("stats", []): + if stat.get("field") == stat_name: + return stat + raise ValueError(f"Stat '{stat_name}' not found in diff.") + + def _create_dummy_files( + self, + contentnode=None, + assessment_item=None, + file_size=1000, + num_files=1, + preset=None, + ): + """ + Helper function to create a file associated with a content node or assessment item. + """ + for _ in range(num_files): + file = File.objects.create( + file_size=file_size, + preset_id=preset, + contentnode=contentnode, + assessment_item=assessment_item, + ) + file.save() + + def _create_dummy_resources(self, count=1, kind=content_kinds.VIDEO, parent=None): + """ + Helper function to create dummy resources under a given parent node. + """ + num_children = parent.get_children().count() if parent else 0 + for i in range(count): + testdata.node( + { + "kind_id": kind, + "title": f"Test {kind.capitalize()} {i}", + "sort_order": num_children + i + 1, + }, + parent=parent, + ) + + def _create_dummy_exercise(self, count=1, parent=None, num_assesments=1): + """ + Helper function to create dummy exercises with a specified number of assessment items. + """ + num_children = parent.get_children().count() if parent else 0 + for i in range(count): + testdata.node( + { + "kind_id": content_kinds.EXERCISE, + "mastery_model": "do_all", + "title": f"Test Exercise {i}", + "sort_order": num_children + i + 1, + "assessment_items": [ + { + "type": "single_selection", + "question": f"Question {j + 1}?", + "assessment_id": uuid.uuid4(), + "answers": [ + { + "answer": f"Answer {k + 1}", + "correct": k == 0, # First answer is correct + "help_text": "", + } + for k in range(2) + ], + } + for j in range(num_assesments) + ], + }, + parent=parent, + ) + + def test_generate_diff_for_same_tree(self): + diff = generate_diff(self.main_tree.id, self.main_tree.id) + stats = diff.get("stats", []) + for stat in stats: + self.assertTrue(stat["original"] == stat["changed"]) + + def test_generate_diff_for_equal_trees(self): + diff = generate_diff(self.main_tree.id, self.staging_tree.id) + stats = diff.get("stats", []) + for stat in stats: + if stat["field"] == "date_created": + # date_created is not expected to be the same + continue + + self.assertTrue(stat["original"] == stat["changed"]) + + def test_generate_diff_for_resources_files_sizes(self): + count_new_files = 3 + count_size_per_file = 1000 + + staging_video_resource = ( + self.staging_tree.get_descendants().filter(kind=content_kinds.VIDEO).first() + ) + self._create_dummy_files( + contentnode=staging_video_resource, + file_size=count_size_per_file, + num_files=count_new_files, + ) + + # How many new files were added times the size of each file + expected_difference = count_new_files * count_size_per_file + + diff = generate_diff(self.staging_tree.id, self.main_tree.id) + file_size_in_bytes_stat = self._get_stat(diff, "file_size_in_bytes") + + self.assertEqual(file_size_in_bytes_stat.get("difference"), expected_difference) + + def test_generate_diff_for_assesments_files_sizes(self): + count_new_files = 3 + count_size_per_file = 1000 + + staging_exercise_resource = ( + self.staging_tree.get_descendants() + .filter(kind=content_kinds.EXERCISE) + .first() + ) + staging_assessment_item = staging_exercise_resource.assessment_items.first() + + self._create_dummy_files( + assessment_item=staging_assessment_item, + file_size=count_size_per_file, + num_files=count_new_files, + ) + + # How many new files were added times the size of each file + expected_difference = count_new_files * count_size_per_file + + diff = generate_diff(self.staging_tree.id, self.main_tree.id) + file_size_in_bytes_stat = self._get_stat(diff, "file_size_in_bytes") + + self.assertEqual(file_size_in_bytes_stat.get("difference"), expected_difference) + + def test_generate_diff_for_all_files_sizes(self): + count_new_files = 3 + count_size_per_file = 1000 + + staging_exercise_resource = ( + self.staging_tree.get_descendants() + .filter(kind=content_kinds.EXERCISE) + .first() + ) + staging_assessment_item = staging_exercise_resource.assessment_items.first() + + self._create_dummy_files( + contentnode=staging_exercise_resource, + file_size=count_size_per_file, + num_files=count_new_files, + ) + + self._create_dummy_files( + assessment_item=staging_assessment_item, + file_size=count_size_per_file, + num_files=count_new_files, + ) + + resource_files_size = count_new_files * count_size_per_file + assessment_files_size = count_new_files * count_size_per_file + + expected_difference = resource_files_size + assessment_files_size + + diff = generate_diff(self.staging_tree.id, self.main_tree.id) + file_size_in_bytes_stat = self._get_stat(diff, "file_size_in_bytes") + + self.assertEqual(file_size_in_bytes_stat.get("difference"), expected_difference) + + def test_generate_diff_for_num_resources(self): + # Creating files just to test that it doesnt affect the num_resources stat + count_new_files = 4 + staging_exercise_resource = ( + self.staging_tree.get_descendants() + .filter(kind=content_kinds.EXERCISE) + .first() + ) + staging_assessment_item = staging_exercise_resource.assessment_items.first() + self._create_dummy_files( + contentnode=staging_exercise_resource, + file_size=1000, + num_files=count_new_files, + ) + self._create_dummy_files( + assessment_item=staging_assessment_item, + file_size=1000, + num_files=count_new_files, + ) + + count_new_resources = 5 + self._create_dummy_resources( + count=count_new_resources, + kind=content_kinds.VIDEO, + parent=self.staging_tree, + ) + + diff = generate_diff(self.staging_tree.id, self.main_tree.id) + count_resources_stat = self._get_stat(diff, "count_resources") + + self.assertEqual(count_resources_stat.get("difference"), count_new_resources) + + def test_generate_diff_for_num_assessment_items(self): + count_new_exercises = 3 + count_assessment_items_per_exercise = 2 + + self._create_dummy_exercise( + count=count_new_exercises, + parent=self.staging_tree, + num_assesments=count_assessment_items_per_exercise, + ) + + expected_difference = count_new_exercises * count_assessment_items_per_exercise + + diff = generate_diff(self.staging_tree.id, self.main_tree.id) + count_questions_stat = self._get_stat(diff, "count_questions") + self.assertEqual(count_questions_stat.get("difference"), expected_difference) + + def test_generate_diff_for_num_subtitle_files(self): + count_new_subtitle_files = 3 + staging_video_resource = ( + self.staging_tree.get_descendants().filter(kind=content_kinds.VIDEO).first() + ) + + for i in range(count_new_subtitle_files): + self._create_dummy_files( + contentnode=staging_video_resource, + file_size=1000, + num_files=1, + preset=format_presets.VIDEO_SUBTITLE, + ) + + diff = generate_diff(self.staging_tree.id, self.main_tree.id) + count_subtitles_stat = self._get_stat(diff, "count_subtitles") + + self.assertEqual( + count_subtitles_stat.get("difference"), count_new_subtitle_files + ) + + def test_generate_diff_for_resources_types(self): + new_resources = { + content_kinds.VIDEO: 3, + content_kinds.TOPIC: 2, + content_kinds.EXERCISE: 1, + } + for kind, count in new_resources.items(): + self._create_dummy_resources( + count=count, kind=kind, parent=self.staging_tree + ) + diff = generate_diff(self.staging_tree.id, self.main_tree.id) + for kind, name in content_kinds.choices: + stat = self._get_stat(diff, f"count_{kind}s") + expected_count = new_resources.get(kind, 0) + self.assertEqual(stat.get("difference"), expected_count) diff --git a/contentcuration/contentcuration/utils/nodes.py b/contentcuration/contentcuration/utils/nodes.py index 200c640a9c..aa424ff27e 100644 --- a/contentcuration/contentcuration/utils/nodes.py +++ b/contentcuration/contentcuration/utils/nodes.py @@ -9,6 +9,8 @@ from django.core.exceptions import ValidationError from django.core.files.storage import default_storage from django.db.models import Count +from django.db.models import Exists +from django.db.models import OuterRef from django.db.models import Sum from django.utils import timezone from le_utils.constants import completion_criteria @@ -208,6 +210,36 @@ def get_diff(updated, original): return None +def _get_file_sizes(descendants_qs): + resource_sizes = {} + assesments_sizes = {} + + if descendants_qs is not None: + descendants_exists_subquery = descendants_qs.filter( + id=OuterRef("contentnode_id") + ) + + resource_sizes = File.objects.filter( + Exists(descendants_exists_subquery) + ).aggregate( + resource_size=Sum("file_size"), + ) + + assesments_sizes = AssessmentItem.objects.filter( + Exists(descendants_exists_subquery) + ).aggregate( + assessment_size=Sum("files__file_size"), + assessment_count=Count("id", distinct=True), + ) + + file_size = (resource_sizes.get("resource_size") or 0) + ( + assesments_sizes.get("assessment_size") or 0 + ) + assessment_count = assesments_sizes.get("assessment_count") or 0 + + return file_size, assessment_count + + def generate_diff(updated_id, original_id): updated = ContentNode.filter_by_pk(pk=updated_id).first() original = ContentNode.filter_by_pk(pk=original_id).first() @@ -228,34 +260,9 @@ def generate_diff(updated_id, original_id): else {} ) - original_file_sizes = ( - main_descendants.aggregate( - resource_size=Sum("files__file_size"), - assessment_size=Sum("assessment_items__files__file_size"), - assessment_count=Count("assessment_items"), - ) - if original - else {} - ) + original_file_size, original_question_count = _get_file_sizes(main_descendants) - updated_file_sizes = ( - updated_descendants.aggregate( - resource_size=Sum("files__file_size"), - assessment_size=Sum("assessment_items__files__file_size"), - assessment_count=Count("assessment_items"), - ) - if updated - else {} - ) - - original_file_size = (original_file_sizes.get("resource_size") or 0) + ( - original_file_sizes.get("assessment_size") or 0 - ) - updated_file_size = (updated_file_sizes.get("resource_size") or 0) + ( - updated_file_sizes.get("assessment_size") or 0 - ) - original_question_count = original_file_sizes.get("assessment_count") or 0 - updated_question_count = updated_file_sizes.get("assessment_count") or 0 + updated_file_size, updated_question_count = _get_file_sizes(updated_descendants) original_resource_count = ( original.get_descendants().exclude(kind_id="topic").count() if original else 0