diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py index f193921afb..4265644ab4 100644 --- a/contentcuration/contentcuration/models.py +++ b/contentcuration/contentcuration/models.py @@ -2250,11 +2250,13 @@ def mark_complete(self): # noqa C901 | ( # A non-blank question ~Q(question="") - # Non-blank answers - & ~Q(answers="[]") - # With either an input question or one answer marked as correct + # Non-blank answers, unless it is a free response question + # (which is allowed to have no answers) + & (~Q(answers="[]") | Q(type=exercises.FREE_RESPONSE)) + # With either an input or free response question or one answer marked as correct & ( Q(type=exercises.INPUT_QUESTION) + | Q(type=exercises.FREE_RESPONSE) | Q(answers__iregex=r'"correct":\s*true') ) ) diff --git a/contentcuration/contentcuration/tests/test_contentnodes.py b/contentcuration/contentcuration/tests/test_contentnodes.py index 420ff69b2b..0c9e444300 100644 --- a/contentcuration/contentcuration/tests/test_contentnodes.py +++ b/contentcuration/contentcuration/tests/test_contentnodes.py @@ -1223,6 +1223,29 @@ def test_create_exercise_invalid_assessment_item_no_answers(self): new_obj.mark_complete() self.assertFalse(new_obj.complete) + def test_create_exercise_valid_assessment_item_free_response_no_answers(self): + licenses = list( + License.objects.filter( + copyright_holder_required=False, is_custom=False + ).values_list("pk", flat=True) + ) + channel = testdata.channel() + new_obj = ContentNode( + title="yes", + kind_id=content_kinds.EXERCISE, + parent=channel.main_tree, + license_id=licenses[0], + extra_fields=self.new_extra_fields, + ) + new_obj.save() + AssessmentItem.objects.create( + contentnode=new_obj, + question="This is a question", + type=exercises.FREE_RESPONSE, + ) + new_obj.mark_complete() + self.assertTrue(new_obj.complete) + def test_create_exercise_invalid_assessment_item_no_correct_answers(self): licenses = list( License.objects.filter( diff --git a/contentcuration/contentcuration/tests/test_exportchannel.py b/contentcuration/contentcuration/tests/test_exportchannel.py index 57599c0942..5c850597d7 100644 --- a/contentcuration/contentcuration/tests/test_exportchannel.py +++ b/contentcuration/contentcuration/tests/test_exportchannel.py @@ -15,6 +15,7 @@ from kolibri_content.router import get_active_content_database from kolibri_content.router import set_active_content_database from le_utils.constants import exercises +from le_utils.constants import format_presets from le_utils.constants.labels import accessibility_categories from le_utils.constants.labels import learning_activities from le_utils.constants.labels import levels @@ -33,6 +34,7 @@ from .testdata import tree from contentcuration import models as cc from contentcuration.models import CustomTaskMetadata +from contentcuration.utils.assessment.qti.archive import hex_to_qti_id from contentcuration.utils.celery.tasks import generate_task_signature from contentcuration.utils.publish import ChannelIncompleteError from contentcuration.utils.publish import convert_channel_thumbnail @@ -209,6 +211,48 @@ def setUp(self): ai.contentnode = legacy_exercise ai.save() + # Add an exercise with free response question to test QTI generation + qti_extra_fields = { + "options": { + "completion_criteria": { + "model": "mastery", + "threshold": { + "m": 1, + "n": 2, + "mastery_model": exercises.M_OF_N, + }, + } + } + } + qti_exercise = create_node( + { + "kind_id": "exercise", + "title": "QTI Free Response Exercise", + "extra_fields": qti_extra_fields, + } + ) + qti_exercise.complete = True + qti_exercise.parent = current_exercise.parent + qti_exercise.save() + + # Create a free response assessment item + cc.AssessmentItem.objects.create( + contentnode=qti_exercise, + assessment_id=uuid.uuid4().hex, + type=exercises.FREE_RESPONSE, + question="What is the capital of France?", + answers=json.dumps([{"answer": "Paris", "correct": True}]), + hints=json.dumps([]), + raw_data="{}", + order=4, + randomize=False, + ) + + for ai in current_exercise.assessment_items.all()[:2]: + ai.id = None + ai.contentnode = qti_exercise + ai.save() + first_topic = self.content_channel.main_tree.get_descendants().first() # Add a publishable topic to ensure it does not inherit but that its children do @@ -400,7 +444,7 @@ def test_inherited_language(self): parent_id=first_topic_node_id )[1:]: if child.kind == "topic": - self.assertIsNone(child.lang_id) + self.assertEqual(child.lang_id, self.content_channel.language_id) self.assertEqual(child.children.first().lang_id, "fr") else: self.assertEqual(child.lang_id, "fr") @@ -558,6 +602,46 @@ def test_publish_no_modify_legacy_exercise_extra_fields(self): {"mastery_model": exercises.M_OF_N, "randomize": True, "m": 1, "n": 2}, ) + def test_qti_exercise_generates_qti_archive(self): + """Test that exercises with free response questions generate QTI archive files.""" + qti_exercise = cc.ContentNode.objects.get(title="QTI Free Response Exercise") + + # Check that a QTI archive file was created + qti_files = qti_exercise.files.filter(preset_id=format_presets.QTI_ZIP) + self.assertEqual( + qti_files.count(), + 1, + "QTI exercise should have exactly one QTI archive file", + ) + + qti_file = qti_files.first() + self.assertIsNotNone( + qti_file.file_on_disk, "QTI file should have file_on_disk content" + ) + self.assertTrue( + qti_file.original_filename.endswith(".zip"), + "QTI file should be a zip archive", + ) + + def test_qti_archive_contains_manifest_and_assessment_ids(self): + + published_qti_exercise = kolibri_models.ContentNode.objects.get( + title="QTI Free Response Exercise" + ) + assessment_ids = ( + published_qti_exercise.assessmentmetadata.first().assessment_item_ids + ) + + # Should have exactly one assessment ID corresponding to our free response question + self.assertEqual( + len(assessment_ids), 3, "Should have exactly three assessment IDs" + ) + + # The assessment ID should match the one from our assessment item + qti_exercise = cc.ContentNode.objects.get(title="QTI Free Response Exercise") + for i, ai in enumerate(qti_exercise.assessment_items.order_by("order")): + self.assertEqual(assessment_ids[i], hex_to_qti_id(ai.assessment_id)) + class EmptyChannelTestCase(StudioTestCase): @classmethod diff --git a/contentcuration/contentcuration/tests/testdata.py b/contentcuration/contentcuration/tests/testdata.py index e938b3b237..4a0db4fbe8 100644 --- a/contentcuration/contentcuration/tests/testdata.py +++ b/contentcuration/contentcuration/tests/testdata.py @@ -217,7 +217,9 @@ def tree(parent=None): def channel(name="testchannel"): channel_creator = user() - channel = cc.Channel.objects.create(name=name, actor_id=channel_creator.id) + channel = cc.Channel.objects.create( + name=name, actor_id=channel_creator.id, language_id="en" + ) channel.save() channel.main_tree = tree() diff --git a/contentcuration/contentcuration/tests/utils/qti/__init__.py b/contentcuration/contentcuration/tests/utils/qti/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/contentcuration/contentcuration/tests/utils/qti/test_assessment_items.py b/contentcuration/contentcuration/tests/utils/qti/test_assessment_items.py new file mode 100644 index 0000000000..6bf2f71e51 --- /dev/null +++ b/contentcuration/contentcuration/tests/utils/qti/test_assessment_items.py @@ -0,0 +1,504 @@ +import unittest + +from contentcuration.utils.assessment.qti.assessment_item import AssessmentItem +from contentcuration.utils.assessment.qti.assessment_item import CorrectResponse +from contentcuration.utils.assessment.qti.assessment_item import DefaultValue +from contentcuration.utils.assessment.qti.assessment_item import ItemBody +from contentcuration.utils.assessment.qti.assessment_item import MapEntry +from contentcuration.utils.assessment.qti.assessment_item import Mapping +from contentcuration.utils.assessment.qti.assessment_item import OutcomeDeclaration +from contentcuration.utils.assessment.qti.assessment_item import ResponseDeclaration +from contentcuration.utils.assessment.qti.assessment_item import ResponseProcessing +from contentcuration.utils.assessment.qti.assessment_item import Value +from contentcuration.utils.assessment.qti.constants import BaseType +from contentcuration.utils.assessment.qti.constants import Cardinality +from contentcuration.utils.assessment.qti.html import Blockquote +from contentcuration.utils.assessment.qti.html import Br +from contentcuration.utils.assessment.qti.html import Div +from contentcuration.utils.assessment.qti.html import P +from contentcuration.utils.assessment.qti.html import Strong +from contentcuration.utils.assessment.qti.interaction_types.simple import ( + ChoiceInteraction, +) +from contentcuration.utils.assessment.qti.interaction_types.simple import SimpleChoice +from contentcuration.utils.assessment.qti.interaction_types.text_based import ( + ExtendedTextInteraction, +) +from contentcuration.utils.assessment.qti.interaction_types.text_based import ( + TextEntryInteraction, +) +from contentcuration.utils.assessment.qti.prompt import Prompt + + +class QTIAssessmentItemTests(unittest.TestCase): + def test_true_false_question(self): + expected_xml = """ + + +true + + + + +1 + + + +

This is a True/False question?

+ +True +False + +
+
+""".replace( + "\n", "" + ) + + # Construct the QTI elements + response_declaration = ResponseDeclaration( + identifier="RESPONSE", + cardinality=Cardinality.SINGLE, + base_type=BaseType.IDENTIFIER, + correct_response=CorrectResponse(value=[Value(value="true")]), + ) + + outcome_declaration = OutcomeDeclaration( + identifier="SCORE", + cardinality=Cardinality.SINGLE, + base_type=BaseType.FLOAT, + default_value=DefaultValue(value=[Value(value="1")]), + ) + + true_choice = SimpleChoice(identifier="true", children=["True"]) + false_choice = SimpleChoice(identifier="false", children=["False"]) + choice_interaction = ChoiceInteraction( + response_identifier="RESPONSE", + max_choices=1, + answers=[true_choice, false_choice], + ) + + item_body = ItemBody( + children=[ + P(children=["This is a True/False question?"]), + choice_interaction, + ] + ) + response_processing = ResponseProcessing( + template="https://purl.imsglobal.org/spec/qti/v3p0/rptemplates/match_correct" + ) + + assessment_item = AssessmentItem( + identifier="beginnersguide007", + title="BG true false example ", + language="EN-US", + time_dependent=False, + item_body=item_body, + response_declaration=[response_declaration], + outcome_declaration=[outcome_declaration], + response_processing=response_processing, + ) + + # Generate the XML + generated_xml = assessment_item.to_xml_string() + + # Compare the generated XML with the expected XML + self.assertEqual(generated_xml.strip(), expected_xml.strip()) + + def test_multiple_choice_question(self): + expected_xml = """ + + +A +C +D + + + + +1 + + + +

QTI 3 is a new version released in 2022.

+ + +

Which of the following features are new to QTI 3?

+

Pick 3 choices.

+
+Shared Vocabulary +Pineapple Flavored +Catalogs for candidate-specific content. +Conformance features definitions +A subset of HTML5 elements +
+
+ +
""".replace( + "\n", "" + ) + response_declaration = ResponseDeclaration( + identifier="RESPONSE", + cardinality=Cardinality.MULTIPLE, + base_type=BaseType.IDENTIFIER, + correct_response=CorrectResponse( + value=[ + Value(value="A"), + Value(value="C"), + Value(value="D"), + ] + ), + ) + + outcome_declaration = OutcomeDeclaration( + identifier="SCORE", + cardinality=Cardinality.SINGLE, + base_type=BaseType.FLOAT, + default_value=DefaultValue(value=[Value(value="1")]), + ) + + prompt = Prompt( + children=[ + P( + children=[ + "Which of the following features are ", + Strong(children=["new"]), + " to QTI 3?", + ] + ), + P(children=["Pick 3 choices."]), + ] + ) + choice_a = SimpleChoice(identifier="A", children=["Shared Vocabulary"]) + choice_b = SimpleChoice(identifier="B", children=["Pineapple Flavored"]) + choice_c = SimpleChoice( + identifier="C", + children=["Catalogs for candidate-specific content."], + ) + choice_d = SimpleChoice( + identifier="D", children=["Conformance features definitions"] + ) + choice_e = SimpleChoice(identifier="E", children=["A subset of HTML5 elements"]) + choice_interaction = ChoiceInteraction( + response_identifier="RESPONSE", + max_choices=3, + answers=[choice_a, choice_b, choice_c, choice_d, choice_e], + prompt=prompt, + ) + + item_body = ItemBody( + children=[ + P(children=["QTI 3 is a new version released in 2022."]), + choice_interaction, + ] + ) + response_processing = ResponseProcessing( + template="https://purl.imsglobal.org/spec/qti/v3p0/rptemplates/match_correct" + ) + + assessment_item = AssessmentItem( + identifier="beginnersguide008", + title="BG Choice example", + language="EN-US", + time_dependent=False, + item_body=item_body, + response_declaration=[response_declaration], + outcome_declaration=[outcome_declaration], + response_processing=response_processing, + ) + + generated_xml = assessment_item.to_xml_string() + self.assertEqual(generated_xml.strip(), expected_xml.strip()) + + def test_long_text_question(self): + expected_xml = """ + + + +

Read this postcard from your English pen-friend, Sam.

+
+
+

Here is a postcard of my town. Please send me
+a postcard from your town. What size is your Town?
+What is the nicest part of your town?
+Where do you go in the evenings?

+

Sam

+
+
+ +Write Sam a postcard. Answer the questions. Write 23–30 words + +
+
""".replace( + "\n", "" + ) + response_declaration = ResponseDeclaration( + identifier="RESPONSE", + cardinality=Cardinality.SINGLE, + base_type=BaseType.STRING, + ) + + outcome_declaration = OutcomeDeclaration( + identifier="SCORE", + cardinality=Cardinality.SINGLE, + base_type=BaseType.FLOAT, + ) + + prompt_text = "Write Sam a postcard. Answer the questions. Write 23–30 words" + + extended_text_interaction = ExtendedTextInteraction( + response_identifier="RESPONSE", + prompt=Prompt(children=[prompt_text]), + ) + + item_body = ItemBody( + children=[ + P(children=["Read this postcard from your English pen-friend, Sam."]), + Div( + children=[ + Blockquote( + class_="postcard", + children=[ + P( + children=[ + "Here is a postcard of my town. Please send me", + Br(), + "a postcard from your town. What size is your Town?", + Br(), + "What is the nicest part of your town?", + Br(), + "Where do you go in the evenings?", + ] + ), + P(children=["Sam"]), + ], + ) + ] + ), + extended_text_interaction, + ] + ) + + assessment_item = AssessmentItem( + identifier="beginnersguide009", + title="BG Postcard example", + language="en-US", + time_dependent=False, + item_body=item_body, + response_declaration=[response_declaration], + outcome_declaration=[outcome_declaration], + ) + + generated_xml = assessment_item.to_xml_string() + self.assertEqual(generated_xml.strip(), expected_xml.strip()) + + def test_missing_word_question(self): + expected_xml = """ + + +York + + + + + + + + +

Identify the missing word in this famous quote from Shakespeare's Richard III.

+
+
+

Now is the winter of our discontent
+Made glorious summer by this sun of ; +
+And all the clouds that lour'd upon our house
+In the deep bosom of the ocean buried.

+
+
+
+ +
""".replace( + "\n", "" + ) + + response_declaration = ResponseDeclaration( + identifier="RESPONSE", + cardinality=Cardinality.SINGLE, + base_type=BaseType.STRING, + correct_response=CorrectResponse(value=[Value(value="York")]), + mapping=Mapping( + default_value=0, + map_entries=[ + MapEntry(map_key="York", mapped_value=1, case_sensitive=True), + MapEntry(map_key="york", mapped_value=0.5), + ], + ), + ) + + outcome_declaration = OutcomeDeclaration( + identifier="SCORE", + cardinality=Cardinality.SINGLE, + base_type=BaseType.FLOAT, + ) + + text_entry_interaction = TextEntryInteraction(response_identifier="RESPONSE") + + item_body = ItemBody( + children=[ + P( + children=[ + "Identify the missing word in this famous quote from Shakespeare's Richard III." + ] + ), + Div( + children=[ + Blockquote( + class_="postcard", + children=[ + P( + children=[ + "Now is the winter of our discontent", + Br(), + "Made glorious summer by this sun of ", + text_entry_interaction, + ";", + Br(), + "And all the clouds that lour'd upon our house", + Br(), + "In the deep bosom of the ocean buried.", + ] + ), + ], + ) + ] + ), + ] + ) + + response_processing = ResponseProcessing( + template="https://purl.imsglobal.org/spec/qti/v3p0/rptemplates/map_response" + ) + + assessment_item = AssessmentItem( + identifier="beginnersguide010", + title="BG Missing Word example", + language="en-US", + time_dependent=False, + item_body=item_body, + response_declaration=[response_declaration], + outcome_declaration=[outcome_declaration], + response_processing=response_processing, + ) + + generated_xml = assessment_item.to_xml_string() + self.assertEqual(generated_xml.strip(), expected_xml.strip()) + + def test_numerical_entry_question(self): + expected_xml = """ + + +42.5 + + + + +0.0 + + + +

Calculate the value of x when 2x + 5 = 90:

+

+
+
""".replace( + "\n", "" + ) + + response_declaration = ResponseDeclaration( + identifier="RESPONSE", + cardinality=Cardinality.SINGLE, + base_type=BaseType.FLOAT, + correct_response=CorrectResponse(value=[Value(value="42.5")]), + ) + + outcome_declaration = OutcomeDeclaration( + identifier="SCORE", + cardinality=Cardinality.SINGLE, + base_type=BaseType.FLOAT, + default_value=DefaultValue(value=[Value(value="0.0")]), + ) + + text_entry_interaction = TextEntryInteraction( + response_identifier="RESPONSE", + expected_length=10, + pattern_mask="^[0-9]*\\.?[0-9]+$", + placeholder_text="Enter a number", + ) + + assessment_item = AssessmentItem( + identifier="numerical-entry-item", + title="Numerical Entry Question", + language="en-US", + time_dependent=False, + item_body=ItemBody( + children=[ + P(children=["Calculate the value of x when 2x + 5 = 90:"]), + P(children=[text_entry_interaction]), + ] + ), + response_declaration=[response_declaration], + outcome_declaration=[outcome_declaration], + ) + + generated_xml = assessment_item.to_xml_string() + self.assertEqual(generated_xml.strip(), expected_xml.strip()) diff --git a/contentcuration/contentcuration/tests/utils/qti/test_fields.py b/contentcuration/contentcuration/tests/utils/qti/test_fields.py new file mode 100644 index 0000000000..40e4a9c0e5 --- /dev/null +++ b/contentcuration/contentcuration/tests/utils/qti/test_fields.py @@ -0,0 +1,332 @@ +import unittest + +from contentcuration.utils.assessment.qti.fields import validate_data_uri +from contentcuration.utils.assessment.qti.fields import validate_local_href_path +from contentcuration.utils.assessment.qti.fields import validate_local_src_path +from contentcuration.utils.assessment.qti.fields import validate_local_srcset + + +class TestValidateDataUri(unittest.TestCase): + def test_valid_data_uris(self): + valid_uris = [ + "data:text/plain;base64,SGVsbG8=", + "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==", + "data:text/plain,Hello%20World", + "data:,Hello", + "data:text/html,

Hello

", + 'data:application/json,{"key":"value"}', + "data:text/css,body{color:red}", + "data:image/svg+xml,", + "data:text/plain;charset=utf-8,Hello", + "data:text/plain;charset=utf-8;base64,SGVsbG8=", + ] + + for uri in valid_uris: + with self.subTest(uri=uri): + result = validate_data_uri(uri) + self.assertEqual(result, uri, f"Should return the same URI: {uri}") + + def test_invalid_data_uris(self): + """Test invalid data URI formats""" + invalid_uris = [ + "not-a-data-uri", + "data:", + "data", + "http://example.com", + "https://example.com/image.png", + "ftp://example.com/file.txt", + "file:///path/to/file", + "", + "data:text/plain", + "ata:text/plain,Hello", + ] + + for uri in invalid_uris: + with self.subTest(uri=uri): + with self.assertRaises(ValueError) as cm: + validate_data_uri(uri) + self.assertIn("Invalid data URI format", str(cm.exception)) + + +class TestValidateLocalHrefPath(unittest.TestCase): + def test_valid_relative_paths(self): + """Test valid relative paths""" + valid_paths = [ + "relative/path.jpg", + "../path.jpg", + "./file.png", + "file.txt", + "images/photo.jpg", + "docs/readme.md", + "assets/style.css", + "#fragment", + "?query=value", + "#fragment?query=value", + "path/to/file.html#section", + "subdir/../file.txt", + ] + + for path in valid_paths: + with self.subTest(path=path): + result = validate_local_href_path(path) + self.assertEqual(result, path, f"Should return the same path: {path}") + + def test_valid_data_uris_in_href(self): + data_uris = [ + "data:text/plain,Hello", + "data:image/png;base64,iVBORw0KGgo=", + ] + + for uri in data_uris: + with self.subTest(uri=uri): + result = validate_local_href_path(uri) + self.assertEqual(result, uri) + + def test_invalid_absolute_urls(self): + absolute_urls = [ + "http://example.com", + "https://example.com/path", + "ftp://example.com/file", + "mailto:test@example.com", + "tel:+1234567890", + "//example.com/path", + "/absolute/path", + "/", + ] + + for url in absolute_urls: + with self.subTest(url=url): + with self.assertRaises(ValueError) as cm: + validate_local_href_path(url) + self.assertIn("Absolute URLs not allowed", str(cm.exception)) + + def test_invalid_data_uris_in_href(self): + """Test that invalid data URIs are rejected""" + with self.assertRaises(ValueError) as cm: + validate_local_href_path("data:invalid") + self.assertIn("Invalid data URI format", str(cm.exception)) + + +class TestValidateLocalSrcPath(unittest.TestCase): + def test_valid_src_paths(self): + """Test valid src paths (must have actual file paths)""" + valid_paths = [ + "relative/path.jpg", + "../path.jpg", + "./file.png", + "file.txt", + "images/photo.jpg", + "subdir/../file.txt", + ] + + for path in valid_paths: + with self.subTest(path=path): + result = validate_local_src_path(path) + self.assertEqual(result, path) + + def test_valid_data_uris_in_src(self): + data_uris = [ + "data:text/plain,Hello", + "data:image/png;base64,iVBORw0KGgo=", + ] + + for uri in data_uris: + with self.subTest(uri=uri): + result = validate_local_src_path(uri) + self.assertEqual(result, uri) + + def test_invalid_empty_paths(self): + """Test rejection of empty paths and fragment-only""" + invalid_paths = ["#fragment", "?query=value", "#fragment?query=value"] + + for path in invalid_paths: + with self.subTest(path=path): + with self.assertRaises(ValueError) as cm: + validate_local_src_path(path) + self.assertIn("Invalid local src path", str(cm.exception)) + + def test_absolute_urls_rejected(self): + """Test that absolute URLs are still rejected""" + with self.assertRaises(ValueError) as cm: + validate_local_src_path("http://example.com/image.jpg") + self.assertIn("Absolute URLs not allowed", str(cm.exception)) + + +class TestValidateLocalSrcset(unittest.TestCase): + def test_empty_srcset(self): + empty_values = ["", " ", "\t", "\n"] + + for value in empty_values: + with self.subTest(value=repr(value)): + result = validate_local_srcset(value) + self.assertEqual(result, value) + + def test_single_image_srcset(self): + valid_srcsets = [ + "image.jpg 2x", + "image.jpg 1.5x", + "image.jpg 100w", + "image.jpg 50h", + "image.jpg 0.5x", + "path/to/image.png 2x", + "../images/photo.jpg 1x", + "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg== 2x", + ] + + for srcset in valid_srcsets: + with self.subTest(srcset=srcset): + result = validate_local_srcset(srcset) + self.assertEqual(result, srcset) + + def test_data_uri_in_srcset(self): + valid_data_srcsets = [ + "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg== 1x", + "data:text/plain,Hello%20World 2x", + "data:image/svg+xml, 1.5x", + 'data:application/json,{"key":"value"} 100w', + ] + + for srcset in valid_data_srcsets: + with self.subTest(srcset=srcset): + result = validate_local_srcset(srcset) + self.assertEqual(result, srcset) + + def test_multiple_images_srcset(self): + valid_srcsets = [ + "small.jpg 1x, large.jpg 2x", + "img-320.jpg 320w, img-640.jpg 640w, img-1280.jpg 1280w", + "portrait.jpg 480h, landscape.jpg 960h", + "image1.jpg 1x, image2.jpg 1.5x, image3.jpg 2x", + "a.jpg 1x,b.jpg 2x", # minimal spacing + ] + + for srcset in valid_srcsets: + with self.subTest(srcset=srcset): + result = validate_local_srcset(srcset) + self.assertEqual(result, srcset) + + def test_mixed_data_uri_and_regular_paths(self): + valid_mixed_srcsets = [ + "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg== 1x, large.jpg 2x", + "small.jpg 1x, data:image/svg+xml, 2x", + "icon.png 1x, data:text/plain,fallback 2x, large.png 3x", + ] + + for srcset in valid_mixed_srcsets: + with self.subTest(srcset=srcset): + result = validate_local_srcset(srcset) + self.assertEqual(result, srcset) + + def test_multiple_data_uris_in_srcset(self): + valid_multi_data_srcsets = [ + "data:image/png;base64,ABC123 1x, data:image/png;base64,DEF456 2x", + "data:text/plain,Small,Image 1x, data:text/plain,Large,Image 2x", + "data:image/svg+xml, 1x, data:image/svg+xml, 2x, data:image/svg+xml, 3x", # noqa: E501 + 'data:application/json,{"size":"small"} 100w, data:application/json,{"size":"large"} 200w', + ] + + for srcset in valid_multi_data_srcsets: + with self.subTest(srcset=srcset): + result = validate_local_srcset(srcset) + self.assertEqual(result, srcset) + + def test_complex_mixed_srcsets(self): + complex_srcsets = [ + "thumb.jpg 1x, data:image/png;base64,MID123 1.5x, data:image/svg+xml, 2x, large.jpg 3x", + "data:text/plain,Icon,1 50w, regular-100.jpg 100w, data:text/plain,Icon,2 150w, regular-200.jpg 200w", + ] + + for srcset in complex_srcsets: + with self.subTest(srcset=srcset): + result = validate_local_srcset(srcset) + self.assertEqual(result, srcset) + + def test_invalid_descriptors(self): + """Test rejection of invalid descriptors""" + invalid_srcsets = [ + "image.jpg 2", # missing unit + "image.jpg x", # missing number + "image.jpg 2z", # invalid unit + "image.jpg 2.x", # malformed number + "image.jpg .x", # malformed number + "image.jpg 2xx", # double unit + "image.jpg -2x", # negative number + "image.jpg 2 x", # space in descriptor + ] + + for srcset in invalid_srcsets: + with self.subTest(srcset=srcset): + with self.assertRaises(ValueError): + validate_local_srcset(srcset) + + def test_invalid_urls_in_srcset(self): + invalid_srcsets = [ + "http://example.com/image.jpg 2x", + "https://cdn.example.com/img.png 1x, local.jpg 2x", + "/absolute/path.jpg 1x", + ] + + for srcset in invalid_srcsets: + with self.subTest(srcset=srcset): + with self.assertRaises(ValueError): + validate_local_srcset(srcset) + + def test_empty_srcset_entries(self): + invalid_srcsets = [ + "image.jpg 2x, ,other.jpg 1x", + ", image.jpg 2x", + "image.jpg 2x,", + ] + + for srcset in invalid_srcsets: + with self.subTest(srcset=srcset): + with self.assertRaises(ValueError): + validate_local_srcset(srcset) + + def test_missing_path_in_srcset(self): + invalid_srcsets = [ + "#fragment 2x", + "?query=value 1x", + ] + + for srcset in invalid_srcsets: + with self.subTest(srcset=srcset): + with self.assertRaises(ValueError): + validate_local_srcset(srcset) + + +class TestEdgeCases(unittest.TestCase): + def test_unicode_paths_href(self): + unicode_paths = ["café/ñ.jpg", "文件/图片.png", "файл.txt"] + + for path in unicode_paths: + with self.subTest(path=path): + result = validate_local_href_path(path) + self.assertEqual(result, path) + + def test_unicode_paths_src(self): + unicode_paths = ["café/ñ.jpg", "文件/图片.png", "файл.txt"] + + for path in unicode_paths: + with self.subTest(path=path): + result = validate_local_src_path(path) + self.assertEqual(result, path) + + def test_very_long_paths(self): + long_path = "a/" * 1000 + "file.txt" + + # Should handle long paths gracefully + result = validate_local_href_path(long_path) + self.assertEqual(result, long_path) + + def test_special_characters_in_data_uri(self): + special_data_uris = [ + "data:text/plain,Hello%20World%21", + "data:text/plain,<>&\"'", + 'data:application/json,{"key":"value"}', + ] + + for uri in special_data_uris: + with self.subTest(uri=uri): + result = validate_data_uri(uri) + self.assertEqual(result, uri) diff --git a/contentcuration/contentcuration/tests/utils/qti/test_html.py b/contentcuration/contentcuration/tests/utils/qti/test_html.py new file mode 100644 index 0000000000..dc5d162bc7 --- /dev/null +++ b/contentcuration/contentcuration/tests/utils/qti/test_html.py @@ -0,0 +1,776 @@ +import unittest + +from contentcuration.utils.assessment.qti.base import TextNode +from contentcuration.utils.assessment.qti.html import A +from contentcuration.utils.assessment.qti.html import Abbr +from contentcuration.utils.assessment.qti.html import Address +from contentcuration.utils.assessment.qti.html import Article +from contentcuration.utils.assessment.qti.html import Aside +from contentcuration.utils.assessment.qti.html import Audio +from contentcuration.utils.assessment.qti.html import B +from contentcuration.utils.assessment.qti.html import Bdi +from contentcuration.utils.assessment.qti.html import Bdo +from contentcuration.utils.assessment.qti.html import BdoDir +from contentcuration.utils.assessment.qti.html import Blockquote +from contentcuration.utils.assessment.qti.html import Br +from contentcuration.utils.assessment.qti.html import Caption +from contentcuration.utils.assessment.qti.html import Cite +from contentcuration.utils.assessment.qti.html import Code +from contentcuration.utils.assessment.qti.html import Col +from contentcuration.utils.assessment.qti.html import Colgroup +from contentcuration.utils.assessment.qti.html import Dd +from contentcuration.utils.assessment.qti.html import Details +from contentcuration.utils.assessment.qti.html import Dfn +from contentcuration.utils.assessment.qti.html import Div +from contentcuration.utils.assessment.qti.html import Dl +from contentcuration.utils.assessment.qti.html import Dt +from contentcuration.utils.assessment.qti.html import Em +from contentcuration.utils.assessment.qti.html import Figcaption +from contentcuration.utils.assessment.qti.html import Figure +from contentcuration.utils.assessment.qti.html import Footer +from contentcuration.utils.assessment.qti.html import H1 +from contentcuration.utils.assessment.qti.html import H2 +from contentcuration.utils.assessment.qti.html import H3 +from contentcuration.utils.assessment.qti.html import H4 +from contentcuration.utils.assessment.qti.html import H5 +from contentcuration.utils.assessment.qti.html import H6 +from contentcuration.utils.assessment.qti.html import Header +from contentcuration.utils.assessment.qti.html import Hr +from contentcuration.utils.assessment.qti.html import HTMLElement +from contentcuration.utils.assessment.qti.html import I +from contentcuration.utils.assessment.qti.html import Img +from contentcuration.utils.assessment.qti.html import Kbd +from contentcuration.utils.assessment.qti.html import Label +from contentcuration.utils.assessment.qti.html import Li +from contentcuration.utils.assessment.qti.html import Nav +from contentcuration.utils.assessment.qti.html import Object +from contentcuration.utils.assessment.qti.html import Ol +from contentcuration.utils.assessment.qti.html import OlType +from contentcuration.utils.assessment.qti.html import P +from contentcuration.utils.assessment.qti.html import Param +from contentcuration.utils.assessment.qti.html import Picture +from contentcuration.utils.assessment.qti.html import Pre +from contentcuration.utils.assessment.qti.html import Q +from contentcuration.utils.assessment.qti.html import Rp +from contentcuration.utils.assessment.qti.html import Rt +from contentcuration.utils.assessment.qti.html import Ruby +from contentcuration.utils.assessment.qti.html import Samp +from contentcuration.utils.assessment.qti.html import Section +from contentcuration.utils.assessment.qti.html import Small +from contentcuration.utils.assessment.qti.html import Source +from contentcuration.utils.assessment.qti.html import Span +from contentcuration.utils.assessment.qti.html import Strong +from contentcuration.utils.assessment.qti.html import Sub +from contentcuration.utils.assessment.qti.html import Summary +from contentcuration.utils.assessment.qti.html import Sup +from contentcuration.utils.assessment.qti.html import Table +from contentcuration.utils.assessment.qti.html import TBody +from contentcuration.utils.assessment.qti.html import Td +from contentcuration.utils.assessment.qti.html import TFoot +from contentcuration.utils.assessment.qti.html import Th +from contentcuration.utils.assessment.qti.html import THead +from contentcuration.utils.assessment.qti.html import Tr +from contentcuration.utils.assessment.qti.html import Track +from contentcuration.utils.assessment.qti.html import TrackKind +from contentcuration.utils.assessment.qti.html import Ul +from contentcuration.utils.assessment.qti.html import Var +from contentcuration.utils.assessment.qti.html import Video + + +class HTMLDataClassTests(unittest.TestCase): + def test_break_elements(self): + br_element = Br() + self.assertEqual(br_element.to_xml_string(), "
") + + hr_element = Hr() + self.assertEqual(hr_element.to_xml_string(), "
") + + def test_display_elements(self): + label_element = Label(children=["Test Label"], for_="test") + self.assertEqual( + label_element.to_xml_string(), '' + ) + + summary_element = Summary(children=["Test Summary"]) + self.assertEqual( + summary_element.to_xml_string(), "Test Summary" + ) + + figcaption_element = Figcaption(children=["Test Figcaption"]) + self.assertEqual( + figcaption_element.to_xml_string(), + "
Test Figcaption
", + ) + + def test_details_validation(self): + summary_element = Summary(children=["Test Summary"]) + + # Valid case: Summary as first child + valid_details = Details(children=[summary_element, "Test Content"]) + self.assertEqual( + valid_details.to_xml_string(), + "
Test SummaryTest Content
", + ) + + # Invalid case: No Summary element + with self.assertRaises(ValueError): + Details(children=["Test Content"]) + + # Invalid case: Summary not as first child + with self.assertRaises(ValueError): + Details(children=["Test Content", summary_element]) + + # Invalid case: Multiple Summary elements + second_summary = Summary(children=["Second Summary"]) + with self.assertRaises(ValueError): + Details(children=[summary_element, "Test Content", second_summary]) + + def test_figure_elements(self): + figure_element = Figure(children=["Test Figure"]) + self.assertEqual(figure_element.to_xml_string(), "
Test Figure
") + + figcaption_element = Figcaption(children=["Test Caption"]) + figure_with_caption = Figure(children=[figcaption_element, "Test Content"]) + self.assertEqual( + figure_with_caption.to_xml_string(), + "
Test Caption
Test Content
", + ) + + figure_with_caption_last = Figure(children=["Test Content", figcaption_element]) + self.assertEqual( + figure_with_caption_last.to_xml_string(), + "
Test Content
Test Caption
", + ) + + with self.assertRaises(ValueError): + Figure( + children=[figcaption_element, Figcaption(children=["Second Caption"])] + ) + + with self.assertRaises(ValueError): + Figure(children=["Before", figcaption_element, "After"]) + + def test_embed_elements(self): + img_element = Img(alt="Test Alt", src="test.jpg") + self.assertEqual( + img_element.to_xml_string(), 'Test Alt' + ) + + param_element = Param(name="test_param", value="test_value") + self.assertEqual( + param_element.to_xml_string(), + '', + ) + + object_element = Object(children=["Test Object"], params=[param_element]) + self.assertEqual( + object_element.to_xml_string(), + 'Test Object', + ) + + picture_source_element = Source(srcset="test.jpg 2x") + self.assertEqual( + picture_source_element.to_xml_string(), '' + ) + + picture_element = Picture(children=[picture_source_element], img=img_element) + self.assertEqual( + picture_element.to_xml_string(), + 'Test Alt', + ) + + def test_flow_elements(self): + blockquote_element = Blockquote( + children=["Test Blockquote"], cite="http://test.com" + ) + self.assertEqual( + blockquote_element.to_xml_string(), + '
Test Blockquote
', + ) + + div_element = Div(children=["Test Div"]) + self.assertEqual(div_element.to_xml_string(), "
Test Div
") + + article_element = Article(children=["Test Article"]) + self.assertEqual( + article_element.to_xml_string(), "
Test Article
" + ) + + section_element = Section(children=["Test Section"]) + self.assertEqual( + section_element.to_xml_string(), "
Test Section
" + ) + + nav_element = Nav(children=["Test Nav"]) + self.assertEqual(nav_element.to_xml_string(), "") + + aside_element = Aside(children=["Test Aside"]) + self.assertEqual(aside_element.to_xml_string(), "") + + header_element = Header(children=["Test Header"]) + self.assertEqual(header_element.to_xml_string(), "
Test Header
") + + footer_element = Footer(children=["Test Footer"]) + self.assertEqual(footer_element.to_xml_string(), "") + + address_element = Address(children=["Test Address"]) + self.assertEqual( + address_element.to_xml_string(), "
Test Address
" + ) + + def test_media_elements(self): + track_element = Track(src="test.vtt", kind=TrackKind.SUBTITLES) + self.assertEqual( + track_element.to_xml_string(), '' + ) + + media_source_element = Source(src="test.mp4") + self.assertEqual( + media_source_element.to_xml_string(), '' + ) + + audio_element = Audio(children=["Test Audio"], src="test.mp3") + self.assertEqual( + audio_element.to_xml_string(), + '', + ) + + video_element = Video(children=["Test Video"], src="test.mp4") + self.assertEqual( + video_element.to_xml_string(), + '', + ) + + def test_sequence_elements(self): + li_element = Li(children=["Test Li"]) + self.assertEqual(li_element.to_xml_string(), "
  • Test Li
  • ") + + ol_element = Ol(children=[li_element], type=OlType.NUMBERS) + self.assertEqual( + ol_element.to_xml_string(), '
    1. Test Li
    ' + ) + + ul_element = Ul(children=[li_element]) + self.assertEqual(ul_element.to_xml_string(), "") + + dt_element = Dt(children=["Test Dt"]) + self.assertEqual(dt_element.to_xml_string(), "
    Test Dt
    ") + + dd_element = Dd(children=["Test Dd"]) + self.assertEqual(dd_element.to_xml_string(), "
    Test Dd
    ") + + dl_element = Dl(children=[dt_element, dd_element]) + self.assertEqual( + dl_element.to_xml_string(), "
    Test Dt
    Test Dd
    " + ) + + def test_table_elements(self): + caption_element = Caption(children=["Test Caption"]) + self.assertEqual( + caption_element.to_xml_string(), "Test Caption" + ) + + col_element = Col() + self.assertEqual(col_element.to_xml_string(), '') + + colgroup_element = Colgroup(children=[col_element]) + self.assertEqual( + colgroup_element.to_xml_string(), '' + ) + + td_element = Td(children=["Test Td"]) + self.assertEqual(td_element.to_xml_string(), "Test Td") + + th_element = Th(children=["Test Th"]) + self.assertEqual(th_element.to_xml_string(), "Test Th") + + tr_element = Tr(children=[th_element, td_element]) + self.assertEqual( + tr_element.to_xml_string(), "Test ThTest Td" + ) + + tbody_element = TBody(children=[tr_element]) + self.assertEqual( + tbody_element.to_xml_string(), + "Test ThTest Td", + ) + + thead_element = THead(children=[tr_element]) + self.assertEqual( + thead_element.to_xml_string(), + "Test ThTest Td", + ) + + tfoot_element = TFoot(children=[tr_element]) + self.assertEqual( + tfoot_element.to_xml_string(), + "Test ThTest Td", + ) + + table_element = Table( + children=[ + caption_element, + colgroup_element, + thead_element, + tbody_element, + tfoot_element, + ] + ) + expected_html = '
    Test Caption
    Test ThTest Td
    Test ThTest Td
    Test ThTest Td
    ' # noqa: E501 + self.assertEqual(table_element.to_xml_string(), expected_html) + + def test_text_elements(self): + a_element = A(children=["Test A"], href="file.html") + self.assertEqual(a_element.to_xml_string(), 'Test A') + + p_element = P(children=["Test P"]) + self.assertEqual(p_element.to_xml_string(), "

    Test P

    ") + + span_element = Span(children=["Test Span"]) + self.assertEqual(span_element.to_xml_string(), "Test Span") + + h1_element = H1(children=["Test H1"]) + self.assertEqual(h1_element.to_xml_string(), "

    Test H1

    ") + + h2_element = H2(children=["Test H2"]) + self.assertEqual(h2_element.to_xml_string(), "

    Test H2

    ") + + h3_element = H3(children=["Test H3"]) + self.assertEqual(h3_element.to_xml_string(), "

    Test H3

    ") + + h4_element = H4(children=["Test H4"]) + self.assertEqual(h4_element.to_xml_string(), "

    Test H4

    ") + + h5_element = H5(children=["Test H5"]) + self.assertEqual(h5_element.to_xml_string(), "
    Test H5
    ") + + h6_element = H6(children=["Test H6"]) + self.assertEqual(h6_element.to_xml_string(), "
    Test H6
    ") + + pre_element = Pre(children=["Test Pre"]) + self.assertEqual(pre_element.to_xml_string(), "
    Test Pre
    ") + + em_element = Em(children=["Test Em"]) + self.assertEqual(em_element.to_xml_string(), "Test Em") + + code_element = Code(children=["Test Code"]) + self.assertEqual(code_element.to_xml_string(), "Test Code") + + kbd_element = Kbd(children=["Test Kbd"]) + self.assertEqual(kbd_element.to_xml_string(), "Test Kbd") + + i_element = I(children=["Test I"]) + self.assertEqual(i_element.to_xml_string(), "Test I") + + dfn_element = Dfn(children=["Test Dfn"]) + self.assertEqual(dfn_element.to_xml_string(), "Test Dfn") + + abbr_element = Abbr(children=["Test Abbr"]) + self.assertEqual(abbr_element.to_xml_string(), "Test Abbr") + + strong_element = Strong(children=["Test Strong"]) + self.assertEqual(strong_element.to_xml_string(), "Test Strong") + + sup_element = Sup(children=["Test Sup"]) + self.assertEqual(sup_element.to_xml_string(), "Test Sup") + + sub_element = Sub(children=["Test Sub"]) + self.assertEqual(sub_element.to_xml_string(), "Test Sub") + + var_element = Var(children=["Test Var"]) + self.assertEqual(var_element.to_xml_string(), "Test Var") + + small_element = Small(children=["Test Small"]) + self.assertEqual(small_element.to_xml_string(), "Test Small") + + samp_element = Samp(children=["Test Samp"]) + self.assertEqual(samp_element.to_xml_string(), "Test Samp") + + b_element = B(children=["Test B"]) + self.assertEqual(b_element.to_xml_string(), "Test B") + + cite_element = Cite(children=["Test Cite"]) + self.assertEqual(cite_element.to_xml_string(), "Test Cite") + + q_element = Q(children=["Test Q"]) + self.assertEqual(q_element.to_xml_string(), "Test Q") + + bdo_element = Bdo(dir=BdoDir.LTR, children=["Test Bdo"]) + self.assertEqual(bdo_element.to_xml_string(), 'Test Bdo') + + bdi_element = Bdi(children=["Test Bdi"]) + self.assertEqual(bdi_element.to_xml_string(), "Test Bdi") + + rt_element = Rt(children=["Test Rt"]) + self.assertEqual(rt_element.to_xml_string(), "Test Rt") + + rp_element = Rp(text="(") + self.assertEqual(rp_element.to_xml_string(), "(") + + ruby_element = Ruby(children=["Test Ruby"]) + self.assertEqual(ruby_element.to_xml_string(), "Test Ruby") + + +class TestHTMLStringIntegration(unittest.TestCase): + def test_complex_html_parsing(self): + complex_html = """ +
    +

    This is a complex paragraph with emphasis and a + link to example.

    + Test image +
    + +
      +
    1. Numbered item one
    2. +
    3. Numbered item two
    4. +
    +

    Final paragraph with
    line break.

    + """ + + # Parse the HTML + elements = HTMLElement.from_html_string(complex_html) + + # Should have 4 root elements: div, ul, ol, p + self.assertEqual( + len(elements), 4, f"Expected 4 root elements, got {len(elements)}" + ) + + # Test first element: div with complex content + div_element = elements[0] + self.assertIsInstance(div_element, Div) + self.assertEqual(div_element.class_, "container") + self.assertEqual(div_element.id_, "main") + + # Div should have 2 children: p and img + self.assertEqual(len(div_element.children), 2) + + # Test paragraph inside div + p_element = div_element.children[0] + self.assertIsInstance(p_element, P) + + # Paragraph should have mixed content: text, strong, text, em, text, a, text + p_children = p_element.children + self.assertEqual(len(p_children), 7) + + # Find and test the strong element + strong_element = p_children[1] + self.assertEqual(len(strong_element.children), 1) + self.assertIsInstance(strong_element.children[0], TextNode) + self.assertEqual(strong_element.children[0].text, "complex") + + # Find and test the em element + em_element = p_children[3] + self.assertEqual(len(em_element.children), 1) + self.assertEqual(em_element.children[0].text, "emphasis") + + # Find and test the link element + a_element = p_children[5] + self.assertEqual(str(a_element.href), "file.html#anchor") + self.assertEqual(len(a_element.children), 1) + self.assertEqual(a_element.children[0].text, "link to example") + + # Test image element + img_element = div_element.children[1] + self.assertIsInstance(img_element, Img) + self.assertEqual(str(img_element.src), "image.jpg") + self.assertEqual(img_element.alt, "Test image") + self.assertEqual(img_element.width, 300) + self.assertEqual(img_element.height, 200) + + # Test second element: unordered list + ul_element = elements[1] + self.assertIsInstance(ul_element, Ul) + self.assertEqual(len(ul_element.children), 3) + + # Test first list item + li1 = ul_element.children[0] + self.assertIsInstance(li1, Li) + li1_children = li1.children + # Should have: TextNode("First "), Strong("bold"), TextNode(" item") + self.assertEqual(len(li1_children), 3) + + # Find strong in first list item + li1_strong = li1_children[1] + self.assertEqual(li1_strong.children[0].text, "bold") + + # Test second list item with link + li2 = ul_element.children[1] + self.assertIsInstance(li2, Li) + li2_link = li2.children[1] + self.assertEqual(li2_link.href, "page2.html") + + # Test third element: ordered list + ol_element = elements[2] + self.assertIsInstance(ol_element, Ol) + self.assertEqual(len(ol_element.children), 2) + + # Test ordered list items + ol_li1 = ol_element.children[0] + self.assertIsInstance(ol_li1, Li) + + ol_li2 = ol_element.children[1] + self.assertIsInstance(ol_li2, Li) + ol_li2_em = ol_li2.children[1] + self.assertEqual(ol_li2_em.children[0].text, "two") + + # Test fourth element: paragraph with line break + final_p = elements[3] + self.assertIsInstance(final_p, P) + br_element = final_p.children[1] + self.assertIsInstance(br_element, Br) + + def test_simple_html_parsing(self): + """Test parsing simple HTML elements""" + + simple_html = "

    Hello world!

    " + elements = HTMLElement.from_html_string(simple_html) + + self.assertEqual(len(elements), 1) + p = elements[0] + self.assertIsInstance(p, P) + self.assertEqual(len(p.children), 3) + + # Check strong element + strong = p.children[1] + self.assertIsInstance(strong, Strong) + self.assertEqual(strong.children[0].text, "world") + + def test_empty_and_self_closing_elements(self): + """Test parsing empty elements and self-closing tags""" + + html = """ +

    + test +
    +
    + """ + + elements = HTMLElement.from_html_string(html) + self.assertEqual(len(elements), 4) + + # Empty paragraph + self.assertIsInstance(elements[0], P) + self.assertEqual(len(elements[0].children), 0) + + # Image with attributes + self.assertIsInstance(elements[1], Img) + self.assertEqual(elements[1].src, "test.jpg") + self.assertEqual(elements[1].alt, "test") + + # Line break + self.assertIsInstance(elements[2], Br) + + # Div with empty span + self.assertIsInstance(elements[3], Div) + self.assertEqual(len(elements[3].children), 1) + self.assertIsInstance(elements[3].children[0], Span) + self.assertEqual(len(elements[3].children[0].children), 0) + + def test_roundtrip_conversion(self): + """Test that HTML -> Pydantic -> XML maintains structure""" + + original_html = """ +

    Test bold and italic text.

    + + """ + + # Parse to Pydantic objects + elements = HTMLElement.from_html_string(original_html) + + # Convert back to XML strings + xml_output = "".join(elem.to_xml_string() for elem in elements) + + self.assertEqual( + "".join(m.strip() for m in original_html.split("\n")), xml_output.strip() + ) + + def test_attribute_type_conversion(self): + """Test that attributes are properly converted to correct types""" + + html = """ +
    + Link + Alt text +
    + """ + + elements = HTMLElement.from_html_string(html) + div = elements[0] + + # Test div attributes + self.assertEqual(div.class_, "test-class") + self.assertEqual(div.id_, "test-id") + + # Test link attributes + a = div.children[0] + self.assertEqual(a.href, "file.html?query=test") + + # Test image attributes + img = div.children[1] + self.assertEqual(img.src, "image.png") + self.assertEqual(img.alt, "Alt text") + self.assertEqual(img.width, 100) + self.assertEqual(img.height, 50) + + +class TestFileDependencies(unittest.TestCase): + def test_img_src_dependencies(self): + img = Img(src="image.jpg", alt="Test image") + dependencies = img.get_file_dependencies() + self.assertEqual(dependencies, ["image.jpg"]) + + def test_img_srcset_dependencies(self): + img = Img( + src="fallback.jpg", + srcset="small.jpg 480w, medium.jpg 800w, large.jpg 1200w", + alt="Responsive image", + ) + dependencies = img.get_file_dependencies() + self.assertEqual( + set(dependencies), {"fallback.jpg", "small.jpg", "medium.jpg", "large.jpg"} + ) + + def test_img_srcset_with_density_descriptors(self): + img = Img( + src="image.jpg", + srcset="image.jpg 1x, image@2x.jpg 2x, image@3x.jpg 3x", + alt="High DPI image", + ) + dependencies = img.get_file_dependencies() + self.assertEqual( + set(dependencies), {"image.jpg", "image@2x.jpg", "image@3x.jpg"} + ) + + def test_a_href_dependencies(self): + a = A(href="document.pdf", children=["Download PDF"]) + dependencies = a.get_file_dependencies() + self.assertEqual(dependencies, ["document.pdf"]) + + def test_audio_src_dependencies(self): + audio = Audio(src="audio.mp3", children=["Audio not supported"]) + dependencies = audio.get_file_dependencies() + self.assertEqual(dependencies, ["audio.mp3"]) + + def test_video_src_dependencies(self): + video = Video(src="video.mp4", children=["Video not supported"]) + dependencies = video.get_file_dependencies() + self.assertEqual(dependencies, ["video.mp4"]) + + def test_source_src_dependencies(self): + source = Source(src="video.webm") + dependencies = source.get_file_dependencies() + self.assertEqual(dependencies, ["video.webm"]) + + def test_source_srcset_dependencies(self): + source = Source(srcset="banner-480.jpg 480w, banner-800.jpg 800w") + dependencies = source.get_file_dependencies() + self.assertEqual(set(dependencies), {"banner-480.jpg", "banner-800.jpg"}) + + def test_track_src_dependencies(self): + track = Track(src="subtitles.vtt", kind="subtitles") + dependencies = track.get_file_dependencies() + self.assertEqual(dependencies, ["subtitles.vtt"]) + + def test_blockquote_cite_dependencies(self): + blockquote = Blockquote( + cite="https://example.com/source.html", children=["Quote text"] + ) + dependencies = blockquote.get_file_dependencies() + # HttpUrl attributes are not included in file dependencies as they're external + self.assertEqual(dependencies, []) + + def test_nested_element_dependencies(self): + img = Img(src="nested.jpg", alt="Nested image") + link = A(href="page.html", children=["Link text"]) + div = Div(children=[img, link, "Some text"]) + + dependencies = div.get_file_dependencies() + self.assertEqual(set(dependencies), {"nested.jpg", "page.html"}) + + def test_complex_nested_dependencies(self): + # Create a complex structure with multiple file dependencies + img1 = Img(src="image1.jpg", alt="Image 1") + img2 = Img( + src="image2.png", + srcset="image2-small.png 480w, image2-large.png 1200w", + alt="Image 2", + ) + link = A(href="document.pdf", children=["Download"]) + audio = Audio(src="background.mp3", children=["Audio"]) + + source1 = Source(src="video.webm") + source2 = Source(src="video.mp4") + video = Video(children=[source1, source2, "Video not supported"]) + + root_div = Div(children=[img1, img2, link, audio, video]) + + dependencies = root_div.get_file_dependencies() + expected = [ + "image1.jpg", + "image2.png", + "image2-small.png", + "image2-large.png", + "document.pdf", + "background.mp3", + "video.webm", + "video.mp4", + ] + self.assertEqual(set(dependencies), set(expected)) + + def test_picture_element_dependencies(self): + source1 = Source(srcset="mobile.jpg 480w, tablet.jpg 800w") + source2 = Source(srcset="desktop.jpg 1200w") + img = Img(src="fallback.jpg", alt="Picture") + picture = Picture(children=[source1, source2], img=img) + + dependencies = picture.get_file_dependencies() + expected = ["mobile.jpg", "tablet.jpg", "desktop.jpg", "fallback.jpg"] + self.assertEqual(set(dependencies), set(expected)) + + def test_table_with_dependencies(self): + img_cell = Td(children=[Img(src="table-image.jpg", alt="Table image")]) + link_cell = Td(children=[A(href="table-link.html", children=["Link"])]) + row = Tr(children=[img_cell, link_cell]) + table = Table(children=[row]) + + dependencies = table.get_file_dependencies() + self.assertEqual(set(dependencies), {"table-image.jpg", "table-link.html"}) + + def test_no_dependencies(self): + p = P(children=["Just text content"]) + dependencies = p.get_file_dependencies() + self.assertEqual(dependencies, []) + + def test_empty_srcset(self): + # Test that empty srcset doesn't break anything + img = Img(src="image.jpg", alt="Image") + dependencies = img.get_file_dependencies() + self.assertEqual(dependencies, ["image.jpg"]) + + def test_duplicate_dependencies_removed(self): + # Test that duplicate file paths are only included once + img1 = Img(src="same.jpg", alt="Image 1") + img2 = Img(src="same.jpg", alt="Image 2") + div = Div(children=[img1, img2]) + + dependencies = div.get_file_dependencies() + self.assertEqual(dependencies, ["same.jpg"]) + + def test_mixed_srcset_formats(self): + # Test srcset with mixed width and density descriptors + img = Img( + src="base.jpg", + srcset="small.jpg 300w, medium.jpg 1.5x, large.jpg 2x", + alt="Mixed srcset", + ) + dependencies = img.get_file_dependencies() + self.assertEqual( + set(dependencies), {"base.jpg", "small.jpg", "medium.jpg", "large.jpg"} + ) diff --git a/contentcuration/contentcuration/tests/utils/qti/test_imsmanifest.py b/contentcuration/contentcuration/tests/utils/qti/test_imsmanifest.py new file mode 100644 index 0000000000..949b88ffdd --- /dev/null +++ b/contentcuration/contentcuration/tests/utils/qti/test_imsmanifest.py @@ -0,0 +1,204 @@ +import unittest + +from contentcuration.utils.assessment.qti.imsmanifest import Dependency +from contentcuration.utils.assessment.qti.imsmanifest import File +from contentcuration.utils.assessment.qti.imsmanifest import Item +from contentcuration.utils.assessment.qti.imsmanifest import Manifest +from contentcuration.utils.assessment.qti.imsmanifest import Metadata +from contentcuration.utils.assessment.qti.imsmanifest import Organization +from contentcuration.utils.assessment.qti.imsmanifest import Organizations +from contentcuration.utils.assessment.qti.imsmanifest import Resource +from contentcuration.utils.assessment.qti.imsmanifest import Resources + + +class TestManifestXMLOutput(unittest.TestCase): + def test_metadata_to_xml_string(self): + metadata = Metadata(schema="test_schema", schemaversion="1.0") + expected_xml = "test_schema1.0" + self.assertEqual(metadata.to_xml_string(), expected_xml) + + metadata = Metadata() + expected_xml = "" + self.assertEqual(metadata.to_xml_string(), expected_xml) + + def test_item_to_xml_string(self): + item = Item(identifier="item1", identifierref="ref1") + expected_xml = '' + self.assertEqual(item.to_xml_string(), expected_xml) + + item = Item() + expected_xml = "" + self.assertEqual(item.to_xml_string(), expected_xml) + + def test_organization_to_xml_string(self): + item1 = Item(identifier="item1") + item2 = Item(identifier="item2") + organization = Organization( + identifier="org1", + structure="hierarchical", + title="Test Org", + item=[item1, item2], + ) + expected_xml = '' # noqa: E501 + self.assertEqual(organization.to_xml_string(), expected_xml) + + organization = Organization() + expected_xml = "" + self.assertEqual(organization.to_xml_string(), expected_xml) + + def test_organizations_to_xml_string(self): + org1 = Organization(identifier="org1") + org2 = Organization(identifier="org2") + organizations = Organizations(organizations=[org1, org2]) + expected_xml = '' + self.assertEqual(organizations.to_xml_string(), expected_xml) + organizations = Organizations() + expected_xml = "" + self.assertEqual(organizations.to_xml_string(), expected_xml) + + def test_file_to_xml_string(self): + file = File(href="test.html") + expected_xml = '' + self.assertEqual(file.to_xml_string(), expected_xml) + file = File() + expected_xml = "" + self.assertEqual(file.to_xml_string(), expected_xml) + + def test_resource_to_xml_string(self): + file1 = File(href="file1.html") + file2 = File(href="file2.html") + resource = Resource( + identifier="res1", type_="webcontent", href="res.zip", files=[file1, file2] + ) + expected_xml = '' + self.assertEqual(resource.to_xml_string(), expected_xml) + + resource = Resource(identifier="res1", type_="webcontent") + expected_xml = '' + self.assertEqual(resource.to_xml_string(), expected_xml) + + def test_resources_to_xml_string(self): + res1 = Resource(identifier="res1", type_="webcontent") + res2 = Resource(identifier="res2", type_="imscp") + resources = Resources(resources=[res1, res2]) + expected_xml = '' + self.assertEqual(resources.to_xml_string(), expected_xml) + resources = Resources() + expected_xml = "" + self.assertEqual(resources.to_xml_string(), expected_xml) + + def test_imsmanifest_to_xml_string(self): + metadata = Metadata(schema="test_schema", schemaversion="1.0") + organizations = Organizations(organizations=[Organization(identifier="org1")]) + resources = Resources( + resources=[Resource(identifier="res1", type_="webcontent")] + ) + manifest = Manifest( + identifier="manifest1", + version="1.0", + metadata=metadata, + organizations=organizations, + resources=resources, + ) + expected_xml = ( + "' # noqa: E501 + "test_schema1.0" + '' + '' + "" + ) + self.assertEqual(manifest.to_xml_string(), expected_xml) + + manifest = Manifest(identifier="democracy_manifest") + expected_xml = ( + '' + "" + "" + "" + "" + ) + self.assertEqual(manifest.to_xml_string(), expected_xml) + + def test_imsmanifest_full_integration(self): + manifest = Manifest( + identifier="level1-T1-test-entry", + version="1.0", + metadata=Metadata(schema="QTI Package", schemaversion="3.0.0"), + organizations=Organizations(), + resources=Resources( + resources=[ + Resource( + identifier="t1-test-entry-item1", + type_="imsqti_item_xmlv3p0", + href="items/choice-single-cardinality.xml", + files=[File(href="items/choice-single-cardinality.xml")], + dependencies=[Dependency(identifierref="image_resource_1")], + ), + Resource( + type_="webcontent", + identifier="image_resource_1", + href="items/images/badger.svg", + files=[File(href="items/images/badger.svg")], + ), + Resource( + identifier="t1-test-entry-item2", + type_="imsqti_item_xmlv3p0", + href="items/choice-multiple-cardinality.xml", + files=[File(href="items/choice-multiple-cardinality.xml")], + ), + Resource( + identifier="t1-test-entry-item3", + type_="imsqti_item_xmlv3p0", + href="items/text-entry.xml", + files=[File(href="items/text-entry.xml")], + ), + Resource( + identifier="t1-test-entry-item4", + type_="imsqti_item_xmlv3p0", + href="items/extended-text.xml", + files=[File(href="items/extended-text.xml")], + ), + Resource( + identifier="t1-test-entry", + type_="imsqti_test_xmlv3p0", + href="assessment.xml", + files=[File(href="assessment.xml")], + ), + ] + ), + ) + + expected_xml = ( + '' # noqa: E501 + "QTI Package3.0.0" + "" + "" + '' + '' + '' + "" + '' + '' + "" + '' + '' + "" + '' + '' + "" + '' + '' + "" + '' + '' + "" + "" + "" + ) + self.assertEqual(manifest.to_xml_string(), expected_xml) diff --git a/contentcuration/contentcuration/tests/utils/qti/test_mathml.py b/contentcuration/contentcuration/tests/utils/qti/test_mathml.py new file mode 100644 index 0000000000..0bace05336 --- /dev/null +++ b/contentcuration/contentcuration/tests/utils/qti/test_mathml.py @@ -0,0 +1,1613 @@ +""" +This test suite was initially generated using Gemini 2.5 Pro Preview. +It was then manually refined to ensure correctness and completeness. +This was then supplemented with additional tests to cover missing edge cases +and validations using Claude Sonnet 4. + +Gemini prompt: +Please write a comprehensive test suite for this, assuming that everything defined +in these files can be imported from `contentcuration.utils.assessment.qti.mathml`. +I am more concerned with integration level testing - checking that appropriately +composed objects produce the correct MathML output when the to_xml_string method +is invoked, and that conversely, appropriate object structures are created +using the from_string method. + + +Claude prompt: +I have these files that define Pydantic objects for generating and validating MathML. +Here are my current tests for this. Please tell me what the tests cover well, and what is missing. +Formulate recommendations to supplement these tests, where testing conformance to the +MathML Core schema is most important, and testing specific quirks of the implementation is not at all important. +Where possible, generate a separate artifact for each separate additional set of tests, +so that I can choose which ones I want to include more easily. +""" +import unittest + +from pydantic import ValidationError + +from contentcuration.utils.assessment.qti.base import TextNode +from contentcuration.utils.assessment.qti.constants import Dir +from contentcuration.utils.assessment.qti.mathml import Annotation +from contentcuration.utils.assessment.qti.mathml import AnnotationXml +from contentcuration.utils.assessment.qti.mathml import Math +from contentcuration.utils.assessment.qti.mathml import MathMLDisplay +from contentcuration.utils.assessment.qti.mathml import MathMLElement +from contentcuration.utils.assessment.qti.mathml import MathMLForm +from contentcuration.utils.assessment.qti.mathml import Mfrac +from contentcuration.utils.assessment.qti.mathml import Mi +from contentcuration.utils.assessment.qti.mathml import Mn +from contentcuration.utils.assessment.qti.mathml import Mo +from contentcuration.utils.assessment.qti.mathml import Mrow +from contentcuration.utils.assessment.qti.mathml import Mspace +from contentcuration.utils.assessment.qti.mathml import Msubsup +from contentcuration.utils.assessment.qti.mathml import Mtable +from contentcuration.utils.assessment.qti.mathml import Mtd +from contentcuration.utils.assessment.qti.mathml import Mtr +from contentcuration.utils.assessment.qti.mathml import Semantics +from contentcuration.utils.assessment.qti.mathml.base import MathMLGroupingElement +from contentcuration.utils.assessment.qti.mathml.base import MathMLLayoutElement +from contentcuration.utils.assessment.qti.mathml.base import MathMLScriptElement +from contentcuration.utils.assessment.qti.mathml.base import MathMLTokenElement +from contentcuration.utils.assessment.qti.mathml.core import Merror +from contentcuration.utils.assessment.qti.mathml.core import Mmultiscripts +from contentcuration.utils.assessment.qti.mathml.core import Mover +from contentcuration.utils.assessment.qti.mathml.core import Mphantom +from contentcuration.utils.assessment.qti.mathml.core import Mprescripts +from contentcuration.utils.assessment.qti.mathml.core import Mroot +from contentcuration.utils.assessment.qti.mathml.core import Ms +from contentcuration.utils.assessment.qti.mathml.core import Msqrt +from contentcuration.utils.assessment.qti.mathml.core import Mstyle +from contentcuration.utils.assessment.qti.mathml.core import Msub +from contentcuration.utils.assessment.qti.mathml.core import Msup +from contentcuration.utils.assessment.qti.mathml.core import Mtext +from contentcuration.utils.assessment.qti.mathml.core import Munder +from contentcuration.utils.assessment.qti.mathml.core import Munderover + + +class TestFieldValidation(unittest.TestCase): + """Tests for field validation using the annotated types and enums.""" + + def test_length_percentage_valid_values(self): + valid_values = [ + "0", # unitless zero + "10px", # pixels + "2em", + "1.5em", # em units + "0.5rem", # rem units + "2pt", + "12pt", # points + "1in", + "2.5in", # inches + "1cm", + "10mm", # metric + "50%", + "100%", + "0%", + "150%", # percentages + "+10px", + "-5px", # signed values + "0.1vh", + "50vw", + "10vmin", + "20vmax", # viewport units + "1ch", + "2ex", # character units + ] + + for value in valid_values: + with self.subTest(value=value): + # Test on mathsize attribute + obj = Mi(mathsize=value, children=["x"]) + self.assertEqual(obj.mathsize, value) + + # Test on width attribute of Mspace + space_obj = Mspace(width=value) + self.assertEqual(space_obj.width, value) + + def test_length_percentage_invalid_values(self): + invalid_values = [ + "10", # number without unit (except 0) + "px", # unit without number + "10 px", # space in value + "10px ", # trailing space + " 10px", # leading space + "10px;", # invalid character + "10xyz", # invalid unit + "auto", # keyword values not allowed + "inherit", # keyword values not allowed + "", # empty string + "10px 20px", # multiple values + ] + + for value in invalid_values: + with self.subTest(value=value): + with self.assertRaises(ValidationError): + Mi(mathsize=value, children=["x"]) + + def test_color_value_valid_values(self): + valid_values = [ + "red", + "blue", + "green", + "black", + "white", # named colors + "transparent", + "currentColor", # special keywords + "#f00", + "#ff0000", + "#FF0000", # hex colors (3,6 chars) + "#ffff", + "#ffffffff", # hex with alpha (4,8 chars) + "rgb(255,0,0)", + "rgb(255, 0, 0)", # rgb function + "rgba(255,0,0,0.5)", + "rgba(255, 0, 0, 1)", # rgba function + "hsl(0,100%,50%)", + "hsl(0, 100%, 50%)", # hsl function + "hsla(0,100%,50%,0.5)", # hsla function + ] + + for value in valid_values: + with self.subTest(value=value): + obj = Mi(mathcolor=value, children=["x"]) + self.assertEqual(obj.mathcolor, value) + + def test_color_value_invalid_values(self): + """ + Note that we do not validate color names against a predefined list, + as this would require a comprehensive list of valid CSS color names. + Instead, we focus on the format of the color value. + We also do not validate that number values in rgb/rgba are within 0-255 range, + as CSS allows values outside this range (e.g., rgb(300, -50, 500)). + """ + invalid_values = [ + "#ff", # too short hex + "#fffffffff", # too long hex + "#gggggg", # invalid hex characters + "rgb()", # empty rgb + "hsl()", # empty hsl + "", # empty string + "rgb(255 0 0)", # space instead of comma (CSS4 syntax) + ] + + for value in invalid_values: + with self.subTest(value=value): + with self.assertRaises(ValidationError): + Mi(mathcolor=value, children=["x"]) + + def test_script_level_valid_values(self): + valid_values = [ + "0", + "1", + "2", + "-1", + "-2", # basic integers + "+1", + "+2", + "+10", # explicit positive + "-10", + "-100", # negative + ] + + for value in valid_values: + with self.subTest(value=value): + obj = Mi(scriptlevel=value, children=["x"]) + self.assertEqual(obj.scriptlevel, value) + + def test_script_level_invalid_values(self): + """Test invalid ScriptLevel values.""" + invalid_values = [ + "1.5", # decimal not allowed + "one", # word not allowed + "", # empty string + " 1", # leading space + "1 ", # trailing space + "++1", # double sign + "+-1", # mixed signs + ] + + for value in invalid_values: + with self.subTest(value=value): + with self.assertRaises(ValidationError): + Mi(scriptlevel=value, children=["x"]) + + def test_enum_validation(self): + """Test enum field validation.""" + # Valid enum values + math_obj = Math(display=MathMLDisplay.BLOCK, children=[]) + self.assertEqual(math_obj.display, MathMLDisplay.BLOCK) + + mo_obj = Mo(form=MathMLForm.INFIX, children=["+"]) + self.assertEqual(mo_obj.form, MathMLForm.INFIX) + + # Invalid enum values should raise ValidationError + with self.assertRaises(ValidationError): + Math(display="invalid_display", children=[]) + + with self.assertRaises(ValidationError): + Mo(form="invalid_form", children=["+"]) + + def test_boolean_attribute_validation(self): + """Test boolean attribute handling.""" + # Valid boolean values + mo_obj = Mo(fence=True, separator=False, children=["|"]) + self.assertTrue(mo_obj.fence) + self.assertFalse(mo_obj.separator) + + # Boolean attributes should accept actual booleans + mo_obj2 = Mo(stretchy=True, symmetric=False, children=["("]) + self.assertTrue(mo_obj2.stretchy) + self.assertFalse(mo_obj2.symmetric) + + +class TestElementConstraints(unittest.TestCase): + """Tests for MathML element structural constraints and children requirements.""" + + def test_token_elements_children_constraints(self): + """Test that token elements only accept TextType children.""" + text_node = "content" + math_element = Mi(children=["x"]) # Invalid child for token elements + + # Valid: token elements with TextType children + token_classes = [Mi, Mn, Mo, Mtext, Ms, Annotation] + + for token_class in token_classes: + with self.subTest(element=token_class.__name__): + # Valid: TextType children + element = token_class(children=[text_node]) + self.assertEqual(len(element.children), 1) + self.assertIsInstance(element.children[0], TextNode) + + # Invalid: MathML element children should fail + with self.assertRaises( + ValidationError, + msg=f"{token_class.__name__} should reject MathML element children", + ): + token_class(children=[math_element]) + + # Mspace should not have children (it's empty) + mspace = Mspace() + self.assertFalse( + hasattr(mspace, "children") or len(getattr(mspace, "children", [])) > 0 + ) + + def test_elements_with_exactly_two_children(self): + """Test elements that require exactly 2 children.""" + child1 = Mi(children=["a"]) + child2 = Mn(children=["1"]) + child3 = Mi(children=["b"]) + + # These elements should accept exactly 2 children + two_child_classes = [ + (Mfrac, "fraction"), + (Mroot, "root"), + (Msub, "subscript"), + (Msup, "superscript"), + (Munder, "under"), + (Mover, "over"), + ] + + for element_class, description in two_child_classes: + with self.subTest(element=element_class.__name__): + # Valid: exactly 2 children + element = element_class(children=[child1, child2]) + self.assertEqual( + len(element.children), + 2, + f"{description} element should have exactly 2 children", + ) + + # Invalid: 1 child should fail + with self.assertRaises( + ValidationError, msg=f"{description} should reject 1 child" + ): + element_class(children=[child1]) + + # Invalid: 3 children should fail + with self.assertRaises( + ValidationError, msg=f"{description} should reject 3 children" + ): + element_class(children=[child1, child2, child3]) + + def test_elements_with_exactly_three_children(self): + """Test elements that require exactly 3 children.""" + child1 = Mi(children=["base"]) + child2 = Mn(children=["sub"]) + child3 = Mn(children=["sup"]) + child4 = Mi(children=["extra"]) + + # These elements should accept exactly 3 children + three_child_classes = [ + (Msubsup, "subscript-superscript"), + (Munderover, "under-over"), + ] + + for element_class, description in three_child_classes: + with self.subTest(element=element_class.__name__): + # Valid: exactly 3 children + element = element_class(children=[child1, child2, child3]) + self.assertEqual( + len(element.children), + 3, + f"{description} element should have exactly 3 children", + ) + + # Invalid: 2 children should fail + with self.assertRaises( + ValidationError, msg=f"{description} should reject 2 children" + ): + element_class(children=[child1, child2]) + + # Invalid: 4 children should fail + with self.assertRaises( + ValidationError, msg=f"{description} should reject 4 children" + ): + element_class(children=[child1, child2, child3, child4]) + + def test_table_structure_constraints(self): + """Test table element structural requirements.""" + # Valid table structure + cell_content = Mi(children=["cell"]) + mtd = Mtd(children=[cell_content]) + self.assertEqual(len(mtd.children), 1) + + # Mtr should contain Mtd elements + mtr = Mtr(children=[mtd]) + self.assertEqual(len(mtr.children), 1) + self.assertIsInstance(mtr.children[0], Mtd) + + # Mtable should contain Mtr elements + mtable = Mtable(children=[mtr]) + self.assertEqual(len(mtable.children), 1) + self.assertIsInstance(mtable.children[0], Mtr) + + # Invalid: Mtr with non-Mtd children should fail + non_mtd_element = Mi(children=["invalid"]) + with self.assertRaises( + ValidationError, msg="Mtr should reject non-Mtd children" + ): + Mtr(children=[non_mtd_element]) + + # Invalid: Mtable with non-Mtr children should fail + non_mtr_element = Mtd(children=[cell_content]) + with self.assertRaises( + ValidationError, msg="Mtable should reject non-Mtr children" + ): + Mtable(children=[non_mtr_element]) + + def test_semantics_element_constraints(self): + """Test Semantics element structure.""" + # First child should be presentation content + presentation = Mi(children=["x"]) + annotation = Annotation(encoding="text/plain", children=["variable x"]) + annotation_xml = AnnotationXml( + encoding="application/mathml+xml", children=[presentation] + ) + + # Valid semantics structures + semantics1 = Semantics(children=[presentation, annotation]) + semantics2 = Semantics(children=[presentation, annotation_xml]) + semantics3 = Semantics(children=[presentation, annotation, annotation_xml]) + + self.assertEqual(len(semantics1.children), 2) + self.assertEqual(len(semantics2.children), 2) + self.assertEqual(len(semantics3.children), 3) + + # Invalid: Semantics with no children should fail + with self.assertRaises( + ValidationError, msg="Semantics should require at least one child" + ): + Semantics(children=[]) + + # Invalid: Semantics with only annotations (no presentation content) should fail + with self.assertRaises( + ValidationError, + msg="Semantics should require presentation content as first child", + ): + Semantics(children=[annotation]) + + def test_mmultiscripts_structure(self): + """Test Mmultiscripts element structure constraints.""" + base = Mi(children=["F"]) + sub1 = Mn(children=["1"]) + sup1 = Mn(children=["2"]) + + # Basic multiscripts structure + mmultiscripts = Mmultiscripts(children=[base, sub1, sup1]) + self.assertEqual(len(mmultiscripts.children), 3) + + # With prescripts + prescripts = Mprescripts() + pre_sub = Mn(children=["0"]) + pre_sup = Mn(children=["3"]) + + mmultiscripts_with_pre = Mmultiscripts( + children=[base, sub1, sup1, prescripts, pre_sub, pre_sup] + ) + self.assertEqual(len(mmultiscripts_with_pre.children), 6) + + def test_mmultiscripts_validation(self): + """Test Mmultiscripts validation rules.""" + base = Mi(children=["F"]) + sub1 = Mn(children=["1"]) + sup1 = Mn(children=["2"]) + sub2 = Mn(children=["3"]) + sup2 = Mn(children=["4"]) + prescripts = Mprescripts() + + # Test: Empty mmultiscripts should fail + with self.assertRaises( + ValidationError, msg="Empty mmultiscripts should be invalid" + ): + Mmultiscripts(children=[]) + + # Test: Odd number of scripts (without prescripts) should fail + with self.assertRaises( + ValidationError, msg="Odd number of scripts should be invalid" + ): + Mmultiscripts(children=[base, sub1]) # Missing superscript + + # Test: Scripts must come in pairs after base + with self.assertRaises(ValidationError, msg="Scripts must be paired"): + Mmultiscripts( + children=[base, sub1, sup1, sub2] + ) # Missing final superscript + + # Test: Post-scripts must be in pairs when prescripts present + with self.assertRaises(ValidationError, msg="Post-scripts must be paired"): + Mmultiscripts( + children=[base, sub1, prescripts, sub2, sup2] + ) # Odd post-scripts + + # Test: Pre-scripts must be in pairs when prescripts present + with self.assertRaises(ValidationError, msg="Pre-scripts must be paired"): + Mmultiscripts( + children=[base, sub1, sup1, prescripts, sub2] + ) # Odd pre-scripts + + # Test: Multiple prescripts should fail + with self.assertRaises( + ValidationError, msg="Multiple prescripts should be invalid" + ): + Mmultiscripts(children=[base, sub1, sup1, prescripts, prescripts]) + + # Test: Valid cases should pass + # Valid: Base only + Mmultiscripts(children=[base]) + + # Valid: Base with paired scripts + Mmultiscripts(children=[base, sub1, sup1]) + + # Valid: Base with multiple paired scripts + Mmultiscripts(children=[base, sub1, sup1, sub2, sup2]) + + # Valid: Base with prescripts and paired pre-scripts + Mmultiscripts(children=[base, prescripts, sub1, sup1]) + + # Valid: Base with post-scripts and pre-scripts + Mmultiscripts(children=[base, sub1, sup1, prescripts, sub2, sup2]) + + def test_empty_elements_validation(self): + """Test elements that can be empty vs those that cannot.""" + # Elements that can be empty + empty_allowed_classes = [ + (Mrow, "row"), + (Mstyle, "style"), + (Merror, "error"), + (Mphantom, "phantom"), + (Msqrt, "square root"), + (Math, "math root"), + ] + + for element_class, description in empty_allowed_classes: + with self.subTest(element=element_class.__name__): + element = element_class(children=[]) + self.assertEqual( + len(element.children), + 0, + f"{description} element should allow empty children", + ) + + # Mspace is inherently empty (no children attribute with content) + mspace = Mspace(width="1em", height="1em") + self.assertIsNotNone(mspace) + + def test_mixed_content_validation(self): + """Test elements that accept mixed content (text + elements).""" + text_before = "Before " + element = Mi(children=["x"]) + text_after = " after" + + # These elements should accept mixed content + mixed_content_classes = [ + (Mrow, "row"), + (Mstyle, "style"), + (Merror, "error"), + (Mphantom, "phantom"), + ] + + for element_class, description in mixed_content_classes: + with self.subTest(element=element_class.__name__): + mixed_element = element_class( + children=[text_before, element, text_after] + ) + self.assertEqual( + len(mixed_element.children), + 3, + f"{description} element should accept mixed content", + ) + self.assertIsInstance(mixed_element.children[0], TextNode) + self.assertIsInstance(mixed_element.children[1], Mi) + self.assertIsInstance(mixed_element.children[2], TextNode) + + def test_annotation_xml_element_name(self): + """Test that AnnotationXml serializes with correct element name.""" + annotation_xml = AnnotationXml(encoding="application/mathml+xml") + expected_name = "annotation-xml" + actual_name = annotation_xml.element_name() + self.assertEqual(actual_name, expected_name) + + def test_mtable_with_complex_structure(self): + """Test complex table structures.""" + # Create a 2x2 table + cell1 = Mtd(children=[Mi(children=["a"])]) + cell2 = Mtd(children=[Mn(children=["1"])]) + cell3 = Mtd(children=[Mi(children=["b"])]) + cell4 = Mtd(children=[Mn(children=["2"])]) + + row1 = Mtr(children=[cell1, cell2]) + row2 = Mtr(children=[cell3, cell4]) + + table = Mtable(children=[row1, row2]) + + self.assertEqual(len(table.children), 2) + self.assertEqual(len(table.children[0].children), 2) + self.assertEqual(len(table.children[1].children), 2) + + def test_element_inheritance_hierarchy(self): + """Test that elements inherit from correct base classes.""" + inheritance_tests = [ + (Mi(children=["x"]), MathMLTokenElement, "token"), + ( + Mfrac( + children=[ + Mi(children=["a"]), + Mn(children=["1"]), + ] + ), + MathMLLayoutElement, + "layout", + ), + ( + Msub( + children=[ + Mi(children=["x"]), + Mn(children=["1"]), + ] + ), + MathMLScriptElement, + "script", + ), + (Mstyle(children=[]), MathMLGroupingElement, "grouping"), + ] + + for element, expected_base, description in inheritance_tests: + with self.subTest( + element=type(element).__name__, base=expected_base.__name__ + ): + self.assertIsInstance( + element, + expected_base, + f"{type(element).__name__} should be a {description} element", + ) + + +class TestMathMLSerialization(unittest.TestCase): + """Tests for object -> to_xml_string() using direct string comparison.""" + + def test_simple_mi(self): + obj = Mi(children=["x"]) + xml_str = obj.to_xml_string() + expected_xml_str = "x" + self.assertEqual(xml_str, expected_xml_str) + + def test_simple_mn_with_attribute(self): + obj = Mn(children=["123"], dir_=Dir.RTL) + xml_str = obj.to_xml_string() + expected_xml_str = '123' + self.assertEqual(xml_str, expected_xml_str) + + def test_mo_with_boolean_attribute(self): + obj = Mo(children=["+"], fence=True, separator=False) + xml_str = obj.to_xml_string() + expected_xml_str = '+' + self.assertEqual(xml_str, expected_xml_str) + + def test_mi_with_enum_attribute(self): + obj = Mi(children=["X"]) + xml_str = obj.to_xml_string() + expected_xml_str = "X" + self.assertEqual(xml_str, expected_xml_str) + + def test_math_element_with_attributes(self): + obj = Math( + display=MathMLDisplay.BLOCK, + alttext="Equation", + children=[Mi(children=["y"])], + ) + xml_str = obj.to_xml_string() + expected_xml_str = 'y' + self.assertEqual(xml_str, expected_xml_str) + + def test_mrow_nested_elements(self): + obj = Mrow( + children=[ + Mi(children=["a"]), + Mo(children=["+"]), + Mn(children=["1"]), + ], + id_="eq1", + class_="equation-style", + ) + xml_str = obj.to_xml_string() + expected_xml_str = 'a+1' + self.assertEqual(xml_str, expected_xml_str) + + def test_mfrac(self): + obj = Mfrac( + children=[ + Mi( + children=["numerator"], + ), + Mn(children=["denominator"]), + ] + ) + xml_str = obj.to_xml_string() + expected_xml_str = "numeratordenominator" + self.assertEqual(xml_str, expected_xml_str) + + def test_msubsup(self): + obj = Msubsup( + children=[ + Mi(children=["X"]), + Mn(children=["s"]), + Mn(children=["p"]), + ] + ) + xml_str = obj.to_xml_string() + expected_xml_str = "Xsp" + self.assertEqual(xml_str, expected_xml_str) + + def test_mtable_mtr_mtd(self): + obj = Mtable( + children=[ + Mtr( + children=[ + Mtd( + children=[ + Mi( + children=["R1C1"], + ) + ] + ), + Mtd( + children=[ + Mi( + children=["R1C2"], + ) + ] + ), + ] + ), + Mtr( + children=[ + Mtd(children=[Mn(children=["1"])]), + Mtd(children=[Mn(children=["2"])]), + ] + ), + ] + ) + xml_str = obj.to_xml_string() + expected_xml_str = "R1C1R1C212" # noqa: E501 + self.assertEqual(xml_str, expected_xml_str) + + def test_mixed_content_serialization(self): + obj = Mrow( + children=[ + "TextBefore", + Mi(children=["x"]), + "TextBetween", + Mn(children=["123"]), + "TextAfter", + ] + ) + xml_str = obj.to_xml_string() + expected_xml_str = ( + "TextBeforexTextBetween123TextAfter" + ) + self.assertEqual(xml_str, expected_xml_str) + + def test_semantics_annotation(self): + obj = Semantics( + children=[ + Mi(children=["x"]), + Annotation( + encoding="text/plain", + children=["Content of annotation"], + ), + ] + ) + xml_str = obj.to_xml_string() + expected_xml_str = 'xContent of annotation' # noqa: E501 + self.assertEqual(xml_str, expected_xml_str) + + def test_annotation_xml(self): + obj = AnnotationXml( + encoding="application/mathml+xml", + children=[ + Mrow( + children=[ + Mi( + children=["alt"], + ), + Mo(children=["="]), + Mn(children=["1"]), + ] + ) + ], + ) + xml_str = obj.to_xml_string() + expected_xml_str = 'alt=1' # noqa: E501 + self.assertEqual(xml_str, expected_xml_str) + + +class TestMathMLDeserialization(unittest.TestCase): + """Tests for from_string() -> object""" + + def test_simple_mi_from_string(self): + xml_str = "y" + result = Mi.from_string(xml_str) + self.assertEqual(len(result), 1) + obj = result[0] + self.assertIsInstance(obj, Mi) + self.assertEqual(len(obj.children), 1) + self.assertIsInstance(obj.children[0], TextNode) + self.assertEqual(obj.children[0].text, "y") + + def test_mo_from_string_with_attributes(self): + xml_str = '+ ' + result = Mo.from_string(xml_str) + self.assertEqual(len(result), 1) + obj = result[0] + self.assertIsInstance(obj, Mo) + self.assertTrue(obj.fence) + self.assertEqual(obj.lspace, "8px") + self.assertEqual(obj.children[0].text, "+ ") + + def test_mrow_nested_from_string(self): + xml_str = ( + 'a+1' + ) + result = Mrow.from_string(xml_str) + self.assertEqual(len(result), 1) + obj = result[0] + self.assertIsInstance(obj, Mrow) + self.assertEqual(obj.id_, "r1") + self.assertEqual(obj.class_, "test-class") + + self.assertEqual(len(obj.children), 3) + self.assertIsInstance(obj.children[0], Mi) + self.assertEqual(obj.children[0].children[0].text, "a") + self.assertIsInstance(obj.children[1], Mo) + self.assertEqual(obj.children[1].children[0].text, "+") + self.assertIsInstance(obj.children[2], Mn) + self.assertEqual(obj.children[2].children[0].text, "1") + + def test_mfrac_from_string(self): + xml_str = "ND" + result = Mfrac.from_string(xml_str) + self.assertEqual(len(result), 1) + obj = result[0] + self.assertIsInstance(obj, Mfrac) + self.assertEqual(len(obj.children), 2) + self.assertIsInstance(obj.children[0], Mi) + self.assertEqual(obj.children[0].children[0].text, "N") + self.assertIsInstance(obj.children[1], Mn) + self.assertEqual(obj.children[1].children[0].text, "D") + + def test_mixed_content_deserialization(self): + xml_str = "Prefix v Infix 42 Suffix" + result = Mrow.from_string(xml_str) + self.assertEqual(len(result), 1) + obj = result[0] + self.assertIsInstance(obj, Mrow) + + self.assertEqual(len(obj.children), 5) + self.assertIsInstance(obj.children[0], TextNode) + self.assertEqual(obj.children[0].text, "Prefix ") + self.assertIsInstance(obj.children[1], Mi) + self.assertEqual(obj.children[1].children[0].text, "v") + self.assertIsInstance(obj.children[2], TextNode) + self.assertEqual(obj.children[2].text, " Infix ") + self.assertIsInstance(obj.children[3], Mn) + self.assertEqual(obj.children[3].children[0].text, "42") + self.assertIsInstance(obj.children[4], TextNode) + self.assertEqual(obj.children[4].text, " Suffix") + + def test_semantics_annotation_from_string(self): + xml_str = ( + "" + " E" + ' E = mc^2' + "" + ) + result = Semantics.from_string(xml_str) + self.assertEqual(len(result), 1) + obj = result[0] + self.assertIsInstance(obj, Semantics) + self.assertEqual(len(obj.children), 2) + + self.assertIsInstance(obj.children[0], Mi) + self.assertEqual(obj.children[0].children[0].text, "E") + + ann_obj = obj.children[1] + self.assertIsInstance(ann_obj, Annotation) + self.assertEqual(ann_obj.encoding, "text/latex") + self.assertEqual(len(ann_obj.children), 1) + self.assertIsInstance(ann_obj.children[0], TextNode) + self.assertEqual(ann_obj.children[0].text, "E = mc^2") + + def test_annotation_xml_from_string(self): + xml_str = ( + '' + " alt=0" + "" + ) + result = AnnotationXml.from_string(xml_str) + self.assertEqual(len(result), 1) + obj = result[0] + self.assertIsInstance(obj, AnnotationXml) + self.assertEqual(obj.encoding, "application/mathml+xml") + self.assertEqual(len(obj.children), 1) + mrow_child = obj.children[0] + self.assertIsInstance(mrow_child, Mrow) + self.assertEqual(len(mrow_child.children), 3) + self.assertIsInstance(mrow_child.children[0], Mi) + self.assertEqual(mrow_child.children[0].children[0].text, "alt") + + def test_from_string_multiple_root_elements(self): + xml_str = "a1" + result = MathMLElement.from_string(xml_str) + self.assertEqual(len(result), 2) + self.assertIsInstance(result[0], Mi) + self.assertEqual(result[0].children[0].text, "a") + self.assertIsInstance(result[1], Mn) + self.assertEqual(result[1].children[0].text, "1") + + +class TestErrorHandling(unittest.TestCase): + def test_from_string_invalid_xml(self): + xml_str = "x" + with self.assertRaisesRegex(ValueError, "Invalid Markup: mismatched tag"): + Mi.from_string(xml_str) + + def test_from_string_unregistered_tag(self): + xml_str = "content" + + with self.assertRaisesRegex( + ValueError, "No registered class found for tag: unregisteredtag" + ): + MathMLElement.from_string(xml_str) + + def test_attribute_validation_error_on_creation(self): + with self.assertRaises(ValueError): # Pydantic's ValidationError + Mi(mathvariant="not-a-valid-variant", children=["x"]) + + +class TestComplexMathematicalExpressions(unittest.TestCase): + """Tests for complex, realistic mathematical expressions.""" + + def test_quadratic_formula(self): + """Test the quadratic formula: x = (-b ± √(b²-4ac)) / 2a""" + # Create: x = (-b ± √(b²-4ac)) / 2a + + # Left side: x = + x = Mi(children=["x"]) + equals = Mo(children=["="]) + + # Right side numerator: -b ± √(b²-4ac) + minus_b = Mrow( + children=[ + Mo(children=["-"]), + Mi(children=["b"]), + ] + ) + + plus_minus = Mo(children=["±"]) + + # b²-4ac inside square root + b_squared = Msup( + children=[ + Mi(children=["b"]), + Mn(children=["2"]), + ] + ) + + four_ac = Mrow( + children=[ + Mn(children=["4"]), + Mi(children=["a"]), + Mi(children=["c"]), + ] + ) + + discriminant = Mrow(children=[b_squared, Mo(children=["-"]), four_ac]) + + sqrt_discriminant = Msqrt(children=[discriminant]) + + numerator = Mrow(children=[minus_b, plus_minus, sqrt_discriminant]) + + # Denominator: 2a + denominator = Mrow( + children=[ + Mn(children=["2"]), + Mi(children=["a"]), + ] + ) + + # Complete fraction + fraction = Mfrac(children=[numerator, denominator]) + + # Complete equation + equation = Mrow(children=[x, equals, fraction]) + + # Test serialization + xml_str = equation.to_xml_string() + self.assertIn("", xml_str) + self.assertIn("", xml_str) + self.assertIn("", xml_str) + + # Test round-trip + result = Mrow.from_string(xml_str) + self.assertEqual(len(result), 1) + self.assertIsInstance(result[0], Mrow) + + def test_integral_with_limits(self): + """Test definite integral: ∫₀^∞ e^(-x²) dx""" + + # Integral symbol with limits + integral_symbol = Mo(children=["∫"]) + lower_limit = Mn(children=["0"]) + upper_limit = Mo(children=["∞"]) + + integral_with_limits = Msubsup( + children=[integral_symbol, lower_limit, upper_limit] + ) + + # e^(-x²) + e = Mi(children=["e"]) + + # -x² + minus = Mo(children=["-"]) + x_squared = Msup( + children=[ + Mi(children=["x"]), + Mn(children=["2"]), + ] + ) + negative_x_squared = Mrow(children=[minus, x_squared]) + + # e^(-x²) + exponential = Msup(children=[e, negative_x_squared]) + + # dx + differential = Mrow( + children=[ + Mi(children=["d"]), + Mi(children=["x"]), + ] + ) + + # Complete integral + integral = Mrow(children=[integral_with_limits, exponential, differential]) + + # Test structure + xml_str = integral.to_xml_string() + self.assertIn("", xml_str) + self.assertIn("∫", xml_str) + self.assertIn("∞", xml_str) + + def test_matrix_expression(self): + """Test 2x2 matrix with expressions in cells.""" + + # Matrix elements + # Row 1: [cos θ, -sin θ] + cos_theta = Mrow( + children=[ + Mo(children=["cos"]), + Mi(children=["θ"]), + ] + ) + + minus_sin_theta = Mrow( + children=[ + Mo(children=["-"]), + Mo(children=["sin"]), + Mi(children=["θ"]), + ] + ) + + row1_cell1 = Mtd(children=[cos_theta]) + row1_cell2 = Mtd(children=[minus_sin_theta]) + row1 = Mtr(children=[row1_cell1, row1_cell2]) + + # Row 2: [sin θ, cos θ] + sin_theta = Mrow( + children=[ + Mo(children=["sin"]), + Mi(children=["θ"]), + ] + ) + + row2_cell1 = Mtd(children=[sin_theta]) + row2_cell2 = Mtd(children=[cos_theta]) + row2 = Mtr(children=[row2_cell1, row2_cell2]) + + # Complete matrix + matrix = Mtable(children=[row1, row2]) + + # Test structure + self.assertEqual(len(matrix.children), 2) + self.assertEqual(len(matrix.children[0].children), 2) + self.assertEqual(len(matrix.children[1].children), 2) + + def test_summation_with_complex_expression(self): + """Test summation: Σ(k=1 to n) k²/(k+1)""" + + # Summation symbol + sigma = Mo(children=["Σ"]) + + # Lower limit: k=1 + k_equals_1 = Mrow( + children=[ + Mi(children=["k"]), + Mo(children=["="]), + Mn(children=["1"]), + ] + ) + + # Upper limit: n + n = Mi(children=["n"]) + + # Summation with limits + summation = Munderover(children=[sigma, k_equals_1, n]) + + # Expression being summed: k²/(k+1) + k_squared = Msup( + children=[ + Mi(children=["k"]), + Mn(children=["2"]), + ] + ) + + k_plus_1 = Mrow( + children=[ + Mi(children=["k"]), + Mo(children=["+"]), + Mn(children=["1"]), + ] + ) + + fraction = Mfrac(children=[k_squared, k_plus_1]) + + # Complete expression + complete_sum = Mrow(children=[summation, fraction]) + + # Test serialization + xml_str = complete_sum.to_xml_string() + self.assertIn("", xml_str) + self.assertIn("Σ", xml_str) + self.assertIn("", xml_str) + + def test_chemical_equation(self): + """Test chemical equation: H₂ + ½O₂ → H₂O""" + + # H₂ + h2 = Mrow( + children=[ + Mi(children=["H"]), + Msub( + children=[ + Mrow(children=[]), # Empty base for subscript positioning + Mn(children=["2"]), + ] + ), + ] + ) + + # Plus sign + plus = Mo(children=["+"]) + + # ½O₂ + half = Mfrac( + children=[ + Mn(children=["1"]), + Mn(children=["2"]), + ] + ) + + o2 = Mrow( + children=[ + Mi(children=["O"]), + Msub(children=[Mrow(children=[]), Mn(children=["2"])]), + ] + ) + + half_o2 = Mrow(children=[half, o2]) + + # Arrow + arrow = Mo(children=["→"]) + + # H₂O + h2o = Mrow( + children=[ + Mi(children=["H"]), + Msub(children=[Mrow(children=[]), Mn(children=["2"])]), + Mi(children=["O"]), + ] + ) + + # Complete equation + equation = Mrow(children=[h2, plus, half_o2, arrow, h2o]) + + # Test structure + xml_str = equation.to_xml_string() + self.assertIn("→", xml_str) + self.assertIn("", xml_str) + self.assertIn("", xml_str) + + def test_nested_fractions(self): + """Test deeply nested fractions: (a/b) / (c/d) = ad/bc""" + + # a/b + a_over_b = Mfrac( + children=[ + Mi(children=["a"]), + Mi(children=["b"]), + ] + ) + + # c/d + c_over_d = Mfrac( + children=[ + Mi(children=["c"]), + Mi(children=["d"]), + ] + ) + + # (a/b) / (c/d) + complex_fraction = Mfrac(children=[a_over_b, c_over_d]) + + # = + equals = Mo(children=["="]) + + # ad + ad = Mrow( + children=[ + Mi(children=["a"]), + Mi(children=["d"]), + ] + ) + + # bc + bc = Mrow( + children=[ + Mi(children=["b"]), + Mi(children=["c"]), + ] + ) + + # ad/bc + result_fraction = Mfrac(children=[ad, bc]) + + # Complete equation + equation = Mrow(children=[complex_fraction, equals, result_fraction]) + + # Test nesting depth + xml_str = equation.to_xml_string() + # Should have nested mfrac elements + frac_count = xml_str.count("") + self.assertEqual(frac_count, 4) + + def test_multiscript_notation(self): + """Test multiscript notation: ₁₁²³⁵U²³⁸""" + + # Base element + u = Mi(children=["U"]) + + # Pre-subscripts and pre-superscripts + prescripts = Mprescripts() + + # Create multiscripts element + # Format: base, post-sub, post-sup, prescripts, pre-sub, pre-sup + multiscripts = Mmultiscripts( + children=[ + u, # base + Mn(children=["238"]), # post-subscript + Mrow(children=[]), # no post-superscript + prescripts, + Mn(children=["92"]), # pre-subscript (atomic number) + Mrow(children=[]), # no pre-superscript + ] + ) + + xml_str = multiscripts.to_xml_string() + self.assertIn("", xml_str) + self.assertIn("", xml_str) + + def test_equation_with_semantics(self): + """Test equation with semantic annotations.""" + + # E = mc² + e = Mi(children=["E"]) + equals = Mo(children=["="]) + m = Mi(children=["m"]) + c_squared = Msup( + children=[ + Mi(children=["c"]), + Mn(children=["2"]), + ] + ) + + equation = Mrow(children=[e, equals, m, c_squared]) + + # Add semantic annotation + latex_annotation = Annotation( + encoding="application/x-tex", children=["E = mc^2"] + ) + + text_annotation = Annotation( + encoding="text/plain", + children=["Einstein's mass-energy equivalence"], + ) + + semantics = Semantics(children=[equation, latex_annotation, text_annotation]) + + # Test structure + self.assertEqual(len(semantics.children), 3) + self.assertIsInstance(semantics.children[0], Mrow) + self.assertIsInstance(semantics.children[1], Annotation) + self.assertIsInstance(semantics.children[2], Annotation) + + def test_styled_expression(self): + """Test expression with styling applied.""" + + # Create expression: f(x) = x² + 1 + f = Mi(children=["f"]) + x_arg = Mi(children=["x"]) + function_call = Mrow( + children=[ + f, + Mo(children=["("]), + x_arg, + Mo(children=[")"]), + ] + ) + + equals = Mo(children=["="]) + + x_squared = Msup( + children=[ + Mi(children=["x"]), + Mn(children=["2"]), + ] + ) + + plus = Mo(children=["+"]) + one = Mn(children=["1"]) + + expression = Mrow(children=[x_squared, plus, one]) + + # Wrap in styled container + styled_expression = Mstyle( + mathcolor="blue", + mathsize="14pt", + children=[function_call, equals, expression], + ) + + # Test styling attributes + self.assertEqual(styled_expression.mathcolor, "blue") + self.assertEqual(styled_expression.mathsize, "14pt") + + +class TestEdgeCasesAndCompliance(unittest.TestCase): + """Tests for edge cases, boundary conditions, and MathML Core compliance.""" + + def test_unicode_content_handling(self): + """Test proper handling of Unicode mathematical symbols.""" + unicode_symbols = [ + "α", + "β", + "γ", + "π", + "∑", + "∫", + "∞", + "≤", + "≥", + "≠", + "∂", + "∇", + "√", + "∈", + "∉", + "⊂", + "⊃", + "∪", + "∩", + "→", + ] + + for symbol in unicode_symbols: + with self.subTest(symbol=symbol): + # Test in Mi element + mi = Mi(children=[symbol]) + xml_str = mi.to_xml_string() + self.assertIn(symbol, xml_str) + + # Test round-trip + result = Mi.from_string(xml_str) + self.assertEqual(result[0].children[0].text, symbol) + + def test_empty_elements_compliance(self): + """Test MathML Core compliance for empty elements.""" + + # Elements that can be empty + empty_allowed = [ + Math(children=[]), + Mrow(children=[]), + Msqrt(children=[]), + Mstyle(children=[]), + Merror(children=[]), + Mphantom(children=[]), + ] + + for element in empty_allowed: + with self.subTest(element=type(element).__name__): + xml_str = element.to_xml_string() + # Should produce valid XML + self.assertTrue(xml_str.startswith("<")) + self.assertTrue(xml_str.endswith(">")) + + def test_whitespace_handling(self): + """Test proper whitespace handling in text content.""" + + # Leading/trailing whitespace in text content + text_with_spaces = " x " + mi = Mi(children=[text_with_spaces]) + xml_str = mi.to_xml_string() + + # Round-trip test + result = Mi.from_string(xml_str) + self.assertEqual(result[0].children[0].text, text_with_spaces) + + # Mixed whitespace in Mtext + text_content = "This is\tsome\ntext with\r\nvarious whitespace" + mtext = Mtext(children=[text_content]) + xml_str = mtext.to_xml_string() + + result = Mtext.from_string(xml_str) + self.assertEqual(result[0].children[0].text, text_content.replace("\r", "")) + + def test_special_characters_in_content(self): + """Test handling of XML special characters in content.""" + + special_chars = ["&", "<", ">", '"', "'"] + + for char in special_chars: + with self.subTest(char=char): + mtext = Mtext(children=[f"Before{char}After"]) + xml_str = mtext.to_xml_string() + + # Should not contain unescaped special characters + if char == "&": + self.assertIn("&", xml_str) + elif char == "<": + self.assertIn("<", xml_str) + elif char == ">": + self.assertIn(">", xml_str) + + # Round-trip should preserve original content + result = Mtext.from_string(xml_str) + self.assertEqual(result[0].children[0].text, f"Before{char}After") + + def test_display_attribute_compliance(self): + """Test Math element display attribute compliance.""" + + # Test both valid display values + for display_value in [MathMLDisplay.BLOCK, MathMLDisplay.INLINE]: + with self.subTest(display=display_value): + math = Math(display=display_value, children=[]) + xml_str = math.to_xml_string() + self.assertIn(f'display="{display_value.value}"', xml_str) + + def test_length_percentage_edge_cases(self): + """Test edge cases for length-percentage values.""" + + # Edge cases that should be valid + valid_edge_cases = [ + "0", # Unitless zero + "0px", # Zero with unit + "+0", # Explicit positive zero + "-0", # Negative zero + "0.0px", # Decimal zero + ".5em", # Leading decimal point + "100%", # Full percentage + "0%", # Zero percentage + "+50%", # Explicit positive percentage + ] + + for value in valid_edge_cases: + with self.subTest(value=value): + try: + mspace = Mspace(width=value) + self.assertEqual(mspace.width, value) + except ValidationError: + self.fail(f"Valid edge case {value} was rejected") + + def test_extremely_long_content(self): + """Test handling of very long text content.""" + + # Create very long text content + long_text = "x" * 10000 + mtext = Mtext(children=[long_text]) + + # Should handle without issues + xml_str = mtext.to_xml_string() + self.assertIn(long_text, xml_str) + + # Round-trip test + result = Mtext.from_string(xml_str) + self.assertEqual(result[0].children[0].text, long_text) + + def test_deeply_nested_structures(self): + """Test deeply nested element structures.""" + + # Create deeply nested structure: ((((x)))) + content = Mi(children=["x"]) + + # Nest 10 levels deep + for i in range(10): + content = Mrow(children=[content]) + + # Should serialize without issues + xml_str = content.to_xml_string() + + # Count nesting depth + open_count = xml_str.count("") + close_count = xml_str.count("") + self.assertEqual(open_count, 10) + self.assertEqual(close_count, 10) + + def test_mixed_content_edge_cases(self): + """Test edge cases in mixed content.""" + + # Empty text nodes mixed with elements + mrow = Mrow( + children=[ + "", + Mi(children=["x"]), + "", + Mo(children=["+"]), + "", + Mn(children=["1"]), + ] + ) + + xml_str = mrow.to_xml_string() + + # Should strip empty text nodes + result = Mrow.from_string(xml_str) + self.assertEqual(len(result[0].children), 3) + + def test_attribute_value_edge_cases(self): + """Test edge cases for attribute values.""" + + # Very long attribute values + long_alttext = "A" * 1000 + math = Math(alttext=long_alttext, children=[]) + xml_str = math.to_xml_string() + self.assertIn(long_alttext, xml_str) + + # Attribute values with special characters + special_alttext = 'Text with "quotes" and &ersands' + math = Math(alttext=special_alttext, children=[]) + xml_str = math.to_xml_string() + + # Should properly escape in XML + result = Math.from_string(xml_str) + self.assertEqual(result[0].alttext, special_alttext) + + def test_script_element_edge_cases(self): + """Test edge cases for script elements.""" + + # Script elements with minimal content + base = Mi(children=["x"]) + empty_script = Mi(children=[""]) + + msub = Msub(children=[base, empty_script]) + xml_str = msub.to_xml_string() + + # Should handle empty script content + result = Msub.from_string(xml_str) + self.assertEqual(len(result[0].children), 2) + + def test_namespace_compliance(self): + """Test MathML namespace handling if supported.""" + + # Basic elements should work without explicit namespace in this implementation + mi = Mi(children=["x"]) + xml_str = mi.to_xml_string() + + # Should produce valid MathML-compatible XML + self.assertTrue(xml_str.startswith("")) + + def test_boolean_attribute_edge_cases(self): + """Test edge cases for boolean attributes.""" + + # Test all boolean attributes on Mo element + mo = Mo( + fence=True, + largeop=False, + movablelimits=True, + separator=False, + stretchy=True, + symmetric=False, + children=["∑"], + ) + + xml_str = mo.to_xml_string() + + # All boolean values should serialize + self.assertIn('fence="true"', xml_str) + self.assertIn('largeop="false"', xml_str) + self.assertIn('movablelimits="true"', xml_str) + self.assertIn('separator="false"', xml_str) + self.assertIn('stretchy="true"', xml_str) + self.assertIn('symmetric="false"', xml_str) + + def test_semantics_edge_cases(self): + """Test edge cases for semantic elements.""" + + # Semantics with only presentation content (no annotations) + presentation = Mi(children=["E"]) + ann1 = Annotation(encoding="text/plain", children=["First"]) + semantics = Semantics(children=[presentation, ann1]) + + xml_str = semantics.to_xml_string() + result = Semantics.from_string(xml_str) + self.assertEqual(len(result[0].children), 2) + + # Multiple annotations of same type + ann2 = Annotation(encoding="text/plain", children=["Second"]) + + semantics_multi = Semantics(children=[presentation, ann1, ann2]) + xml_str = semantics_multi.to_xml_string() + self.assertEqual(xml_str.count("10") + + value_element_with_attributes = Value( + value="5", + field_identifier="part1", + base_type=BaseType.INTEGER, + ) + self.assertEqual( + value_element_with_attributes.to_xml_string(), + '5', + ) + + def test_correct_response_element(self): + correct_response_element = CorrectResponse( + value=[Value(value="A"), Value(value="B")] + ) + self.assertEqual( + correct_response_element.to_xml_string(), + "AB", + ) + + def test_response_declaration_element(self): + response_declaration_element = ResponseDeclaration( + identifier="RESPONSE_1", + cardinality=Cardinality.SINGLE, + base_type=BaseType.IDENTIFIER, + correct_response=CorrectResponse(value=[Value(value="choiceA")]), + ) + expected_xml = 'choiceA' # noqa: E501 + self.assertEqual(response_declaration_element.to_xml_string(), expected_xml) + + def test_outcome_declaration_element(self): + outcome_declaration_element = OutcomeDeclaration( + identifier="SCORE", + cardinality=Cardinality.SINGLE, + base_type=BaseType.FLOAT, + ) + expected_xml = '' + self.assertEqual(outcome_declaration_element.to_xml_string(), expected_xml) + + def test_response_processing_element(self): + response_processing_element = ResponseProcessing( + template="https://example.com/response_processing.xml" + ) + self.assertEqual( + response_processing_element.to_xml_string(), + '', + ) + + def test_assessment_item_element(self): + item_body = ItemBody(children=[P(children=["Test Item Body Content"])]) + assessment_item_element = AssessmentItem( + identifier="item_1", + title="Test Assessment Item", + language="en-US", + item_body=item_body, + ) + expected_xml = '

    Test Item Body Content

    ' # noqa: E501 + self.assertEqual(assessment_item_element.to_xml_string(), expected_xml) + + def test_prompt_element(self): + prompt_element = Prompt(children=["This is the prompt text."]) + self.assertEqual( + prompt_element.to_xml_string(), + "This is the prompt text.", + ) + + def test_simple_choice_element(self): + simple_choice_element = SimpleChoice( + identifier="choice1", children=["Choice 1"] + ) + self.assertEqual( + simple_choice_element.to_xml_string(), + 'Choice 1', + ) + + def test_choice_interaction_element(self): + choice1 = SimpleChoice(identifier="choice1", children=["Choice 1"]) + choice2 = SimpleChoice(identifier="choice2", children=["Choice 2"]) + choice_interaction_element = ChoiceInteraction( + answers=[choice1, choice2], + response_identifier="RESPONSE", + prompt=Prompt(children=["Select the correct answer."]), + ) + expected_xml = 'Select the correct answer.Choice 1Choice 2' # noqa: E501 + self.assertEqual(choice_interaction_element.to_xml_string(), expected_xml) + + def test_text_entry_interaction_element(self): + text_entry_interaction = TextEntryInteraction( + response_identifier="textEntry1", + expected_length=10, + placeholder_text="Enter your answer", + ) + expected_xml = '' + self.assertEqual(text_entry_interaction.to_xml_string(), expected_xml) + + def test_extended_text_interaction_element(self): + extended_text_interaction = ExtendedTextInteraction( + response_identifier="extendedText1", + placeholder_text="Enter your essay here.", + prompt=Prompt(children=["What is truth?"]), + ) + expected_xml = 'What is truth?' # noqa: E501 + self.assertEqual(extended_text_interaction.to_xml_string(), expected_xml) diff --git a/contentcuration/contentcuration/tests/utils/test_exercise_creation.py b/contentcuration/contentcuration/tests/utils/test_exercise_creation.py index 37f4330a4b..deceb2d980 100644 --- a/contentcuration/contentcuration/tests/utils/test_exercise_creation.py +++ b/contentcuration/contentcuration/tests/utils/test_exercise_creation.py @@ -1,3 +1,7 @@ +# flake8: noqa: E501 +# Ignore line length issues in this file +# Black will autoformat where possible, so this is not too egregious +# but will allow our long strings where necessary. import json import os import re @@ -16,7 +20,9 @@ from contentcuration.tests.base import StudioTestCase from contentcuration.tests.testdata import fileobj_exercise_graphie from contentcuration.tests.testdata import fileobj_exercise_image -from contentcuration.utils.publish import create_perseus_exercise +from contentcuration.utils.assessment.perseus import PerseusExerciseGenerator +from contentcuration.utils.assessment.qti.archive import hex_to_qti_id +from contentcuration.utils.assessment.qti.archive import QTIExerciseGenerator class TestPerseusExerciseCreation(StudioTestCase): @@ -37,8 +43,8 @@ def setUp(self): # Create an exercise node self.exercise_node = ContentNode.objects.create( title="Test Exercise", - node_id="exercise-node-id", - content_id="exercise-content-id", + node_id="1234567890abcdef1234567890abcded", + content_id="fedcba0987654321fedcba0987654321", kind_id=content_kinds.EXERCISE, parent=self.channel.main_tree, extra_fields=json.dumps( @@ -58,9 +64,6 @@ def setUp(self): ), ) - # Create a kolibri node representation (only needs id for testing) - self.kolibri_node = type("KolibriNode", (), {"id": "kolibri-node-id"}) - def _create_assessment_item( self, item_type, question_text, answers, hints=None, assessment_id=None ): @@ -81,6 +84,16 @@ def _create_assessment_item( ) return item + def _create_perseus_zip(self, exercise_data): + generator = PerseusExerciseGenerator( + self.exercise_node, + exercise_data, + self.channel.id, + "en-US", + user_id=self.user.id, + ) + return generator.create_exercise_archive() + def _validate_perseus_zip(self, exercise_file): """Helper to validate the structure of the Perseus zip file""" # Use Django's storage backend to read the file @@ -145,9 +158,7 @@ def test_basic_exercise_creation(self): } # Call the function to create the Perseus exercise - create_perseus_exercise( - self.exercise_node, self.kolibri_node, exercise_data, user_id=self.user.id - ) + self._create_perseus_zip(exercise_data) # Verify that a file was created for the node exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE) @@ -179,6 +190,101 @@ def test_basic_exercise_creation(self): # we are deliberately changing the archive generation algorithm for perseus files. self.assertEqual(exercise_file.checksum, "0ec7e964b466ebc76e81e175570e97f1") + def test_multiple_images_index_mismatch_regression(self): + """Regression test for index mismatch bug in process_image_strings method. + + When content is modified inside the re.finditer loop, subsequent matches + point to invalid positions due to string length changes, resulting in + malformed image processing. + """ + # Create three image files - use mix of resized and non-resized images + # to trigger different replacement lengths + image1 = fileobj_exercise_image(size=(100, 100), color="red") + image2 = fileobj_exercise_image(size=(200, 200), color="blue") + image3 = fileobj_exercise_image(size=(300, 300), color="green") + + # Create URLs for all images + image1_url = exercises.CONTENT_STORAGE_FORMAT.format(image1.filename()) + image2_url = exercises.CONTENT_STORAGE_FORMAT.format(image2.filename()) + image3_url = exercises.CONTENT_STORAGE_FORMAT.format(image3.filename()) + + # Create question with multiple images - mix of resized and original + # This should create different length replacements + question_text = ( + f"First image (resized): ![img1]({image1_url} =50x50)\n" + f"Second image (original): ![img2]({image2_url})\n" + f"Third image (resized): ![img3]({image3_url} =70x70)" + ) + + item = self._create_assessment_item( + exercises.SINGLE_SELECTION, + question_text, + [{"answer": "Answer", "correct": True, "order": 1}], + ) + + # Associate all images with the assessment item + for img in [image1, image2, image3]: + img.assessment_item = item + img.save() + + exercise_data = { + "mastery_model": exercises.M_OF_N, + "randomize": True, + "n": 1, + "m": 1, + "all_assessment_items": [item.assessment_id], + "assessment_mapping": {item.assessment_id: exercises.SINGLE_SELECTION}, + } + + # Create the Perseus exercise + self._create_perseus_zip(exercise_data) + exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE) + zip_file, _ = self._validate_perseus_zip(exercise_file) + + # Get the Perseus item JSON content + item_json = json.loads( + zip_file.read(f"{item.assessment_id}.json").decode("utf-8") + ) + question_content = item_json["question"]["content"] + + # Extract all markdown image references using the same pattern as the code + markdown_pattern = r"!\[([^\]]*)\]\(([^)]+)\)" + matches = re.findall(markdown_pattern, question_content) + + # Check that we have exactly 3 well-formed image references + # If the bug exists, we might get malformed content due to index mismatch + self.assertEqual( + len(matches), + 3, + f"Expected 3 image references, found {len(matches)} in content: {question_content}", + ) + + # Verify each match has proper structure + for i, (alt_text, _) in enumerate(matches): + expected_alt = f"img{i+1}" + self.assertEqual( + alt_text, + expected_alt, + f"Image {i+1} alt text malformed: got '{alt_text}', expected '{expected_alt}'", + ) + + # Verify that width and height are properly included in the question images + question_images = item_json["question"]["images"] + + self.assertEqual( + len(question_images), + 2, + f"Expected 2 image entries with dimensions, found {len(question_images)}: {list(question_images.keys())}", + ) + + # Verify that we have images with the expected dimensions + for image_name, image_data in question_images.items(): + width, height = image_data["width"], image_data["height"] + if width == 50 and height != 50: + self.fail("Should find image with 50x50 dimensions") + elif width == 70 and height != 70: + self.fail("Should find image with 70x70 dimensions") + def test_exercise_with_image(self): image_file = fileobj_exercise_image() @@ -209,9 +315,7 @@ def test_exercise_with_image(self): } # Create the Perseus exercise - create_perseus_exercise( - self.exercise_node, self.kolibri_node, exercise_data, user_id=self.user.id - ) + self._create_perseus_zip(exercise_data) # Verify that a file was created exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE) @@ -259,9 +363,7 @@ def test_exercise_with_image_no_attached_file(self): } # Create the Perseus exercise - create_perseus_exercise( - self.exercise_node, self.kolibri_node, exercise_data, user_id=self.user.id - ) + self._create_perseus_zip(exercise_data) # Verify that a file was created exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE) @@ -311,9 +413,7 @@ def test_exercise_with_image_deleted_file_object(self): image_file.delete() # Create the Perseus exercise - create_perseus_exercise( - self.exercise_node, self.kolibri_node, exercise_data, user_id=self.user.id - ) + self._create_perseus_zip(exercise_data) # Verify that a file was created exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE) @@ -392,9 +492,7 @@ def test_exercise_with_graphie(self): } # Create the Perseus exercise - create_perseus_exercise( - self.exercise_node, self.kolibri_node, exercise_data, user_id=self.user.id - ) + self._create_perseus_zip(exercise_data) # Verify that a file was created exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE) @@ -431,7 +529,7 @@ def test_exercise_with_graphie(self): def test_formula_processing(self): """Test that formulas are properly processed in exercises""" # Create a question with LaTeX formulas - question_text = "Solve: $\\frac{x}{2} = 3$" + question_text = "Solve: $$\\frac{x}{2} = 3$$" item = self._create_assessment_item( exercises.INPUT_QUESTION, question_text, @@ -449,10 +547,43 @@ def test_formula_processing(self): } # Create the Perseus exercise - create_perseus_exercise( - self.exercise_node, self.kolibri_node, exercise_data, user_id=self.user.id + self._create_perseus_zip(exercise_data) + + # Verify that a file was created + exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE) + + # Validate the zip file + zip_file, _ = self._validate_perseus_zip(exercise_file) + + # Check that the formula was properly processed + item_json = json.loads( + zip_file.read(f"{item.assessment_id}.json").decode("utf-8") + ) + self.assertIn("$\\frac{x}{2} = 3$", item_json["question"]["content"]) + + def test_multiple_formula_processing(self): + """Test that formulas are properly processed in exercises""" + # Create a question with LaTeX formulas + question_text = "Solve: $$\\frac{x}{2} = 3$$ or maybe $$\\frac{y}{2} = 7$$" + item = self._create_assessment_item( + exercises.INPUT_QUESTION, + question_text, + [{"answer": "6", "correct": True, "order": 1}], ) + # Create the exercise data + exercise_data = { + "mastery_model": exercises.M_OF_N, + "randomize": True, + "n": 1, + "m": 1, + "all_assessment_items": [item.assessment_id], + "assessment_mapping": {item.assessment_id: exercises.INPUT_QUESTION}, + } + + # Create the Perseus exercise + self._create_perseus_zip(exercise_data) + # Verify that a file was created exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE) @@ -463,7 +594,10 @@ def test_formula_processing(self): item_json = json.loads( zip_file.read(f"{item.assessment_id}.json").decode("utf-8") ) - self.assertIn("\\frac{x}{2} = 3", item_json["question"]["content"]) + self.assertIn( + "Solve: $\\frac{x}{2} = 3$ or maybe $\\frac{y}{2} = 7$", + item_json["question"]["content"], + ) def test_multiple_question_types(self): """Test creating an exercise with multiple question types""" @@ -526,9 +660,7 @@ def test_multiple_question_types(self): } # Create the Perseus exercise - create_perseus_exercise( - self.exercise_node, self.kolibri_node, exercise_data, user_id=self.user.id - ) + self._create_perseus_zip(exercise_data) # Verify that a file was created exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE) @@ -661,6 +793,7 @@ def _test_image_resizing_in_field(self, field_type): # Create the assessment item item_type = exercises.SINGLE_SELECTION + item = self._create_assessment_item(item_type, question_text, answers, hints) # Associate the image with the assessment item @@ -678,9 +811,7 @@ def _test_image_resizing_in_field(self, field_type): } # Create the Perseus exercise - create_perseus_exercise( - self.exercise_node, self.kolibri_node, exercise_data, user_id=self.user.id - ) + self._create_perseus_zip(exercise_data) # Get the exercise file exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE) @@ -820,9 +951,7 @@ def test_image_with_same_resize_dimensions(self): } # Create the Perseus exercise - create_perseus_exercise( - self.exercise_node, self.kolibri_node, exercise_data, user_id=self.user.id - ) + self._create_perseus_zip(exercise_data) # Get the exercise file exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE) @@ -901,9 +1030,7 @@ def test_image_with_similar_dimensions(self): } # Create the Perseus exercise - create_perseus_exercise( - self.exercise_node, self.kolibri_node, exercise_data, user_id=self.user.id - ) + self._create_perseus_zip(exercise_data) # Get the exercise file exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE) @@ -949,3 +1076,749 @@ def test_image_with_similar_dimensions(self): third_image, "Images with dimensions outside 1% threshold should use different files", ) + + def test_image_with_zero_width(self): + # Create a base image file + base_image = fileobj_exercise_image(size=(400, 300), color="red") + base_image_url = exercises.CONTENT_STORAGE_FORMAT.format(base_image.filename()) + + # Create a question with images that have very similar dimensions + # The code has logic to use the same image if dimensions are within 1% of each other + question_text = ( + f"First image: ![shape1]({base_image_url} =0x150)\n" + f"Second image: ![shape2]({base_image_url} =200x151)" + ) + + # Create the assessment item + item = self._create_assessment_item( + exercises.SINGLE_SELECTION, + question_text, + [{"answer": "Answer", "correct": True, "order": 1}], + ) + + # Associate the image with the assessment item + base_image.assessment_item = item + base_image.save() + + # Create exercise data + exercise_data = { + "mastery_model": exercises.M_OF_N, + "randomize": True, + "n": 1, + "m": 1, + "all_assessment_items": [item.assessment_id], + "assessment_mapping": {item.assessment_id: exercises.SINGLE_SELECTION}, + } + + # Create the Perseus exercise + self._create_perseus_zip(exercise_data) + + # Get the exercise file + exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE) + + # Validate the zip file + zip_file, _ = self._validate_perseus_zip(exercise_file) + + # Get all image files in the zip + image_files = [ + name for name in zip_file.namelist() if name.startswith("images/") + ] + + # Verify we have exactly 1 image file + self.assertEqual( + len(image_files), + 1, + f"Expected 1 resized images, found {len(image_files)}: {image_files}", + ) + + +class TestQTIExerciseCreation(StudioTestCase): + """ + Tests for the QTI exercise generator which handles QTI format exercise file generation. + + These tests verify that the function correctly packages assessment items + into a valid QTI Content Package with IMS manifest and individual item XML files. + """ + + maxDiff = None + + def setUp(self): + self.setUpBase() + + # Create an exercise node + self.exercise_node = ContentNode.objects.create( + title="Test QTI Exercise", + node_id="1234567890abcdef1234567890abcded", + content_id="fedcba0987654321fedcba0987654321", + kind_id=content_kinds.EXERCISE, + parent=self.channel.main_tree, + extra_fields=json.dumps( + { + "randomize": True, + "options": { + "completion_criteria": { + "model": "mastery", + "threshold": { + "mastery_model": exercises.M_OF_N, + "m": 3, + "n": 5, + }, + } + }, + } + ), + ) + + def _create_assessment_item( + self, item_type, question_text, answers, hints=None, assessment_id=None + ): + """Helper to create assessment items with the right structure""" + if hints is None: + hints = [{"hint": "This is a hint", "order": 1}] + + item = AssessmentItem.objects.create( + contentnode=self.exercise_node, + assessment_id=assessment_id or uuid4().hex, + type=item_type, + question=question_text, + answers=json.dumps(answers), + hints=json.dumps(hints), + raw_data="{}", + order=len(self.exercise_node.assessment_items.all()) + 1, + randomize=True, + ) + return item + + def _create_qti_zip(self, exercise_data): + """Create QTI exercise zip using the generator""" + generator = QTIExerciseGenerator( + self.exercise_node, + exercise_data, + self.channel.id, + "en-US", + user_id=self.user.id, + ) + return generator.create_exercise_archive() + + def _normalize_xml(self, xml_string): + return "".join(x.strip() for x in xml_string.split("\n")) + + def _validate_qti_zip_structure(self, exercise_file): + """Helper to validate basic structure of the QTI Content Package""" + # Use Django's storage backend to read the file + with storage.open(exercise_file.file_on_disk.name, "rb") as f: + zip_data = f.read() + + zip_file = zipfile.ZipFile(BytesIO(zip_data)) + + # Check that the imsmanifest.xml file exists + assert ( + "imsmanifest.xml" in zip_file.namelist() + ), "imsmanifest.xml not found in zip file" + + return zip_file + + def test_basic_qti_exercise_creation(self): + """Test the basic creation of a QTI exercise with a single question""" + # Create a simple multiple choice question with 32-char hex ID + assessment_id = "1234567890abcdef1234567890abcdef" + item = self._create_assessment_item( + exercises.SINGLE_SELECTION, + "What is 2+2?", + [ + {"answer": "4", "correct": True, "order": 1}, + {"answer": "3", "correct": False, "order": 2}, + {"answer": "5", "correct": False, "order": 3}, + ], + assessment_id=assessment_id, + ) + + # Create the exercise data structure + exercise_data = { + "mastery_model": exercises.M_OF_N, + "randomize": True, + "n": 5, + "m": 3, + "all_assessment_items": [item.assessment_id], + "assessment_mapping": {item.assessment_id: exercises.SINGLE_SELECTION}, + } + + # Call the function to create the QTI exercise + self._create_qti_zip(exercise_data) + + # Verify that a file was created for the node + exercise_file = self.exercise_node.files.get(preset_id=format_presets.QTI_ZIP) + self.assertIsNotNone(exercise_file) + self.assertEqual(exercise_file.file_format_id, "zip") + + # Validate the contents of the zip file + zip_file = self._validate_qti_zip_structure(exercise_file) + + # Check that the assessment item XML file exists + expected_item_file = "items/KEjRWeJCrze8SNFZ4kKvN7w.xml" + self.assertIn(expected_item_file, zip_file.namelist()) + + # Get the actual QTI item XML content + actual_item_xml = zip_file.read(expected_item_file).decode("utf-8") + + # Expected QTI item XML content + expected_item_xml = """ + + + + choice_0 + + + + + + +

    What is 2+2?

    +
    +

    4

    +

    3

    +

    5

    +
    +
    + +
    """ + + # Compare normalized XML + self.assertEqual( + self._normalize_xml(expected_item_xml), + self._normalize_xml(actual_item_xml), + ) + + # Get the actual IMS manifest content + actual_manifest_xml = zip_file.read("imsmanifest.xml").decode("utf-8") + + # Expected IMS manifest XML content + expected_manifest_xml = """ + + + QTI Package + 3.0.0 + + + + + + + +""" + + # Compare normalized XML + self.assertEqual( + self._normalize_xml(expected_manifest_xml), + self._normalize_xml(actual_manifest_xml), + ) + + def test_multiple_selection_question(self): + """Test QTI generation for multiple selection questions""" + assessment_id = "abcdef1234567890abcdef1234567890" + item = self._create_assessment_item( + exercises.MULTIPLE_SELECTION, + "Select all prime numbers:", + [ + {"answer": "2", "correct": True, "order": 1}, + {"answer": "3", "correct": True, "order": 2}, + {"answer": "4", "correct": False, "order": 3}, + {"answer": "5", "correct": True, "order": 4}, + ], + assessment_id=assessment_id, + ) + + exercise_data = { + "mastery_model": exercises.M_OF_N, + "randomize": True, + "n": 1, + "m": 1, + "all_assessment_items": [item.assessment_id], + "assessment_mapping": {item.assessment_id: exercises.MULTIPLE_SELECTION}, + } + + self._create_qti_zip(exercise_data) + exercise_file = self.exercise_node.files.get(preset_id=format_presets.QTI_ZIP) + zip_file = self._validate_qti_zip_structure(exercise_file) + + qti_id = hex_to_qti_id(assessment_id) + + # Check the QTI XML for multiple selection specifics + expected_item_file = f"items/{qti_id}.xml" + actual_item_xml = zip_file.read(expected_item_file).decode("utf-8") + + # Expected QTI item XML content for multiple selection + expected_item_xml = """ + + + + choice_0 + choice_1 + choice_3 + + + + + + +

    Select all prime numbers:

    +
    +

    2

    +

    3

    +

    4

    +

    5

    +
    +
    + +
    """ + + # Compare normalized XML + self.assertEqual( + self._normalize_xml(expected_item_xml), + self._normalize_xml(actual_item_xml), + ) + + def test_free_response_question(self): + assessment_id = "fedcba0987654321fedcba0987654321" + item = self._create_assessment_item( + exercises.FREE_RESPONSE, + "What is the capital of France?", + [{"answer": "Paris", "correct": True, "order": 1}], + assessment_id=assessment_id, + ) + + exercise_data = { + "mastery_model": exercises.M_OF_N, + "randomize": True, + "n": 1, + "m": 1, + "all_assessment_items": [item.assessment_id], + "assessment_mapping": {item.assessment_id: exercises.FREE_RESPONSE}, + } + + self._create_qti_zip(exercise_data) + exercise_file = self.exercise_node.files.get(preset_id=format_presets.QTI_ZIP) + zip_file = self._validate_qti_zip_structure(exercise_file) + + # Check the QTI XML for text entry specifics + expected_item_file = "items/K_ty6CYdlQyH-3LoJh2VDIQ.xml" + actual_item_xml = zip_file.read(expected_item_file).decode("utf-8") + + # Expected QTI item XML content for text entry + expected_item_xml = """ + + + + Paris + + + + +
    +

    What is the capital of France?

    +

    +
    +
    + +
    """ + + # Compare normalized XML + self.assertEqual( + self._normalize_xml(expected_item_xml), + self._normalize_xml(actual_item_xml), + ) + + def test_free_response_question_with_maths(self): + assessment_id = "fedcba0987654321fedcba0987654321" + item = self._create_assessment_item( + exercises.FREE_RESPONSE, + "$$\\sum_n^sxa^n$$\n\n What does this even mean?", + [{"answer": "Nothing", "correct": True, "order": 1}], + assessment_id=assessment_id, + ) + + exercise_data = { + "mastery_model": exercises.M_OF_N, + "randomize": True, + "n": 1, + "m": 1, + "all_assessment_items": [item.assessment_id], + "assessment_mapping": {item.assessment_id: exercises.FREE_RESPONSE}, + } + + self._create_qti_zip(exercise_data) + exercise_file = self.exercise_node.files.get(preset_id=format_presets.QTI_ZIP) + zip_file = self._validate_qti_zip_structure(exercise_file) + + # Check the QTI XML for text entry specifics + expected_item_file = "items/K_ty6CYdlQyH-3LoJh2VDIQ.xml" + actual_item_xml = zip_file.read(expected_item_file).decode("utf-8") + + # Expected QTI item XML content for text entry + expected_item_xml = """ + + + + Nothing + + + + +
    + + + + ns + x + an + + \\sum_n^sxa^n + + +

    What does this even mean?

    +

    +
    +
    + +
    """ + + # Compare normalized XML + self.assertEqual( + self._normalize_xml(expected_item_xml), + self._normalize_xml(actual_item_xml), + ) + + def test_perseus_question_rejection(self): + """Test that Perseus questions are properly rejected""" + assessment_id = "aaaa1111bbbb2222cccc3333dddd4444" + # Create a mock Perseus question + item = AssessmentItem.objects.create( + contentnode=self.exercise_node, + assessment_id=assessment_id, + type=exercises.PERSEUS_QUESTION, + raw_data='{"question": {"content": "Perseus content"}}', + order=1, + ) + + exercise_data = { + "mastery_model": exercises.M_OF_N, + "randomize": True, + "n": 1, + "m": 1, + "all_assessment_items": [item.assessment_id], + "assessment_mapping": {item.assessment_id: exercises.PERSEUS_QUESTION}, + } + + # Should raise ValueError for Perseus questions + with self.assertRaises(ValueError) as context: + self._create_qti_zip(exercise_data) + + self.assertIn("Perseus questions are not supported", str(context.exception)) + + def test_exercise_with_image(self): + """Test QTI exercise generation with images""" + assessment_id = "1111aaaa2222bbbb3333cccc4444dddd" + image_file = fileobj_exercise_image() + + # Create a question with image + image_url = exercises.CONTENT_STORAGE_FORMAT.format(f"{image_file.filename()}") + question_text = f"Identify the shape: ![shape]({image_url})" + item = self._create_assessment_item( + exercises.SINGLE_SELECTION, + question_text, + [ + {"answer": "Circle", "correct": True, "order": 1}, + {"answer": "Square", "correct": False, "order": 2}, + ], + assessment_id=assessment_id, + ) + + # Associate the image with the assessment item + image_file.assessment_item = item + image_file.save() + + exercise_data = { + "mastery_model": exercises.M_OF_N, + "randomize": True, + "n": 1, + "m": 1, + "all_assessment_items": [item.assessment_id], + "assessment_mapping": {item.assessment_id: exercises.SINGLE_SELECTION}, + } + + self._create_qti_zip(exercise_data) + exercise_file = self.exercise_node.files.get(preset_id=format_presets.QTI_ZIP) + zip_file = self._validate_qti_zip_structure(exercise_file) + + # Check that the image file was included in the zip + image_path = f"items/images/{image_file.filename()}" + self.assertIn(image_path, zip_file.namelist()) + + # Get the actual manifest content + actual_manifest_xml = zip_file.read("imsmanifest.xml").decode("utf-8") + + # Expected manifest should include the image file dependency + expected_manifest_xml = f""" + + + QTI Package + 3.0.0 + + + + + + + + +""" + + # Compare normalized XML + self.assertEqual( + self._normalize_xml(expected_manifest_xml), + self._normalize_xml(actual_manifest_xml), + ) + + self.assertEqual(exercise_file.checksum, "51ba0d6e3c7f30239265c5294abe6ac5") + + def test_question_with_mathematical_content(self): + """Test QTI generation for questions containing mathematical formulas converted to MathML""" + assessment_id = "dddddddddddddddddddddddddddddddd" + item = self._create_assessment_item( + exercises.SINGLE_SELECTION, + "Solve the equation $$\\frac{x}{2} = 3$$ for x. What is the value of x?", + [ + {"answer": "6", "correct": True, "order": 1}, + {"answer": "3", "correct": False, "order": 2}, + {"answer": "1.5", "correct": False, "order": 3}, + {"answer": "9", "correct": False, "order": 4}, + ], + assessment_id=assessment_id, + ) + + exercise_data = { + "mastery_model": exercises.M_OF_N, + "randomize": True, + "n": 1, + "m": 1, + "all_assessment_items": [item.assessment_id], + "assessment_mapping": {item.assessment_id: exercises.SINGLE_SELECTION}, + } + + self._create_qti_zip(exercise_data) + exercise_file = self.exercise_node.files.get(preset_id=format_presets.QTI_ZIP) + zip_file = self._validate_qti_zip_structure(exercise_file) + + qti_id = hex_to_qti_id(assessment_id) + + # Check the QTI XML for mathematical content conversion to MathML + expected_item_file = f"items/{qti_id}.xml" + actual_item_xml = zip_file.read(expected_item_file).decode("utf-8") + + # Expected QTI item XML content with MathML conversion + expected_item_xml = f""" + + + + choice_0 + + + + + + +

    Solve the equation x2=3\\frac{{x}}{{2}} = 3 for x. What is the value of x?

    +
    +

    6

    +

    3

    +

    1.5

    +

    9

    +
    +
    + +
    """ + + # Compare normalized XML + self.assertEqual( + self._normalize_xml(expected_item_xml), + self._normalize_xml(actual_item_xml), + ) + + def test_multiple_question_types_mixed(self): + """Test creating a QTI exercise with multiple supported question types""" + # Create different types of supported questions with 32-char hex IDs + assessment_id1 = "1111111111111111111111111111111a" + assessment_id2 = "2222222222222222222222222222222b" + assessment_id3 = "3333333333333333333333333333333c" + + qti_id1 = hex_to_qti_id(assessment_id1) + qti_id2 = hex_to_qti_id(assessment_id2) + qti_id3 = hex_to_qti_id(assessment_id3) + + item1 = self._create_assessment_item( + exercises.SINGLE_SELECTION, + "What is 2+2?", + [ + {"answer": "4", "correct": True, "order": 1}, + {"answer": "5", "correct": False, "order": 2}, + ], + assessment_id=assessment_id1, + ) + + item2 = self._create_assessment_item( + exercises.MULTIPLE_SELECTION, + "Select all even numbers:", + [ + {"answer": "2", "correct": True, "order": 1}, + {"answer": "3", "correct": False, "order": 2}, + {"answer": "4", "correct": True, "order": 3}, + {"answer": "5", "correct": False, "order": 4}, + ], + assessment_id=assessment_id2, + ) + + item3 = self._create_assessment_item( + exercises.INPUT_QUESTION, + "What is the capital of Spain?", + [{"answer": "Madrid", "correct": True, "order": 1}], + assessment_id=assessment_id3, + ) + + exercise_data = { + "mastery_model": exercises.M_OF_N, + "randomize": True, + "n": 3, + "m": 2, + "all_assessment_items": [ + item1.assessment_id, + item2.assessment_id, + item3.assessment_id, + ], + "assessment_mapping": { + item1.assessment_id: exercises.SINGLE_SELECTION, + item2.assessment_id: exercises.MULTIPLE_SELECTION, + item3.assessment_id: exercises.INPUT_QUESTION, + }, + } + + self._create_qti_zip(exercise_data) + exercise_file = self.exercise_node.files.get(preset_id=format_presets.QTI_ZIP) + zip_file = self._validate_qti_zip_structure(exercise_file) + + # Check that all question XML files are included + expected_files = [ + f"items/{qti_id1}.xml", + f"items/{qti_id2}.xml", + f"items/{qti_id3}.xml", + ] + + for expected_file in expected_files: + self.assertIn(expected_file, zip_file.namelist()) + + # Get the actual manifest content + actual_manifest_xml = zip_file.read("imsmanifest.xml").decode("utf-8") + + # Expected manifest with all three resources + expected_manifest_xml = f""" + + + QTI Package + 3.0.0 + + + + + + + + + + + + + +""" + + # Compare normalized XML + self.assertEqual( + self._normalize_xml(expected_manifest_xml), + self._normalize_xml(actual_manifest_xml), + ) + + self.assertEqual(exercise_file.checksum, "8e488543ef52f0b153553eaf9fb51419") + + def test_unsupported_question_type(self): + """Test that unsupported question types raise appropriate errors""" + assessment_id = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" + # Create an item with an unsupported type + item = AssessmentItem.objects.create( + contentnode=self.exercise_node, + assessment_id=assessment_id, + type="UNSUPPORTED_TYPE", + question="This is an unsupported question type", + answers="[]", + hints="[]", + raw_data="{}", + order=1, + ) + + exercise_data = { + "mastery_model": exercises.M_OF_N, + "randomize": True, + "n": 1, + "m": 1, + "all_assessment_items": [item.assessment_id], + "assessment_mapping": {item.assessment_id: "UNSUPPORTED_TYPE"}, + } + + with self.assertRaises(ValueError) as context: + self._create_qti_zip(exercise_data) + + self.assertIn("Unsupported question type", str(context.exception)) + + def test_manifest_structure_single_item(self): + """Test that the IMS manifest has proper structure and metadata for a single item""" + assessment_id = "cccccccccccccccccccccccccccccccc" + item = self._create_assessment_item( + exercises.SINGLE_SELECTION, + "Test question", + [{"answer": "Test answer", "correct": True, "order": 1}], + assessment_id=assessment_id, + ) + + exercise_data = { + "mastery_model": exercises.M_OF_N, + "randomize": True, + "n": 1, + "m": 1, + "all_assessment_items": [item.assessment_id], + "assessment_mapping": {item.assessment_id: exercises.SINGLE_SELECTION}, + } + + self._create_qti_zip(exercise_data) + exercise_file = self.exercise_node.files.get(preset_id=format_presets.QTI_ZIP) + zip_file = self._validate_qti_zip_structure(exercise_file) + + # Get the actual manifest content + actual_manifest_xml = zip_file.read("imsmanifest.xml").decode("utf-8") + + # Expected exact manifest structure + expected_manifest_xml = """ + + + QTI Package + 3.0.0 + + + + + + + +""" + + # Compare normalized XML + self.assertEqual( + self._normalize_xml(expected_manifest_xml), + self._normalize_xml(actual_manifest_xml), + ) diff --git a/contentcuration/contentcuration/tests/utils/test_markdown.py b/contentcuration/contentcuration/tests/utils/test_markdown.py new file mode 100644 index 0000000000..9dc3cd41b6 --- /dev/null +++ b/contentcuration/contentcuration/tests/utils/test_markdown.py @@ -0,0 +1,198 @@ +import unittest + +from contentcuration.utils.assessment.markdown import render_markdown +from contentcuration.utils.assessment.qti import ElementTreeBase + + +class TexMathTestMixin: + """Mixin providing test methods for TexMath plugin tests""" + + def _assert_conversion(self, markdown_text: str, expected: str): + """Override in subclasses to define assertion behavior""" + raise NotImplementedError("Subclasses must implement _assert_conversion") + + def test_markdown_with_inline_math(self): + """Test conversion of markdown with inline math to HTML + MathML""" + + markdown_text = ( + "What is the answer to this *question*? $$x\cdot y=z^2$$" # noqa W605 + ) + expected = ( + "

    What is the answer to this question? " + '' + "x·y=z2" + 'x\cdot y=z^2' # noqa W605 + "

    \n" + ) + + self._assert_conversion(markdown_text, expected) + + def test_block_math(self): + """Test conversion of block math""" + + markdown_text = ( + "Here's an equation:\n\n$$E = mc^2$$\n\nThat's Einstein's formula." + ) + expected = ( + "

    Here's an equation:

    \n" + '' + "E=mc2" + 'E = mc^2' + "" + "

    That's Einstein's formula.

    \n" + ) + + self._assert_conversion(markdown_text, expected) + + def test_multiline_block_math(self): + """ + Ensure a $$ … $$ block spanning multiple lines is converted to MathML + and the literal $$ delimiters are removed. This currently fails with + the buggy BLOCK_PATTERN because it stops after the first '$'. + """ + markdown_text = ( + "$$\n" + "\\begin{aligned}\n" + "a = b + c \\\\\n" + "$5 = d + e\n" + "\\end{aligned}\n" + "$$" + ) + expected = ( + '' + "a=b+c" + '$5=d+e' + '\n\\begin{aligned}\na = b + c \\\\\n$5 = d + e\n\\end{aligned}\n' + "" + ) + + self._assert_conversion(markdown_text, expected) + + def test_inline_math_with_dollar_inside(self): + """ + Ensure a $$ … $$ inline that contains an internal '$' (e.g. inside + \\text{}) is parsed correctly. With the old BLOCK_PATTERN the first '$' + prematurely terminates the match so the delimiters remain. + """ + markdown_text = "Test this $$\\text{Cost = 1.00 $USD$}$$" + expected = ( + "

    Test this " + '' + "Cost = 1.00 $USD$" + '\\text{Cost = 1.00 $USD$}' + "

    \n" + ) + + self._assert_conversion(markdown_text, expected) + + def test_multiple_math_expressions(self): + """Test multiple math expressions in one document""" + + markdown_text = "First: $$a + b$$, then $$c \\times d$$, finally $$e^f$$." + expected = ( + "

    First: " + 'a+b' + 'a + b' + ", then " + 'c×d' + 'c \\times d' + ", finally " + 'ef' + 'e^f' + ".

    \n" + ) + + self._assert_conversion(markdown_text, expected) + + def test_mixed_inline_and_block(self): + """Test document with both inline and block math""" + + markdown_text = ( + "This is inline math: $$a = b$$\n\n" + "And this is block math:\n\n" + "$$\\sum_{i=1}^{n} x_i = y$$\n\n" + "Back to text with more inline: $$z^2$$" + ) + expected = ( + "

    This is inline math: " + 'a=b' + 'a = b' + "

    \n" + "

    And this is block math:

    \n" + '' + "i=1" + "nxi=y" + '\sum_{i=1}^{n} x_i = y' # noqa W605 + "" + "

    Back to text with more inline: " + 'z2' + 'z^2' + "

    \n" + ) + + self._assert_conversion(markdown_text, expected) + + def test_no_math_content(self): + """Test that regular markdown without math still works""" + + markdown_text = "This is just *regular* markdown with **bold** text." + expected = "

    This is just regular markdown with bold text.

    \n" + + self._assert_conversion(markdown_text, expected) + + def test_simple_inline_math(self): + """Test simple inline math expression""" + + markdown_text = "The variable $$x$$ is unknown." + expected = ( + "

    The variable " + 'x' + 'x' + " is unknown.

    \n" + ) + + self._assert_conversion(markdown_text, expected) + + def test_simple_block_math(self): + """Test simple block math expression""" + + markdown_text = "$$y = mx + b$$" + expected = ( + '' + "y=mx+b" + 'y = mx + b' + "" + ) + + self._assert_conversion(markdown_text, expected) + + +class TestTexMathPlugin(TexMathTestMixin, unittest.TestCase): + """Test direct markdown conversion: markdown → HTML+MathML""" + + def _assert_conversion(self, markdown_text: str, expected: str): + """Test direct markdown to HTML+MathML conversion""" + result = render_markdown(markdown_text) + self.assertEqual(result, expected) + + +class TestTexMathPluginRoundtrip(TexMathTestMixin, unittest.TestCase): + """Test full roundtrip: markdown → HTML+MathML → Pydantic → string""" + + maxDiff = None + + def _assert_conversion(self, markdown_text: str, expected: str): + """Test full roundtrip conversion via Pydantic objects""" + result = render_markdown(markdown_text) + + # Parse to Pydantic objects and back to string + parsed = ElementTreeBase.from_string(result) + roundtrip_result = ( + "".join(e.to_xml_string().strip() for e in parsed) + if isinstance(parsed, list) + else parsed.to_xml_string().strip() + ) + self.assertEqual( + roundtrip_result.replace("\n", "").strip(), + expected.replace("\n", "").strip(), + ) diff --git a/contentcuration/contentcuration/utils/assessment/__init__.py b/contentcuration/contentcuration/utils/assessment/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/contentcuration/contentcuration/utils/assessment/base.py b/contentcuration/contentcuration/utils/assessment/base.py new file mode 100644 index 0000000000..0f668920a0 --- /dev/null +++ b/contentcuration/contentcuration/utils/assessment/base.py @@ -0,0 +1,395 @@ +import hashlib +import json +import logging +import os +import re +import zipfile +from abc import ABC +from abc import abstractmethod +from io import BytesIO +from tempfile import NamedTemporaryFile +from tempfile import TemporaryDirectory + +from django.core.files import File +from django.core.files.storage import default_storage as storage +from le_utils.constants import exercises +from PIL import Image + +from contentcuration import models + + +image_pattern = rf"!\[(?:[^\]]*)]\(\${exercises.CONTENT_STORAGE_PLACEHOLDER}/([^\s)]+)(?:\s=([0-9\.]+)x([0-9\.]+))*[^)]*\)" + + +def resize_image(image_content, width, height): + try: + with Image.open(BytesIO(image_content)) as img: + original_format = img.format + img = img.resize((int(width), int(height)), Image.LANCZOS) + buffered = BytesIO() + img.save(buffered, format=original_format) + return buffered.getvalue() + except Exception as e: + logging.warning(f"Error resizing image: {str(e)}") + return None + + +def get_resized_image_checksum(image_content): + return hashlib.md5(image_content).hexdigest() + + +class ExerciseArchiveGenerator(ABC): + """ + Abstract base class for exercise zip generators. + Handles common functionality for creating exercise zip files for different formats. + """ + + ZIP_DATE_TIME = (2015, 10, 21, 7, 28, 0) + ZIP_COMPRESS_TYPE = zipfile.ZIP_DEFLATED + ZIP_COMMENT = "".encode() + + @property + @abstractmethod + def file_format(self): + pass + + @property + @abstractmethod + def preset(self): + pass + + @abstractmethod + def get_image_file_path(self): + """ + Abstract method to get the archive file path for storing assessment image files. + + Returns: + str: The file path for images in the exercise archive + """ + pass + + def get_image_ref_prefix(self): + """ + A value to insert in front of the image file path - this is needed for Perseus to properly + find all image file paths in the frontend. + """ + return "" + + @abstractmethod + def create_assessment_item(self, assessment_item, processed_data): + """ + Abstract method to create an assessment item from processed data. + Args: + assessment_item: The assessment item to process + processed_data: Data processed from the assessment item + Returns: + filepath: Path for the created assessment item file + file_content: Content of the assessment item file + """ + pass + + def __init__( + self, ccnode, exercise_data, channel_id, default_language, user_id=None + ): + """ + Initialize the exercise zip generator. + + Args: + ccnode: Content node containing exercise data + exercise_data: Data specific to the exercise format + user_id: Optional user ID for tracking who created the exercise + """ + self.ccnode = ccnode + self.exercise_data = exercise_data + self.channel_id = channel_id + self.default_language = default_language + self.user_id = user_id + self.resized_images_map = {} + self.assessment_items = [] + self.files_to_write = [] + self.tempdir = None + + def write_to_zipfile(self, zf, filepath, content): + """ + This method is a copy of the write_file_to_zip_with_neutral_metadata function from ricecooker. + The comment, date_time, and compress_type are parameterized to allow for Perseus to override them. + This can be updated in future when we have a good way to avoid rebuilding perseus files, unless needed. + """ + filepath = filepath.replace("\\", "/") + info = zipfile.ZipInfo(filepath, date_time=self.ZIP_DATE_TIME) + info.comment = self.ZIP_COMMENT + info.compress_type = self.ZIP_COMPRESS_TYPE + info.create_system = 0 + zf.writestr(info, content) + + def add_file_to_write(self, filepath, content): + if self.tempdir is None: + raise RuntimeError( + "Cannot add files to write before creating the temporary directory." + ) + full_path = os.path.join(self.tempdir, filepath) + if os.path.exists(full_path): + return + os.makedirs(os.path.dirname(full_path), exist_ok=True) + with open(full_path, "wb") as f: + f.write(content) + self.files_to_write.append(full_path) + + def _add_original_image(self, checksum, filename, new_file_path): + """Extract original image handling""" + with storage.open( + models.generate_object_storage_name(checksum, filename), "rb" + ) as imgfile: + original_content = imgfile.read() + self.add_file_to_write(os.path.join(new_file_path, filename), original_content) + + def _get_similar_image(self, filename, width, height): + if filename not in self.resized_images_map: + self.resized_images_map[filename] = {} + return None + if (width, height) in self.resized_images_map[filename]: + return self.resized_images_map[filename][(width, height)] + + for key, resized_image in self.resized_images_map[filename].items(): + if ( + abs(key[0] - width) / width < 0.01 + and abs(key[1] - height) / height < 0.01 + ): + return resized_image + + def _resize_image(self, checksum, ext, filename, width, height, new_file_path): + with storage.open( + models.generate_object_storage_name(checksum, filename), + "rb", + ) as imgfile: + original_content = imgfile.read() + + resized_content = resize_image(original_content, width, height) + + if not resized_content: + logging.warning(f"Failed to resize image {filename}. Using original image.") + return + resized_checksum = get_resized_image_checksum(resized_content) + + new_img_ref = f"{resized_checksum}{ext}" + self.resized_images_map[filename][(width, height)] = new_img_ref + self.add_file_to_write( + os.path.join(new_file_path, new_img_ref), resized_content + ) + return new_img_ref + + def _process_single_image( + self, filename, checksum, ext, width, height, new_file_path + ): + if width is None and height is None: + # No resizing needed, just add original + self._add_original_image(checksum, filename, new_file_path) + return filename + + # Try to get similar or create resized image + similar_image = self._get_similar_image(filename, width, height) + if similar_image: + return similar_image + + resized_image = self._resize_image( + checksum, ext, filename, width, height, new_file_path + ) + return resized_image or filename + + def _replace_filename_in_match( + self, content, img_match, old_filename, new_filename + ): + """Extract filename replacement logic""" + start, end = img_match.span() + old_match = content[start:end] + new_match = old_match.replace(old_filename, new_filename) + return content[:start] + new_match + content[end:] + + def _is_valid_image_filename(self, filename): + checksum, ext = os.path.splitext(filename) + + if not ext: + logging.warning( + "While publishing channel `{}` a filename with no extension was encountered: `{}`".format( + self.channel_id, filename + ) + ) + return False + + try: + int(checksum, 16) # Validate hex checksum + return True + except ValueError: + logging.warning( + "while publishing channel `{}` a filename with an improper checksum was encountered: `{}`".format( + self.channel_id, filename + ) + ) + if os.environ.get("BRANCH_ENVIRONMENT", "") != "master": + raise + return False + + def process_image_strings(self, content): + new_file_path = self.get_image_file_path() + new_image_path = f"{self.get_image_ref_prefix()}{new_file_path}" + image_list = [] + processed_files = [] + for img_match in re.finditer(image_pattern, content): + # Add any image files that haven't been written to the zipfile + filename = img_match.group(1) + width = float(img_match.group(2)) if img_match.group(2) else None + height = float(img_match.group(3)) if img_match.group(3) else None + checksum, ext = os.path.splitext(filename) + + if not self._is_valid_image_filename(filename): + continue + + if width == 0 or height == 0: + # Can't resize an image to 0 width or height, so just ignore. + continue + + processed_filename = self._process_single_image( + filename, checksum, ext, width, height, new_file_path + ) + processed_files.append( + (img_match, filename, processed_filename, width, height) + ) + + # Process matches in reverse order to avoid index mismatch when modifying content + for img_match, filename, processed_filename, width, height in reversed( + processed_files + ): + content = self._replace_filename_in_match( + content, img_match, filename, processed_filename + ) + if width is not None and height is not None: + image_list.append( + {"name": processed_filename, "width": width, "height": height} + ) + + content = content.replace( + f"${exercises.CONTENT_STORAGE_PLACEHOLDER}", new_image_path + ) + return content, image_list + + def _process_content(self, content): + """ + Process the content to handle images. + + Args: + content: The content string to process + + Returns: + tuple: Processed content and list of image data + """ + return self.process_image_strings(content) + + def _sort_by_order(self, items, item_type): + try: + return sorted(items, key=lambda x: x.get("order")) + except TypeError: + logging.error(f"Unable to sort {item_type}, leaving unsorted.") + return items + + def _process_answers(self, assessment_item): + answer_data = json.loads(assessment_item.answers) + processed_answers = [] + + for answer in answer_data: + if answer["answer"]: + if isinstance(answer["answer"], str): + (answer["answer"], answer_images,) = self._process_content( + answer["answer"], + ) + answer["images"] = answer_images + + processed_answers.append(answer) + + return self._sort_by_order(processed_answers, "answers") + + def _process_hints(self, assessment_item): + hint_data = json.loads(assessment_item.hints) + + for hint in hint_data: + hint["hint"], hint_images = self._process_content( + hint["hint"], + ) + hint["images"] = hint_images + + return self._sort_by_order(hint_data, "hints") + + def process_assessment_item(self, assessment_item): + # Process question + question, question_images = self._process_content( + assessment_item.question, + ) + + # Process answers and hints + processed_answers = self._process_answers(assessment_item) + processed_hints = self._process_hints(assessment_item) + + new_file_path = self.get_image_file_path() + new_image_path = f"{exercises.IMG_PLACEHOLDER}/{new_file_path}" + context = { + "question": question, + "question_images": question_images, + "answers": processed_answers, + "multiple_select": assessment_item.type == exercises.MULTIPLE_SELECTION, + "raw_data": assessment_item.raw_data.replace( + exercises.CONTENT_STORAGE_PLACEHOLDER, new_image_path + ), + "hints": processed_hints, + "randomize": assessment_item.randomize, + } + filepath, file_content = self.create_assessment_item(assessment_item, context) + self.add_file_to_write(filepath, file_content) + + def handle_before_assessment_items(self): + pass + + def handle_after_assessment_items(self): + pass + + def _create_zipfile(self): + filename = "{0}.{ext}".format(self.ccnode.title, ext=self.file_format) + with NamedTemporaryFile(suffix="zip") as tempf: + with zipfile.ZipFile(tempf.name, "w") as zf: + for file_path in self.files_to_write: + with open(file_path, "rb") as f: + self.write_to_zipfile( + zf, + os.path.relpath(file_path, self.tempdir), + f.read(), + ) + file_size = tempf.tell() + tempf.flush() + + self.ccnode.files.filter(preset_id=self.preset).delete() + + assessment_file_obj = models.File.objects.create( + file_on_disk=File(open(tempf.name, "rb"), name=filename), + contentnode=self.ccnode, + file_format_id=self.file_format, + preset_id=self.preset, + original_filename=filename, + file_size=file_size, + uploaded_by_id=self.user_id, + ) + logging.debug( + "Created exercise for {0} with checksum {1}".format( + self.ccnode.title, assessment_file_obj.checksum + ) + ) + + def create_exercise_archive(self): + with TemporaryDirectory() as tempdir: + self.tempdir = tempdir + self.handle_before_assessment_items() + for question in ( + self.ccnode.assessment_items.prefetch_related("files") + .all() + .order_by("order") + ): + self.process_assessment_item(question) + self.handle_after_assessment_items() + self._create_zipfile() diff --git a/contentcuration/contentcuration/utils/assessment/markdown.py b/contentcuration/contentcuration/utils/assessment/markdown.py new file mode 100644 index 0000000000..c34da5dee1 --- /dev/null +++ b/contentcuration/contentcuration/utils/assessment/markdown.py @@ -0,0 +1,135 @@ +import re +import xml.etree.ElementTree as ET + +from latex2mathml.converter import convert +from markdown_it import MarkdownIt +from markdown_it.renderer import RendererProtocol +from markdown_it.rules_block import StateBlock +from markdown_it.rules_inline import StateInline +from markdown_it.token import Token +from markdown_it.utils import EnvType +from markdown_it.utils import OptionsDict + +from contentcuration.utils.assessment.qti.mathml.core import Annotation +from contentcuration.utils.assessment.qti.mathml.core import Semantics + + +# Regex patterns for $$ delimited math +INLINE_PATTERN = re.compile(r"^\$\$([\s\S]+?)\$\$") +BLOCK_PATTERN = re.compile(r"^\$\$([\s\S]+?)\$\$", re.M) + + +def math_inline_func(state: StateInline, silent: bool) -> bool: + """Parse inline math: $$expression$$""" + if not state.src.startswith("$$", state.pos): + return False + + match = INLINE_PATTERN.match(state.src[state.pos :]) + if not match: + return False + + if not silent: + token = state.push("math_inline", "math", 0) + token.content = match.group(1) + token.markup = "$$" + + state.pos += match.end() + return True + + +def math_block_func( + state: StateBlock, begLine: int, endLine: int, silent: bool +) -> bool: + """Parse block math: $$expression$$""" + begin = state.bMarks[begLine] + state.tShift[begLine] + + if not state.src.startswith("$$", begin): + return False + + match = BLOCK_PATTERN.match(state.src[begin:]) + if not match: + return False + + if not silent: + token = state.push("math_block", "math", 0) + token.block = True + token.content = match.group(1) + token.markup = "$$" + + # Advance to next line after the math block + endpos = begin + match.end() - 1 + line = begLine + while line < endLine: + if endpos >= state.bMarks[line] and endpos <= state.eMarks[line]: + state.line = line + 1 + break + line += 1 + + return True + + +def _convert(latex, inline=True): + # Remove the namespace declaration for cleaner output + markup = convert(latex, display="inline" if inline else "block").replace( + ' xmlns="http://www.w3.org/1998/Math/MathML"', "" + ) + # By default latex2mathml encodes operators that don't need to be encoded + # so we parse it with ElementTree and turn it back into a string here for consistency. + math_element = ET.fromstring(markup) + + # Create LaTeX annotation + latex_annotation_element = Annotation( + encoding="application/x-tex", children=[latex] + ).to_element() + + semantics_element = Semantics().to_element() + for child in math_element: + math_element.remove(child) + semantics_element.append(child) + semantics_element.append(latex_annotation_element) + math_element.append(semantics_element) + + return ET.tostring(math_element, encoding="unicode") + + +def render_math_inline( + self: RendererProtocol, + tokens: list[Token], + idx: int, + options: OptionsDict, + env: EnvType, +) -> str: + """Render inline math to MathML""" + return _convert(tokens[idx].content) + + +def render_math_block( + self: RendererProtocol, + tokens: list[Token], + idx: int, + options: OptionsDict, + env: EnvType, +) -> str: + """Render block math to MathML""" + return _convert(tokens[idx].content, inline=False) + + +def texmath_to_mathml_plugin(md: MarkdownIt) -> None: + """Simple plugin for parsing TeX math with $$ delimiters. + + Converts inline and block math expressions to MathML using latex2mathml. + """ + # Register parsing rules + md.inline.ruler.before("escape", "math_inline", math_inline_func) + md.block.ruler.before("fence", "math_block", math_block_func) + + # Register renderers + md.add_render_rule("math_inline", render_math_inline) + md.add_render_rule("math_block", render_math_block) + + +md = MarkdownIt("gfm-like").disable("linkify").use(texmath_to_mathml_plugin) + + +def render_markdown(markdown): + return md.render(markdown) diff --git a/contentcuration/contentcuration/utils/assessment/perseus.py b/contentcuration/contentcuration/utils/assessment/perseus.py new file mode 100644 index 0000000000..7ba4e1ce6f --- /dev/null +++ b/contentcuration/contentcuration/utils/assessment/perseus.py @@ -0,0 +1,131 @@ +import json +import re +import zipfile + +from django.core.files.storage import default_storage as storage +from django.template.loader import render_to_string +from le_utils.constants import exercises +from le_utils.constants import file_formats +from le_utils.constants import format_presets + +from contentcuration import models +from contentcuration.utils.assessment.base import ExerciseArchiveGenerator +from contentcuration.utils.parser import extract_value + + +_DOUBLE_DOLLAR_RE = re.compile(r"\$\$(.+?)\$\$", flags=re.DOTALL) + + +class PerseusExerciseGenerator(ExerciseArchiveGenerator): + """ + Exercise zip generator for Perseus format exercises. + """ + + ZIP_DATE_TIME = (2013, 3, 14, 1, 59, 26) + ZIP_COMPRESS_TYPE = zipfile.ZIP_STORED + ZIP_COMMENT = "Perseus file generated during export process".encode() + + file_format = file_formats.PERSEUS + preset = format_presets.EXERCISE + + TEMPLATE_MAP = { + exercises.MULTIPLE_SELECTION: "perseus/multiple_selection.json", + exercises.SINGLE_SELECTION: "perseus/multiple_selection.json", + exercises.INPUT_QUESTION: "perseus/input_question.json", + exercises.PERSEUS_QUESTION: "perseus/perseus_question.json", + "true_false": "perseus/multiple_selection.json", + } + + def _write_raw_perseus_image_files(self, assessment_item): + # For raw perseus JSON questions, the files must be + # specified in advance. + + # Files have been prefetched when the assessment item was + # queried, so take advantage of that. + files = sorted(assessment_item.files.all(), key=lambda x: x.checksum) + image_files = filter( + lambda x: x.preset_id == format_presets.EXERCISE_IMAGE, files + ) + graphie_files = filter( + lambda x: x.preset_id == format_presets.EXERCISE_GRAPHIE, files + ) + images_path = self.get_image_file_path() + for image in image_files: + image_name = "{}/{}.{}".format( + images_path, image.checksum, image.file_format_id + ) + with storage.open( + models.generate_object_storage_name(image.checksum, str(image)), + "rb", + ) as content: + self.add_file_to_write(image_name, content.read()) + + for image in graphie_files: + svg_name = "{}/{}.svg".format(images_path, image.original_filename) + json_name = "{}/{}-data.json".format(images_path, image.original_filename) + with storage.open( + models.generate_object_storage_name(image.checksum, str(image)), + "rb", + ) as content: + content = content.read() + # in Python 3, delimiter needs to be in bytes format + content = content.split(exercises.GRAPHIE_DELIMITER.encode("ascii")) + if len(content) != 2: + raise ValueError( + f"Graphie file '{image.original_filename}' " + f"missing delimiter {exercises.GRAPHIE_DELIMITER!r}" + ) + self.add_file_to_write(svg_name, content[0]) + self.add_file_to_write(json_name, content[1]) + + def _process_formulas(self, content): + return _DOUBLE_DOLLAR_RE.sub(r"$\1$", content) + + def _process_content(self, content): + content = self._process_formulas(content) + return super()._process_content(content) + + def process_assessment_item(self, assessment_item): + if assessment_item.type == exercises.PERSEUS_QUESTION: + self._write_raw_perseus_image_files(assessment_item) + return super().process_assessment_item(assessment_item) + + def _process_input_answers(self, processed_data): + """Extract input answer processing logic""" + non_empty_answers = [] + for answer in processed_data["answers"]: + answer["answer"] = extract_value(answer["answer"]) + if answer["answer"] or answer["answer"] == 0: + non_empty_answers.append(answer) + + return {**processed_data, "answers": non_empty_answers} + + def create_assessment_item(self, assessment_item, processed_data): + template = self.TEMPLATE_MAP.get(assessment_item.type) + if not template: + raise TypeError( + f"Unrecognized question type on item {assessment_item.assessment_id}: {assessment_item.type}" + ) + + # Handle input question special case + if assessment_item.type == exercises.INPUT_QUESTION: + processed_data = self._process_input_answers(processed_data) + + filename = f"{assessment_item.assessment_id}.json" + content = render_to_string(template, processed_data).encode("utf-8", "ignore") + return filename, content + + def get_image_file_path(self): + return "images" + + def get_image_ref_prefix(self): + return f"${exercises.IMG_PLACEHOLDER}/" + + def handle_before_assessment_items(self): + exercise_context = { + "exercise": json.dumps(self.exercise_data, sort_keys=True, indent=4) + } + exercise_result = render_to_string( + "perseus/exercise.json", exercise_context + ).encode("utf-8") + self.add_file_to_write("exercise.json", exercise_result) diff --git a/contentcuration/contentcuration/utils/assessment/qti/__init__.py b/contentcuration/contentcuration/utils/assessment/qti/__init__.py new file mode 100644 index 0000000000..c8cb0afb95 --- /dev/null +++ b/contentcuration/contentcuration/utils/assessment/qti/__init__.py @@ -0,0 +1,6 @@ +from .base import ElementTreeBase + + +__all__ = [ + "ElementTreeBase", +] diff --git a/contentcuration/contentcuration/utils/assessment/qti/archive.py b/contentcuration/contentcuration/utils/assessment/qti/archive.py new file mode 100644 index 0000000000..4a29f20c84 --- /dev/null +++ b/contentcuration/contentcuration/utils/assessment/qti/archive.py @@ -0,0 +1,284 @@ +import base64 +from typing import Any +from typing import Dict +from typing import List +from typing import Tuple + +from le_utils.constants import exercises +from le_utils.constants import format_presets + +from contentcuration.utils.assessment.base import ExerciseArchiveGenerator +from contentcuration.utils.assessment.markdown import render_markdown +from contentcuration.utils.assessment.qti.assessment_item import AssessmentItem +from contentcuration.utils.assessment.qti.assessment_item import CorrectResponse +from contentcuration.utils.assessment.qti.assessment_item import ItemBody +from contentcuration.utils.assessment.qti.assessment_item import OutcomeDeclaration +from contentcuration.utils.assessment.qti.assessment_item import ResponseDeclaration +from contentcuration.utils.assessment.qti.assessment_item import ResponseProcessing +from contentcuration.utils.assessment.qti.assessment_item import Value +from contentcuration.utils.assessment.qti.base import ElementTreeBase +from contentcuration.utils.assessment.qti.constants import BaseType +from contentcuration.utils.assessment.qti.constants import Cardinality +from contentcuration.utils.assessment.qti.constants import Orientation +from contentcuration.utils.assessment.qti.constants import ResourceType +from contentcuration.utils.assessment.qti.constants import ShowHide +from contentcuration.utils.assessment.qti.html import Div +from contentcuration.utils.assessment.qti.html import FlowContentList +from contentcuration.utils.assessment.qti.html import P +from contentcuration.utils.assessment.qti.imsmanifest import File as ManifestFile +from contentcuration.utils.assessment.qti.imsmanifest import Manifest +from contentcuration.utils.assessment.qti.imsmanifest import Metadata +from contentcuration.utils.assessment.qti.imsmanifest import Resource +from contentcuration.utils.assessment.qti.imsmanifest import Resources +from contentcuration.utils.assessment.qti.interaction_types.simple import ( + ChoiceInteraction, +) +from contentcuration.utils.assessment.qti.interaction_types.simple import SimpleChoice +from contentcuration.utils.assessment.qti.interaction_types.text_based import ( + TextEntryInteraction, +) +from contentcuration.utils.assessment.qti.prompt import Prompt + + +choice_interactions = { + exercises.MULTIPLE_SELECTION, + exercises.SINGLE_SELECTION, + "true_false", +} +text_entry_interactions = {exercises.INPUT_QUESTION, exercises.FREE_RESPONSE} + + +def hex_to_qti_id(hex_string): + """ + Encode a 32 digit hex to a 22 character base64 encoded id and a K prefix. + """ + bytes_data = bytes.fromhex(hex_string) + return f"K{base64.urlsafe_b64encode(bytes_data).decode('ascii').rstrip('=')}" + + +class QTIExerciseGenerator(ExerciseArchiveGenerator): + """ + Exercise zip generator for QTI format exercises. + Creates IMS Content Package with QTI 3.0 assessment items. + """ + + file_format = "zip" + preset = format_presets.QTI_ZIP + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.qti_items = [] + + def get_image_file_path(self) -> str: + """Get the file path for QTI assessment items.""" + return "items/images" + + def _create_html_content_from_text(self, text: str) -> FlowContentList: + """Convert text content to QTI HTML flow content.""" + if not text.strip(): + return [] + markup = render_markdown(text) + return ElementTreeBase.from_string(markup) + + def _create_choice_interaction_and_response( + self, processed_data: Dict[str, Any] + ) -> Tuple[ChoiceInteraction, ResponseDeclaration]: + """Create a QTI choice interaction for multiple choice questions.""" + + prompt = Prompt( + children=self._create_html_content_from_text(processed_data["question"]) + ) + + choices = [] + correct_values = [] + for i, answer in enumerate(processed_data.get("answers", [])): + choice_id = f"choice_{i}" + choice_content = self._create_html_content_from_text( + answer.get("answer", "") + ) + + choice = SimpleChoice( + identifier=choice_id, + children=choice_content, + show_hide=ShowHide.SHOW, + fixed=False, + ) + choices.append(choice) + + if answer.get("correct", False): + correct_values.append(Value(value=choice_id)) + + response_declaration = ResponseDeclaration( + identifier="RESPONSE", + cardinality=Cardinality.MULTIPLE + if processed_data["multiple_select"] + else Cardinality.SINGLE, + base_type=BaseType.IDENTIFIER, + correct_response=CorrectResponse(value=correct_values) + if correct_values + else None, + ) + + interaction = ChoiceInteraction( + response_identifier="RESPONSE", + prompt=prompt, + answers=choices, + shuffle=processed_data.get("randomize", False), + max_choices=len(choices) if processed_data["multiple_select"] else 1, + min_choices=0, + orientation=Orientation.VERTICAL, + ) + return interaction, response_declaration + + def _create_text_entry_interaction_and_response( + self, processed_data: Dict[str, Any] + ) -> Tuple[Div, ResponseDeclaration]: + prompt = self._create_html_content_from_text(processed_data["question"]) + interaction_element = TextEntryInteraction( + response_identifier="RESPONSE", + expected_length=50, # Default expected length + placeholder_text="Enter your answer here", + ) + # Text entry interaction is an inline element, so wrap it in a paragraph tag. + interaction_element = P(children=[interaction_element]) + # prompt is already a list of elements, so just append the interaction to it. + prompt.append(interaction_element) + interaction = Div(children=prompt) + + correct_values = [ + Value(value=answer["answer"]) + for answer in processed_data["answers"] + if answer["correct"] + ] + + response_declaration = ResponseDeclaration( + identifier="RESPONSE", + cardinality=Cardinality.MULTIPLE + if processed_data["multiple_select"] + else Cardinality.SINGLE, + base_type=BaseType.STRING, + correct_response=CorrectResponse(value=correct_values) + if correct_values + else None, + ) + return interaction, response_declaration + + def _qti_item_filepath(self, assessment_id): + return f"items/{assessment_id}.xml" + + def create_assessment_item( + self, assessment_item, processed_data: Dict[str, Any] + ) -> tuple[str, bytes]: + """Create QTI assessment item XML.""" + + # Skip Perseus questions as they can't be easily converted + if assessment_item.type == exercises.PERSEUS_QUESTION: + raise ValueError( + f"Perseus questions are not supported in QTI format: {assessment_item.assessment_id}" + ) + + if assessment_item.type in choice_interactions: + ( + interaction, + response_declaration, + ) = self._create_choice_interaction_and_response(processed_data) + elif assessment_item.type in text_entry_interactions: + ( + interaction, + response_declaration, + ) = self._create_text_entry_interaction_and_response(processed_data) + else: + raise ValueError(f"Unsupported question type: {assessment_item.type}") + + # Create item body with the interaction + item_body = ItemBody(children=[interaction]) + + # Create outcome declaration + outcome_declaration = OutcomeDeclaration( + identifier="SCORE", cardinality=Cardinality.SINGLE, base_type=BaseType.FLOAT + ) + + # Create response processing + response_processing = ResponseProcessing( + template="https://purl.imsglobal.org/spec/qti/v3p0/rptemplates/match_correct" + ) + + language = ( + self.ccnode.language.lang_code + if self.ccnode.language + else self.default_language + ) + + qti_item_id = hex_to_qti_id(assessment_item.assessment_id) + + # Create the assessment item + qti_item = AssessmentItem( + identifier=qti_item_id, + title=f"{self.ccnode.title} {len(self.qti_items) + 1}", + language=language, + adaptive=False, + time_dependent=False, + response_declaration=[response_declaration], + outcome_declaration=[outcome_declaration], + item_body=item_body, + response_processing=response_processing, + ) + + # Store for manifest creation + self.qti_items.append(qti_item) + + # Generate XML content + xml_content = qti_item.to_xml_string() + + # Add XML declaration and format nicely + full_xml = f'\n{xml_content}' + + filename = self._qti_item_filepath(qti_item_id) + return filename, full_xml.encode("utf-8") + + def _create_manifest_resources(self) -> List[Resource]: + """Create manifest resources for all QTI items.""" + resources = [] + + for qti_item in self.qti_items: + # Get file dependencies (images, etc.) + file_dependencies = qti_item.get_file_dependencies() + + # Create file entries + qti_item_filepath = self._qti_item_filepath(qti_item.identifier) + files = [ManifestFile(href=qti_item_filepath)] + for dep in file_dependencies: + files.append(ManifestFile(href=dep)) + + resource = Resource( + identifier=qti_item.identifier, + type_=ResourceType.ASSESSMENT_ITEM.value, + href=qti_item_filepath, + files=files, + ) + resources.append(resource) + + return resources + + def _create_imsmanifest(self) -> str: + # Create resources + resources = self._create_manifest_resources() + + # Create manifest + manifest = Manifest( + identifier=hex_to_qti_id(self.ccnode.content_id), + version="1.0", + metadata=Metadata(schema="QTI Package", schemaversion="3.0.0"), + resources=Resources(resources=resources), + ) + + xml_content = manifest.to_xml_string() + return f'\n{xml_content}' + + def handle_after_assessment_items(self): + # Create and write the IMS manifest + manifest_xml = self._create_imsmanifest() + self.add_file_to_write("imsmanifest.xml", manifest_xml.encode("utf-8")) + # Sort all paths to parallel the predictable zip generation logic in ricecooker + # and the Kolibri Studio frontend. + self.files_to_write = sorted(self.files_to_write) diff --git a/contentcuration/contentcuration/utils/assessment/qti/assessment_item.py b/contentcuration/contentcuration/utils/assessment/qti/assessment_item.py new file mode 100644 index 0000000000..830044ae79 --- /dev/null +++ b/contentcuration/contentcuration/utils/assessment/qti/assessment_item.py @@ -0,0 +1,237 @@ +from typing import Annotated +from typing import List +from typing import Optional +from typing import Union + +from annotated_types import Len +from pydantic import AnyUrl +from pydantic import Field +from pydantic import model_validator +from pydantic import PositiveInt + +from contentcuration.utils.assessment.qti.base import BaseSequence +from contentcuration.utils.assessment.qti.base import QTIBase +from contentcuration.utils.assessment.qti.base import TextType +from contentcuration.utils.assessment.qti.constants import BaseType +from contentcuration.utils.assessment.qti.constants import Cardinality +from contentcuration.utils.assessment.qti.constants import ExternalScored +from contentcuration.utils.assessment.qti.constants import View +from contentcuration.utils.assessment.qti.fields import BCP47Language +from contentcuration.utils.assessment.qti.fields import LocalHrefPath +from contentcuration.utils.assessment.qti.fields import QTIIdentifier +from contentcuration.utils.assessment.qti.html import BlockContentElement +from contentcuration.utils.assessment.qti.interaction_types.base import BlockInteraction + + +class Value(QTIBase): + """ + Represents a single value within a default value, correct response, + or other value container. + + For record values, both the field-identifier and base-type attributes + are required to identify which field of the record this value belongs to + and what type that field is. + + For non-record values (single, multiple, ordered cardinality), these + attributes are optional and typically not needed as the base-type is + determined by the parent variable declaration. + """ + + value: TextType # The actual value content + field_identifier: Optional[QTIIdentifier] = None # Required only for record values + base_type: Optional[BaseType] = None # Required only for record values + + +ValueType = Annotated[List[Value], Len(min_length=1)] + + +class CorrectResponse(QTIBase): + """Defines the correct response for the interaction.""" + + value: ValueType = Field(default_factory=list) + + +class DefaultValue(QTIBase): + """ + Defines the default value for a variable. Contains one or more + value elements depending on the cardinality of the variable. + """ + + value: ValueType = Field(default_factory=list) + # Human readable interpretation of the default value + interpretation: Optional[str] = None + + +def _validate_value(self, attribute_name="default_value"): + attr_value = getattr(self, attribute_name) + if attr_value is not None: + if self.cardinality == Cardinality.SINGLE: + # Single cardinality should have exactly one value + if len(attr_value.value) > 1: + raise ValueError( + f"Single cardinality cannot have multiple {attribute_name.replace('_', ' ')}s" + ) + elif self.cardinality == Cardinality.RECORD: + # Record cardinality requires field identifiers + for value in attr_value.value: + if not value.field_identifier: + raise ValueError( + f"Record cardinality requires field_identifier in {attribute_name.replace('_', ' ')}" + ) + if not value.base_type: + raise ValueError( + f"Record cardinality requires base_type in {attribute_name.replace('_', ' ')}" + ) + + +class OutcomeDeclaration(QTIBase): + """ + QTI outcome declaration defines an outcome variable, which represents the + result of response processing. Outcomes are typically scores but can also + be other results such as feedback identifiers or completion status. + """ + + identifier: QTIIdentifier + cardinality: Cardinality = Cardinality.SINGLE + base_type: Optional[BaseType] = None + view: Optional[View] = None + interpretation: Optional[AnyUrl] = None + long_interpretation: Optional[str] = None + normal_maximum: Optional[PositiveInt] = None + normal_minimum: Optional[float] = None + mastery_value: Optional[float] = None + external_scored: Optional[ExternalScored] = None + variable_identifier_ref: Optional[str] = None + default_value: Optional[DefaultValue] = None + + @model_validator(mode="after") + def validate_cardinality_compatibility(self): + _validate_value(self) + return self + + +class ItemBody(QTIBase, BaseSequence): + """Contains the content of the assessment item""" + + children: List[Union[BlockInteraction, BlockContentElement]] = Field( + default_factory=list + ) + + +class ContextDeclaration(QTIBase): + """ + QTI context declaration defines a 'contextual' variable with global scope to + an assessment item. Context variables provide contextual information to + template processing and response processing, such as candidate information, + test information, and environment information. + """ + + identifier: QTIIdentifier + cardinality: Cardinality + base_type: Optional[BaseType] = None + default_value: Optional[DefaultValue] = None + + @model_validator(mode="after") + def validate_cardinality_compatibility(self): + _validate_value(self) + return self + + +class MapEntry(QTIBase): + """Entry in a mapping that maps a specific value to a score""" + + # Key (usually an identifier) + map_key: str + # Value to map + mapped_value: float + # Whether string comparison is case sensitive + case_sensitive: bool = False + + +class Mapping(QTIBase): + """Maps response values to scores for partial credit scoring""" + + map_entries: List[MapEntry] = Field(default_factory=list) + # Score for responses not explicitly mapped + default_value: float = 0.0 + # Lower bound for mapping results + lower_bound: Optional[float] = None + # Upper bound for mapping results + upper_bound: Optional[float] = None + + +class AreaMapEntry(QTIBase): + """Entry in an area mapping that maps a specific area to a score""" + + # Shape of the area (rect, circle, poly, default) + shape: str + # Coordinates defining the area + coords: str + # Score for responses in this area + mapped_value: float + + +class AreaMapping(QTIBase): + """Maps areas to scores for graphical interactions""" + + area_map_entries: List[AreaMapEntry] = Field(default_factory=list) + # Score for responses not in any defined area + default_value: float = 0.0 + # Lower bound for mapping results + lower_bound: Optional[float] = None + # Upper bound for mapping results + upper_bound: Optional[float] = None + + +class ResponseDeclaration(QTIBase): + """ + QTI response declaration defines a response variable and optionally its + correct response value and/or mapping. Response variables capture candidate + interactions with the assessment item's interactions and are used in response + processing to determine outcomes. + """ + + identifier: QTIIdentifier + cardinality: Cardinality + base_type: BaseType + correct_response: Optional[CorrectResponse] = None + mapping: Optional[Mapping] = None + area_mapping: Optional[AreaMapping] = None + + @model_validator(mode="after") + def validate_cardinality_compatibility(self): + _validate_value(self, "correct_response") + return self + + +class ResponseProcessing(QTIBase): + """Represents response processing rules or template reference""" + + # URI reference to a response processing template + template: Optional[AnyUrl] = None + # Optional URL that resolves to the template - we additionally enforce that this be local + # although this is not required by the QTI spec + template_location: Optional[LocalHrefPath] = None + # rules deliberately not implemented yet + + +class AssessmentItem(QTIBase): + """Represents a QTI assessment item""" + + xmlns: str = "http://www.imsglobal.org/xsd/imsqtiasi_v3p0" + xmlns__xsi: str = "http://www.w3.org/2001/XMLSchema-instance" + xsi__schemaLocation: str = "http://www.imsglobal.org/xsd/imsqtiasi_v3p0 https://purl.imsglobal.org/spec/qti/v3p0/schema/xsd/imsqti_asiv3p0p1_v1p0.xsd" + identifier: QTIIdentifier + title: str + label: Optional[str] = None + adaptive: bool = False + time_dependent: Optional[bool] = None + language: BCP47Language + tool_name: str = "kolibri" + tool_version: str = "0.1" + + context_declaration: List[ContextDeclaration] = Field(default_factory=list) + response_declaration: List[ResponseDeclaration] = Field(default_factory=list) + outcome_declaration: List[OutcomeDeclaration] = Field(default_factory=list) + item_body: Optional[ItemBody] = None + response_processing: Optional[ResponseProcessing] = None diff --git a/contentcuration/contentcuration/utils/assessment/qti/base.py b/contentcuration/contentcuration/utils/assessment/qti/base.py new file mode 100644 index 0000000000..5467654a6b --- /dev/null +++ b/contentcuration/contentcuration/utils/assessment/qti/base.py @@ -0,0 +1,274 @@ +import re +import xml.etree.ElementTree as ET +from abc import ABC +from enum import Enum +from functools import partial +from typing import Annotated +from typing import List +from typing import Optional +from typing import Set +from typing import Type +from typing import Union + +from pydantic import BaseModel +from pydantic import BeforeValidator +from pydantic import ConfigDict +from pydantic import PrivateAttr + +from .constants import Dir +from .fields import entry_pattern as srcset_entry_pattern +from contentcuration.utils.assessment.qti.fields import BCP47Language + + +class TextNode(BaseModel): + """Class to represent text nodes within XML elements""" + + text: str + + +class XMLElement(BaseModel, ABC): + """Base class for XML elements""" + + # Pydantic configuration + model_config = ConfigDict( + # Prevent extra fields + extra="forbid", + validate_assignment=True, + # Prevent mutations to ensure immutability + frozen=True, + ) + + # Private attributes (not included in Pydantic fields) + _file_dependencies: Set[str] = PrivateAttr(default_factory=set) + _element: ET.Element = PrivateAttr(default=None) + + @classmethod + def element_name(cls): + return cls.__name__.lower() + + def to_element(self) -> ET.Element: # noqa: C901 + if self._element: + return self._element + + element = ET.Element(self.element_name()) + + self._file_dependencies = set() + + # Add attributes based on pydantic fields + for field_name in self.__class__.model_fields: + + value = getattr(self, field_name) + + # Skip None values + if value is None: + continue + + if isinstance(value, (XMLElement, TextNode)): + value = [value] + + if isinstance(value, list): + if all(isinstance(item, (XMLElement, TextNode)) for item in value): + for item in value: + if isinstance(item, XMLElement): + child_elements = item.to_element() + if not isinstance(child_elements, list): + child_elements = [child_elements] + for child_element in child_elements: + element.append(child_element) + self._file_dependencies |= item._file_dependencies + else: + current_children = list(element) + if current_children: + current_children[-1].tail = ( + current_children[-1].tail or "" + ) + item.text + else: + element.text = (element.text or "") + item.text + + continue + raise ValueError( + "List types should only contain XMLElement or TextNodes" + ) + + elif isinstance(value, bool): + value = str(value).lower() + + elif isinstance(value, Enum): + # Handle enum values + value = value.value + + # Some attribute names are reserved Python keywords or Python builtins + # to allow this, we allow a trailing underscore which we strip here. + # All attributes use kebab-case, which we can't easily use as field names + # so we encode them as snake_case and convert to kebab-case here. + # Some attributes also include : which we encode as double underscore. + attr_name = field_name.rstrip("_").replace("__", ":").replace("_", "-") + + # Set the attribute + element.set(attr_name, str(value)) + + if attr_name == "src" or attr_name == "href": + self._file_dependencies.add(value) + elif attr_name == "srcset": + entries = re.findall(srcset_entry_pattern, value) + for entry in entries: + # Each entry is a tuple of (url, descriptors) + url = entry[0].strip() + self._file_dependencies.add(url) + + self._element = element + + return self._element + + def to_xml_string(self) -> str: + """Convert to XML string""" + element = self.to_element() + return ET.tostring(element, encoding="unicode") + + def get_file_dependencies(self) -> List[str]: + # Ensure the element has been processed so that the file dependencies are collected. + self.to_element() + return list(self._file_dependencies) + + +class QTIBase(XMLElement): + """ + A base class to allow us to conventionally generate element names from class names for QTI elements. + """ + + @classmethod + def element_name(cls): + # Convert PascalCase to kebab-case + name = re.sub(r"(?<=[a-z])(?=[A-Z])", "-", cls.__name__) + return f"qti-{name.lower()}" + + +def coerce_str_to_model(element_type, value: Union[str, XMLElement]) -> XMLElement: + """Convert string to element_type if needed""" + if isinstance(value, str): + return element_type(text=value) + return value + + +def generate_coerced_string_type(element_type): + return Annotated[ + element_type, BeforeValidator(partial(coerce_str_to_model, element_type)) + ] + + +TextType = generate_coerced_string_type(TextNode) + + +class BaseSequence(XMLElement): + id_: Optional[str] = None + class_: Optional[str] = None + lang: Optional[BCP47Language] = None + # We explicitly do not set the deprecated language value. + label: Optional[str] = None + # We explicitly do not set the base value. + dir_: Optional[Dir] = None + + +# Pydantic's BaseModel Metaclass is only importable from an internal module, +# so we inspect the BaseSequence class to get its metaclass. +BaseSequenceMetaclass = type(BaseSequence) + + +class RegistryMeta(BaseSequenceMetaclass): + """Generic metaclass that creates separate registries for each subclass""" + + def __new__(mcs, name, bases, attrs): + cls = super().__new__(mcs, name, bases, attrs) + + # Each metaclass gets its own registry + if not hasattr(mcs, "_registry"): + mcs._registry = {} + + element_name = cls.element_name() + if element_name in mcs._registry and mcs._registry[element_name] is not cls: + raise ValueError( + f"Element name '{element_name}' already registered in {mcs.__name__}" + ) + mcs._registry[element_name] = cls + + return cls + + @classmethod + def _ensure_registry_complete(cls): + """Ensure all HTML and MathML classes are registered""" + if not hasattr(cls, "_registry_initialized"): + # Import modules to trigger registration + from contentcuration.utils.assessment.qti import html, mathml # noqa: F401 + + cls._registry_initialized = True + + @classmethod + def get_class_for_tag(cls, tag_name: str) -> Optional[Type]: + """Get the registered class for a given tag name""" + cls._ensure_registry_complete() + return getattr(cls, "_registry", {}).get(tag_name) + + +class ElementTreeBase(BaseSequence, metaclass=RegistryMeta): + @classmethod + def from_element(cls, element: ET.Element) -> "ElementTreeBase": + # Get the appropriate class for this tag + target_class = type(cls).get_class_for_tag(element.tag) + if target_class is None: + raise ValueError(f"No registered class found for tag: {element.tag}") + + # Convert attributes to field data - Pydantic will handle type coercion + field_data = {} + for attr_name, attr_value in element.attrib.items(): + field_name = cls._attr_name_to_field_name(attr_name) + field_data[field_name] = attr_value + + # Convert children and text + children = cls._extract_children(element) + if children: + field_data["children"] = children + + return target_class(**field_data) + + @classmethod + def _attr_name_to_field_name(cls, attr_name: str) -> str: + """Convert attribute name to Python field name""" + # kebab-case -> snake_case, : -> __ + field_name = attr_name.replace(":", "__").replace("-", "_") + + # Add trailing underscore for Python keywords + if field_name in {"class", "for", "type", "id", "dir"}: + field_name += "_" + + return field_name + + @classmethod + def _extract_children( + cls, element: ET.Element + ) -> List[Union["ElementTreeBase", TextNode]]: + """Extract child elements and text nodes from XML element""" + children = [] + + # Add initial text if present + if element.text and element.text.strip(): + children.append(TextNode(text=element.text)) + + # Process child elements + for child_elem in element: + children.append(cls.from_element(child_elem)) + # Add tail text after child element + if child_elem.tail and child_elem.tail.strip(): + children.append(TextNode(text=child_elem.tail)) + + return children + + @classmethod + def from_string(cls, string: str) -> List["ElementTreeBase"]: + """Parse markup string and return list of ElementTreeBase instances""" + try: + # Wrap in a root element to handle multiple top-level elements + wrapped_markup = f"{string}" + root = ET.fromstring(wrapped_markup) + return [cls.from_element(child) for child in root] + except ET.ParseError as e: + raise ValueError(f"Invalid Markup: {e}") from e diff --git a/contentcuration/contentcuration/utils/assessment/qti/constants.py b/contentcuration/contentcuration/utils/assessment/qti/constants.py new file mode 100644 index 0000000000..99ea507af3 --- /dev/null +++ b/contentcuration/contentcuration/utils/assessment/qti/constants.py @@ -0,0 +1,69 @@ +from enum import Enum + + +# QTI Constants for Outcome Declarations + + +class Cardinality(Enum): + MULTIPLE = "multiple" + SINGLE = "single" + ORDERED = "ordered" + RECORD = "record" + + +class BaseType(Enum): + BOOLEAN = "boolean" + DIRECTED_PAIR = "directedPair" + DURATION = "duration" + FILE = "file" + FLOAT = "float" + IDENTIFIER = "identifier" + INTEGER = "integer" + PAIR = "pair" + POINT = "point" + STRING = "string" + URI = "uri" + + +class View(Enum): + AUTHOR = "author" + CANDIDATE = "candidate" + PROCTOR = "proctor" + SCORER = "scorer" + TEST_CONSTRUCTOR = "testConstructor" + TUTOR = "tutor" + + +class ExternalScored(Enum): + EXTERNAL_MACHINE = "externalMachine" + HUMAN = "human" + + +class ShowHide(Enum): + SHOW = "show" + HIDE = "hide" + + +class Dir(Enum): + LTR = "ltr" + RTL = "rtl" + AUTO = "auto" + + +class Format(Enum): + PLAIN = "plain" + PREFORMATTED = "preformatted" + XHTML = "xhtml" + + +class Orientation(Enum): + HORIZONTAL = "horizontal" + VERTICAL = "vertical" + + +class ResourceType(Enum): + """Enumeration for QTI resource types""" + + ASSESSMENT_TEST = "imsqti_test_xmlv3p0" + ASSESSMENT_ITEM = "imsqti_item_xmlv3p0" + RESPONSE_TEMPLATE = "imsqti_rptemplate_xmlv3p0" diff --git a/contentcuration/contentcuration/utils/assessment/qti/fields.py b/contentcuration/contentcuration/utils/assessment/qti/fields.py new file mode 100644 index 0000000000..f90b6d30e8 --- /dev/null +++ b/contentcuration/contentcuration/utils/assessment/qti/fields.py @@ -0,0 +1,118 @@ +import re +from typing import Annotated +from urllib.parse import urlparse + +from langcodes import Language as LangCodesLanguage +from pydantic import BeforeValidator +from pydantic import Field + + +def validate_bcp47_language(value: str) -> str: + """Validate and normalize BCP47 language tag.""" + if not isinstance(value, str): + raise ValueError(f"BCP47 language tag must be a string, got {type(value)}") + + if not value: + raise ValueError("BCP47 language tag cannot be empty") + + try: + # Validate and normalize using langcodes + return LangCodesLanguage.get(value).to_tag() + except ValueError as e: + raise ValueError("Invalid BCP47 language tag") from e + + +BCP47Language = Annotated[str, BeforeValidator(validate_bcp47_language)] + +data_uri_pattern = r"data:(?:([-\w]+/[-+\w.]+)(?:(;[-\w]+=[-\w]+)*))?(;base64)?,(.*)" + +data_uri_regex = re.compile(rf"^{data_uri_pattern}$") + + +def validate_data_uri(value: str) -> str: + """ + Validate data URI format according to RFC 2397. + Format: data:[][;base64], + """ + + match = data_uri_regex.match(value) + if not match: + raise ValueError(f"Invalid data URI format: {value}") + + return value + + +def validate_local_href_path(value: str) -> str: + """ + Validate that a path is relative (no scheme) and suitable for offline bundling. + Allows: relative/path.jpg, ../path.jpg, ./file.png, #fragment, data:... + Rejects: http://..., https://..., ftp://..., etc. + """ + parsed = urlparse(value) + # Allow data URLs (for embedded content) + if parsed.scheme == "data": + return validate_data_uri(value) + + # Reject absolute URLs + if parsed.scheme or parsed.netloc or parsed.path.startswith("/"): + raise ValueError(f"Absolute URLs not allowed in bundled content: {value}") + + return value + + +def validate_local_src_path(value: str) -> str: + """ + Validate local src paths - stricter than href, should be actual file paths. + """ + value = validate_local_href_path(value) + + parsed = urlparse(value) + if not parsed.path: + raise ValueError(f"Invalid local src path: {value}") + + # Allow relative paths + return value + + +# Regex pattern for complete srcset validation +# Matches: (data URI OR regular path) + one or more descriptors (2x, 100w, etc.) +# Separated by commas with optional whitespace +entry_pattern = rf"({data_uri_pattern}|[^\s,]+)(?:\s+\d*\.?\d+[xwh])+" +# Pattern for complete srcset: one or more entries separated by commas +srcset_pattern = rf"^{entry_pattern}(?:\s*,\s*{entry_pattern})*$" + + +def validate_local_srcset(value: str) -> str: + if not value.strip(): + return value + + if not re.match(srcset_pattern, value.strip()): + raise ValueError(f"Invalid srcset format: {value}") + + entries = re.findall(entry_pattern, value) + + for entry in entries: + url = entry[0] + # Only need to validate the URL - descriptors already confirmed valid + validate_local_src_path(url.strip()) + + return value + + +# Custom types for HTML attributes +LocalHrefPath = Annotated[str, BeforeValidator(validate_local_href_path)] +LocalSrcPath = Annotated[str, BeforeValidator(validate_local_src_path)] +LocalSrcSet = Annotated[str, BeforeValidator(validate_local_srcset)] + + +QTIIdentifier = Annotated[ + str, + Field( + pattern=r"^[a-zA-Z_][a-zA-Z0-9_\-]{0,31}$", + min_length=1, + max_length=32, + description="QTI XML identifier: must start with letter or underscore, " + "contain only letters, digits, underscores, and hyphens, " + "no colons, max 32 characters", + ), +] diff --git a/contentcuration/contentcuration/utils/assessment/qti/html/__init__.py b/contentcuration/contentcuration/utils/assessment/qti/html/__init__.py new file mode 100644 index 0000000000..f28fea09f0 --- /dev/null +++ b/contentcuration/contentcuration/utils/assessment/qti/html/__init__.py @@ -0,0 +1,188 @@ +# __init__.py +from contentcuration.utils.assessment.qti.html.base import BlockContentElement +from contentcuration.utils.assessment.qti.html.base import FlowContentElement +from contentcuration.utils.assessment.qti.html.base import HTMLElement +from contentcuration.utils.assessment.qti.html.base import Source +from contentcuration.utils.assessment.qti.html.breaks import Br +from contentcuration.utils.assessment.qti.html.breaks import Hr +from contentcuration.utils.assessment.qti.html.content_types import FlowContent +from contentcuration.utils.assessment.qti.html.content_types import FlowContentList +from contentcuration.utils.assessment.qti.html.content_types import InlineContent +from contentcuration.utils.assessment.qti.html.content_types import InlineContentList +from contentcuration.utils.assessment.qti.html.content_types import InlineGroup +from contentcuration.utils.assessment.qti.html.content_types import InlineGroupList +from contentcuration.utils.assessment.qti.html.display import Details +from contentcuration.utils.assessment.qti.html.display import Figcaption +from contentcuration.utils.assessment.qti.html.display import Figure +from contentcuration.utils.assessment.qti.html.display import Label +from contentcuration.utils.assessment.qti.html.display import Summary +from contentcuration.utils.assessment.qti.html.embed import Img +from contentcuration.utils.assessment.qti.html.embed import Object +from contentcuration.utils.assessment.qti.html.embed import Param +from contentcuration.utils.assessment.qti.html.embed import Picture +from contentcuration.utils.assessment.qti.html.flow import Address +from contentcuration.utils.assessment.qti.html.flow import Article +from contentcuration.utils.assessment.qti.html.flow import Aside +from contentcuration.utils.assessment.qti.html.flow import Blockquote +from contentcuration.utils.assessment.qti.html.flow import Div +from contentcuration.utils.assessment.qti.html.flow import Footer +from contentcuration.utils.assessment.qti.html.flow import Header +from contentcuration.utils.assessment.qti.html.flow import Nav +from contentcuration.utils.assessment.qti.html.flow import Section +from contentcuration.utils.assessment.qti.html.media import Audio +from contentcuration.utils.assessment.qti.html.media import Preload +from contentcuration.utils.assessment.qti.html.media import Track +from contentcuration.utils.assessment.qti.html.media import TrackKind +from contentcuration.utils.assessment.qti.html.media import Video +from contentcuration.utils.assessment.qti.html.sequence import Dd +from contentcuration.utils.assessment.qti.html.sequence import Dl +from contentcuration.utils.assessment.qti.html.sequence import Dt +from contentcuration.utils.assessment.qti.html.sequence import Li +from contentcuration.utils.assessment.qti.html.sequence import Ol +from contentcuration.utils.assessment.qti.html.sequence import OlType +from contentcuration.utils.assessment.qti.html.sequence import Ul +from contentcuration.utils.assessment.qti.html.table import Caption +from contentcuration.utils.assessment.qti.html.table import Col +from contentcuration.utils.assessment.qti.html.table import Colgroup +from contentcuration.utils.assessment.qti.html.table import Table +from contentcuration.utils.assessment.qti.html.table import TBody +from contentcuration.utils.assessment.qti.html.table import Td +from contentcuration.utils.assessment.qti.html.table import TFoot +from contentcuration.utils.assessment.qti.html.table import Th +from contentcuration.utils.assessment.qti.html.table import THead +from contentcuration.utils.assessment.qti.html.table import ThScope +from contentcuration.utils.assessment.qti.html.table import Tr +from contentcuration.utils.assessment.qti.html.table import TrList +from contentcuration.utils.assessment.qti.html.text import A +from contentcuration.utils.assessment.qti.html.text import Abbr +from contentcuration.utils.assessment.qti.html.text import B +from contentcuration.utils.assessment.qti.html.text import Bdi +from contentcuration.utils.assessment.qti.html.text import Bdo +from contentcuration.utils.assessment.qti.html.text import BdoDir +from contentcuration.utils.assessment.qti.html.text import BlockHTMLText +from contentcuration.utils.assessment.qti.html.text import Cite +from contentcuration.utils.assessment.qti.html.text import Code +from contentcuration.utils.assessment.qti.html.text import Dfn +from contentcuration.utils.assessment.qti.html.text import Em +from contentcuration.utils.assessment.qti.html.text import H1 +from contentcuration.utils.assessment.qti.html.text import H2 +from contentcuration.utils.assessment.qti.html.text import H3 +from contentcuration.utils.assessment.qti.html.text import H4 +from contentcuration.utils.assessment.qti.html.text import H5 +from contentcuration.utils.assessment.qti.html.text import H6 +from contentcuration.utils.assessment.qti.html.text import I +from contentcuration.utils.assessment.qti.html.text import InlineHTMLText +from contentcuration.utils.assessment.qti.html.text import Kbd +from contentcuration.utils.assessment.qti.html.text import P +from contentcuration.utils.assessment.qti.html.text import Pre +from contentcuration.utils.assessment.qti.html.text import Q +from contentcuration.utils.assessment.qti.html.text import Rp +from contentcuration.utils.assessment.qti.html.text import Rt +from contentcuration.utils.assessment.qti.html.text import Ruby +from contentcuration.utils.assessment.qti.html.text import Samp +from contentcuration.utils.assessment.qti.html.text import Small +from contentcuration.utils.assessment.qti.html.text import Span +from contentcuration.utils.assessment.qti.html.text import Strong +from contentcuration.utils.assessment.qti.html.text import Sub +from contentcuration.utils.assessment.qti.html.text import Sup +from contentcuration.utils.assessment.qti.html.text import Var + +__all__ = [ + # Base classes + "HTMLElement", + "FlowContentElement", + "BlockContentElement", + "InlineHTMLText", + "BlockHTMLText", + # Content type aliases + "FlowContent", + "FlowContentList", + "InlineContent", + "InlineContentList", + "InlineGroup", + "InlineGroupList", + # Breaks + "Br", + "Hr", + # Display elements + "Details", + "Figcaption", + "Figure", + "Label", + "Summary", + # Embedded content + "Img", + "Object", + "Param", + "Picture", + "Source", + # Flow/sectioning content + "Address", + "Article", + "Aside", + "Blockquote", + "Div", + "Footer", + "Header", + "Nav", + "Section", + # Media elements and enums + "Audio", + "Preload", + "Track", + "TrackKind", + "Video", + # Lists and sequences + "Dd", + "Dl", + "Dt", + "Li", + "Ol", + "OlType", + "Ul", + # Tables and related types + "Caption", + "Col", + "Colgroup", + "Table", + "TBody", + "Td", + "TFoot", + "Th", + "THead", + "ThScope", + "Tr", + "TrList", + # Text content + "A", + "Abbr", + "B", + "Bdi", + "Bdo", + "BdoDir", + "Cite", + "Code", + "Dfn", + "Em", + "H1", + "H2", + "H3", + "H4", + "H5", + "H6", + "I", + "Kbd", + "P", + "Pre", + "Q", + "Rp", + "Rt", + "Ruby", + "Samp", + "Small", + "Span", + "Strong", + "Sub", + "Sup", + "Var", +] diff --git a/contentcuration/contentcuration/utils/assessment/qti/html/base.py b/contentcuration/contentcuration/utils/assessment/qti/html/base.py new file mode 100644 index 0000000000..79dba7cebf --- /dev/null +++ b/contentcuration/contentcuration/utils/assessment/qti/html/base.py @@ -0,0 +1,56 @@ +from typing import List +from typing import Optional + +from pydantic import model_validator + +from contentcuration.utils.assessment.qti.base import ElementTreeBase +from contentcuration.utils.assessment.qti.fields import LocalSrcPath +from contentcuration.utils.assessment.qti.fields import LocalSrcSet + + +class HTMLElement(ElementTreeBase): + """ + Represents an HTML element within QTI. + """ + + @classmethod + def from_html_string(cls, html_string: str) -> List["HTMLElement"]: + """Parse HTML string and return list of HTMLElement instances""" + return cls.from_string(html_string) + + +class FlowContentElement(HTMLElement): + pass + + +class InlineContentElement(FlowContentElement): + pass + + +class BlockContentElement(FlowContentElement): + pass + + +class Source(HTMLElement): + # These attributes are common to all elements in HTML5 + media: Optional[str] = None + type: Optional[str] = None + + # Required if a child of