diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py
index f193921afb..4265644ab4 100644
--- a/contentcuration/contentcuration/models.py
+++ b/contentcuration/contentcuration/models.py
@@ -2250,11 +2250,13 @@ def mark_complete(self): # noqa C901
| (
# A non-blank question
~Q(question="")
- # Non-blank answers
- & ~Q(answers="[]")
- # With either an input question or one answer marked as correct
+ # Non-blank answers, unless it is a free response question
+ # (which is allowed to have no answers)
+ & (~Q(answers="[]") | Q(type=exercises.FREE_RESPONSE))
+ # With either an input or free response question or one answer marked as correct
& (
Q(type=exercises.INPUT_QUESTION)
+ | Q(type=exercises.FREE_RESPONSE)
| Q(answers__iregex=r'"correct":\s*true')
)
)
diff --git a/contentcuration/contentcuration/tests/test_contentnodes.py b/contentcuration/contentcuration/tests/test_contentnodes.py
index 420ff69b2b..0c9e444300 100644
--- a/contentcuration/contentcuration/tests/test_contentnodes.py
+++ b/contentcuration/contentcuration/tests/test_contentnodes.py
@@ -1223,6 +1223,29 @@ def test_create_exercise_invalid_assessment_item_no_answers(self):
new_obj.mark_complete()
self.assertFalse(new_obj.complete)
+ def test_create_exercise_valid_assessment_item_free_response_no_answers(self):
+ licenses = list(
+ License.objects.filter(
+ copyright_holder_required=False, is_custom=False
+ ).values_list("pk", flat=True)
+ )
+ channel = testdata.channel()
+ new_obj = ContentNode(
+ title="yes",
+ kind_id=content_kinds.EXERCISE,
+ parent=channel.main_tree,
+ license_id=licenses[0],
+ extra_fields=self.new_extra_fields,
+ )
+ new_obj.save()
+ AssessmentItem.objects.create(
+ contentnode=new_obj,
+ question="This is a question",
+ type=exercises.FREE_RESPONSE,
+ )
+ new_obj.mark_complete()
+ self.assertTrue(new_obj.complete)
+
def test_create_exercise_invalid_assessment_item_no_correct_answers(self):
licenses = list(
License.objects.filter(
diff --git a/contentcuration/contentcuration/tests/test_exportchannel.py b/contentcuration/contentcuration/tests/test_exportchannel.py
index 57599c0942..5c850597d7 100644
--- a/contentcuration/contentcuration/tests/test_exportchannel.py
+++ b/contentcuration/contentcuration/tests/test_exportchannel.py
@@ -15,6 +15,7 @@
from kolibri_content.router import get_active_content_database
from kolibri_content.router import set_active_content_database
from le_utils.constants import exercises
+from le_utils.constants import format_presets
from le_utils.constants.labels import accessibility_categories
from le_utils.constants.labels import learning_activities
from le_utils.constants.labels import levels
@@ -33,6 +34,7 @@
from .testdata import tree
from contentcuration import models as cc
from contentcuration.models import CustomTaskMetadata
+from contentcuration.utils.assessment.qti.archive import hex_to_qti_id
from contentcuration.utils.celery.tasks import generate_task_signature
from contentcuration.utils.publish import ChannelIncompleteError
from contentcuration.utils.publish import convert_channel_thumbnail
@@ -209,6 +211,48 @@ def setUp(self):
ai.contentnode = legacy_exercise
ai.save()
+ # Add an exercise with free response question to test QTI generation
+ qti_extra_fields = {
+ "options": {
+ "completion_criteria": {
+ "model": "mastery",
+ "threshold": {
+ "m": 1,
+ "n": 2,
+ "mastery_model": exercises.M_OF_N,
+ },
+ }
+ }
+ }
+ qti_exercise = create_node(
+ {
+ "kind_id": "exercise",
+ "title": "QTI Free Response Exercise",
+ "extra_fields": qti_extra_fields,
+ }
+ )
+ qti_exercise.complete = True
+ qti_exercise.parent = current_exercise.parent
+ qti_exercise.save()
+
+ # Create a free response assessment item
+ cc.AssessmentItem.objects.create(
+ contentnode=qti_exercise,
+ assessment_id=uuid.uuid4().hex,
+ type=exercises.FREE_RESPONSE,
+ question="What is the capital of France?",
+ answers=json.dumps([{"answer": "Paris", "correct": True}]),
+ hints=json.dumps([]),
+ raw_data="{}",
+ order=4,
+ randomize=False,
+ )
+
+ for ai in current_exercise.assessment_items.all()[:2]:
+ ai.id = None
+ ai.contentnode = qti_exercise
+ ai.save()
+
first_topic = self.content_channel.main_tree.get_descendants().first()
# Add a publishable topic to ensure it does not inherit but that its children do
@@ -400,7 +444,7 @@ def test_inherited_language(self):
parent_id=first_topic_node_id
)[1:]:
if child.kind == "topic":
- self.assertIsNone(child.lang_id)
+ self.assertEqual(child.lang_id, self.content_channel.language_id)
self.assertEqual(child.children.first().lang_id, "fr")
else:
self.assertEqual(child.lang_id, "fr")
@@ -558,6 +602,46 @@ def test_publish_no_modify_legacy_exercise_extra_fields(self):
{"mastery_model": exercises.M_OF_N, "randomize": True, "m": 1, "n": 2},
)
+ def test_qti_exercise_generates_qti_archive(self):
+ """Test that exercises with free response questions generate QTI archive files."""
+ qti_exercise = cc.ContentNode.objects.get(title="QTI Free Response Exercise")
+
+ # Check that a QTI archive file was created
+ qti_files = qti_exercise.files.filter(preset_id=format_presets.QTI_ZIP)
+ self.assertEqual(
+ qti_files.count(),
+ 1,
+ "QTI exercise should have exactly one QTI archive file",
+ )
+
+ qti_file = qti_files.first()
+ self.assertIsNotNone(
+ qti_file.file_on_disk, "QTI file should have file_on_disk content"
+ )
+ self.assertTrue(
+ qti_file.original_filename.endswith(".zip"),
+ "QTI file should be a zip archive",
+ )
+
+ def test_qti_archive_contains_manifest_and_assessment_ids(self):
+
+ published_qti_exercise = kolibri_models.ContentNode.objects.get(
+ title="QTI Free Response Exercise"
+ )
+ assessment_ids = (
+ published_qti_exercise.assessmentmetadata.first().assessment_item_ids
+ )
+
+ # Should have exactly one assessment ID corresponding to our free response question
+ self.assertEqual(
+ len(assessment_ids), 3, "Should have exactly three assessment IDs"
+ )
+
+ # The assessment ID should match the one from our assessment item
+ qti_exercise = cc.ContentNode.objects.get(title="QTI Free Response Exercise")
+ for i, ai in enumerate(qti_exercise.assessment_items.order_by("order")):
+ self.assertEqual(assessment_ids[i], hex_to_qti_id(ai.assessment_id))
+
class EmptyChannelTestCase(StudioTestCase):
@classmethod
diff --git a/contentcuration/contentcuration/tests/testdata.py b/contentcuration/contentcuration/tests/testdata.py
index e938b3b237..4a0db4fbe8 100644
--- a/contentcuration/contentcuration/tests/testdata.py
+++ b/contentcuration/contentcuration/tests/testdata.py
@@ -217,7 +217,9 @@ def tree(parent=None):
def channel(name="testchannel"):
channel_creator = user()
- channel = cc.Channel.objects.create(name=name, actor_id=channel_creator.id)
+ channel = cc.Channel.objects.create(
+ name=name, actor_id=channel_creator.id, language_id="en"
+ )
channel.save()
channel.main_tree = tree()
diff --git a/contentcuration/contentcuration/tests/utils/qti/__init__.py b/contentcuration/contentcuration/tests/utils/qti/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/contentcuration/contentcuration/tests/utils/qti/test_assessment_items.py b/contentcuration/contentcuration/tests/utils/qti/test_assessment_items.py
new file mode 100644
index 0000000000..6bf2f71e51
--- /dev/null
+++ b/contentcuration/contentcuration/tests/utils/qti/test_assessment_items.py
@@ -0,0 +1,504 @@
+import unittest
+
+from contentcuration.utils.assessment.qti.assessment_item import AssessmentItem
+from contentcuration.utils.assessment.qti.assessment_item import CorrectResponse
+from contentcuration.utils.assessment.qti.assessment_item import DefaultValue
+from contentcuration.utils.assessment.qti.assessment_item import ItemBody
+from contentcuration.utils.assessment.qti.assessment_item import MapEntry
+from contentcuration.utils.assessment.qti.assessment_item import Mapping
+from contentcuration.utils.assessment.qti.assessment_item import OutcomeDeclaration
+from contentcuration.utils.assessment.qti.assessment_item import ResponseDeclaration
+from contentcuration.utils.assessment.qti.assessment_item import ResponseProcessing
+from contentcuration.utils.assessment.qti.assessment_item import Value
+from contentcuration.utils.assessment.qti.constants import BaseType
+from contentcuration.utils.assessment.qti.constants import Cardinality
+from contentcuration.utils.assessment.qti.html import Blockquote
+from contentcuration.utils.assessment.qti.html import Br
+from contentcuration.utils.assessment.qti.html import Div
+from contentcuration.utils.assessment.qti.html import P
+from contentcuration.utils.assessment.qti.html import Strong
+from contentcuration.utils.assessment.qti.interaction_types.simple import (
+ ChoiceInteraction,
+)
+from contentcuration.utils.assessment.qti.interaction_types.simple import SimpleChoice
+from contentcuration.utils.assessment.qti.interaction_types.text_based import (
+ ExtendedTextInteraction,
+)
+from contentcuration.utils.assessment.qti.interaction_types.text_based import (
+ TextEntryInteraction,
+)
+from contentcuration.utils.assessment.qti.prompt import Prompt
+
+
+class QTIAssessmentItemTests(unittest.TestCase):
+ def test_true_false_question(self):
+ expected_xml = """
+
+
+true
+
+
+
+
+1
+
+
+
+This is a True/False question?
+
+True
+False
+
+
+
+""".replace(
+ "\n", ""
+ )
+
+ # Construct the QTI elements
+ response_declaration = ResponseDeclaration(
+ identifier="RESPONSE",
+ cardinality=Cardinality.SINGLE,
+ base_type=BaseType.IDENTIFIER,
+ correct_response=CorrectResponse(value=[Value(value="true")]),
+ )
+
+ outcome_declaration = OutcomeDeclaration(
+ identifier="SCORE",
+ cardinality=Cardinality.SINGLE,
+ base_type=BaseType.FLOAT,
+ default_value=DefaultValue(value=[Value(value="1")]),
+ )
+
+ true_choice = SimpleChoice(identifier="true", children=["True"])
+ false_choice = SimpleChoice(identifier="false", children=["False"])
+ choice_interaction = ChoiceInteraction(
+ response_identifier="RESPONSE",
+ max_choices=1,
+ answers=[true_choice, false_choice],
+ )
+
+ item_body = ItemBody(
+ children=[
+ P(children=["This is a True/False question?"]),
+ choice_interaction,
+ ]
+ )
+ response_processing = ResponseProcessing(
+ template="https://purl.imsglobal.org/spec/qti/v3p0/rptemplates/match_correct"
+ )
+
+ assessment_item = AssessmentItem(
+ identifier="beginnersguide007",
+ title="BG true false example ",
+ language="EN-US",
+ time_dependent=False,
+ item_body=item_body,
+ response_declaration=[response_declaration],
+ outcome_declaration=[outcome_declaration],
+ response_processing=response_processing,
+ )
+
+ # Generate the XML
+ generated_xml = assessment_item.to_xml_string()
+
+ # Compare the generated XML with the expected XML
+ self.assertEqual(generated_xml.strip(), expected_xml.strip())
+
+ def test_multiple_choice_question(self):
+ expected_xml = """
+
+
+A
+C
+D
+
+
+
+
+1
+
+
+
+QTI 3 is a new version released in 2022.
+
+
+Which of the following features are new to QTI 3?
+Pick 3 choices.
+
+Shared Vocabulary
+Pineapple Flavored
+Catalogs for candidate-specific content.
+Conformance features definitions
+A subset of HTML5 elements
+
+
+
+ """.replace(
+ "\n", ""
+ )
+ response_declaration = ResponseDeclaration(
+ identifier="RESPONSE",
+ cardinality=Cardinality.MULTIPLE,
+ base_type=BaseType.IDENTIFIER,
+ correct_response=CorrectResponse(
+ value=[
+ Value(value="A"),
+ Value(value="C"),
+ Value(value="D"),
+ ]
+ ),
+ )
+
+ outcome_declaration = OutcomeDeclaration(
+ identifier="SCORE",
+ cardinality=Cardinality.SINGLE,
+ base_type=BaseType.FLOAT,
+ default_value=DefaultValue(value=[Value(value="1")]),
+ )
+
+ prompt = Prompt(
+ children=[
+ P(
+ children=[
+ "Which of the following features are ",
+ Strong(children=["new"]),
+ " to QTI 3?",
+ ]
+ ),
+ P(children=["Pick 3 choices."]),
+ ]
+ )
+ choice_a = SimpleChoice(identifier="A", children=["Shared Vocabulary"])
+ choice_b = SimpleChoice(identifier="B", children=["Pineapple Flavored"])
+ choice_c = SimpleChoice(
+ identifier="C",
+ children=["Catalogs for candidate-specific content."],
+ )
+ choice_d = SimpleChoice(
+ identifier="D", children=["Conformance features definitions"]
+ )
+ choice_e = SimpleChoice(identifier="E", children=["A subset of HTML5 elements"])
+ choice_interaction = ChoiceInteraction(
+ response_identifier="RESPONSE",
+ max_choices=3,
+ answers=[choice_a, choice_b, choice_c, choice_d, choice_e],
+ prompt=prompt,
+ )
+
+ item_body = ItemBody(
+ children=[
+ P(children=["QTI 3 is a new version released in 2022."]),
+ choice_interaction,
+ ]
+ )
+ response_processing = ResponseProcessing(
+ template="https://purl.imsglobal.org/spec/qti/v3p0/rptemplates/match_correct"
+ )
+
+ assessment_item = AssessmentItem(
+ identifier="beginnersguide008",
+ title="BG Choice example",
+ language="EN-US",
+ time_dependent=False,
+ item_body=item_body,
+ response_declaration=[response_declaration],
+ outcome_declaration=[outcome_declaration],
+ response_processing=response_processing,
+ )
+
+ generated_xml = assessment_item.to_xml_string()
+ self.assertEqual(generated_xml.strip(), expected_xml.strip())
+
+ def test_long_text_question(self):
+ expected_xml = """
+
+
+
+Read this postcard from your English pen-friend, Sam.
+
+
+Here is a postcard of my town. Please send me
+a postcard from your town. What size is your Town?
+What is the nicest part of your town?
+Where do you go in the evenings?
+Sam
+
+
+
+Write Sam a postcard. Answer the questions. Write 23–30 words
+
+
+ """.replace(
+ "\n", ""
+ )
+ response_declaration = ResponseDeclaration(
+ identifier="RESPONSE",
+ cardinality=Cardinality.SINGLE,
+ base_type=BaseType.STRING,
+ )
+
+ outcome_declaration = OutcomeDeclaration(
+ identifier="SCORE",
+ cardinality=Cardinality.SINGLE,
+ base_type=BaseType.FLOAT,
+ )
+
+ prompt_text = "Write Sam a postcard. Answer the questions. Write 23–30 words"
+
+ extended_text_interaction = ExtendedTextInteraction(
+ response_identifier="RESPONSE",
+ prompt=Prompt(children=[prompt_text]),
+ )
+
+ item_body = ItemBody(
+ children=[
+ P(children=["Read this postcard from your English pen-friend, Sam."]),
+ Div(
+ children=[
+ Blockquote(
+ class_="postcard",
+ children=[
+ P(
+ children=[
+ "Here is a postcard of my town. Please send me",
+ Br(),
+ "a postcard from your town. What size is your Town?",
+ Br(),
+ "What is the nicest part of your town?",
+ Br(),
+ "Where do you go in the evenings?",
+ ]
+ ),
+ P(children=["Sam"]),
+ ],
+ )
+ ]
+ ),
+ extended_text_interaction,
+ ]
+ )
+
+ assessment_item = AssessmentItem(
+ identifier="beginnersguide009",
+ title="BG Postcard example",
+ language="en-US",
+ time_dependent=False,
+ item_body=item_body,
+ response_declaration=[response_declaration],
+ outcome_declaration=[outcome_declaration],
+ )
+
+ generated_xml = assessment_item.to_xml_string()
+ self.assertEqual(generated_xml.strip(), expected_xml.strip())
+
+ def test_missing_word_question(self):
+ expected_xml = """
+
+
+York
+
+
+
+
+
+
+
+
+Identify the missing word in this famous quote from Shakespeare's Richard III.
+
+
+Now is the winter of our discontent
+Made glorious summer by this sun of ;
+
+And all the clouds that lour'd upon our house
+In the deep bosom of the ocean buried.
+
+
+
+
+ """.replace(
+ "\n", ""
+ )
+
+ response_declaration = ResponseDeclaration(
+ identifier="RESPONSE",
+ cardinality=Cardinality.SINGLE,
+ base_type=BaseType.STRING,
+ correct_response=CorrectResponse(value=[Value(value="York")]),
+ mapping=Mapping(
+ default_value=0,
+ map_entries=[
+ MapEntry(map_key="York", mapped_value=1, case_sensitive=True),
+ MapEntry(map_key="york", mapped_value=0.5),
+ ],
+ ),
+ )
+
+ outcome_declaration = OutcomeDeclaration(
+ identifier="SCORE",
+ cardinality=Cardinality.SINGLE,
+ base_type=BaseType.FLOAT,
+ )
+
+ text_entry_interaction = TextEntryInteraction(response_identifier="RESPONSE")
+
+ item_body = ItemBody(
+ children=[
+ P(
+ children=[
+ "Identify the missing word in this famous quote from Shakespeare's Richard III."
+ ]
+ ),
+ Div(
+ children=[
+ Blockquote(
+ class_="postcard",
+ children=[
+ P(
+ children=[
+ "Now is the winter of our discontent",
+ Br(),
+ "Made glorious summer by this sun of ",
+ text_entry_interaction,
+ ";",
+ Br(),
+ "And all the clouds that lour'd upon our house",
+ Br(),
+ "In the deep bosom of the ocean buried.",
+ ]
+ ),
+ ],
+ )
+ ]
+ ),
+ ]
+ )
+
+ response_processing = ResponseProcessing(
+ template="https://purl.imsglobal.org/spec/qti/v3p0/rptemplates/map_response"
+ )
+
+ assessment_item = AssessmentItem(
+ identifier="beginnersguide010",
+ title="BG Missing Word example",
+ language="en-US",
+ time_dependent=False,
+ item_body=item_body,
+ response_declaration=[response_declaration],
+ outcome_declaration=[outcome_declaration],
+ response_processing=response_processing,
+ )
+
+ generated_xml = assessment_item.to_xml_string()
+ self.assertEqual(generated_xml.strip(), expected_xml.strip())
+
+ def test_numerical_entry_question(self):
+ expected_xml = """
+
+
+42.5
+
+
+
+
+0.0
+
+
+
+Calculate the value of x when 2x + 5 = 90:
+
+
+ """.replace(
+ "\n", ""
+ )
+
+ response_declaration = ResponseDeclaration(
+ identifier="RESPONSE",
+ cardinality=Cardinality.SINGLE,
+ base_type=BaseType.FLOAT,
+ correct_response=CorrectResponse(value=[Value(value="42.5")]),
+ )
+
+ outcome_declaration = OutcomeDeclaration(
+ identifier="SCORE",
+ cardinality=Cardinality.SINGLE,
+ base_type=BaseType.FLOAT,
+ default_value=DefaultValue(value=[Value(value="0.0")]),
+ )
+
+ text_entry_interaction = TextEntryInteraction(
+ response_identifier="RESPONSE",
+ expected_length=10,
+ pattern_mask="^[0-9]*\\.?[0-9]+$",
+ placeholder_text="Enter a number",
+ )
+
+ assessment_item = AssessmentItem(
+ identifier="numerical-entry-item",
+ title="Numerical Entry Question",
+ language="en-US",
+ time_dependent=False,
+ item_body=ItemBody(
+ children=[
+ P(children=["Calculate the value of x when 2x + 5 = 90:"]),
+ P(children=[text_entry_interaction]),
+ ]
+ ),
+ response_declaration=[response_declaration],
+ outcome_declaration=[outcome_declaration],
+ )
+
+ generated_xml = assessment_item.to_xml_string()
+ self.assertEqual(generated_xml.strip(), expected_xml.strip())
diff --git a/contentcuration/contentcuration/tests/utils/qti/test_fields.py b/contentcuration/contentcuration/tests/utils/qti/test_fields.py
new file mode 100644
index 0000000000..40e4a9c0e5
--- /dev/null
+++ b/contentcuration/contentcuration/tests/utils/qti/test_fields.py
@@ -0,0 +1,332 @@
+import unittest
+
+from contentcuration.utils.assessment.qti.fields import validate_data_uri
+from contentcuration.utils.assessment.qti.fields import validate_local_href_path
+from contentcuration.utils.assessment.qti.fields import validate_local_src_path
+from contentcuration.utils.assessment.qti.fields import validate_local_srcset
+
+
+class TestValidateDataUri(unittest.TestCase):
+ def test_valid_data_uris(self):
+ valid_uris = [
+ "data:text/plain;base64,SGVsbG8=",
+ "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg==",
+ "data:text/plain,Hello%20World",
+ "data:,Hello",
+ "data:text/html,
Hello ",
+ 'data:application/json,{"key":"value"}',
+ "data:text/css,body{color:red}",
+ "data:image/svg+xml, ",
+ "data:text/plain;charset=utf-8,Hello",
+ "data:text/plain;charset=utf-8;base64,SGVsbG8=",
+ ]
+
+ for uri in valid_uris:
+ with self.subTest(uri=uri):
+ result = validate_data_uri(uri)
+ self.assertEqual(result, uri, f"Should return the same URI: {uri}")
+
+ def test_invalid_data_uris(self):
+ """Test invalid data URI formats"""
+ invalid_uris = [
+ "not-a-data-uri",
+ "data:",
+ "data",
+ "http://example.com",
+ "https://example.com/image.png",
+ "ftp://example.com/file.txt",
+ "file:///path/to/file",
+ "",
+ "data:text/plain",
+ "ata:text/plain,Hello",
+ ]
+
+ for uri in invalid_uris:
+ with self.subTest(uri=uri):
+ with self.assertRaises(ValueError) as cm:
+ validate_data_uri(uri)
+ self.assertIn("Invalid data URI format", str(cm.exception))
+
+
+class TestValidateLocalHrefPath(unittest.TestCase):
+ def test_valid_relative_paths(self):
+ """Test valid relative paths"""
+ valid_paths = [
+ "relative/path.jpg",
+ "../path.jpg",
+ "./file.png",
+ "file.txt",
+ "images/photo.jpg",
+ "docs/readme.md",
+ "assets/style.css",
+ "#fragment",
+ "?query=value",
+ "#fragment?query=value",
+ "path/to/file.html#section",
+ "subdir/../file.txt",
+ ]
+
+ for path in valid_paths:
+ with self.subTest(path=path):
+ result = validate_local_href_path(path)
+ self.assertEqual(result, path, f"Should return the same path: {path}")
+
+ def test_valid_data_uris_in_href(self):
+ data_uris = [
+ "data:text/plain,Hello",
+ "data:image/png;base64,iVBORw0KGgo=",
+ ]
+
+ for uri in data_uris:
+ with self.subTest(uri=uri):
+ result = validate_local_href_path(uri)
+ self.assertEqual(result, uri)
+
+ def test_invalid_absolute_urls(self):
+ absolute_urls = [
+ "http://example.com",
+ "https://example.com/path",
+ "ftp://example.com/file",
+ "mailto:test@example.com",
+ "tel:+1234567890",
+ "//example.com/path",
+ "/absolute/path",
+ "/",
+ ]
+
+ for url in absolute_urls:
+ with self.subTest(url=url):
+ with self.assertRaises(ValueError) as cm:
+ validate_local_href_path(url)
+ self.assertIn("Absolute URLs not allowed", str(cm.exception))
+
+ def test_invalid_data_uris_in_href(self):
+ """Test that invalid data URIs are rejected"""
+ with self.assertRaises(ValueError) as cm:
+ validate_local_href_path("data:invalid")
+ self.assertIn("Invalid data URI format", str(cm.exception))
+
+
+class TestValidateLocalSrcPath(unittest.TestCase):
+ def test_valid_src_paths(self):
+ """Test valid src paths (must have actual file paths)"""
+ valid_paths = [
+ "relative/path.jpg",
+ "../path.jpg",
+ "./file.png",
+ "file.txt",
+ "images/photo.jpg",
+ "subdir/../file.txt",
+ ]
+
+ for path in valid_paths:
+ with self.subTest(path=path):
+ result = validate_local_src_path(path)
+ self.assertEqual(result, path)
+
+ def test_valid_data_uris_in_src(self):
+ data_uris = [
+ "data:text/plain,Hello",
+ "data:image/png;base64,iVBORw0KGgo=",
+ ]
+
+ for uri in data_uris:
+ with self.subTest(uri=uri):
+ result = validate_local_src_path(uri)
+ self.assertEqual(result, uri)
+
+ def test_invalid_empty_paths(self):
+ """Test rejection of empty paths and fragment-only"""
+ invalid_paths = ["#fragment", "?query=value", "#fragment?query=value"]
+
+ for path in invalid_paths:
+ with self.subTest(path=path):
+ with self.assertRaises(ValueError) as cm:
+ validate_local_src_path(path)
+ self.assertIn("Invalid local src path", str(cm.exception))
+
+ def test_absolute_urls_rejected(self):
+ """Test that absolute URLs are still rejected"""
+ with self.assertRaises(ValueError) as cm:
+ validate_local_src_path("http://example.com/image.jpg")
+ self.assertIn("Absolute URLs not allowed", str(cm.exception))
+
+
+class TestValidateLocalSrcset(unittest.TestCase):
+ def test_empty_srcset(self):
+ empty_values = ["", " ", "\t", "\n"]
+
+ for value in empty_values:
+ with self.subTest(value=repr(value)):
+ result = validate_local_srcset(value)
+ self.assertEqual(result, value)
+
+ def test_single_image_srcset(self):
+ valid_srcsets = [
+ "image.jpg 2x",
+ "image.jpg 1.5x",
+ "image.jpg 100w",
+ "image.jpg 50h",
+ "image.jpg 0.5x",
+ "path/to/image.png 2x",
+ "../images/photo.jpg 1x",
+ "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg== 2x",
+ ]
+
+ for srcset in valid_srcsets:
+ with self.subTest(srcset=srcset):
+ result = validate_local_srcset(srcset)
+ self.assertEqual(result, srcset)
+
+ def test_data_uri_in_srcset(self):
+ valid_data_srcsets = [
+ "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg== 1x",
+ "data:text/plain,Hello%20World 2x",
+ "data:image/svg+xml, 1.5x",
+ 'data:application/json,{"key":"value"} 100w',
+ ]
+
+ for srcset in valid_data_srcsets:
+ with self.subTest(srcset=srcset):
+ result = validate_local_srcset(srcset)
+ self.assertEqual(result, srcset)
+
+ def test_multiple_images_srcset(self):
+ valid_srcsets = [
+ "small.jpg 1x, large.jpg 2x",
+ "img-320.jpg 320w, img-640.jpg 640w, img-1280.jpg 1280w",
+ "portrait.jpg 480h, landscape.jpg 960h",
+ "image1.jpg 1x, image2.jpg 1.5x, image3.jpg 2x",
+ "a.jpg 1x,b.jpg 2x", # minimal spacing
+ ]
+
+ for srcset in valid_srcsets:
+ with self.subTest(srcset=srcset):
+ result = validate_local_srcset(srcset)
+ self.assertEqual(result, srcset)
+
+ def test_mixed_data_uri_and_regular_paths(self):
+ valid_mixed_srcsets = [
+ "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg== 1x, large.jpg 2x",
+ "small.jpg 1x, data:image/svg+xml, 2x",
+ "icon.png 1x, data:text/plain,fallback 2x, large.png 3x",
+ ]
+
+ for srcset in valid_mixed_srcsets:
+ with self.subTest(srcset=srcset):
+ result = validate_local_srcset(srcset)
+ self.assertEqual(result, srcset)
+
+ def test_multiple_data_uris_in_srcset(self):
+ valid_multi_data_srcsets = [
+ "data:image/png;base64,ABC123 1x, data:image/png;base64,DEF456 2x",
+ "data:text/plain,Small,Image 1x, data:text/plain,Large,Image 2x",
+ "data:image/svg+xml, 1x, data:image/svg+xml, 2x, data:image/svg+xml, 3x", # noqa: E501
+ 'data:application/json,{"size":"small"} 100w, data:application/json,{"size":"large"} 200w',
+ ]
+
+ for srcset in valid_multi_data_srcsets:
+ with self.subTest(srcset=srcset):
+ result = validate_local_srcset(srcset)
+ self.assertEqual(result, srcset)
+
+ def test_complex_mixed_srcsets(self):
+ complex_srcsets = [
+ "thumb.jpg 1x, data:image/png;base64,MID123 1.5x, data:image/svg+xml, 2x, large.jpg 3x",
+ "data:text/plain,Icon,1 50w, regular-100.jpg 100w, data:text/plain,Icon,2 150w, regular-200.jpg 200w",
+ ]
+
+ for srcset in complex_srcsets:
+ with self.subTest(srcset=srcset):
+ result = validate_local_srcset(srcset)
+ self.assertEqual(result, srcset)
+
+ def test_invalid_descriptors(self):
+ """Test rejection of invalid descriptors"""
+ invalid_srcsets = [
+ "image.jpg 2", # missing unit
+ "image.jpg x", # missing number
+ "image.jpg 2z", # invalid unit
+ "image.jpg 2.x", # malformed number
+ "image.jpg .x", # malformed number
+ "image.jpg 2xx", # double unit
+ "image.jpg -2x", # negative number
+ "image.jpg 2 x", # space in descriptor
+ ]
+
+ for srcset in invalid_srcsets:
+ with self.subTest(srcset=srcset):
+ with self.assertRaises(ValueError):
+ validate_local_srcset(srcset)
+
+ def test_invalid_urls_in_srcset(self):
+ invalid_srcsets = [
+ "http://example.com/image.jpg 2x",
+ "https://cdn.example.com/img.png 1x, local.jpg 2x",
+ "/absolute/path.jpg 1x",
+ ]
+
+ for srcset in invalid_srcsets:
+ with self.subTest(srcset=srcset):
+ with self.assertRaises(ValueError):
+ validate_local_srcset(srcset)
+
+ def test_empty_srcset_entries(self):
+ invalid_srcsets = [
+ "image.jpg 2x, ,other.jpg 1x",
+ ", image.jpg 2x",
+ "image.jpg 2x,",
+ ]
+
+ for srcset in invalid_srcsets:
+ with self.subTest(srcset=srcset):
+ with self.assertRaises(ValueError):
+ validate_local_srcset(srcset)
+
+ def test_missing_path_in_srcset(self):
+ invalid_srcsets = [
+ "#fragment 2x",
+ "?query=value 1x",
+ ]
+
+ for srcset in invalid_srcsets:
+ with self.subTest(srcset=srcset):
+ with self.assertRaises(ValueError):
+ validate_local_srcset(srcset)
+
+
+class TestEdgeCases(unittest.TestCase):
+ def test_unicode_paths_href(self):
+ unicode_paths = ["café/ñ.jpg", "文件/图片.png", "файл.txt"]
+
+ for path in unicode_paths:
+ with self.subTest(path=path):
+ result = validate_local_href_path(path)
+ self.assertEqual(result, path)
+
+ def test_unicode_paths_src(self):
+ unicode_paths = ["café/ñ.jpg", "文件/图片.png", "файл.txt"]
+
+ for path in unicode_paths:
+ with self.subTest(path=path):
+ result = validate_local_src_path(path)
+ self.assertEqual(result, path)
+
+ def test_very_long_paths(self):
+ long_path = "a/" * 1000 + "file.txt"
+
+ # Should handle long paths gracefully
+ result = validate_local_href_path(long_path)
+ self.assertEqual(result, long_path)
+
+ def test_special_characters_in_data_uri(self):
+ special_data_uris = [
+ "data:text/plain,Hello%20World%21",
+ "data:text/plain,<>&\"'",
+ 'data:application/json,{"key":"value"}',
+ ]
+
+ for uri in special_data_uris:
+ with self.subTest(uri=uri):
+ result = validate_data_uri(uri)
+ self.assertEqual(result, uri)
diff --git a/contentcuration/contentcuration/tests/utils/qti/test_html.py b/contentcuration/contentcuration/tests/utils/qti/test_html.py
new file mode 100644
index 0000000000..dc5d162bc7
--- /dev/null
+++ b/contentcuration/contentcuration/tests/utils/qti/test_html.py
@@ -0,0 +1,776 @@
+import unittest
+
+from contentcuration.utils.assessment.qti.base import TextNode
+from contentcuration.utils.assessment.qti.html import A
+from contentcuration.utils.assessment.qti.html import Abbr
+from contentcuration.utils.assessment.qti.html import Address
+from contentcuration.utils.assessment.qti.html import Article
+from contentcuration.utils.assessment.qti.html import Aside
+from contentcuration.utils.assessment.qti.html import Audio
+from contentcuration.utils.assessment.qti.html import B
+from contentcuration.utils.assessment.qti.html import Bdi
+from contentcuration.utils.assessment.qti.html import Bdo
+from contentcuration.utils.assessment.qti.html import BdoDir
+from contentcuration.utils.assessment.qti.html import Blockquote
+from contentcuration.utils.assessment.qti.html import Br
+from contentcuration.utils.assessment.qti.html import Caption
+from contentcuration.utils.assessment.qti.html import Cite
+from contentcuration.utils.assessment.qti.html import Code
+from contentcuration.utils.assessment.qti.html import Col
+from contentcuration.utils.assessment.qti.html import Colgroup
+from contentcuration.utils.assessment.qti.html import Dd
+from contentcuration.utils.assessment.qti.html import Details
+from contentcuration.utils.assessment.qti.html import Dfn
+from contentcuration.utils.assessment.qti.html import Div
+from contentcuration.utils.assessment.qti.html import Dl
+from contentcuration.utils.assessment.qti.html import Dt
+from contentcuration.utils.assessment.qti.html import Em
+from contentcuration.utils.assessment.qti.html import Figcaption
+from contentcuration.utils.assessment.qti.html import Figure
+from contentcuration.utils.assessment.qti.html import Footer
+from contentcuration.utils.assessment.qti.html import H1
+from contentcuration.utils.assessment.qti.html import H2
+from contentcuration.utils.assessment.qti.html import H3
+from contentcuration.utils.assessment.qti.html import H4
+from contentcuration.utils.assessment.qti.html import H5
+from contentcuration.utils.assessment.qti.html import H6
+from contentcuration.utils.assessment.qti.html import Header
+from contentcuration.utils.assessment.qti.html import Hr
+from contentcuration.utils.assessment.qti.html import HTMLElement
+from contentcuration.utils.assessment.qti.html import I
+from contentcuration.utils.assessment.qti.html import Img
+from contentcuration.utils.assessment.qti.html import Kbd
+from contentcuration.utils.assessment.qti.html import Label
+from contentcuration.utils.assessment.qti.html import Li
+from contentcuration.utils.assessment.qti.html import Nav
+from contentcuration.utils.assessment.qti.html import Object
+from contentcuration.utils.assessment.qti.html import Ol
+from contentcuration.utils.assessment.qti.html import OlType
+from contentcuration.utils.assessment.qti.html import P
+from contentcuration.utils.assessment.qti.html import Param
+from contentcuration.utils.assessment.qti.html import Picture
+from contentcuration.utils.assessment.qti.html import Pre
+from contentcuration.utils.assessment.qti.html import Q
+from contentcuration.utils.assessment.qti.html import Rp
+from contentcuration.utils.assessment.qti.html import Rt
+from contentcuration.utils.assessment.qti.html import Ruby
+from contentcuration.utils.assessment.qti.html import Samp
+from contentcuration.utils.assessment.qti.html import Section
+from contentcuration.utils.assessment.qti.html import Small
+from contentcuration.utils.assessment.qti.html import Source
+from contentcuration.utils.assessment.qti.html import Span
+from contentcuration.utils.assessment.qti.html import Strong
+from contentcuration.utils.assessment.qti.html import Sub
+from contentcuration.utils.assessment.qti.html import Summary
+from contentcuration.utils.assessment.qti.html import Sup
+from contentcuration.utils.assessment.qti.html import Table
+from contentcuration.utils.assessment.qti.html import TBody
+from contentcuration.utils.assessment.qti.html import Td
+from contentcuration.utils.assessment.qti.html import TFoot
+from contentcuration.utils.assessment.qti.html import Th
+from contentcuration.utils.assessment.qti.html import THead
+from contentcuration.utils.assessment.qti.html import Tr
+from contentcuration.utils.assessment.qti.html import Track
+from contentcuration.utils.assessment.qti.html import TrackKind
+from contentcuration.utils.assessment.qti.html import Ul
+from contentcuration.utils.assessment.qti.html import Var
+from contentcuration.utils.assessment.qti.html import Video
+
+
+class HTMLDataClassTests(unittest.TestCase):
+ def test_break_elements(self):
+ br_element = Br()
+ self.assertEqual(br_element.to_xml_string(), " ")
+
+ hr_element = Hr()
+ self.assertEqual(hr_element.to_xml_string(), " ")
+
+ def test_display_elements(self):
+ label_element = Label(children=["Test Label"], for_="test")
+ self.assertEqual(
+ label_element.to_xml_string(), 'Test Label '
+ )
+
+ summary_element = Summary(children=["Test Summary"])
+ self.assertEqual(
+ summary_element.to_xml_string(), "Test Summary "
+ )
+
+ figcaption_element = Figcaption(children=["Test Figcaption"])
+ self.assertEqual(
+ figcaption_element.to_xml_string(),
+ "Test Figcaption ",
+ )
+
+ def test_details_validation(self):
+ summary_element = Summary(children=["Test Summary"])
+
+ # Valid case: Summary as first child
+ valid_details = Details(children=[summary_element, "Test Content"])
+ self.assertEqual(
+ valid_details.to_xml_string(),
+ "Test Summary Test Content ",
+ )
+
+ # Invalid case: No Summary element
+ with self.assertRaises(ValueError):
+ Details(children=["Test Content"])
+
+ # Invalid case: Summary not as first child
+ with self.assertRaises(ValueError):
+ Details(children=["Test Content", summary_element])
+
+ # Invalid case: Multiple Summary elements
+ second_summary = Summary(children=["Second Summary"])
+ with self.assertRaises(ValueError):
+ Details(children=[summary_element, "Test Content", second_summary])
+
+ def test_figure_elements(self):
+ figure_element = Figure(children=["Test Figure"])
+ self.assertEqual(figure_element.to_xml_string(), "Test Figure ")
+
+ figcaption_element = Figcaption(children=["Test Caption"])
+ figure_with_caption = Figure(children=[figcaption_element, "Test Content"])
+ self.assertEqual(
+ figure_with_caption.to_xml_string(),
+ "Test Caption Test Content ",
+ )
+
+ figure_with_caption_last = Figure(children=["Test Content", figcaption_element])
+ self.assertEqual(
+ figure_with_caption_last.to_xml_string(),
+ "Test ContentTest Caption ",
+ )
+
+ with self.assertRaises(ValueError):
+ Figure(
+ children=[figcaption_element, Figcaption(children=["Second Caption"])]
+ )
+
+ with self.assertRaises(ValueError):
+ Figure(children=["Before", figcaption_element, "After"])
+
+ def test_embed_elements(self):
+ img_element = Img(alt="Test Alt", src="test.jpg")
+ self.assertEqual(
+ img_element.to_xml_string(), ' '
+ )
+
+ param_element = Param(name="test_param", value="test_value")
+ self.assertEqual(
+ param_element.to_xml_string(),
+ ' ',
+ )
+
+ object_element = Object(children=["Test Object"], params=[param_element])
+ self.assertEqual(
+ object_element.to_xml_string(),
+ ' Test Object ',
+ )
+
+ picture_source_element = Source(srcset="test.jpg 2x")
+ self.assertEqual(
+ picture_source_element.to_xml_string(), ' '
+ )
+
+ picture_element = Picture(children=[picture_source_element], img=img_element)
+ self.assertEqual(
+ picture_element.to_xml_string(),
+ ' ',
+ )
+
+ def test_flow_elements(self):
+ blockquote_element = Blockquote(
+ children=["Test Blockquote"], cite="http://test.com"
+ )
+ self.assertEqual(
+ blockquote_element.to_xml_string(),
+ 'Test Blockquote ',
+ )
+
+ div_element = Div(children=["Test Div"])
+ self.assertEqual(div_element.to_xml_string(), "Test Div
")
+
+ article_element = Article(children=["Test Article"])
+ self.assertEqual(
+ article_element.to_xml_string(), "Test Article "
+ )
+
+ section_element = Section(children=["Test Section"])
+ self.assertEqual(
+ section_element.to_xml_string(), ""
+ )
+
+ nav_element = Nav(children=["Test Nav"])
+ self.assertEqual(nav_element.to_xml_string(), "Test Nav ")
+
+ aside_element = Aside(children=["Test Aside"])
+ self.assertEqual(aside_element.to_xml_string(), "")
+
+ header_element = Header(children=["Test Header"])
+ self.assertEqual(header_element.to_xml_string(), "")
+
+ footer_element = Footer(children=["Test Footer"])
+ self.assertEqual(footer_element.to_xml_string(), "")
+
+ address_element = Address(children=["Test Address"])
+ self.assertEqual(
+ address_element.to_xml_string(), "Test Address "
+ )
+
+ def test_media_elements(self):
+ track_element = Track(src="test.vtt", kind=TrackKind.SUBTITLES)
+ self.assertEqual(
+ track_element.to_xml_string(), ' '
+ )
+
+ media_source_element = Source(src="test.mp4")
+ self.assertEqual(
+ media_source_element.to_xml_string(), ' '
+ )
+
+ audio_element = Audio(children=["Test Audio"], src="test.mp3")
+ self.assertEqual(
+ audio_element.to_xml_string(),
+ 'Test Audio ',
+ )
+
+ video_element = Video(children=["Test Video"], src="test.mp4")
+ self.assertEqual(
+ video_element.to_xml_string(),
+ 'Test Video ',
+ )
+
+ def test_sequence_elements(self):
+ li_element = Li(children=["Test Li"])
+ self.assertEqual(li_element.to_xml_string(), "Test Li ")
+
+ ol_element = Ol(children=[li_element], type=OlType.NUMBERS)
+ self.assertEqual(
+ ol_element.to_xml_string(), 'Test Li '
+ )
+
+ ul_element = Ul(children=[li_element])
+ self.assertEqual(ul_element.to_xml_string(), "")
+
+ dt_element = Dt(children=["Test Dt"])
+ self.assertEqual(dt_element.to_xml_string(), "Test Dt ")
+
+ dd_element = Dd(children=["Test Dd"])
+ self.assertEqual(dd_element.to_xml_string(), "Test Dd ")
+
+ dl_element = Dl(children=[dt_element, dd_element])
+ self.assertEqual(
+ dl_element.to_xml_string(), "Test Dt Test Dd "
+ )
+
+ def test_table_elements(self):
+ caption_element = Caption(children=["Test Caption"])
+ self.assertEqual(
+ caption_element.to_xml_string(), "Test Caption "
+ )
+
+ col_element = Col()
+ self.assertEqual(col_element.to_xml_string(), ' ')
+
+ colgroup_element = Colgroup(children=[col_element])
+ self.assertEqual(
+ colgroup_element.to_xml_string(), ' '
+ )
+
+ td_element = Td(children=["Test Td"])
+ self.assertEqual(td_element.to_xml_string(), "Test Td ")
+
+ th_element = Th(children=["Test Th"])
+ self.assertEqual(th_element.to_xml_string(), "Test Th ")
+
+ tr_element = Tr(children=[th_element, td_element])
+ self.assertEqual(
+ tr_element.to_xml_string(), "Test Th Test Td "
+ )
+
+ tbody_element = TBody(children=[tr_element])
+ self.assertEqual(
+ tbody_element.to_xml_string(),
+ "Test Th Test Td ",
+ )
+
+ thead_element = THead(children=[tr_element])
+ self.assertEqual(
+ thead_element.to_xml_string(),
+ "Test Th Test Td ",
+ )
+
+ tfoot_element = TFoot(children=[tr_element])
+ self.assertEqual(
+ tfoot_element.to_xml_string(),
+ "Test Th Test Td ",
+ )
+
+ table_element = Table(
+ children=[
+ caption_element,
+ colgroup_element,
+ thead_element,
+ tbody_element,
+ tfoot_element,
+ ]
+ )
+ expected_html = 'Test Caption Test Th Test Td Test Th Test Td Test Th Test Td
' # noqa: E501
+ self.assertEqual(table_element.to_xml_string(), expected_html)
+
+ def test_text_elements(self):
+ a_element = A(children=["Test A"], href="file.html")
+ self.assertEqual(a_element.to_xml_string(), 'Test A ')
+
+ p_element = P(children=["Test P"])
+ self.assertEqual(p_element.to_xml_string(), "Test P
")
+
+ span_element = Span(children=["Test Span"])
+ self.assertEqual(span_element.to_xml_string(), "Test Span ")
+
+ h1_element = H1(children=["Test H1"])
+ self.assertEqual(h1_element.to_xml_string(), "Test H1 ")
+
+ h2_element = H2(children=["Test H2"])
+ self.assertEqual(h2_element.to_xml_string(), "Test H2 ")
+
+ h3_element = H3(children=["Test H3"])
+ self.assertEqual(h3_element.to_xml_string(), "Test H3 ")
+
+ h4_element = H4(children=["Test H4"])
+ self.assertEqual(h4_element.to_xml_string(), "Test H4 ")
+
+ h5_element = H5(children=["Test H5"])
+ self.assertEqual(h5_element.to_xml_string(), "Test H5 ")
+
+ h6_element = H6(children=["Test H6"])
+ self.assertEqual(h6_element.to_xml_string(), "Test H6 ")
+
+ pre_element = Pre(children=["Test Pre"])
+ self.assertEqual(pre_element.to_xml_string(), "Test Pre ")
+
+ em_element = Em(children=["Test Em"])
+ self.assertEqual(em_element.to_xml_string(), "Test Em ")
+
+ code_element = Code(children=["Test Code"])
+ self.assertEqual(code_element.to_xml_string(), "Test Code")
+
+ kbd_element = Kbd(children=["Test Kbd"])
+ self.assertEqual(kbd_element.to_xml_string(), "Test Kbd ")
+
+ i_element = I(children=["Test I"])
+ self.assertEqual(i_element.to_xml_string(), "Test I ")
+
+ dfn_element = Dfn(children=["Test Dfn"])
+ self.assertEqual(dfn_element.to_xml_string(), "Test Dfn ")
+
+ abbr_element = Abbr(children=["Test Abbr"])
+ self.assertEqual(abbr_element.to_xml_string(), "Test Abbr ")
+
+ strong_element = Strong(children=["Test Strong"])
+ self.assertEqual(strong_element.to_xml_string(), "Test Strong ")
+
+ sup_element = Sup(children=["Test Sup"])
+ self.assertEqual(sup_element.to_xml_string(), "Test Sup ")
+
+ sub_element = Sub(children=["Test Sub"])
+ self.assertEqual(sub_element.to_xml_string(), "Test Sub ")
+
+ var_element = Var(children=["Test Var"])
+ self.assertEqual(var_element.to_xml_string(), "Test Var ")
+
+ small_element = Small(children=["Test Small"])
+ self.assertEqual(small_element.to_xml_string(), "Test Small ")
+
+ samp_element = Samp(children=["Test Samp"])
+ self.assertEqual(samp_element.to_xml_string(), "Test Samp ")
+
+ b_element = B(children=["Test B"])
+ self.assertEqual(b_element.to_xml_string(), "Test B ")
+
+ cite_element = Cite(children=["Test Cite"])
+ self.assertEqual(cite_element.to_xml_string(), "Test Cite ")
+
+ q_element = Q(children=["Test Q"])
+ self.assertEqual(q_element.to_xml_string(), "Test Q ")
+
+ bdo_element = Bdo(dir=BdoDir.LTR, children=["Test Bdo"])
+ self.assertEqual(bdo_element.to_xml_string(), 'Test Bdo ')
+
+ bdi_element = Bdi(children=["Test Bdi"])
+ self.assertEqual(bdi_element.to_xml_string(), "Test Bdi ")
+
+ rt_element = Rt(children=["Test Rt"])
+ self.assertEqual(rt_element.to_xml_string(), "Test Rt ")
+
+ rp_element = Rp(text="(")
+ self.assertEqual(rp_element.to_xml_string(), "( ")
+
+ ruby_element = Ruby(children=["Test Ruby"])
+ self.assertEqual(ruby_element.to_xml_string(), "Test Ruby ")
+
+
+class TestHTMLStringIntegration(unittest.TestCase):
+ def test_complex_html_parsing(self):
+ complex_html = """
+
+
This is a complex paragraph with emphasis and a
+ link to example .
+
+
+
+ First bold item
+ Second item with internal link
+ Third item
+
+
+ Numbered item one
+ Numbered item two
+
+ Final paragraph with line break.
+ """
+
+ # Parse the HTML
+ elements = HTMLElement.from_html_string(complex_html)
+
+ # Should have 4 root elements: div, ul, ol, p
+ self.assertEqual(
+ len(elements), 4, f"Expected 4 root elements, got {len(elements)}"
+ )
+
+ # Test first element: div with complex content
+ div_element = elements[0]
+ self.assertIsInstance(div_element, Div)
+ self.assertEqual(div_element.class_, "container")
+ self.assertEqual(div_element.id_, "main")
+
+ # Div should have 2 children: p and img
+ self.assertEqual(len(div_element.children), 2)
+
+ # Test paragraph inside div
+ p_element = div_element.children[0]
+ self.assertIsInstance(p_element, P)
+
+ # Paragraph should have mixed content: text, strong, text, em, text, a, text
+ p_children = p_element.children
+ self.assertEqual(len(p_children), 7)
+
+ # Find and test the strong element
+ strong_element = p_children[1]
+ self.assertEqual(len(strong_element.children), 1)
+ self.assertIsInstance(strong_element.children[0], TextNode)
+ self.assertEqual(strong_element.children[0].text, "complex")
+
+ # Find and test the em element
+ em_element = p_children[3]
+ self.assertEqual(len(em_element.children), 1)
+ self.assertEqual(em_element.children[0].text, "emphasis")
+
+ # Find and test the link element
+ a_element = p_children[5]
+ self.assertEqual(str(a_element.href), "file.html#anchor")
+ self.assertEqual(len(a_element.children), 1)
+ self.assertEqual(a_element.children[0].text, "link to example")
+
+ # Test image element
+ img_element = div_element.children[1]
+ self.assertIsInstance(img_element, Img)
+ self.assertEqual(str(img_element.src), "image.jpg")
+ self.assertEqual(img_element.alt, "Test image")
+ self.assertEqual(img_element.width, 300)
+ self.assertEqual(img_element.height, 200)
+
+ # Test second element: unordered list
+ ul_element = elements[1]
+ self.assertIsInstance(ul_element, Ul)
+ self.assertEqual(len(ul_element.children), 3)
+
+ # Test first list item
+ li1 = ul_element.children[0]
+ self.assertIsInstance(li1, Li)
+ li1_children = li1.children
+ # Should have: TextNode("First "), Strong("bold"), TextNode(" item")
+ self.assertEqual(len(li1_children), 3)
+
+ # Find strong in first list item
+ li1_strong = li1_children[1]
+ self.assertEqual(li1_strong.children[0].text, "bold")
+
+ # Test second list item with link
+ li2 = ul_element.children[1]
+ self.assertIsInstance(li2, Li)
+ li2_link = li2.children[1]
+ self.assertEqual(li2_link.href, "page2.html")
+
+ # Test third element: ordered list
+ ol_element = elements[2]
+ self.assertIsInstance(ol_element, Ol)
+ self.assertEqual(len(ol_element.children), 2)
+
+ # Test ordered list items
+ ol_li1 = ol_element.children[0]
+ self.assertIsInstance(ol_li1, Li)
+
+ ol_li2 = ol_element.children[1]
+ self.assertIsInstance(ol_li2, Li)
+ ol_li2_em = ol_li2.children[1]
+ self.assertEqual(ol_li2_em.children[0].text, "two")
+
+ # Test fourth element: paragraph with line break
+ final_p = elements[3]
+ self.assertIsInstance(final_p, P)
+ br_element = final_p.children[1]
+ self.assertIsInstance(br_element, Br)
+
+ def test_simple_html_parsing(self):
+ """Test parsing simple HTML elements"""
+
+ simple_html = "Hello world !
"
+ elements = HTMLElement.from_html_string(simple_html)
+
+ self.assertEqual(len(elements), 1)
+ p = elements[0]
+ self.assertIsInstance(p, P)
+ self.assertEqual(len(p.children), 3)
+
+ # Check strong element
+ strong = p.children[1]
+ self.assertIsInstance(strong, Strong)
+ self.assertEqual(strong.children[0].text, "world")
+
+ def test_empty_and_self_closing_elements(self):
+ """Test parsing empty elements and self-closing tags"""
+
+ html = """
+
+
+
+
+ """
+
+ elements = HTMLElement.from_html_string(html)
+ self.assertEqual(len(elements), 4)
+
+ # Empty paragraph
+ self.assertIsInstance(elements[0], P)
+ self.assertEqual(len(elements[0].children), 0)
+
+ # Image with attributes
+ self.assertIsInstance(elements[1], Img)
+ self.assertEqual(elements[1].src, "test.jpg")
+ self.assertEqual(elements[1].alt, "test")
+
+ # Line break
+ self.assertIsInstance(elements[2], Br)
+
+ # Div with empty span
+ self.assertIsInstance(elements[3], Div)
+ self.assertEqual(len(elements[3].children), 1)
+ self.assertIsInstance(elements[3].children[0], Span)
+ self.assertEqual(len(elements[3].children[0].children), 0)
+
+ def test_roundtrip_conversion(self):
+ """Test that HTML -> Pydantic -> XML maintains structure"""
+
+ original_html = """
+ Test bold and italic text.
+
+ """
+
+ # Parse to Pydantic objects
+ elements = HTMLElement.from_html_string(original_html)
+
+ # Convert back to XML strings
+ xml_output = "".join(elem.to_xml_string() for elem in elements)
+
+ self.assertEqual(
+ "".join(m.strip() for m in original_html.split("\n")), xml_output.strip()
+ )
+
+ def test_attribute_type_conversion(self):
+ """Test that attributes are properly converted to correct types"""
+
+ html = """
+
+ """
+
+ elements = HTMLElement.from_html_string(html)
+ div = elements[0]
+
+ # Test div attributes
+ self.assertEqual(div.class_, "test-class")
+ self.assertEqual(div.id_, "test-id")
+
+ # Test link attributes
+ a = div.children[0]
+ self.assertEqual(a.href, "file.html?query=test")
+
+ # Test image attributes
+ img = div.children[1]
+ self.assertEqual(img.src, "image.png")
+ self.assertEqual(img.alt, "Alt text")
+ self.assertEqual(img.width, 100)
+ self.assertEqual(img.height, 50)
+
+
+class TestFileDependencies(unittest.TestCase):
+ def test_img_src_dependencies(self):
+ img = Img(src="image.jpg", alt="Test image")
+ dependencies = img.get_file_dependencies()
+ self.assertEqual(dependencies, ["image.jpg"])
+
+ def test_img_srcset_dependencies(self):
+ img = Img(
+ src="fallback.jpg",
+ srcset="small.jpg 480w, medium.jpg 800w, large.jpg 1200w",
+ alt="Responsive image",
+ )
+ dependencies = img.get_file_dependencies()
+ self.assertEqual(
+ set(dependencies), {"fallback.jpg", "small.jpg", "medium.jpg", "large.jpg"}
+ )
+
+ def test_img_srcset_with_density_descriptors(self):
+ img = Img(
+ src="image.jpg",
+ srcset="image.jpg 1x, image@2x.jpg 2x, image@3x.jpg 3x",
+ alt="High DPI image",
+ )
+ dependencies = img.get_file_dependencies()
+ self.assertEqual(
+ set(dependencies), {"image.jpg", "image@2x.jpg", "image@3x.jpg"}
+ )
+
+ def test_a_href_dependencies(self):
+ a = A(href="document.pdf", children=["Download PDF"])
+ dependencies = a.get_file_dependencies()
+ self.assertEqual(dependencies, ["document.pdf"])
+
+ def test_audio_src_dependencies(self):
+ audio = Audio(src="audio.mp3", children=["Audio not supported"])
+ dependencies = audio.get_file_dependencies()
+ self.assertEqual(dependencies, ["audio.mp3"])
+
+ def test_video_src_dependencies(self):
+ video = Video(src="video.mp4", children=["Video not supported"])
+ dependencies = video.get_file_dependencies()
+ self.assertEqual(dependencies, ["video.mp4"])
+
+ def test_source_src_dependencies(self):
+ source = Source(src="video.webm")
+ dependencies = source.get_file_dependencies()
+ self.assertEqual(dependencies, ["video.webm"])
+
+ def test_source_srcset_dependencies(self):
+ source = Source(srcset="banner-480.jpg 480w, banner-800.jpg 800w")
+ dependencies = source.get_file_dependencies()
+ self.assertEqual(set(dependencies), {"banner-480.jpg", "banner-800.jpg"})
+
+ def test_track_src_dependencies(self):
+ track = Track(src="subtitles.vtt", kind="subtitles")
+ dependencies = track.get_file_dependencies()
+ self.assertEqual(dependencies, ["subtitles.vtt"])
+
+ def test_blockquote_cite_dependencies(self):
+ blockquote = Blockquote(
+ cite="https://example.com/source.html", children=["Quote text"]
+ )
+ dependencies = blockquote.get_file_dependencies()
+ # HttpUrl attributes are not included in file dependencies as they're external
+ self.assertEqual(dependencies, [])
+
+ def test_nested_element_dependencies(self):
+ img = Img(src="nested.jpg", alt="Nested image")
+ link = A(href="page.html", children=["Link text"])
+ div = Div(children=[img, link, "Some text"])
+
+ dependencies = div.get_file_dependencies()
+ self.assertEqual(set(dependencies), {"nested.jpg", "page.html"})
+
+ def test_complex_nested_dependencies(self):
+ # Create a complex structure with multiple file dependencies
+ img1 = Img(src="image1.jpg", alt="Image 1")
+ img2 = Img(
+ src="image2.png",
+ srcset="image2-small.png 480w, image2-large.png 1200w",
+ alt="Image 2",
+ )
+ link = A(href="document.pdf", children=["Download"])
+ audio = Audio(src="background.mp3", children=["Audio"])
+
+ source1 = Source(src="video.webm")
+ source2 = Source(src="video.mp4")
+ video = Video(children=[source1, source2, "Video not supported"])
+
+ root_div = Div(children=[img1, img2, link, audio, video])
+
+ dependencies = root_div.get_file_dependencies()
+ expected = [
+ "image1.jpg",
+ "image2.png",
+ "image2-small.png",
+ "image2-large.png",
+ "document.pdf",
+ "background.mp3",
+ "video.webm",
+ "video.mp4",
+ ]
+ self.assertEqual(set(dependencies), set(expected))
+
+ def test_picture_element_dependencies(self):
+ source1 = Source(srcset="mobile.jpg 480w, tablet.jpg 800w")
+ source2 = Source(srcset="desktop.jpg 1200w")
+ img = Img(src="fallback.jpg", alt="Picture")
+ picture = Picture(children=[source1, source2], img=img)
+
+ dependencies = picture.get_file_dependencies()
+ expected = ["mobile.jpg", "tablet.jpg", "desktop.jpg", "fallback.jpg"]
+ self.assertEqual(set(dependencies), set(expected))
+
+ def test_table_with_dependencies(self):
+ img_cell = Td(children=[Img(src="table-image.jpg", alt="Table image")])
+ link_cell = Td(children=[A(href="table-link.html", children=["Link"])])
+ row = Tr(children=[img_cell, link_cell])
+ table = Table(children=[row])
+
+ dependencies = table.get_file_dependencies()
+ self.assertEqual(set(dependencies), {"table-image.jpg", "table-link.html"})
+
+ def test_no_dependencies(self):
+ p = P(children=["Just text content"])
+ dependencies = p.get_file_dependencies()
+ self.assertEqual(dependencies, [])
+
+ def test_empty_srcset(self):
+ # Test that empty srcset doesn't break anything
+ img = Img(src="image.jpg", alt="Image")
+ dependencies = img.get_file_dependencies()
+ self.assertEqual(dependencies, ["image.jpg"])
+
+ def test_duplicate_dependencies_removed(self):
+ # Test that duplicate file paths are only included once
+ img1 = Img(src="same.jpg", alt="Image 1")
+ img2 = Img(src="same.jpg", alt="Image 2")
+ div = Div(children=[img1, img2])
+
+ dependencies = div.get_file_dependencies()
+ self.assertEqual(dependencies, ["same.jpg"])
+
+ def test_mixed_srcset_formats(self):
+ # Test srcset with mixed width and density descriptors
+ img = Img(
+ src="base.jpg",
+ srcset="small.jpg 300w, medium.jpg 1.5x, large.jpg 2x",
+ alt="Mixed srcset",
+ )
+ dependencies = img.get_file_dependencies()
+ self.assertEqual(
+ set(dependencies), {"base.jpg", "small.jpg", "medium.jpg", "large.jpg"}
+ )
diff --git a/contentcuration/contentcuration/tests/utils/qti/test_imsmanifest.py b/contentcuration/contentcuration/tests/utils/qti/test_imsmanifest.py
new file mode 100644
index 0000000000..949b88ffdd
--- /dev/null
+++ b/contentcuration/contentcuration/tests/utils/qti/test_imsmanifest.py
@@ -0,0 +1,204 @@
+import unittest
+
+from contentcuration.utils.assessment.qti.imsmanifest import Dependency
+from contentcuration.utils.assessment.qti.imsmanifest import File
+from contentcuration.utils.assessment.qti.imsmanifest import Item
+from contentcuration.utils.assessment.qti.imsmanifest import Manifest
+from contentcuration.utils.assessment.qti.imsmanifest import Metadata
+from contentcuration.utils.assessment.qti.imsmanifest import Organization
+from contentcuration.utils.assessment.qti.imsmanifest import Organizations
+from contentcuration.utils.assessment.qti.imsmanifest import Resource
+from contentcuration.utils.assessment.qti.imsmanifest import Resources
+
+
+class TestManifestXMLOutput(unittest.TestCase):
+ def test_metadata_to_xml_string(self):
+ metadata = Metadata(schema="test_schema", schemaversion="1.0")
+ expected_xml = "test_schema 1.0 "
+ self.assertEqual(metadata.to_xml_string(), expected_xml)
+
+ metadata = Metadata()
+ expected_xml = " "
+ self.assertEqual(metadata.to_xml_string(), expected_xml)
+
+ def test_item_to_xml_string(self):
+ item = Item(identifier="item1", identifierref="ref1")
+ expected_xml = ' '
+ self.assertEqual(item.to_xml_string(), expected_xml)
+
+ item = Item()
+ expected_xml = " "
+ self.assertEqual(item.to_xml_string(), expected_xml)
+
+ def test_organization_to_xml_string(self):
+ item1 = Item(identifier="item1")
+ item2 = Item(identifier="item2")
+ organization = Organization(
+ identifier="org1",
+ structure="hierarchical",
+ title="Test Org",
+ item=[item1, item2],
+ )
+ expected_xml = ' ' # noqa: E501
+ self.assertEqual(organization.to_xml_string(), expected_xml)
+
+ organization = Organization()
+ expected_xml = " "
+ self.assertEqual(organization.to_xml_string(), expected_xml)
+
+ def test_organizations_to_xml_string(self):
+ org1 = Organization(identifier="org1")
+ org2 = Organization(identifier="org2")
+ organizations = Organizations(organizations=[org1, org2])
+ expected_xml = ' '
+ self.assertEqual(organizations.to_xml_string(), expected_xml)
+ organizations = Organizations()
+ expected_xml = " "
+ self.assertEqual(organizations.to_xml_string(), expected_xml)
+
+ def test_file_to_xml_string(self):
+ file = File(href="test.html")
+ expected_xml = ' '
+ self.assertEqual(file.to_xml_string(), expected_xml)
+ file = File()
+ expected_xml = " "
+ self.assertEqual(file.to_xml_string(), expected_xml)
+
+ def test_resource_to_xml_string(self):
+ file1 = File(href="file1.html")
+ file2 = File(href="file2.html")
+ resource = Resource(
+ identifier="res1", type_="webcontent", href="res.zip", files=[file1, file2]
+ )
+ expected_xml = ' '
+ self.assertEqual(resource.to_xml_string(), expected_xml)
+
+ resource = Resource(identifier="res1", type_="webcontent")
+ expected_xml = ' '
+ self.assertEqual(resource.to_xml_string(), expected_xml)
+
+ def test_resources_to_xml_string(self):
+ res1 = Resource(identifier="res1", type_="webcontent")
+ res2 = Resource(identifier="res2", type_="imscp")
+ resources = Resources(resources=[res1, res2])
+ expected_xml = ' '
+ self.assertEqual(resources.to_xml_string(), expected_xml)
+ resources = Resources()
+ expected_xml = " "
+ self.assertEqual(resources.to_xml_string(), expected_xml)
+
+ def test_imsmanifest_to_xml_string(self):
+ metadata = Metadata(schema="test_schema", schemaversion="1.0")
+ organizations = Organizations(organizations=[Organization(identifier="org1")])
+ resources = Resources(
+ resources=[Resource(identifier="res1", type_="webcontent")]
+ )
+ manifest = Manifest(
+ identifier="manifest1",
+ version="1.0",
+ metadata=metadata,
+ organizations=organizations,
+ resources=resources,
+ )
+ expected_xml = (
+ "' # noqa: E501
+ "test_schema 1.0 "
+ ' '
+ ' '
+ " "
+ )
+ self.assertEqual(manifest.to_xml_string(), expected_xml)
+
+ manifest = Manifest(identifier="democracy_manifest")
+ expected_xml = (
+ ''
+ " "
+ " "
+ " "
+ " "
+ )
+ self.assertEqual(manifest.to_xml_string(), expected_xml)
+
+ def test_imsmanifest_full_integration(self):
+ manifest = Manifest(
+ identifier="level1-T1-test-entry",
+ version="1.0",
+ metadata=Metadata(schema="QTI Package", schemaversion="3.0.0"),
+ organizations=Organizations(),
+ resources=Resources(
+ resources=[
+ Resource(
+ identifier="t1-test-entry-item1",
+ type_="imsqti_item_xmlv3p0",
+ href="items/choice-single-cardinality.xml",
+ files=[File(href="items/choice-single-cardinality.xml")],
+ dependencies=[Dependency(identifierref="image_resource_1")],
+ ),
+ Resource(
+ type_="webcontent",
+ identifier="image_resource_1",
+ href="items/images/badger.svg",
+ files=[File(href="items/images/badger.svg")],
+ ),
+ Resource(
+ identifier="t1-test-entry-item2",
+ type_="imsqti_item_xmlv3p0",
+ href="items/choice-multiple-cardinality.xml",
+ files=[File(href="items/choice-multiple-cardinality.xml")],
+ ),
+ Resource(
+ identifier="t1-test-entry-item3",
+ type_="imsqti_item_xmlv3p0",
+ href="items/text-entry.xml",
+ files=[File(href="items/text-entry.xml")],
+ ),
+ Resource(
+ identifier="t1-test-entry-item4",
+ type_="imsqti_item_xmlv3p0",
+ href="items/extended-text.xml",
+ files=[File(href="items/extended-text.xml")],
+ ),
+ Resource(
+ identifier="t1-test-entry",
+ type_="imsqti_test_xmlv3p0",
+ href="assessment.xml",
+ files=[File(href="assessment.xml")],
+ ),
+ ]
+ ),
+ )
+
+ expected_xml = (
+ '' # noqa: E501
+ "QTI Package 3.0.0 "
+ " "
+ ""
+ ''
+ ' '
+ ' '
+ " "
+ ''
+ ' '
+ " "
+ ''
+ ' '
+ " "
+ ''
+ ' '
+ " "
+ ''
+ ' '
+ " "
+ ''
+ ' '
+ " "
+ " "
+ " "
+ )
+ self.assertEqual(manifest.to_xml_string(), expected_xml)
diff --git a/contentcuration/contentcuration/tests/utils/qti/test_mathml.py b/contentcuration/contentcuration/tests/utils/qti/test_mathml.py
new file mode 100644
index 0000000000..0bace05336
--- /dev/null
+++ b/contentcuration/contentcuration/tests/utils/qti/test_mathml.py
@@ -0,0 +1,1613 @@
+"""
+This test suite was initially generated using Gemini 2.5 Pro Preview.
+It was then manually refined to ensure correctness and completeness.
+This was then supplemented with additional tests to cover missing edge cases
+and validations using Claude Sonnet 4.
+
+Gemini prompt:
+Please write a comprehensive test suite for this, assuming that everything defined
+in these files can be imported from `contentcuration.utils.assessment.qti.mathml`.
+I am more concerned with integration level testing - checking that appropriately
+composed objects produce the correct MathML output when the to_xml_string method
+is invoked, and that conversely, appropriate object structures are created
+using the from_string method.
+
+
+Claude prompt:
+I have these files that define Pydantic objects for generating and validating MathML.
+Here are my current tests for this. Please tell me what the tests cover well, and what is missing.
+Formulate recommendations to supplement these tests, where testing conformance to the
+MathML Core schema is most important, and testing specific quirks of the implementation is not at all important.
+Where possible, generate a separate artifact for each separate additional set of tests,
+so that I can choose which ones I want to include more easily.
+"""
+import unittest
+
+from pydantic import ValidationError
+
+from contentcuration.utils.assessment.qti.base import TextNode
+from contentcuration.utils.assessment.qti.constants import Dir
+from contentcuration.utils.assessment.qti.mathml import Annotation
+from contentcuration.utils.assessment.qti.mathml import AnnotationXml
+from contentcuration.utils.assessment.qti.mathml import Math
+from contentcuration.utils.assessment.qti.mathml import MathMLDisplay
+from contentcuration.utils.assessment.qti.mathml import MathMLElement
+from contentcuration.utils.assessment.qti.mathml import MathMLForm
+from contentcuration.utils.assessment.qti.mathml import Mfrac
+from contentcuration.utils.assessment.qti.mathml import Mi
+from contentcuration.utils.assessment.qti.mathml import Mn
+from contentcuration.utils.assessment.qti.mathml import Mo
+from contentcuration.utils.assessment.qti.mathml import Mrow
+from contentcuration.utils.assessment.qti.mathml import Mspace
+from contentcuration.utils.assessment.qti.mathml import Msubsup
+from contentcuration.utils.assessment.qti.mathml import Mtable
+from contentcuration.utils.assessment.qti.mathml import Mtd
+from contentcuration.utils.assessment.qti.mathml import Mtr
+from contentcuration.utils.assessment.qti.mathml import Semantics
+from contentcuration.utils.assessment.qti.mathml.base import MathMLGroupingElement
+from contentcuration.utils.assessment.qti.mathml.base import MathMLLayoutElement
+from contentcuration.utils.assessment.qti.mathml.base import MathMLScriptElement
+from contentcuration.utils.assessment.qti.mathml.base import MathMLTokenElement
+from contentcuration.utils.assessment.qti.mathml.core import Merror
+from contentcuration.utils.assessment.qti.mathml.core import Mmultiscripts
+from contentcuration.utils.assessment.qti.mathml.core import Mover
+from contentcuration.utils.assessment.qti.mathml.core import Mphantom
+from contentcuration.utils.assessment.qti.mathml.core import Mprescripts
+from contentcuration.utils.assessment.qti.mathml.core import Mroot
+from contentcuration.utils.assessment.qti.mathml.core import Ms
+from contentcuration.utils.assessment.qti.mathml.core import Msqrt
+from contentcuration.utils.assessment.qti.mathml.core import Mstyle
+from contentcuration.utils.assessment.qti.mathml.core import Msub
+from contentcuration.utils.assessment.qti.mathml.core import Msup
+from contentcuration.utils.assessment.qti.mathml.core import Mtext
+from contentcuration.utils.assessment.qti.mathml.core import Munder
+from contentcuration.utils.assessment.qti.mathml.core import Munderover
+
+
+class TestFieldValidation(unittest.TestCase):
+ """Tests for field validation using the annotated types and enums."""
+
+ def test_length_percentage_valid_values(self):
+ valid_values = [
+ "0", # unitless zero
+ "10px", # pixels
+ "2em",
+ "1.5em", # em units
+ "0.5rem", # rem units
+ "2pt",
+ "12pt", # points
+ "1in",
+ "2.5in", # inches
+ "1cm",
+ "10mm", # metric
+ "50%",
+ "100%",
+ "0%",
+ "150%", # percentages
+ "+10px",
+ "-5px", # signed values
+ "0.1vh",
+ "50vw",
+ "10vmin",
+ "20vmax", # viewport units
+ "1ch",
+ "2ex", # character units
+ ]
+
+ for value in valid_values:
+ with self.subTest(value=value):
+ # Test on mathsize attribute
+ obj = Mi(mathsize=value, children=["x"])
+ self.assertEqual(obj.mathsize, value)
+
+ # Test on width attribute of Mspace
+ space_obj = Mspace(width=value)
+ self.assertEqual(space_obj.width, value)
+
+ def test_length_percentage_invalid_values(self):
+ invalid_values = [
+ "10", # number without unit (except 0)
+ "px", # unit without number
+ "10 px", # space in value
+ "10px ", # trailing space
+ " 10px", # leading space
+ "10px;", # invalid character
+ "10xyz", # invalid unit
+ "auto", # keyword values not allowed
+ "inherit", # keyword values not allowed
+ "", # empty string
+ "10px 20px", # multiple values
+ ]
+
+ for value in invalid_values:
+ with self.subTest(value=value):
+ with self.assertRaises(ValidationError):
+ Mi(mathsize=value, children=["x"])
+
+ def test_color_value_valid_values(self):
+ valid_values = [
+ "red",
+ "blue",
+ "green",
+ "black",
+ "white", # named colors
+ "transparent",
+ "currentColor", # special keywords
+ "#f00",
+ "#ff0000",
+ "#FF0000", # hex colors (3,6 chars)
+ "#ffff",
+ "#ffffffff", # hex with alpha (4,8 chars)
+ "rgb(255,0,0)",
+ "rgb(255, 0, 0)", # rgb function
+ "rgba(255,0,0,0.5)",
+ "rgba(255, 0, 0, 1)", # rgba function
+ "hsl(0,100%,50%)",
+ "hsl(0, 100%, 50%)", # hsl function
+ "hsla(0,100%,50%,0.5)", # hsla function
+ ]
+
+ for value in valid_values:
+ with self.subTest(value=value):
+ obj = Mi(mathcolor=value, children=["x"])
+ self.assertEqual(obj.mathcolor, value)
+
+ def test_color_value_invalid_values(self):
+ """
+ Note that we do not validate color names against a predefined list,
+ as this would require a comprehensive list of valid CSS color names.
+ Instead, we focus on the format of the color value.
+ We also do not validate that number values in rgb/rgba are within 0-255 range,
+ as CSS allows values outside this range (e.g., rgb(300, -50, 500)).
+ """
+ invalid_values = [
+ "#ff", # too short hex
+ "#fffffffff", # too long hex
+ "#gggggg", # invalid hex characters
+ "rgb()", # empty rgb
+ "hsl()", # empty hsl
+ "", # empty string
+ "rgb(255 0 0)", # space instead of comma (CSS4 syntax)
+ ]
+
+ for value in invalid_values:
+ with self.subTest(value=value):
+ with self.assertRaises(ValidationError):
+ Mi(mathcolor=value, children=["x"])
+
+ def test_script_level_valid_values(self):
+ valid_values = [
+ "0",
+ "1",
+ "2",
+ "-1",
+ "-2", # basic integers
+ "+1",
+ "+2",
+ "+10", # explicit positive
+ "-10",
+ "-100", # negative
+ ]
+
+ for value in valid_values:
+ with self.subTest(value=value):
+ obj = Mi(scriptlevel=value, children=["x"])
+ self.assertEqual(obj.scriptlevel, value)
+
+ def test_script_level_invalid_values(self):
+ """Test invalid ScriptLevel values."""
+ invalid_values = [
+ "1.5", # decimal not allowed
+ "one", # word not allowed
+ "", # empty string
+ " 1", # leading space
+ "1 ", # trailing space
+ "++1", # double sign
+ "+-1", # mixed signs
+ ]
+
+ for value in invalid_values:
+ with self.subTest(value=value):
+ with self.assertRaises(ValidationError):
+ Mi(scriptlevel=value, children=["x"])
+
+ def test_enum_validation(self):
+ """Test enum field validation."""
+ # Valid enum values
+ math_obj = Math(display=MathMLDisplay.BLOCK, children=[])
+ self.assertEqual(math_obj.display, MathMLDisplay.BLOCK)
+
+ mo_obj = Mo(form=MathMLForm.INFIX, children=["+"])
+ self.assertEqual(mo_obj.form, MathMLForm.INFIX)
+
+ # Invalid enum values should raise ValidationError
+ with self.assertRaises(ValidationError):
+ Math(display="invalid_display", children=[])
+
+ with self.assertRaises(ValidationError):
+ Mo(form="invalid_form", children=["+"])
+
+ def test_boolean_attribute_validation(self):
+ """Test boolean attribute handling."""
+ # Valid boolean values
+ mo_obj = Mo(fence=True, separator=False, children=["|"])
+ self.assertTrue(mo_obj.fence)
+ self.assertFalse(mo_obj.separator)
+
+ # Boolean attributes should accept actual booleans
+ mo_obj2 = Mo(stretchy=True, symmetric=False, children=["("])
+ self.assertTrue(mo_obj2.stretchy)
+ self.assertFalse(mo_obj2.symmetric)
+
+
+class TestElementConstraints(unittest.TestCase):
+ """Tests for MathML element structural constraints and children requirements."""
+
+ def test_token_elements_children_constraints(self):
+ """Test that token elements only accept TextType children."""
+ text_node = "content"
+ math_element = Mi(children=["x"]) # Invalid child for token elements
+
+ # Valid: token elements with TextType children
+ token_classes = [Mi, Mn, Mo, Mtext, Ms, Annotation]
+
+ for token_class in token_classes:
+ with self.subTest(element=token_class.__name__):
+ # Valid: TextType children
+ element = token_class(children=[text_node])
+ self.assertEqual(len(element.children), 1)
+ self.assertIsInstance(element.children[0], TextNode)
+
+ # Invalid: MathML element children should fail
+ with self.assertRaises(
+ ValidationError,
+ msg=f"{token_class.__name__} should reject MathML element children",
+ ):
+ token_class(children=[math_element])
+
+ # Mspace should not have children (it's empty)
+ mspace = Mspace()
+ self.assertFalse(
+ hasattr(mspace, "children") or len(getattr(mspace, "children", [])) > 0
+ )
+
+ def test_elements_with_exactly_two_children(self):
+ """Test elements that require exactly 2 children."""
+ child1 = Mi(children=["a"])
+ child2 = Mn(children=["1"])
+ child3 = Mi(children=["b"])
+
+ # These elements should accept exactly 2 children
+ two_child_classes = [
+ (Mfrac, "fraction"),
+ (Mroot, "root"),
+ (Msub, "subscript"),
+ (Msup, "superscript"),
+ (Munder, "under"),
+ (Mover, "over"),
+ ]
+
+ for element_class, description in two_child_classes:
+ with self.subTest(element=element_class.__name__):
+ # Valid: exactly 2 children
+ element = element_class(children=[child1, child2])
+ self.assertEqual(
+ len(element.children),
+ 2,
+ f"{description} element should have exactly 2 children",
+ )
+
+ # Invalid: 1 child should fail
+ with self.assertRaises(
+ ValidationError, msg=f"{description} should reject 1 child"
+ ):
+ element_class(children=[child1])
+
+ # Invalid: 3 children should fail
+ with self.assertRaises(
+ ValidationError, msg=f"{description} should reject 3 children"
+ ):
+ element_class(children=[child1, child2, child3])
+
+ def test_elements_with_exactly_three_children(self):
+ """Test elements that require exactly 3 children."""
+ child1 = Mi(children=["base"])
+ child2 = Mn(children=["sub"])
+ child3 = Mn(children=["sup"])
+ child4 = Mi(children=["extra"])
+
+ # These elements should accept exactly 3 children
+ three_child_classes = [
+ (Msubsup, "subscript-superscript"),
+ (Munderover, "under-over"),
+ ]
+
+ for element_class, description in three_child_classes:
+ with self.subTest(element=element_class.__name__):
+ # Valid: exactly 3 children
+ element = element_class(children=[child1, child2, child3])
+ self.assertEqual(
+ len(element.children),
+ 3,
+ f"{description} element should have exactly 3 children",
+ )
+
+ # Invalid: 2 children should fail
+ with self.assertRaises(
+ ValidationError, msg=f"{description} should reject 2 children"
+ ):
+ element_class(children=[child1, child2])
+
+ # Invalid: 4 children should fail
+ with self.assertRaises(
+ ValidationError, msg=f"{description} should reject 4 children"
+ ):
+ element_class(children=[child1, child2, child3, child4])
+
+ def test_table_structure_constraints(self):
+ """Test table element structural requirements."""
+ # Valid table structure
+ cell_content = Mi(children=["cell"])
+ mtd = Mtd(children=[cell_content])
+ self.assertEqual(len(mtd.children), 1)
+
+ # Mtr should contain Mtd elements
+ mtr = Mtr(children=[mtd])
+ self.assertEqual(len(mtr.children), 1)
+ self.assertIsInstance(mtr.children[0], Mtd)
+
+ # Mtable should contain Mtr elements
+ mtable = Mtable(children=[mtr])
+ self.assertEqual(len(mtable.children), 1)
+ self.assertIsInstance(mtable.children[0], Mtr)
+
+ # Invalid: Mtr with non-Mtd children should fail
+ non_mtd_element = Mi(children=["invalid"])
+ with self.assertRaises(
+ ValidationError, msg="Mtr should reject non-Mtd children"
+ ):
+ Mtr(children=[non_mtd_element])
+
+ # Invalid: Mtable with non-Mtr children should fail
+ non_mtr_element = Mtd(children=[cell_content])
+ with self.assertRaises(
+ ValidationError, msg="Mtable should reject non-Mtr children"
+ ):
+ Mtable(children=[non_mtr_element])
+
+ def test_semantics_element_constraints(self):
+ """Test Semantics element structure."""
+ # First child should be presentation content
+ presentation = Mi(children=["x"])
+ annotation = Annotation(encoding="text/plain", children=["variable x"])
+ annotation_xml = AnnotationXml(
+ encoding="application/mathml+xml", children=[presentation]
+ )
+
+ # Valid semantics structures
+ semantics1 = Semantics(children=[presentation, annotation])
+ semantics2 = Semantics(children=[presentation, annotation_xml])
+ semantics3 = Semantics(children=[presentation, annotation, annotation_xml])
+
+ self.assertEqual(len(semantics1.children), 2)
+ self.assertEqual(len(semantics2.children), 2)
+ self.assertEqual(len(semantics3.children), 3)
+
+ # Invalid: Semantics with no children should fail
+ with self.assertRaises(
+ ValidationError, msg="Semantics should require at least one child"
+ ):
+ Semantics(children=[])
+
+ # Invalid: Semantics with only annotations (no presentation content) should fail
+ with self.assertRaises(
+ ValidationError,
+ msg="Semantics should require presentation content as first child",
+ ):
+ Semantics(children=[annotation])
+
+ def test_mmultiscripts_structure(self):
+ """Test Mmultiscripts element structure constraints."""
+ base = Mi(children=["F"])
+ sub1 = Mn(children=["1"])
+ sup1 = Mn(children=["2"])
+
+ # Basic multiscripts structure
+ mmultiscripts = Mmultiscripts(children=[base, sub1, sup1])
+ self.assertEqual(len(mmultiscripts.children), 3)
+
+ # With prescripts
+ prescripts = Mprescripts()
+ pre_sub = Mn(children=["0"])
+ pre_sup = Mn(children=["3"])
+
+ mmultiscripts_with_pre = Mmultiscripts(
+ children=[base, sub1, sup1, prescripts, pre_sub, pre_sup]
+ )
+ self.assertEqual(len(mmultiscripts_with_pre.children), 6)
+
+ def test_mmultiscripts_validation(self):
+ """Test Mmultiscripts validation rules."""
+ base = Mi(children=["F"])
+ sub1 = Mn(children=["1"])
+ sup1 = Mn(children=["2"])
+ sub2 = Mn(children=["3"])
+ sup2 = Mn(children=["4"])
+ prescripts = Mprescripts()
+
+ # Test: Empty mmultiscripts should fail
+ with self.assertRaises(
+ ValidationError, msg="Empty mmultiscripts should be invalid"
+ ):
+ Mmultiscripts(children=[])
+
+ # Test: Odd number of scripts (without prescripts) should fail
+ with self.assertRaises(
+ ValidationError, msg="Odd number of scripts should be invalid"
+ ):
+ Mmultiscripts(children=[base, sub1]) # Missing superscript
+
+ # Test: Scripts must come in pairs after base
+ with self.assertRaises(ValidationError, msg="Scripts must be paired"):
+ Mmultiscripts(
+ children=[base, sub1, sup1, sub2]
+ ) # Missing final superscript
+
+ # Test: Post-scripts must be in pairs when prescripts present
+ with self.assertRaises(ValidationError, msg="Post-scripts must be paired"):
+ Mmultiscripts(
+ children=[base, sub1, prescripts, sub2, sup2]
+ ) # Odd post-scripts
+
+ # Test: Pre-scripts must be in pairs when prescripts present
+ with self.assertRaises(ValidationError, msg="Pre-scripts must be paired"):
+ Mmultiscripts(
+ children=[base, sub1, sup1, prescripts, sub2]
+ ) # Odd pre-scripts
+
+ # Test: Multiple prescripts should fail
+ with self.assertRaises(
+ ValidationError, msg="Multiple prescripts should be invalid"
+ ):
+ Mmultiscripts(children=[base, sub1, sup1, prescripts, prescripts])
+
+ # Test: Valid cases should pass
+ # Valid: Base only
+ Mmultiscripts(children=[base])
+
+ # Valid: Base with paired scripts
+ Mmultiscripts(children=[base, sub1, sup1])
+
+ # Valid: Base with multiple paired scripts
+ Mmultiscripts(children=[base, sub1, sup1, sub2, sup2])
+
+ # Valid: Base with prescripts and paired pre-scripts
+ Mmultiscripts(children=[base, prescripts, sub1, sup1])
+
+ # Valid: Base with post-scripts and pre-scripts
+ Mmultiscripts(children=[base, sub1, sup1, prescripts, sub2, sup2])
+
+ def test_empty_elements_validation(self):
+ """Test elements that can be empty vs those that cannot."""
+ # Elements that can be empty
+ empty_allowed_classes = [
+ (Mrow, "row"),
+ (Mstyle, "style"),
+ (Merror, "error"),
+ (Mphantom, "phantom"),
+ (Msqrt, "square root"),
+ (Math, "math root"),
+ ]
+
+ for element_class, description in empty_allowed_classes:
+ with self.subTest(element=element_class.__name__):
+ element = element_class(children=[])
+ self.assertEqual(
+ len(element.children),
+ 0,
+ f"{description} element should allow empty children",
+ )
+
+ # Mspace is inherently empty (no children attribute with content)
+ mspace = Mspace(width="1em", height="1em")
+ self.assertIsNotNone(mspace)
+
+ def test_mixed_content_validation(self):
+ """Test elements that accept mixed content (text + elements)."""
+ text_before = "Before "
+ element = Mi(children=["x"])
+ text_after = " after"
+
+ # These elements should accept mixed content
+ mixed_content_classes = [
+ (Mrow, "row"),
+ (Mstyle, "style"),
+ (Merror, "error"),
+ (Mphantom, "phantom"),
+ ]
+
+ for element_class, description in mixed_content_classes:
+ with self.subTest(element=element_class.__name__):
+ mixed_element = element_class(
+ children=[text_before, element, text_after]
+ )
+ self.assertEqual(
+ len(mixed_element.children),
+ 3,
+ f"{description} element should accept mixed content",
+ )
+ self.assertIsInstance(mixed_element.children[0], TextNode)
+ self.assertIsInstance(mixed_element.children[1], Mi)
+ self.assertIsInstance(mixed_element.children[2], TextNode)
+
+ def test_annotation_xml_element_name(self):
+ """Test that AnnotationXml serializes with correct element name."""
+ annotation_xml = AnnotationXml(encoding="application/mathml+xml")
+ expected_name = "annotation-xml"
+ actual_name = annotation_xml.element_name()
+ self.assertEqual(actual_name, expected_name)
+
+ def test_mtable_with_complex_structure(self):
+ """Test complex table structures."""
+ # Create a 2x2 table
+ cell1 = Mtd(children=[Mi(children=["a"])])
+ cell2 = Mtd(children=[Mn(children=["1"])])
+ cell3 = Mtd(children=[Mi(children=["b"])])
+ cell4 = Mtd(children=[Mn(children=["2"])])
+
+ row1 = Mtr(children=[cell1, cell2])
+ row2 = Mtr(children=[cell3, cell4])
+
+ table = Mtable(children=[row1, row2])
+
+ self.assertEqual(len(table.children), 2)
+ self.assertEqual(len(table.children[0].children), 2)
+ self.assertEqual(len(table.children[1].children), 2)
+
+ def test_element_inheritance_hierarchy(self):
+ """Test that elements inherit from correct base classes."""
+ inheritance_tests = [
+ (Mi(children=["x"]), MathMLTokenElement, "token"),
+ (
+ Mfrac(
+ children=[
+ Mi(children=["a"]),
+ Mn(children=["1"]),
+ ]
+ ),
+ MathMLLayoutElement,
+ "layout",
+ ),
+ (
+ Msub(
+ children=[
+ Mi(children=["x"]),
+ Mn(children=["1"]),
+ ]
+ ),
+ MathMLScriptElement,
+ "script",
+ ),
+ (Mstyle(children=[]), MathMLGroupingElement, "grouping"),
+ ]
+
+ for element, expected_base, description in inheritance_tests:
+ with self.subTest(
+ element=type(element).__name__, base=expected_base.__name__
+ ):
+ self.assertIsInstance(
+ element,
+ expected_base,
+ f"{type(element).__name__} should be a {description} element",
+ )
+
+
+class TestMathMLSerialization(unittest.TestCase):
+ """Tests for object -> to_xml_string() using direct string comparison."""
+
+ def test_simple_mi(self):
+ obj = Mi(children=["x"])
+ xml_str = obj.to_xml_string()
+ expected_xml_str = "x "
+ self.assertEqual(xml_str, expected_xml_str)
+
+ def test_simple_mn_with_attribute(self):
+ obj = Mn(children=["123"], dir_=Dir.RTL)
+ xml_str = obj.to_xml_string()
+ expected_xml_str = '123 '
+ self.assertEqual(xml_str, expected_xml_str)
+
+ def test_mo_with_boolean_attribute(self):
+ obj = Mo(children=["+"], fence=True, separator=False)
+ xml_str = obj.to_xml_string()
+ expected_xml_str = '+ '
+ self.assertEqual(xml_str, expected_xml_str)
+
+ def test_mi_with_enum_attribute(self):
+ obj = Mi(children=["X"])
+ xml_str = obj.to_xml_string()
+ expected_xml_str = "X "
+ self.assertEqual(xml_str, expected_xml_str)
+
+ def test_math_element_with_attributes(self):
+ obj = Math(
+ display=MathMLDisplay.BLOCK,
+ alttext="Equation",
+ children=[Mi(children=["y"])],
+ )
+ xml_str = obj.to_xml_string()
+ expected_xml_str = 'y '
+ self.assertEqual(xml_str, expected_xml_str)
+
+ def test_mrow_nested_elements(self):
+ obj = Mrow(
+ children=[
+ Mi(children=["a"]),
+ Mo(children=["+"]),
+ Mn(children=["1"]),
+ ],
+ id_="eq1",
+ class_="equation-style",
+ )
+ xml_str = obj.to_xml_string()
+ expected_xml_str = 'a + 1 '
+ self.assertEqual(xml_str, expected_xml_str)
+
+ def test_mfrac(self):
+ obj = Mfrac(
+ children=[
+ Mi(
+ children=["numerator"],
+ ),
+ Mn(children=["denominator"]),
+ ]
+ )
+ xml_str = obj.to_xml_string()
+ expected_xml_str = "numerator denominator "
+ self.assertEqual(xml_str, expected_xml_str)
+
+ def test_msubsup(self):
+ obj = Msubsup(
+ children=[
+ Mi(children=["X"]),
+ Mn(children=["s"]),
+ Mn(children=["p"]),
+ ]
+ )
+ xml_str = obj.to_xml_string()
+ expected_xml_str = "X s p "
+ self.assertEqual(xml_str, expected_xml_str)
+
+ def test_mtable_mtr_mtd(self):
+ obj = Mtable(
+ children=[
+ Mtr(
+ children=[
+ Mtd(
+ children=[
+ Mi(
+ children=["R1C1"],
+ )
+ ]
+ ),
+ Mtd(
+ children=[
+ Mi(
+ children=["R1C2"],
+ )
+ ]
+ ),
+ ]
+ ),
+ Mtr(
+ children=[
+ Mtd(children=[Mn(children=["1"])]),
+ Mtd(children=[Mn(children=["2"])]),
+ ]
+ ),
+ ]
+ )
+ xml_str = obj.to_xml_string()
+ expected_xml_str = "R1C1 R1C2 1 2 " # noqa: E501
+ self.assertEqual(xml_str, expected_xml_str)
+
+ def test_mixed_content_serialization(self):
+ obj = Mrow(
+ children=[
+ "TextBefore",
+ Mi(children=["x"]),
+ "TextBetween",
+ Mn(children=["123"]),
+ "TextAfter",
+ ]
+ )
+ xml_str = obj.to_xml_string()
+ expected_xml_str = (
+ "TextBeforex TextBetween123 TextAfter "
+ )
+ self.assertEqual(xml_str, expected_xml_str)
+
+ def test_semantics_annotation(self):
+ obj = Semantics(
+ children=[
+ Mi(children=["x"]),
+ Annotation(
+ encoding="text/plain",
+ children=["Content of annotation"],
+ ),
+ ]
+ )
+ xml_str = obj.to_xml_string()
+ expected_xml_str = 'x Content of annotation ' # noqa: E501
+ self.assertEqual(xml_str, expected_xml_str)
+
+ def test_annotation_xml(self):
+ obj = AnnotationXml(
+ encoding="application/mathml+xml",
+ children=[
+ Mrow(
+ children=[
+ Mi(
+ children=["alt"],
+ ),
+ Mo(children=["="]),
+ Mn(children=["1"]),
+ ]
+ )
+ ],
+ )
+ xml_str = obj.to_xml_string()
+ expected_xml_str = 'alt = 1 ' # noqa: E501
+ self.assertEqual(xml_str, expected_xml_str)
+
+
+class TestMathMLDeserialization(unittest.TestCase):
+ """Tests for from_string() -> object"""
+
+ def test_simple_mi_from_string(self):
+ xml_str = "y "
+ result = Mi.from_string(xml_str)
+ self.assertEqual(len(result), 1)
+ obj = result[0]
+ self.assertIsInstance(obj, Mi)
+ self.assertEqual(len(obj.children), 1)
+ self.assertIsInstance(obj.children[0], TextNode)
+ self.assertEqual(obj.children[0].text, "y")
+
+ def test_mo_from_string_with_attributes(self):
+ xml_str = '+ '
+ result = Mo.from_string(xml_str)
+ self.assertEqual(len(result), 1)
+ obj = result[0]
+ self.assertIsInstance(obj, Mo)
+ self.assertTrue(obj.fence)
+ self.assertEqual(obj.lspace, "8px")
+ self.assertEqual(obj.children[0].text, "+ ")
+
+ def test_mrow_nested_from_string(self):
+ xml_str = (
+ 'a + 1 '
+ )
+ result = Mrow.from_string(xml_str)
+ self.assertEqual(len(result), 1)
+ obj = result[0]
+ self.assertIsInstance(obj, Mrow)
+ self.assertEqual(obj.id_, "r1")
+ self.assertEqual(obj.class_, "test-class")
+
+ self.assertEqual(len(obj.children), 3)
+ self.assertIsInstance(obj.children[0], Mi)
+ self.assertEqual(obj.children[0].children[0].text, "a")
+ self.assertIsInstance(obj.children[1], Mo)
+ self.assertEqual(obj.children[1].children[0].text, "+")
+ self.assertIsInstance(obj.children[2], Mn)
+ self.assertEqual(obj.children[2].children[0].text, "1")
+
+ def test_mfrac_from_string(self):
+ xml_str = "N D "
+ result = Mfrac.from_string(xml_str)
+ self.assertEqual(len(result), 1)
+ obj = result[0]
+ self.assertIsInstance(obj, Mfrac)
+ self.assertEqual(len(obj.children), 2)
+ self.assertIsInstance(obj.children[0], Mi)
+ self.assertEqual(obj.children[0].children[0].text, "N")
+ self.assertIsInstance(obj.children[1], Mn)
+ self.assertEqual(obj.children[1].children[0].text, "D")
+
+ def test_mixed_content_deserialization(self):
+ xml_str = "Prefix v Infix 42 Suffix "
+ result = Mrow.from_string(xml_str)
+ self.assertEqual(len(result), 1)
+ obj = result[0]
+ self.assertIsInstance(obj, Mrow)
+
+ self.assertEqual(len(obj.children), 5)
+ self.assertIsInstance(obj.children[0], TextNode)
+ self.assertEqual(obj.children[0].text, "Prefix ")
+ self.assertIsInstance(obj.children[1], Mi)
+ self.assertEqual(obj.children[1].children[0].text, "v")
+ self.assertIsInstance(obj.children[2], TextNode)
+ self.assertEqual(obj.children[2].text, " Infix ")
+ self.assertIsInstance(obj.children[3], Mn)
+ self.assertEqual(obj.children[3].children[0].text, "42")
+ self.assertIsInstance(obj.children[4], TextNode)
+ self.assertEqual(obj.children[4].text, " Suffix")
+
+ def test_semantics_annotation_from_string(self):
+ xml_str = (
+ ""
+ " E "
+ ' E = mc^2 '
+ " "
+ )
+ result = Semantics.from_string(xml_str)
+ self.assertEqual(len(result), 1)
+ obj = result[0]
+ self.assertIsInstance(obj, Semantics)
+ self.assertEqual(len(obj.children), 2)
+
+ self.assertIsInstance(obj.children[0], Mi)
+ self.assertEqual(obj.children[0].children[0].text, "E")
+
+ ann_obj = obj.children[1]
+ self.assertIsInstance(ann_obj, Annotation)
+ self.assertEqual(ann_obj.encoding, "text/latex")
+ self.assertEqual(len(ann_obj.children), 1)
+ self.assertIsInstance(ann_obj.children[0], TextNode)
+ self.assertEqual(ann_obj.children[0].text, "E = mc^2")
+
+ def test_annotation_xml_from_string(self):
+ xml_str = (
+ ''
+ " alt = 0 "
+ " "
+ )
+ result = AnnotationXml.from_string(xml_str)
+ self.assertEqual(len(result), 1)
+ obj = result[0]
+ self.assertIsInstance(obj, AnnotationXml)
+ self.assertEqual(obj.encoding, "application/mathml+xml")
+ self.assertEqual(len(obj.children), 1)
+ mrow_child = obj.children[0]
+ self.assertIsInstance(mrow_child, Mrow)
+ self.assertEqual(len(mrow_child.children), 3)
+ self.assertIsInstance(mrow_child.children[0], Mi)
+ self.assertEqual(mrow_child.children[0].children[0].text, "alt")
+
+ def test_from_string_multiple_root_elements(self):
+ xml_str = "a 1 "
+ result = MathMLElement.from_string(xml_str)
+ self.assertEqual(len(result), 2)
+ self.assertIsInstance(result[0], Mi)
+ self.assertEqual(result[0].children[0].text, "a")
+ self.assertIsInstance(result[1], Mn)
+ self.assertEqual(result[1].children[0].text, "1")
+
+
+class TestErrorHandling(unittest.TestCase):
+ def test_from_string_invalid_xml(self):
+ xml_str = "x"
+ with self.assertRaisesRegex(ValueError, "Invalid Markup: mismatched tag"):
+ Mi.from_string(xml_str)
+
+ def test_from_string_unregistered_tag(self):
+ xml_str = "content "
+
+ with self.assertRaisesRegex(
+ ValueError, "No registered class found for tag: unregisteredtag"
+ ):
+ MathMLElement.from_string(xml_str)
+
+ def test_attribute_validation_error_on_creation(self):
+ with self.assertRaises(ValueError): # Pydantic's ValidationError
+ Mi(mathvariant="not-a-valid-variant", children=["x"])
+
+
+class TestComplexMathematicalExpressions(unittest.TestCase):
+ """Tests for complex, realistic mathematical expressions."""
+
+ def test_quadratic_formula(self):
+ """Test the quadratic formula: x = (-b ± √(b²-4ac)) / 2a"""
+ # Create: x = (-b ± √(b²-4ac)) / 2a
+
+ # Left side: x =
+ x = Mi(children=["x"])
+ equals = Mo(children=["="])
+
+ # Right side numerator: -b ± √(b²-4ac)
+ minus_b = Mrow(
+ children=[
+ Mo(children=["-"]),
+ Mi(children=["b"]),
+ ]
+ )
+
+ plus_minus = Mo(children=["±"])
+
+ # b²-4ac inside square root
+ b_squared = Msup(
+ children=[
+ Mi(children=["b"]),
+ Mn(children=["2"]),
+ ]
+ )
+
+ four_ac = Mrow(
+ children=[
+ Mn(children=["4"]),
+ Mi(children=["a"]),
+ Mi(children=["c"]),
+ ]
+ )
+
+ discriminant = Mrow(children=[b_squared, Mo(children=["-"]), four_ac])
+
+ sqrt_discriminant = Msqrt(children=[discriminant])
+
+ numerator = Mrow(children=[minus_b, plus_minus, sqrt_discriminant])
+
+ # Denominator: 2a
+ denominator = Mrow(
+ children=[
+ Mn(children=["2"]),
+ Mi(children=["a"]),
+ ]
+ )
+
+ # Complete fraction
+ fraction = Mfrac(children=[numerator, denominator])
+
+ # Complete equation
+ equation = Mrow(children=[x, equals, fraction])
+
+ # Test serialization
+ xml_str = equation.to_xml_string()
+ self.assertIn("", xml_str)
+ self.assertIn("", xml_str)
+ self.assertIn("", xml_str)
+
+ # Test round-trip
+ result = Mrow.from_string(xml_str)
+ self.assertEqual(len(result), 1)
+ self.assertIsInstance(result[0], Mrow)
+
+ def test_integral_with_limits(self):
+ """Test definite integral: ∫₀^∞ e^(-x²) dx"""
+
+ # Integral symbol with limits
+ integral_symbol = Mo(children=["∫"])
+ lower_limit = Mn(children=["0"])
+ upper_limit = Mo(children=["∞"])
+
+ integral_with_limits = Msubsup(
+ children=[integral_symbol, lower_limit, upper_limit]
+ )
+
+ # e^(-x²)
+ e = Mi(children=["e"])
+
+ # -x²
+ minus = Mo(children=["-"])
+ x_squared = Msup(
+ children=[
+ Mi(children=["x"]),
+ Mn(children=["2"]),
+ ]
+ )
+ negative_x_squared = Mrow(children=[minus, x_squared])
+
+ # e^(-x²)
+ exponential = Msup(children=[e, negative_x_squared])
+
+ # dx
+ differential = Mrow(
+ children=[
+ Mi(children=["d"]),
+ Mi(children=["x"]),
+ ]
+ )
+
+ # Complete integral
+ integral = Mrow(children=[integral_with_limits, exponential, differential])
+
+ # Test structure
+ xml_str = integral.to_xml_string()
+ self.assertIn("", xml_str)
+ self.assertIn("∫", xml_str)
+ self.assertIn("∞", xml_str)
+
+ def test_matrix_expression(self):
+ """Test 2x2 matrix with expressions in cells."""
+
+ # Matrix elements
+ # Row 1: [cos θ, -sin θ]
+ cos_theta = Mrow(
+ children=[
+ Mo(children=["cos"]),
+ Mi(children=["θ"]),
+ ]
+ )
+
+ minus_sin_theta = Mrow(
+ children=[
+ Mo(children=["-"]),
+ Mo(children=["sin"]),
+ Mi(children=["θ"]),
+ ]
+ )
+
+ row1_cell1 = Mtd(children=[cos_theta])
+ row1_cell2 = Mtd(children=[minus_sin_theta])
+ row1 = Mtr(children=[row1_cell1, row1_cell2])
+
+ # Row 2: [sin θ, cos θ]
+ sin_theta = Mrow(
+ children=[
+ Mo(children=["sin"]),
+ Mi(children=["θ"]),
+ ]
+ )
+
+ row2_cell1 = Mtd(children=[sin_theta])
+ row2_cell2 = Mtd(children=[cos_theta])
+ row2 = Mtr(children=[row2_cell1, row2_cell2])
+
+ # Complete matrix
+ matrix = Mtable(children=[row1, row2])
+
+ # Test structure
+ self.assertEqual(len(matrix.children), 2)
+ self.assertEqual(len(matrix.children[0].children), 2)
+ self.assertEqual(len(matrix.children[1].children), 2)
+
+ def test_summation_with_complex_expression(self):
+ """Test summation: Σ(k=1 to n) k²/(k+1)"""
+
+ # Summation symbol
+ sigma = Mo(children=["Σ"])
+
+ # Lower limit: k=1
+ k_equals_1 = Mrow(
+ children=[
+ Mi(children=["k"]),
+ Mo(children=["="]),
+ Mn(children=["1"]),
+ ]
+ )
+
+ # Upper limit: n
+ n = Mi(children=["n"])
+
+ # Summation with limits
+ summation = Munderover(children=[sigma, k_equals_1, n])
+
+ # Expression being summed: k²/(k+1)
+ k_squared = Msup(
+ children=[
+ Mi(children=["k"]),
+ Mn(children=["2"]),
+ ]
+ )
+
+ k_plus_1 = Mrow(
+ children=[
+ Mi(children=["k"]),
+ Mo(children=["+"]),
+ Mn(children=["1"]),
+ ]
+ )
+
+ fraction = Mfrac(children=[k_squared, k_plus_1])
+
+ # Complete expression
+ complete_sum = Mrow(children=[summation, fraction])
+
+ # Test serialization
+ xml_str = complete_sum.to_xml_string()
+ self.assertIn("", xml_str)
+ self.assertIn("Σ", xml_str)
+ self.assertIn("", xml_str)
+
+ def test_chemical_equation(self):
+ """Test chemical equation: H₂ + ½O₂ → H₂O"""
+
+ # H₂
+ h2 = Mrow(
+ children=[
+ Mi(children=["H"]),
+ Msub(
+ children=[
+ Mrow(children=[]), # Empty base for subscript positioning
+ Mn(children=["2"]),
+ ]
+ ),
+ ]
+ )
+
+ # Plus sign
+ plus = Mo(children=["+"])
+
+ # ½O₂
+ half = Mfrac(
+ children=[
+ Mn(children=["1"]),
+ Mn(children=["2"]),
+ ]
+ )
+
+ o2 = Mrow(
+ children=[
+ Mi(children=["O"]),
+ Msub(children=[Mrow(children=[]), Mn(children=["2"])]),
+ ]
+ )
+
+ half_o2 = Mrow(children=[half, o2])
+
+ # Arrow
+ arrow = Mo(children=["→"])
+
+ # H₂O
+ h2o = Mrow(
+ children=[
+ Mi(children=["H"]),
+ Msub(children=[Mrow(children=[]), Mn(children=["2"])]),
+ Mi(children=["O"]),
+ ]
+ )
+
+ # Complete equation
+ equation = Mrow(children=[h2, plus, half_o2, arrow, h2o])
+
+ # Test structure
+ xml_str = equation.to_xml_string()
+ self.assertIn("→", xml_str)
+ self.assertIn("", xml_str)
+ self.assertIn("", xml_str)
+
+ def test_nested_fractions(self):
+ """Test deeply nested fractions: (a/b) / (c/d) = ad/bc"""
+
+ # a/b
+ a_over_b = Mfrac(
+ children=[
+ Mi(children=["a"]),
+ Mi(children=["b"]),
+ ]
+ )
+
+ # c/d
+ c_over_d = Mfrac(
+ children=[
+ Mi(children=["c"]),
+ Mi(children=["d"]),
+ ]
+ )
+
+ # (a/b) / (c/d)
+ complex_fraction = Mfrac(children=[a_over_b, c_over_d])
+
+ # =
+ equals = Mo(children=["="])
+
+ # ad
+ ad = Mrow(
+ children=[
+ Mi(children=["a"]),
+ Mi(children=["d"]),
+ ]
+ )
+
+ # bc
+ bc = Mrow(
+ children=[
+ Mi(children=["b"]),
+ Mi(children=["c"]),
+ ]
+ )
+
+ # ad/bc
+ result_fraction = Mfrac(children=[ad, bc])
+
+ # Complete equation
+ equation = Mrow(children=[complex_fraction, equals, result_fraction])
+
+ # Test nesting depth
+ xml_str = equation.to_xml_string()
+ # Should have nested mfrac elements
+ frac_count = xml_str.count("")
+ self.assertEqual(frac_count, 4)
+
+ def test_multiscript_notation(self):
+ """Test multiscript notation: ₁₁²³⁵U²³⁸"""
+
+ # Base element
+ u = Mi(children=["U"])
+
+ # Pre-subscripts and pre-superscripts
+ prescripts = Mprescripts()
+
+ # Create multiscripts element
+ # Format: base, post-sub, post-sup, prescripts, pre-sub, pre-sup
+ multiscripts = Mmultiscripts(
+ children=[
+ u, # base
+ Mn(children=["238"]), # post-subscript
+ Mrow(children=[]), # no post-superscript
+ prescripts,
+ Mn(children=["92"]), # pre-subscript (atomic number)
+ Mrow(children=[]), # no pre-superscript
+ ]
+ )
+
+ xml_str = multiscripts.to_xml_string()
+ self.assertIn("", xml_str)
+ self.assertIn(" ", xml_str)
+
+ def test_equation_with_semantics(self):
+ """Test equation with semantic annotations."""
+
+ # E = mc²
+ e = Mi(children=["E"])
+ equals = Mo(children=["="])
+ m = Mi(children=["m"])
+ c_squared = Msup(
+ children=[
+ Mi(children=["c"]),
+ Mn(children=["2"]),
+ ]
+ )
+
+ equation = Mrow(children=[e, equals, m, c_squared])
+
+ # Add semantic annotation
+ latex_annotation = Annotation(
+ encoding="application/x-tex", children=["E = mc^2"]
+ )
+
+ text_annotation = Annotation(
+ encoding="text/plain",
+ children=["Einstein's mass-energy equivalence"],
+ )
+
+ semantics = Semantics(children=[equation, latex_annotation, text_annotation])
+
+ # Test structure
+ self.assertEqual(len(semantics.children), 3)
+ self.assertIsInstance(semantics.children[0], Mrow)
+ self.assertIsInstance(semantics.children[1], Annotation)
+ self.assertIsInstance(semantics.children[2], Annotation)
+
+ def test_styled_expression(self):
+ """Test expression with styling applied."""
+
+ # Create expression: f(x) = x² + 1
+ f = Mi(children=["f"])
+ x_arg = Mi(children=["x"])
+ function_call = Mrow(
+ children=[
+ f,
+ Mo(children=["("]),
+ x_arg,
+ Mo(children=[")"]),
+ ]
+ )
+
+ equals = Mo(children=["="])
+
+ x_squared = Msup(
+ children=[
+ Mi(children=["x"]),
+ Mn(children=["2"]),
+ ]
+ )
+
+ plus = Mo(children=["+"])
+ one = Mn(children=["1"])
+
+ expression = Mrow(children=[x_squared, plus, one])
+
+ # Wrap in styled container
+ styled_expression = Mstyle(
+ mathcolor="blue",
+ mathsize="14pt",
+ children=[function_call, equals, expression],
+ )
+
+ # Test styling attributes
+ self.assertEqual(styled_expression.mathcolor, "blue")
+ self.assertEqual(styled_expression.mathsize, "14pt")
+
+
+class TestEdgeCasesAndCompliance(unittest.TestCase):
+ """Tests for edge cases, boundary conditions, and MathML Core compliance."""
+
+ def test_unicode_content_handling(self):
+ """Test proper handling of Unicode mathematical symbols."""
+ unicode_symbols = [
+ "α",
+ "β",
+ "γ",
+ "π",
+ "∑",
+ "∫",
+ "∞",
+ "≤",
+ "≥",
+ "≠",
+ "∂",
+ "∇",
+ "√",
+ "∈",
+ "∉",
+ "⊂",
+ "⊃",
+ "∪",
+ "∩",
+ "→",
+ ]
+
+ for symbol in unicode_symbols:
+ with self.subTest(symbol=symbol):
+ # Test in Mi element
+ mi = Mi(children=[symbol])
+ xml_str = mi.to_xml_string()
+ self.assertIn(symbol, xml_str)
+
+ # Test round-trip
+ result = Mi.from_string(xml_str)
+ self.assertEqual(result[0].children[0].text, symbol)
+
+ def test_empty_elements_compliance(self):
+ """Test MathML Core compliance for empty elements."""
+
+ # Elements that can be empty
+ empty_allowed = [
+ Math(children=[]),
+ Mrow(children=[]),
+ Msqrt(children=[]),
+ Mstyle(children=[]),
+ Merror(children=[]),
+ Mphantom(children=[]),
+ ]
+
+ for element in empty_allowed:
+ with self.subTest(element=type(element).__name__):
+ xml_str = element.to_xml_string()
+ # Should produce valid XML
+ self.assertTrue(xml_str.startswith("<"))
+ self.assertTrue(xml_str.endswith(">"))
+
+ def test_whitespace_handling(self):
+ """Test proper whitespace handling in text content."""
+
+ # Leading/trailing whitespace in text content
+ text_with_spaces = " x "
+ mi = Mi(children=[text_with_spaces])
+ xml_str = mi.to_xml_string()
+
+ # Round-trip test
+ result = Mi.from_string(xml_str)
+ self.assertEqual(result[0].children[0].text, text_with_spaces)
+
+ # Mixed whitespace in Mtext
+ text_content = "This is\tsome\ntext with\r\nvarious whitespace"
+ mtext = Mtext(children=[text_content])
+ xml_str = mtext.to_xml_string()
+
+ result = Mtext.from_string(xml_str)
+ self.assertEqual(result[0].children[0].text, text_content.replace("\r", ""))
+
+ def test_special_characters_in_content(self):
+ """Test handling of XML special characters in content."""
+
+ special_chars = ["&", "<", ">", '"', "'"]
+
+ for char in special_chars:
+ with self.subTest(char=char):
+ mtext = Mtext(children=[f"Before{char}After"])
+ xml_str = mtext.to_xml_string()
+
+ # Should not contain unescaped special characters
+ if char == "&":
+ self.assertIn("&", xml_str)
+ elif char == "<":
+ self.assertIn("<", xml_str)
+ elif char == ">":
+ self.assertIn(">", xml_str)
+
+ # Round-trip should preserve original content
+ result = Mtext.from_string(xml_str)
+ self.assertEqual(result[0].children[0].text, f"Before{char}After")
+
+ def test_display_attribute_compliance(self):
+ """Test Math element display attribute compliance."""
+
+ # Test both valid display values
+ for display_value in [MathMLDisplay.BLOCK, MathMLDisplay.INLINE]:
+ with self.subTest(display=display_value):
+ math = Math(display=display_value, children=[])
+ xml_str = math.to_xml_string()
+ self.assertIn(f'display="{display_value.value}"', xml_str)
+
+ def test_length_percentage_edge_cases(self):
+ """Test edge cases for length-percentage values."""
+
+ # Edge cases that should be valid
+ valid_edge_cases = [
+ "0", # Unitless zero
+ "0px", # Zero with unit
+ "+0", # Explicit positive zero
+ "-0", # Negative zero
+ "0.0px", # Decimal zero
+ ".5em", # Leading decimal point
+ "100%", # Full percentage
+ "0%", # Zero percentage
+ "+50%", # Explicit positive percentage
+ ]
+
+ for value in valid_edge_cases:
+ with self.subTest(value=value):
+ try:
+ mspace = Mspace(width=value)
+ self.assertEqual(mspace.width, value)
+ except ValidationError:
+ self.fail(f"Valid edge case {value} was rejected")
+
+ def test_extremely_long_content(self):
+ """Test handling of very long text content."""
+
+ # Create very long text content
+ long_text = "x" * 10000
+ mtext = Mtext(children=[long_text])
+
+ # Should handle without issues
+ xml_str = mtext.to_xml_string()
+ self.assertIn(long_text, xml_str)
+
+ # Round-trip test
+ result = Mtext.from_string(xml_str)
+ self.assertEqual(result[0].children[0].text, long_text)
+
+ def test_deeply_nested_structures(self):
+ """Test deeply nested element structures."""
+
+ # Create deeply nested structure: ((((x))))
+ content = Mi(children=["x"])
+
+ # Nest 10 levels deep
+ for i in range(10):
+ content = Mrow(children=[content])
+
+ # Should serialize without issues
+ xml_str = content.to_xml_string()
+
+ # Count nesting depth
+ open_count = xml_str.count("")
+ close_count = xml_str.count(" ")
+ self.assertEqual(open_count, 10)
+ self.assertEqual(close_count, 10)
+
+ def test_mixed_content_edge_cases(self):
+ """Test edge cases in mixed content."""
+
+ # Empty text nodes mixed with elements
+ mrow = Mrow(
+ children=[
+ "",
+ Mi(children=["x"]),
+ "",
+ Mo(children=["+"]),
+ "",
+ Mn(children=["1"]),
+ ]
+ )
+
+ xml_str = mrow.to_xml_string()
+
+ # Should strip empty text nodes
+ result = Mrow.from_string(xml_str)
+ self.assertEqual(len(result[0].children), 3)
+
+ def test_attribute_value_edge_cases(self):
+ """Test edge cases for attribute values."""
+
+ # Very long attribute values
+ long_alttext = "A" * 1000
+ math = Math(alttext=long_alttext, children=[])
+ xml_str = math.to_xml_string()
+ self.assertIn(long_alttext, xml_str)
+
+ # Attribute values with special characters
+ special_alttext = 'Text with "quotes" and &ersands'
+ math = Math(alttext=special_alttext, children=[])
+ xml_str = math.to_xml_string()
+
+ # Should properly escape in XML
+ result = Math.from_string(xml_str)
+ self.assertEqual(result[0].alttext, special_alttext)
+
+ def test_script_element_edge_cases(self):
+ """Test edge cases for script elements."""
+
+ # Script elements with minimal content
+ base = Mi(children=["x"])
+ empty_script = Mi(children=[""])
+
+ msub = Msub(children=[base, empty_script])
+ xml_str = msub.to_xml_string()
+
+ # Should handle empty script content
+ result = Msub.from_string(xml_str)
+ self.assertEqual(len(result[0].children), 2)
+
+ def test_namespace_compliance(self):
+ """Test MathML namespace handling if supported."""
+
+ # Basic elements should work without explicit namespace in this implementation
+ mi = Mi(children=["x"])
+ xml_str = mi.to_xml_string()
+
+ # Should produce valid MathML-compatible XML
+ self.assertTrue(xml_str.startswith(""))
+
+ def test_boolean_attribute_edge_cases(self):
+ """Test edge cases for boolean attributes."""
+
+ # Test all boolean attributes on Mo element
+ mo = Mo(
+ fence=True,
+ largeop=False,
+ movablelimits=True,
+ separator=False,
+ stretchy=True,
+ symmetric=False,
+ children=["∑"],
+ )
+
+ xml_str = mo.to_xml_string()
+
+ # All boolean values should serialize
+ self.assertIn('fence="true"', xml_str)
+ self.assertIn('largeop="false"', xml_str)
+ self.assertIn('movablelimits="true"', xml_str)
+ self.assertIn('separator="false"', xml_str)
+ self.assertIn('stretchy="true"', xml_str)
+ self.assertIn('symmetric="false"', xml_str)
+
+ def test_semantics_edge_cases(self):
+ """Test edge cases for semantic elements."""
+
+ # Semantics with only presentation content (no annotations)
+ presentation = Mi(children=["E"])
+ ann1 = Annotation(encoding="text/plain", children=["First"])
+ semantics = Semantics(children=[presentation, ann1])
+
+ xml_str = semantics.to_xml_string()
+ result = Semantics.from_string(xml_str)
+ self.assertEqual(len(result[0].children), 2)
+
+ # Multiple annotations of same type
+ ann2 = Annotation(encoding="text/plain", children=["Second"])
+
+ semantics_multi = Semantics(children=[presentation, ann1, ann2])
+ xml_str = semantics_multi.to_xml_string()
+ self.assertEqual(xml_str.count("10")
+
+ value_element_with_attributes = Value(
+ value="5",
+ field_identifier="part1",
+ base_type=BaseType.INTEGER,
+ )
+ self.assertEqual(
+ value_element_with_attributes.to_xml_string(),
+ '5 ',
+ )
+
+ def test_correct_response_element(self):
+ correct_response_element = CorrectResponse(
+ value=[Value(value="A"), Value(value="B")]
+ )
+ self.assertEqual(
+ correct_response_element.to_xml_string(),
+ "A B ",
+ )
+
+ def test_response_declaration_element(self):
+ response_declaration_element = ResponseDeclaration(
+ identifier="RESPONSE_1",
+ cardinality=Cardinality.SINGLE,
+ base_type=BaseType.IDENTIFIER,
+ correct_response=CorrectResponse(value=[Value(value="choiceA")]),
+ )
+ expected_xml = 'choiceA ' # noqa: E501
+ self.assertEqual(response_declaration_element.to_xml_string(), expected_xml)
+
+ def test_outcome_declaration_element(self):
+ outcome_declaration_element = OutcomeDeclaration(
+ identifier="SCORE",
+ cardinality=Cardinality.SINGLE,
+ base_type=BaseType.FLOAT,
+ )
+ expected_xml = ' '
+ self.assertEqual(outcome_declaration_element.to_xml_string(), expected_xml)
+
+ def test_response_processing_element(self):
+ response_processing_element = ResponseProcessing(
+ template="https://example.com/response_processing.xml"
+ )
+ self.assertEqual(
+ response_processing_element.to_xml_string(),
+ ' ',
+ )
+
+ def test_assessment_item_element(self):
+ item_body = ItemBody(children=[P(children=["Test Item Body Content"])])
+ assessment_item_element = AssessmentItem(
+ identifier="item_1",
+ title="Test Assessment Item",
+ language="en-US",
+ item_body=item_body,
+ )
+ expected_xml = 'Test Item Body Content
' # noqa: E501
+ self.assertEqual(assessment_item_element.to_xml_string(), expected_xml)
+
+ def test_prompt_element(self):
+ prompt_element = Prompt(children=["This is the prompt text."])
+ self.assertEqual(
+ prompt_element.to_xml_string(),
+ "This is the prompt text. ",
+ )
+
+ def test_simple_choice_element(self):
+ simple_choice_element = SimpleChoice(
+ identifier="choice1", children=["Choice 1"]
+ )
+ self.assertEqual(
+ simple_choice_element.to_xml_string(),
+ 'Choice 1 ',
+ )
+
+ def test_choice_interaction_element(self):
+ choice1 = SimpleChoice(identifier="choice1", children=["Choice 1"])
+ choice2 = SimpleChoice(identifier="choice2", children=["Choice 2"])
+ choice_interaction_element = ChoiceInteraction(
+ answers=[choice1, choice2],
+ response_identifier="RESPONSE",
+ prompt=Prompt(children=["Select the correct answer."]),
+ )
+ expected_xml = 'Select the correct answer. Choice 1 Choice 2 ' # noqa: E501
+ self.assertEqual(choice_interaction_element.to_xml_string(), expected_xml)
+
+ def test_text_entry_interaction_element(self):
+ text_entry_interaction = TextEntryInteraction(
+ response_identifier="textEntry1",
+ expected_length=10,
+ placeholder_text="Enter your answer",
+ )
+ expected_xml = ' '
+ self.assertEqual(text_entry_interaction.to_xml_string(), expected_xml)
+
+ def test_extended_text_interaction_element(self):
+ extended_text_interaction = ExtendedTextInteraction(
+ response_identifier="extendedText1",
+ placeholder_text="Enter your essay here.",
+ prompt=Prompt(children=["What is truth?"]),
+ )
+ expected_xml = 'What is truth? ' # noqa: E501
+ self.assertEqual(extended_text_interaction.to_xml_string(), expected_xml)
diff --git a/contentcuration/contentcuration/tests/utils/test_exercise_creation.py b/contentcuration/contentcuration/tests/utils/test_exercise_creation.py
index 37f4330a4b..deceb2d980 100644
--- a/contentcuration/contentcuration/tests/utils/test_exercise_creation.py
+++ b/contentcuration/contentcuration/tests/utils/test_exercise_creation.py
@@ -1,3 +1,7 @@
+# flake8: noqa: E501
+# Ignore line length issues in this file
+# Black will autoformat where possible, so this is not too egregious
+# but will allow our long strings where necessary.
import json
import os
import re
@@ -16,7 +20,9 @@
from contentcuration.tests.base import StudioTestCase
from contentcuration.tests.testdata import fileobj_exercise_graphie
from contentcuration.tests.testdata import fileobj_exercise_image
-from contentcuration.utils.publish import create_perseus_exercise
+from contentcuration.utils.assessment.perseus import PerseusExerciseGenerator
+from contentcuration.utils.assessment.qti.archive import hex_to_qti_id
+from contentcuration.utils.assessment.qti.archive import QTIExerciseGenerator
class TestPerseusExerciseCreation(StudioTestCase):
@@ -37,8 +43,8 @@ def setUp(self):
# Create an exercise node
self.exercise_node = ContentNode.objects.create(
title="Test Exercise",
- node_id="exercise-node-id",
- content_id="exercise-content-id",
+ node_id="1234567890abcdef1234567890abcded",
+ content_id="fedcba0987654321fedcba0987654321",
kind_id=content_kinds.EXERCISE,
parent=self.channel.main_tree,
extra_fields=json.dumps(
@@ -58,9 +64,6 @@ def setUp(self):
),
)
- # Create a kolibri node representation (only needs id for testing)
- self.kolibri_node = type("KolibriNode", (), {"id": "kolibri-node-id"})
-
def _create_assessment_item(
self, item_type, question_text, answers, hints=None, assessment_id=None
):
@@ -81,6 +84,16 @@ def _create_assessment_item(
)
return item
+ def _create_perseus_zip(self, exercise_data):
+ generator = PerseusExerciseGenerator(
+ self.exercise_node,
+ exercise_data,
+ self.channel.id,
+ "en-US",
+ user_id=self.user.id,
+ )
+ return generator.create_exercise_archive()
+
def _validate_perseus_zip(self, exercise_file):
"""Helper to validate the structure of the Perseus zip file"""
# Use Django's storage backend to read the file
@@ -145,9 +158,7 @@ def test_basic_exercise_creation(self):
}
# Call the function to create the Perseus exercise
- create_perseus_exercise(
- self.exercise_node, self.kolibri_node, exercise_data, user_id=self.user.id
- )
+ self._create_perseus_zip(exercise_data)
# Verify that a file was created for the node
exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE)
@@ -179,6 +190,101 @@ def test_basic_exercise_creation(self):
# we are deliberately changing the archive generation algorithm for perseus files.
self.assertEqual(exercise_file.checksum, "0ec7e964b466ebc76e81e175570e97f1")
+ def test_multiple_images_index_mismatch_regression(self):
+ """Regression test for index mismatch bug in process_image_strings method.
+
+ When content is modified inside the re.finditer loop, subsequent matches
+ point to invalid positions due to string length changes, resulting in
+ malformed image processing.
+ """
+ # Create three image files - use mix of resized and non-resized images
+ # to trigger different replacement lengths
+ image1 = fileobj_exercise_image(size=(100, 100), color="red")
+ image2 = fileobj_exercise_image(size=(200, 200), color="blue")
+ image3 = fileobj_exercise_image(size=(300, 300), color="green")
+
+ # Create URLs for all images
+ image1_url = exercises.CONTENT_STORAGE_FORMAT.format(image1.filename())
+ image2_url = exercises.CONTENT_STORAGE_FORMAT.format(image2.filename())
+ image3_url = exercises.CONTENT_STORAGE_FORMAT.format(image3.filename())
+
+ # Create question with multiple images - mix of resized and original
+ # This should create different length replacements
+ question_text = (
+ f"First image (resized): \n"
+ f"Second image (original): \n"
+ f"Third image (resized): "
+ )
+
+ item = self._create_assessment_item(
+ exercises.SINGLE_SELECTION,
+ question_text,
+ [{"answer": "Answer", "correct": True, "order": 1}],
+ )
+
+ # Associate all images with the assessment item
+ for img in [image1, image2, image3]:
+ img.assessment_item = item
+ img.save()
+
+ exercise_data = {
+ "mastery_model": exercises.M_OF_N,
+ "randomize": True,
+ "n": 1,
+ "m": 1,
+ "all_assessment_items": [item.assessment_id],
+ "assessment_mapping": {item.assessment_id: exercises.SINGLE_SELECTION},
+ }
+
+ # Create the Perseus exercise
+ self._create_perseus_zip(exercise_data)
+ exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE)
+ zip_file, _ = self._validate_perseus_zip(exercise_file)
+
+ # Get the Perseus item JSON content
+ item_json = json.loads(
+ zip_file.read(f"{item.assessment_id}.json").decode("utf-8")
+ )
+ question_content = item_json["question"]["content"]
+
+ # Extract all markdown image references using the same pattern as the code
+ markdown_pattern = r"!\[([^\]]*)\]\(([^)]+)\)"
+ matches = re.findall(markdown_pattern, question_content)
+
+ # Check that we have exactly 3 well-formed image references
+ # If the bug exists, we might get malformed content due to index mismatch
+ self.assertEqual(
+ len(matches),
+ 3,
+ f"Expected 3 image references, found {len(matches)} in content: {question_content}",
+ )
+
+ # Verify each match has proper structure
+ for i, (alt_text, _) in enumerate(matches):
+ expected_alt = f"img{i+1}"
+ self.assertEqual(
+ alt_text,
+ expected_alt,
+ f"Image {i+1} alt text malformed: got '{alt_text}', expected '{expected_alt}'",
+ )
+
+ # Verify that width and height are properly included in the question images
+ question_images = item_json["question"]["images"]
+
+ self.assertEqual(
+ len(question_images),
+ 2,
+ f"Expected 2 image entries with dimensions, found {len(question_images)}: {list(question_images.keys())}",
+ )
+
+ # Verify that we have images with the expected dimensions
+ for image_name, image_data in question_images.items():
+ width, height = image_data["width"], image_data["height"]
+ if width == 50 and height != 50:
+ self.fail("Should find image with 50x50 dimensions")
+ elif width == 70 and height != 70:
+ self.fail("Should find image with 70x70 dimensions")
+
def test_exercise_with_image(self):
image_file = fileobj_exercise_image()
@@ -209,9 +315,7 @@ def test_exercise_with_image(self):
}
# Create the Perseus exercise
- create_perseus_exercise(
- self.exercise_node, self.kolibri_node, exercise_data, user_id=self.user.id
- )
+ self._create_perseus_zip(exercise_data)
# Verify that a file was created
exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE)
@@ -259,9 +363,7 @@ def test_exercise_with_image_no_attached_file(self):
}
# Create the Perseus exercise
- create_perseus_exercise(
- self.exercise_node, self.kolibri_node, exercise_data, user_id=self.user.id
- )
+ self._create_perseus_zip(exercise_data)
# Verify that a file was created
exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE)
@@ -311,9 +413,7 @@ def test_exercise_with_image_deleted_file_object(self):
image_file.delete()
# Create the Perseus exercise
- create_perseus_exercise(
- self.exercise_node, self.kolibri_node, exercise_data, user_id=self.user.id
- )
+ self._create_perseus_zip(exercise_data)
# Verify that a file was created
exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE)
@@ -392,9 +492,7 @@ def test_exercise_with_graphie(self):
}
# Create the Perseus exercise
- create_perseus_exercise(
- self.exercise_node, self.kolibri_node, exercise_data, user_id=self.user.id
- )
+ self._create_perseus_zip(exercise_data)
# Verify that a file was created
exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE)
@@ -431,7 +529,7 @@ def test_exercise_with_graphie(self):
def test_formula_processing(self):
"""Test that formulas are properly processed in exercises"""
# Create a question with LaTeX formulas
- question_text = "Solve: $\\frac{x}{2} = 3$"
+ question_text = "Solve: $$\\frac{x}{2} = 3$$"
item = self._create_assessment_item(
exercises.INPUT_QUESTION,
question_text,
@@ -449,10 +547,43 @@ def test_formula_processing(self):
}
# Create the Perseus exercise
- create_perseus_exercise(
- self.exercise_node, self.kolibri_node, exercise_data, user_id=self.user.id
+ self._create_perseus_zip(exercise_data)
+
+ # Verify that a file was created
+ exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE)
+
+ # Validate the zip file
+ zip_file, _ = self._validate_perseus_zip(exercise_file)
+
+ # Check that the formula was properly processed
+ item_json = json.loads(
+ zip_file.read(f"{item.assessment_id}.json").decode("utf-8")
+ )
+ self.assertIn("$\\frac{x}{2} = 3$", item_json["question"]["content"])
+
+ def test_multiple_formula_processing(self):
+ """Test that formulas are properly processed in exercises"""
+ # Create a question with LaTeX formulas
+ question_text = "Solve: $$\\frac{x}{2} = 3$$ or maybe $$\\frac{y}{2} = 7$$"
+ item = self._create_assessment_item(
+ exercises.INPUT_QUESTION,
+ question_text,
+ [{"answer": "6", "correct": True, "order": 1}],
)
+ # Create the exercise data
+ exercise_data = {
+ "mastery_model": exercises.M_OF_N,
+ "randomize": True,
+ "n": 1,
+ "m": 1,
+ "all_assessment_items": [item.assessment_id],
+ "assessment_mapping": {item.assessment_id: exercises.INPUT_QUESTION},
+ }
+
+ # Create the Perseus exercise
+ self._create_perseus_zip(exercise_data)
+
# Verify that a file was created
exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE)
@@ -463,7 +594,10 @@ def test_formula_processing(self):
item_json = json.loads(
zip_file.read(f"{item.assessment_id}.json").decode("utf-8")
)
- self.assertIn("\\frac{x}{2} = 3", item_json["question"]["content"])
+ self.assertIn(
+ "Solve: $\\frac{x}{2} = 3$ or maybe $\\frac{y}{2} = 7$",
+ item_json["question"]["content"],
+ )
def test_multiple_question_types(self):
"""Test creating an exercise with multiple question types"""
@@ -526,9 +660,7 @@ def test_multiple_question_types(self):
}
# Create the Perseus exercise
- create_perseus_exercise(
- self.exercise_node, self.kolibri_node, exercise_data, user_id=self.user.id
- )
+ self._create_perseus_zip(exercise_data)
# Verify that a file was created
exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE)
@@ -661,6 +793,7 @@ def _test_image_resizing_in_field(self, field_type):
# Create the assessment item
item_type = exercises.SINGLE_SELECTION
+
item = self._create_assessment_item(item_type, question_text, answers, hints)
# Associate the image with the assessment item
@@ -678,9 +811,7 @@ def _test_image_resizing_in_field(self, field_type):
}
# Create the Perseus exercise
- create_perseus_exercise(
- self.exercise_node, self.kolibri_node, exercise_data, user_id=self.user.id
- )
+ self._create_perseus_zip(exercise_data)
# Get the exercise file
exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE)
@@ -820,9 +951,7 @@ def test_image_with_same_resize_dimensions(self):
}
# Create the Perseus exercise
- create_perseus_exercise(
- self.exercise_node, self.kolibri_node, exercise_data, user_id=self.user.id
- )
+ self._create_perseus_zip(exercise_data)
# Get the exercise file
exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE)
@@ -901,9 +1030,7 @@ def test_image_with_similar_dimensions(self):
}
# Create the Perseus exercise
- create_perseus_exercise(
- self.exercise_node, self.kolibri_node, exercise_data, user_id=self.user.id
- )
+ self._create_perseus_zip(exercise_data)
# Get the exercise file
exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE)
@@ -949,3 +1076,749 @@ def test_image_with_similar_dimensions(self):
third_image,
"Images with dimensions outside 1% threshold should use different files",
)
+
+ def test_image_with_zero_width(self):
+ # Create a base image file
+ base_image = fileobj_exercise_image(size=(400, 300), color="red")
+ base_image_url = exercises.CONTENT_STORAGE_FORMAT.format(base_image.filename())
+
+ # Create a question with images that have very similar dimensions
+ # The code has logic to use the same image if dimensions are within 1% of each other
+ question_text = (
+ f"First image: \n"
+ f"Second image: "
+ )
+
+ # Create the assessment item
+ item = self._create_assessment_item(
+ exercises.SINGLE_SELECTION,
+ question_text,
+ [{"answer": "Answer", "correct": True, "order": 1}],
+ )
+
+ # Associate the image with the assessment item
+ base_image.assessment_item = item
+ base_image.save()
+
+ # Create exercise data
+ exercise_data = {
+ "mastery_model": exercises.M_OF_N,
+ "randomize": True,
+ "n": 1,
+ "m": 1,
+ "all_assessment_items": [item.assessment_id],
+ "assessment_mapping": {item.assessment_id: exercises.SINGLE_SELECTION},
+ }
+
+ # Create the Perseus exercise
+ self._create_perseus_zip(exercise_data)
+
+ # Get the exercise file
+ exercise_file = self.exercise_node.files.get(preset_id=format_presets.EXERCISE)
+
+ # Validate the zip file
+ zip_file, _ = self._validate_perseus_zip(exercise_file)
+
+ # Get all image files in the zip
+ image_files = [
+ name for name in zip_file.namelist() if name.startswith("images/")
+ ]
+
+ # Verify we have exactly 1 image file
+ self.assertEqual(
+ len(image_files),
+ 1,
+ f"Expected 1 resized images, found {len(image_files)}: {image_files}",
+ )
+
+
+class TestQTIExerciseCreation(StudioTestCase):
+ """
+ Tests for the QTI exercise generator which handles QTI format exercise file generation.
+
+ These tests verify that the function correctly packages assessment items
+ into a valid QTI Content Package with IMS manifest and individual item XML files.
+ """
+
+ maxDiff = None
+
+ def setUp(self):
+ self.setUpBase()
+
+ # Create an exercise node
+ self.exercise_node = ContentNode.objects.create(
+ title="Test QTI Exercise",
+ node_id="1234567890abcdef1234567890abcded",
+ content_id="fedcba0987654321fedcba0987654321",
+ kind_id=content_kinds.EXERCISE,
+ parent=self.channel.main_tree,
+ extra_fields=json.dumps(
+ {
+ "randomize": True,
+ "options": {
+ "completion_criteria": {
+ "model": "mastery",
+ "threshold": {
+ "mastery_model": exercises.M_OF_N,
+ "m": 3,
+ "n": 5,
+ },
+ }
+ },
+ }
+ ),
+ )
+
+ def _create_assessment_item(
+ self, item_type, question_text, answers, hints=None, assessment_id=None
+ ):
+ """Helper to create assessment items with the right structure"""
+ if hints is None:
+ hints = [{"hint": "This is a hint", "order": 1}]
+
+ item = AssessmentItem.objects.create(
+ contentnode=self.exercise_node,
+ assessment_id=assessment_id or uuid4().hex,
+ type=item_type,
+ question=question_text,
+ answers=json.dumps(answers),
+ hints=json.dumps(hints),
+ raw_data="{}",
+ order=len(self.exercise_node.assessment_items.all()) + 1,
+ randomize=True,
+ )
+ return item
+
+ def _create_qti_zip(self, exercise_data):
+ """Create QTI exercise zip using the generator"""
+ generator = QTIExerciseGenerator(
+ self.exercise_node,
+ exercise_data,
+ self.channel.id,
+ "en-US",
+ user_id=self.user.id,
+ )
+ return generator.create_exercise_archive()
+
+ def _normalize_xml(self, xml_string):
+ return "".join(x.strip() for x in xml_string.split("\n"))
+
+ def _validate_qti_zip_structure(self, exercise_file):
+ """Helper to validate basic structure of the QTI Content Package"""
+ # Use Django's storage backend to read the file
+ with storage.open(exercise_file.file_on_disk.name, "rb") as f:
+ zip_data = f.read()
+
+ zip_file = zipfile.ZipFile(BytesIO(zip_data))
+
+ # Check that the imsmanifest.xml file exists
+ assert (
+ "imsmanifest.xml" in zip_file.namelist()
+ ), "imsmanifest.xml not found in zip file"
+
+ return zip_file
+
+ def test_basic_qti_exercise_creation(self):
+ """Test the basic creation of a QTI exercise with a single question"""
+ # Create a simple multiple choice question with 32-char hex ID
+ assessment_id = "1234567890abcdef1234567890abcdef"
+ item = self._create_assessment_item(
+ exercises.SINGLE_SELECTION,
+ "What is 2+2?",
+ [
+ {"answer": "4", "correct": True, "order": 1},
+ {"answer": "3", "correct": False, "order": 2},
+ {"answer": "5", "correct": False, "order": 3},
+ ],
+ assessment_id=assessment_id,
+ )
+
+ # Create the exercise data structure
+ exercise_data = {
+ "mastery_model": exercises.M_OF_N,
+ "randomize": True,
+ "n": 5,
+ "m": 3,
+ "all_assessment_items": [item.assessment_id],
+ "assessment_mapping": {item.assessment_id: exercises.SINGLE_SELECTION},
+ }
+
+ # Call the function to create the QTI exercise
+ self._create_qti_zip(exercise_data)
+
+ # Verify that a file was created for the node
+ exercise_file = self.exercise_node.files.get(preset_id=format_presets.QTI_ZIP)
+ self.assertIsNotNone(exercise_file)
+ self.assertEqual(exercise_file.file_format_id, "zip")
+
+ # Validate the contents of the zip file
+ zip_file = self._validate_qti_zip_structure(exercise_file)
+
+ # Check that the assessment item XML file exists
+ expected_item_file = "items/KEjRWeJCrze8SNFZ4kKvN7w.xml"
+ self.assertIn(expected_item_file, zip_file.namelist())
+
+ # Get the actual QTI item XML content
+ actual_item_xml = zip_file.read(expected_item_file).decode("utf-8")
+
+ # Expected QTI item XML content
+ expected_item_xml = """
+
+
+
+ choice_0
+
+
+
+
+
+
+ What is 2+2?
+
+ 4
+ 3
+ 5
+
+
+
+ """
+
+ # Compare normalized XML
+ self.assertEqual(
+ self._normalize_xml(expected_item_xml),
+ self._normalize_xml(actual_item_xml),
+ )
+
+ # Get the actual IMS manifest content
+ actual_manifest_xml = zip_file.read("imsmanifest.xml").decode("utf-8")
+
+ # Expected IMS manifest XML content
+ expected_manifest_xml = """
+
+
+ QTI Package
+ 3.0.0
+
+
+
+
+
+
+
+ """
+
+ # Compare normalized XML
+ self.assertEqual(
+ self._normalize_xml(expected_manifest_xml),
+ self._normalize_xml(actual_manifest_xml),
+ )
+
+ def test_multiple_selection_question(self):
+ """Test QTI generation for multiple selection questions"""
+ assessment_id = "abcdef1234567890abcdef1234567890"
+ item = self._create_assessment_item(
+ exercises.MULTIPLE_SELECTION,
+ "Select all prime numbers:",
+ [
+ {"answer": "2", "correct": True, "order": 1},
+ {"answer": "3", "correct": True, "order": 2},
+ {"answer": "4", "correct": False, "order": 3},
+ {"answer": "5", "correct": True, "order": 4},
+ ],
+ assessment_id=assessment_id,
+ )
+
+ exercise_data = {
+ "mastery_model": exercises.M_OF_N,
+ "randomize": True,
+ "n": 1,
+ "m": 1,
+ "all_assessment_items": [item.assessment_id],
+ "assessment_mapping": {item.assessment_id: exercises.MULTIPLE_SELECTION},
+ }
+
+ self._create_qti_zip(exercise_data)
+ exercise_file = self.exercise_node.files.get(preset_id=format_presets.QTI_ZIP)
+ zip_file = self._validate_qti_zip_structure(exercise_file)
+
+ qti_id = hex_to_qti_id(assessment_id)
+
+ # Check the QTI XML for multiple selection specifics
+ expected_item_file = f"items/{qti_id}.xml"
+ actual_item_xml = zip_file.read(expected_item_file).decode("utf-8")
+
+ # Expected QTI item XML content for multiple selection
+ expected_item_xml = """
+
+
+
+ choice_0
+ choice_1
+ choice_3
+
+
+
+
+
+
+ Select all prime numbers:
+
+ 2
+ 3
+ 4
+ 5
+
+
+
+ """
+
+ # Compare normalized XML
+ self.assertEqual(
+ self._normalize_xml(expected_item_xml),
+ self._normalize_xml(actual_item_xml),
+ )
+
+ def test_free_response_question(self):
+ assessment_id = "fedcba0987654321fedcba0987654321"
+ item = self._create_assessment_item(
+ exercises.FREE_RESPONSE,
+ "What is the capital of France?",
+ [{"answer": "Paris", "correct": True, "order": 1}],
+ assessment_id=assessment_id,
+ )
+
+ exercise_data = {
+ "mastery_model": exercises.M_OF_N,
+ "randomize": True,
+ "n": 1,
+ "m": 1,
+ "all_assessment_items": [item.assessment_id],
+ "assessment_mapping": {item.assessment_id: exercises.FREE_RESPONSE},
+ }
+
+ self._create_qti_zip(exercise_data)
+ exercise_file = self.exercise_node.files.get(preset_id=format_presets.QTI_ZIP)
+ zip_file = self._validate_qti_zip_structure(exercise_file)
+
+ # Check the QTI XML for text entry specifics
+ expected_item_file = "items/K_ty6CYdlQyH-3LoJh2VDIQ.xml"
+ actual_item_xml = zip_file.read(expected_item_file).decode("utf-8")
+
+ # Expected QTI item XML content for text entry
+ expected_item_xml = """
+
+
+
+ Paris
+
+
+
+
+
+
What is the capital of France?
+
+
+
+
+ """
+
+ # Compare normalized XML
+ self.assertEqual(
+ self._normalize_xml(expected_item_xml),
+ self._normalize_xml(actual_item_xml),
+ )
+
+ def test_free_response_question_with_maths(self):
+ assessment_id = "fedcba0987654321fedcba0987654321"
+ item = self._create_assessment_item(
+ exercises.FREE_RESPONSE,
+ "$$\\sum_n^sxa^n$$\n\n What does this even mean?",
+ [{"answer": "Nothing", "correct": True, "order": 1}],
+ assessment_id=assessment_id,
+ )
+
+ exercise_data = {
+ "mastery_model": exercises.M_OF_N,
+ "randomize": True,
+ "n": 1,
+ "m": 1,
+ "all_assessment_items": [item.assessment_id],
+ "assessment_mapping": {item.assessment_id: exercises.FREE_RESPONSE},
+ }
+
+ self._create_qti_zip(exercise_data)
+ exercise_file = self.exercise_node.files.get(preset_id=format_presets.QTI_ZIP)
+ zip_file = self._validate_qti_zip_structure(exercise_file)
+
+ # Check the QTI XML for text entry specifics
+ expected_item_file = "items/K_ty6CYdlQyH-3LoJh2VDIQ.xml"
+ actual_item_xml = zip_file.read(expected_item_file).decode("utf-8")
+
+ # Expected QTI item XML content for text entry
+ expected_item_xml = """
+
+
+
+ Nothing
+
+
+
+
+
+
+
+
+ ∑ n s
+ x
+ a n
+
+ \\sum_n^sxa^n
+
+
+
What does this even mean?
+
+
+
+
+ """
+
+ # Compare normalized XML
+ self.assertEqual(
+ self._normalize_xml(expected_item_xml),
+ self._normalize_xml(actual_item_xml),
+ )
+
+ def test_perseus_question_rejection(self):
+ """Test that Perseus questions are properly rejected"""
+ assessment_id = "aaaa1111bbbb2222cccc3333dddd4444"
+ # Create a mock Perseus question
+ item = AssessmentItem.objects.create(
+ contentnode=self.exercise_node,
+ assessment_id=assessment_id,
+ type=exercises.PERSEUS_QUESTION,
+ raw_data='{"question": {"content": "Perseus content"}}',
+ order=1,
+ )
+
+ exercise_data = {
+ "mastery_model": exercises.M_OF_N,
+ "randomize": True,
+ "n": 1,
+ "m": 1,
+ "all_assessment_items": [item.assessment_id],
+ "assessment_mapping": {item.assessment_id: exercises.PERSEUS_QUESTION},
+ }
+
+ # Should raise ValueError for Perseus questions
+ with self.assertRaises(ValueError) as context:
+ self._create_qti_zip(exercise_data)
+
+ self.assertIn("Perseus questions are not supported", str(context.exception))
+
+ def test_exercise_with_image(self):
+ """Test QTI exercise generation with images"""
+ assessment_id = "1111aaaa2222bbbb3333cccc4444dddd"
+ image_file = fileobj_exercise_image()
+
+ # Create a question with image
+ image_url = exercises.CONTENT_STORAGE_FORMAT.format(f"{image_file.filename()}")
+ question_text = f"Identify the shape: "
+ item = self._create_assessment_item(
+ exercises.SINGLE_SELECTION,
+ question_text,
+ [
+ {"answer": "Circle", "correct": True, "order": 1},
+ {"answer": "Square", "correct": False, "order": 2},
+ ],
+ assessment_id=assessment_id,
+ )
+
+ # Associate the image with the assessment item
+ image_file.assessment_item = item
+ image_file.save()
+
+ exercise_data = {
+ "mastery_model": exercises.M_OF_N,
+ "randomize": True,
+ "n": 1,
+ "m": 1,
+ "all_assessment_items": [item.assessment_id],
+ "assessment_mapping": {item.assessment_id: exercises.SINGLE_SELECTION},
+ }
+
+ self._create_qti_zip(exercise_data)
+ exercise_file = self.exercise_node.files.get(preset_id=format_presets.QTI_ZIP)
+ zip_file = self._validate_qti_zip_structure(exercise_file)
+
+ # Check that the image file was included in the zip
+ image_path = f"items/images/{image_file.filename()}"
+ self.assertIn(image_path, zip_file.namelist())
+
+ # Get the actual manifest content
+ actual_manifest_xml = zip_file.read("imsmanifest.xml").decode("utf-8")
+
+ # Expected manifest should include the image file dependency
+ expected_manifest_xml = f"""
+
+
+ QTI Package
+ 3.0.0
+
+
+
+
+
+
+
+
+ """
+
+ # Compare normalized XML
+ self.assertEqual(
+ self._normalize_xml(expected_manifest_xml),
+ self._normalize_xml(actual_manifest_xml),
+ )
+
+ self.assertEqual(exercise_file.checksum, "51ba0d6e3c7f30239265c5294abe6ac5")
+
+ def test_question_with_mathematical_content(self):
+ """Test QTI generation for questions containing mathematical formulas converted to MathML"""
+ assessment_id = "dddddddddddddddddddddddddddddddd"
+ item = self._create_assessment_item(
+ exercises.SINGLE_SELECTION,
+ "Solve the equation $$\\frac{x}{2} = 3$$ for x. What is the value of x?",
+ [
+ {"answer": "6", "correct": True, "order": 1},
+ {"answer": "3", "correct": False, "order": 2},
+ {"answer": "1.5", "correct": False, "order": 3},
+ {"answer": "9", "correct": False, "order": 4},
+ ],
+ assessment_id=assessment_id,
+ )
+
+ exercise_data = {
+ "mastery_model": exercises.M_OF_N,
+ "randomize": True,
+ "n": 1,
+ "m": 1,
+ "all_assessment_items": [item.assessment_id],
+ "assessment_mapping": {item.assessment_id: exercises.SINGLE_SELECTION},
+ }
+
+ self._create_qti_zip(exercise_data)
+ exercise_file = self.exercise_node.files.get(preset_id=format_presets.QTI_ZIP)
+ zip_file = self._validate_qti_zip_structure(exercise_file)
+
+ qti_id = hex_to_qti_id(assessment_id)
+
+ # Check the QTI XML for mathematical content conversion to MathML
+ expected_item_file = f"items/{qti_id}.xml"
+ actual_item_xml = zip_file.read(expected_item_file).decode("utf-8")
+
+ # Expected QTI item XML content with MathML conversion
+ expected_item_xml = f"""
+
+
+
+ choice_0
+
+
+
+
+
+
+ Solve the equation x 2 = 3 \\frac{{x}}{{2}} = 3 for x. What is the value of x?
+
+ 6
+ 3
+ 1.5
+ 9
+
+
+
+ """
+
+ # Compare normalized XML
+ self.assertEqual(
+ self._normalize_xml(expected_item_xml),
+ self._normalize_xml(actual_item_xml),
+ )
+
+ def test_multiple_question_types_mixed(self):
+ """Test creating a QTI exercise with multiple supported question types"""
+ # Create different types of supported questions with 32-char hex IDs
+ assessment_id1 = "1111111111111111111111111111111a"
+ assessment_id2 = "2222222222222222222222222222222b"
+ assessment_id3 = "3333333333333333333333333333333c"
+
+ qti_id1 = hex_to_qti_id(assessment_id1)
+ qti_id2 = hex_to_qti_id(assessment_id2)
+ qti_id3 = hex_to_qti_id(assessment_id3)
+
+ item1 = self._create_assessment_item(
+ exercises.SINGLE_SELECTION,
+ "What is 2+2?",
+ [
+ {"answer": "4", "correct": True, "order": 1},
+ {"answer": "5", "correct": False, "order": 2},
+ ],
+ assessment_id=assessment_id1,
+ )
+
+ item2 = self._create_assessment_item(
+ exercises.MULTIPLE_SELECTION,
+ "Select all even numbers:",
+ [
+ {"answer": "2", "correct": True, "order": 1},
+ {"answer": "3", "correct": False, "order": 2},
+ {"answer": "4", "correct": True, "order": 3},
+ {"answer": "5", "correct": False, "order": 4},
+ ],
+ assessment_id=assessment_id2,
+ )
+
+ item3 = self._create_assessment_item(
+ exercises.INPUT_QUESTION,
+ "What is the capital of Spain?",
+ [{"answer": "Madrid", "correct": True, "order": 1}],
+ assessment_id=assessment_id3,
+ )
+
+ exercise_data = {
+ "mastery_model": exercises.M_OF_N,
+ "randomize": True,
+ "n": 3,
+ "m": 2,
+ "all_assessment_items": [
+ item1.assessment_id,
+ item2.assessment_id,
+ item3.assessment_id,
+ ],
+ "assessment_mapping": {
+ item1.assessment_id: exercises.SINGLE_SELECTION,
+ item2.assessment_id: exercises.MULTIPLE_SELECTION,
+ item3.assessment_id: exercises.INPUT_QUESTION,
+ },
+ }
+
+ self._create_qti_zip(exercise_data)
+ exercise_file = self.exercise_node.files.get(preset_id=format_presets.QTI_ZIP)
+ zip_file = self._validate_qti_zip_structure(exercise_file)
+
+ # Check that all question XML files are included
+ expected_files = [
+ f"items/{qti_id1}.xml",
+ f"items/{qti_id2}.xml",
+ f"items/{qti_id3}.xml",
+ ]
+
+ for expected_file in expected_files:
+ self.assertIn(expected_file, zip_file.namelist())
+
+ # Get the actual manifest content
+ actual_manifest_xml = zip_file.read("imsmanifest.xml").decode("utf-8")
+
+ # Expected manifest with all three resources
+ expected_manifest_xml = f"""
+
+
+ QTI Package
+ 3.0.0
+
+
+
+
+
+
+
+
+
+
+
+
+
+ """
+
+ # Compare normalized XML
+ self.assertEqual(
+ self._normalize_xml(expected_manifest_xml),
+ self._normalize_xml(actual_manifest_xml),
+ )
+
+ self.assertEqual(exercise_file.checksum, "8e488543ef52f0b153553eaf9fb51419")
+
+ def test_unsupported_question_type(self):
+ """Test that unsupported question types raise appropriate errors"""
+ assessment_id = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
+ # Create an item with an unsupported type
+ item = AssessmentItem.objects.create(
+ contentnode=self.exercise_node,
+ assessment_id=assessment_id,
+ type="UNSUPPORTED_TYPE",
+ question="This is an unsupported question type",
+ answers="[]",
+ hints="[]",
+ raw_data="{}",
+ order=1,
+ )
+
+ exercise_data = {
+ "mastery_model": exercises.M_OF_N,
+ "randomize": True,
+ "n": 1,
+ "m": 1,
+ "all_assessment_items": [item.assessment_id],
+ "assessment_mapping": {item.assessment_id: "UNSUPPORTED_TYPE"},
+ }
+
+ with self.assertRaises(ValueError) as context:
+ self._create_qti_zip(exercise_data)
+
+ self.assertIn("Unsupported question type", str(context.exception))
+
+ def test_manifest_structure_single_item(self):
+ """Test that the IMS manifest has proper structure and metadata for a single item"""
+ assessment_id = "cccccccccccccccccccccccccccccccc"
+ item = self._create_assessment_item(
+ exercises.SINGLE_SELECTION,
+ "Test question",
+ [{"answer": "Test answer", "correct": True, "order": 1}],
+ assessment_id=assessment_id,
+ )
+
+ exercise_data = {
+ "mastery_model": exercises.M_OF_N,
+ "randomize": True,
+ "n": 1,
+ "m": 1,
+ "all_assessment_items": [item.assessment_id],
+ "assessment_mapping": {item.assessment_id: exercises.SINGLE_SELECTION},
+ }
+
+ self._create_qti_zip(exercise_data)
+ exercise_file = self.exercise_node.files.get(preset_id=format_presets.QTI_ZIP)
+ zip_file = self._validate_qti_zip_structure(exercise_file)
+
+ # Get the actual manifest content
+ actual_manifest_xml = zip_file.read("imsmanifest.xml").decode("utf-8")
+
+ # Expected exact manifest structure
+ expected_manifest_xml = """
+
+
+ QTI Package
+ 3.0.0
+
+
+
+
+
+
+
+ """
+
+ # Compare normalized XML
+ self.assertEqual(
+ self._normalize_xml(expected_manifest_xml),
+ self._normalize_xml(actual_manifest_xml),
+ )
diff --git a/contentcuration/contentcuration/tests/utils/test_markdown.py b/contentcuration/contentcuration/tests/utils/test_markdown.py
new file mode 100644
index 0000000000..9dc3cd41b6
--- /dev/null
+++ b/contentcuration/contentcuration/tests/utils/test_markdown.py
@@ -0,0 +1,198 @@
+import unittest
+
+from contentcuration.utils.assessment.markdown import render_markdown
+from contentcuration.utils.assessment.qti import ElementTreeBase
+
+
+class TexMathTestMixin:
+ """Mixin providing test methods for TexMath plugin tests"""
+
+ def _assert_conversion(self, markdown_text: str, expected: str):
+ """Override in subclasses to define assertion behavior"""
+ raise NotImplementedError("Subclasses must implement _assert_conversion")
+
+ def test_markdown_with_inline_math(self):
+ """Test conversion of markdown with inline math to HTML + MathML"""
+
+ markdown_text = (
+ "What is the answer to this *question*? $$x\cdot y=z^2$$" # noqa W605
+ )
+ expected = (
+ "What is the answer to this question ? "
+ ''
+ "x · y = z 2 "
+ 'x\cdot y=z^2 ' # noqa W605
+ "
\n"
+ )
+
+ self._assert_conversion(markdown_text, expected)
+
+ def test_block_math(self):
+ """Test conversion of block math"""
+
+ markdown_text = (
+ "Here's an equation:\n\n$$E = mc^2$$\n\nThat's Einstein's formula."
+ )
+ expected = (
+ "Here's an equation:
\n"
+ ''
+ "E = m c 2 "
+ 'E = mc^2 '
+ " "
+ "That's Einstein's formula.
\n"
+ )
+
+ self._assert_conversion(markdown_text, expected)
+
+ def test_multiline_block_math(self):
+ """
+ Ensure a $$ … $$ block spanning multiple lines is converted to MathML
+ and the literal $$ delimiters are removed. This currently fails with
+ the buggy BLOCK_PATTERN because it stops after the first '$'.
+ """
+ markdown_text = (
+ "$$\n"
+ "\\begin{aligned}\n"
+ "a = b + c \\\\\n"
+ "$5 = d + e\n"
+ "\\end{aligned}\n"
+ "$$"
+ )
+ expected = (
+ ''
+ "a = b + c "
+ '$ 5 = d + e '
+ '\n\\begin{aligned}\na = b + c \\\\\n$5 = d + e\n\\end{aligned}\n '
+ " "
+ )
+
+ self._assert_conversion(markdown_text, expected)
+
+ def test_inline_math_with_dollar_inside(self):
+ """
+ Ensure a $$ … $$ inline that contains an internal '$' (e.g. inside
+ \\text{}) is parsed correctly. With the old BLOCK_PATTERN the first '$'
+ prematurely terminates the match so the delimiters remain.
+ """
+ markdown_text = "Test this $$\\text{Cost = 1.00 $USD$}$$"
+ expected = (
+ "Test this "
+ ''
+ "Cost = 1.00 $USD$ "
+ '\\text{Cost = 1.00 $USD$} '
+ "
\n"
+ )
+
+ self._assert_conversion(markdown_text, expected)
+
+ def test_multiple_math_expressions(self):
+ """Test multiple math expressions in one document"""
+
+ markdown_text = "First: $$a + b$$, then $$c \\times d$$, finally $$e^f$$."
+ expected = (
+ "First: "
+ 'a + b '
+ 'a + b '
+ ", then "
+ 'c × d '
+ 'c \\times d '
+ ", finally "
+ 'e f '
+ 'e^f '
+ ".
\n"
+ )
+
+ self._assert_conversion(markdown_text, expected)
+
+ def test_mixed_inline_and_block(self):
+ """Test document with both inline and block math"""
+
+ markdown_text = (
+ "This is inline math: $$a = b$$\n\n"
+ "And this is block math:\n\n"
+ "$$\\sum_{i=1}^{n} x_i = y$$\n\n"
+ "Back to text with more inline: $$z^2$$"
+ )
+ expected = (
+ "This is inline math: "
+ 'a = b '
+ 'a = b '
+ "
\n"
+ "And this is block math:
\n"
+ ''
+ "∑ i = 1 "
+ "n x i = y "
+ '\sum_{i=1}^{n} x_i = y ' # noqa W605
+ " "
+ "Back to text with more inline: "
+ 'z 2 '
+ 'z^2 '
+ "
\n"
+ )
+
+ self._assert_conversion(markdown_text, expected)
+
+ def test_no_math_content(self):
+ """Test that regular markdown without math still works"""
+
+ markdown_text = "This is just *regular* markdown with **bold** text."
+ expected = "This is just regular markdown with bold text.
\n"
+
+ self._assert_conversion(markdown_text, expected)
+
+ def test_simple_inline_math(self):
+ """Test simple inline math expression"""
+
+ markdown_text = "The variable $$x$$ is unknown."
+ expected = (
+ "The variable "
+ 'x '
+ 'x '
+ " is unknown.
\n"
+ )
+
+ self._assert_conversion(markdown_text, expected)
+
+ def test_simple_block_math(self):
+ """Test simple block math expression"""
+
+ markdown_text = "$$y = mx + b$$"
+ expected = (
+ ''
+ "y = m x + b "
+ 'y = mx + b '
+ " "
+ )
+
+ self._assert_conversion(markdown_text, expected)
+
+
+class TestTexMathPlugin(TexMathTestMixin, unittest.TestCase):
+ """Test direct markdown conversion: markdown → HTML+MathML"""
+
+ def _assert_conversion(self, markdown_text: str, expected: str):
+ """Test direct markdown to HTML+MathML conversion"""
+ result = render_markdown(markdown_text)
+ self.assertEqual(result, expected)
+
+
+class TestTexMathPluginRoundtrip(TexMathTestMixin, unittest.TestCase):
+ """Test full roundtrip: markdown → HTML+MathML → Pydantic → string"""
+
+ maxDiff = None
+
+ def _assert_conversion(self, markdown_text: str, expected: str):
+ """Test full roundtrip conversion via Pydantic objects"""
+ result = render_markdown(markdown_text)
+
+ # Parse to Pydantic objects and back to string
+ parsed = ElementTreeBase.from_string(result)
+ roundtrip_result = (
+ "".join(e.to_xml_string().strip() for e in parsed)
+ if isinstance(parsed, list)
+ else parsed.to_xml_string().strip()
+ )
+ self.assertEqual(
+ roundtrip_result.replace("\n", "").strip(),
+ expected.replace("\n", "").strip(),
+ )
diff --git a/contentcuration/contentcuration/utils/assessment/__init__.py b/contentcuration/contentcuration/utils/assessment/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/contentcuration/contentcuration/utils/assessment/base.py b/contentcuration/contentcuration/utils/assessment/base.py
new file mode 100644
index 0000000000..0f668920a0
--- /dev/null
+++ b/contentcuration/contentcuration/utils/assessment/base.py
@@ -0,0 +1,395 @@
+import hashlib
+import json
+import logging
+import os
+import re
+import zipfile
+from abc import ABC
+from abc import abstractmethod
+from io import BytesIO
+from tempfile import NamedTemporaryFile
+from tempfile import TemporaryDirectory
+
+from django.core.files import File
+from django.core.files.storage import default_storage as storage
+from le_utils.constants import exercises
+from PIL import Image
+
+from contentcuration import models
+
+
+image_pattern = rf"!\[(?:[^\]]*)]\(\${exercises.CONTENT_STORAGE_PLACEHOLDER}/([^\s)]+)(?:\s=([0-9\.]+)x([0-9\.]+))*[^)]*\)"
+
+
+def resize_image(image_content, width, height):
+ try:
+ with Image.open(BytesIO(image_content)) as img:
+ original_format = img.format
+ img = img.resize((int(width), int(height)), Image.LANCZOS)
+ buffered = BytesIO()
+ img.save(buffered, format=original_format)
+ return buffered.getvalue()
+ except Exception as e:
+ logging.warning(f"Error resizing image: {str(e)}")
+ return None
+
+
+def get_resized_image_checksum(image_content):
+ return hashlib.md5(image_content).hexdigest()
+
+
+class ExerciseArchiveGenerator(ABC):
+ """
+ Abstract base class for exercise zip generators.
+ Handles common functionality for creating exercise zip files for different formats.
+ """
+
+ ZIP_DATE_TIME = (2015, 10, 21, 7, 28, 0)
+ ZIP_COMPRESS_TYPE = zipfile.ZIP_DEFLATED
+ ZIP_COMMENT = "".encode()
+
+ @property
+ @abstractmethod
+ def file_format(self):
+ pass
+
+ @property
+ @abstractmethod
+ def preset(self):
+ pass
+
+ @abstractmethod
+ def get_image_file_path(self):
+ """
+ Abstract method to get the archive file path for storing assessment image files.
+
+ Returns:
+ str: The file path for images in the exercise archive
+ """
+ pass
+
+ def get_image_ref_prefix(self):
+ """
+ A value to insert in front of the image file path - this is needed for Perseus to properly
+ find all image file paths in the frontend.
+ """
+ return ""
+
+ @abstractmethod
+ def create_assessment_item(self, assessment_item, processed_data):
+ """
+ Abstract method to create an assessment item from processed data.
+ Args:
+ assessment_item: The assessment item to process
+ processed_data: Data processed from the assessment item
+ Returns:
+ filepath: Path for the created assessment item file
+ file_content: Content of the assessment item file
+ """
+ pass
+
+ def __init__(
+ self, ccnode, exercise_data, channel_id, default_language, user_id=None
+ ):
+ """
+ Initialize the exercise zip generator.
+
+ Args:
+ ccnode: Content node containing exercise data
+ exercise_data: Data specific to the exercise format
+ user_id: Optional user ID for tracking who created the exercise
+ """
+ self.ccnode = ccnode
+ self.exercise_data = exercise_data
+ self.channel_id = channel_id
+ self.default_language = default_language
+ self.user_id = user_id
+ self.resized_images_map = {}
+ self.assessment_items = []
+ self.files_to_write = []
+ self.tempdir = None
+
+ def write_to_zipfile(self, zf, filepath, content):
+ """
+ This method is a copy of the write_file_to_zip_with_neutral_metadata function from ricecooker.
+ The comment, date_time, and compress_type are parameterized to allow for Perseus to override them.
+ This can be updated in future when we have a good way to avoid rebuilding perseus files, unless needed.
+ """
+ filepath = filepath.replace("\\", "/")
+ info = zipfile.ZipInfo(filepath, date_time=self.ZIP_DATE_TIME)
+ info.comment = self.ZIP_COMMENT
+ info.compress_type = self.ZIP_COMPRESS_TYPE
+ info.create_system = 0
+ zf.writestr(info, content)
+
+ def add_file_to_write(self, filepath, content):
+ if self.tempdir is None:
+ raise RuntimeError(
+ "Cannot add files to write before creating the temporary directory."
+ )
+ full_path = os.path.join(self.tempdir, filepath)
+ if os.path.exists(full_path):
+ return
+ os.makedirs(os.path.dirname(full_path), exist_ok=True)
+ with open(full_path, "wb") as f:
+ f.write(content)
+ self.files_to_write.append(full_path)
+
+ def _add_original_image(self, checksum, filename, new_file_path):
+ """Extract original image handling"""
+ with storage.open(
+ models.generate_object_storage_name(checksum, filename), "rb"
+ ) as imgfile:
+ original_content = imgfile.read()
+ self.add_file_to_write(os.path.join(new_file_path, filename), original_content)
+
+ def _get_similar_image(self, filename, width, height):
+ if filename not in self.resized_images_map:
+ self.resized_images_map[filename] = {}
+ return None
+ if (width, height) in self.resized_images_map[filename]:
+ return self.resized_images_map[filename][(width, height)]
+
+ for key, resized_image in self.resized_images_map[filename].items():
+ if (
+ abs(key[0] - width) / width < 0.01
+ and abs(key[1] - height) / height < 0.01
+ ):
+ return resized_image
+
+ def _resize_image(self, checksum, ext, filename, width, height, new_file_path):
+ with storage.open(
+ models.generate_object_storage_name(checksum, filename),
+ "rb",
+ ) as imgfile:
+ original_content = imgfile.read()
+
+ resized_content = resize_image(original_content, width, height)
+
+ if not resized_content:
+ logging.warning(f"Failed to resize image {filename}. Using original image.")
+ return
+ resized_checksum = get_resized_image_checksum(resized_content)
+
+ new_img_ref = f"{resized_checksum}{ext}"
+ self.resized_images_map[filename][(width, height)] = new_img_ref
+ self.add_file_to_write(
+ os.path.join(new_file_path, new_img_ref), resized_content
+ )
+ return new_img_ref
+
+ def _process_single_image(
+ self, filename, checksum, ext, width, height, new_file_path
+ ):
+ if width is None and height is None:
+ # No resizing needed, just add original
+ self._add_original_image(checksum, filename, new_file_path)
+ return filename
+
+ # Try to get similar or create resized image
+ similar_image = self._get_similar_image(filename, width, height)
+ if similar_image:
+ return similar_image
+
+ resized_image = self._resize_image(
+ checksum, ext, filename, width, height, new_file_path
+ )
+ return resized_image or filename
+
+ def _replace_filename_in_match(
+ self, content, img_match, old_filename, new_filename
+ ):
+ """Extract filename replacement logic"""
+ start, end = img_match.span()
+ old_match = content[start:end]
+ new_match = old_match.replace(old_filename, new_filename)
+ return content[:start] + new_match + content[end:]
+
+ def _is_valid_image_filename(self, filename):
+ checksum, ext = os.path.splitext(filename)
+
+ if not ext:
+ logging.warning(
+ "While publishing channel `{}` a filename with no extension was encountered: `{}`".format(
+ self.channel_id, filename
+ )
+ )
+ return False
+
+ try:
+ int(checksum, 16) # Validate hex checksum
+ return True
+ except ValueError:
+ logging.warning(
+ "while publishing channel `{}` a filename with an improper checksum was encountered: `{}`".format(
+ self.channel_id, filename
+ )
+ )
+ if os.environ.get("BRANCH_ENVIRONMENT", "") != "master":
+ raise
+ return False
+
+ def process_image_strings(self, content):
+ new_file_path = self.get_image_file_path()
+ new_image_path = f"{self.get_image_ref_prefix()}{new_file_path}"
+ image_list = []
+ processed_files = []
+ for img_match in re.finditer(image_pattern, content):
+ # Add any image files that haven't been written to the zipfile
+ filename = img_match.group(1)
+ width = float(img_match.group(2)) if img_match.group(2) else None
+ height = float(img_match.group(3)) if img_match.group(3) else None
+ checksum, ext = os.path.splitext(filename)
+
+ if not self._is_valid_image_filename(filename):
+ continue
+
+ if width == 0 or height == 0:
+ # Can't resize an image to 0 width or height, so just ignore.
+ continue
+
+ processed_filename = self._process_single_image(
+ filename, checksum, ext, width, height, new_file_path
+ )
+ processed_files.append(
+ (img_match, filename, processed_filename, width, height)
+ )
+
+ # Process matches in reverse order to avoid index mismatch when modifying content
+ for img_match, filename, processed_filename, width, height in reversed(
+ processed_files
+ ):
+ content = self._replace_filename_in_match(
+ content, img_match, filename, processed_filename
+ )
+ if width is not None and height is not None:
+ image_list.append(
+ {"name": processed_filename, "width": width, "height": height}
+ )
+
+ content = content.replace(
+ f"${exercises.CONTENT_STORAGE_PLACEHOLDER}", new_image_path
+ )
+ return content, image_list
+
+ def _process_content(self, content):
+ """
+ Process the content to handle images.
+
+ Args:
+ content: The content string to process
+
+ Returns:
+ tuple: Processed content and list of image data
+ """
+ return self.process_image_strings(content)
+
+ def _sort_by_order(self, items, item_type):
+ try:
+ return sorted(items, key=lambda x: x.get("order"))
+ except TypeError:
+ logging.error(f"Unable to sort {item_type}, leaving unsorted.")
+ return items
+
+ def _process_answers(self, assessment_item):
+ answer_data = json.loads(assessment_item.answers)
+ processed_answers = []
+
+ for answer in answer_data:
+ if answer["answer"]:
+ if isinstance(answer["answer"], str):
+ (answer["answer"], answer_images,) = self._process_content(
+ answer["answer"],
+ )
+ answer["images"] = answer_images
+
+ processed_answers.append(answer)
+
+ return self._sort_by_order(processed_answers, "answers")
+
+ def _process_hints(self, assessment_item):
+ hint_data = json.loads(assessment_item.hints)
+
+ for hint in hint_data:
+ hint["hint"], hint_images = self._process_content(
+ hint["hint"],
+ )
+ hint["images"] = hint_images
+
+ return self._sort_by_order(hint_data, "hints")
+
+ def process_assessment_item(self, assessment_item):
+ # Process question
+ question, question_images = self._process_content(
+ assessment_item.question,
+ )
+
+ # Process answers and hints
+ processed_answers = self._process_answers(assessment_item)
+ processed_hints = self._process_hints(assessment_item)
+
+ new_file_path = self.get_image_file_path()
+ new_image_path = f"{exercises.IMG_PLACEHOLDER}/{new_file_path}"
+ context = {
+ "question": question,
+ "question_images": question_images,
+ "answers": processed_answers,
+ "multiple_select": assessment_item.type == exercises.MULTIPLE_SELECTION,
+ "raw_data": assessment_item.raw_data.replace(
+ exercises.CONTENT_STORAGE_PLACEHOLDER, new_image_path
+ ),
+ "hints": processed_hints,
+ "randomize": assessment_item.randomize,
+ }
+ filepath, file_content = self.create_assessment_item(assessment_item, context)
+ self.add_file_to_write(filepath, file_content)
+
+ def handle_before_assessment_items(self):
+ pass
+
+ def handle_after_assessment_items(self):
+ pass
+
+ def _create_zipfile(self):
+ filename = "{0}.{ext}".format(self.ccnode.title, ext=self.file_format)
+ with NamedTemporaryFile(suffix="zip") as tempf:
+ with zipfile.ZipFile(tempf.name, "w") as zf:
+ for file_path in self.files_to_write:
+ with open(file_path, "rb") as f:
+ self.write_to_zipfile(
+ zf,
+ os.path.relpath(file_path, self.tempdir),
+ f.read(),
+ )
+ file_size = tempf.tell()
+ tempf.flush()
+
+ self.ccnode.files.filter(preset_id=self.preset).delete()
+
+ assessment_file_obj = models.File.objects.create(
+ file_on_disk=File(open(tempf.name, "rb"), name=filename),
+ contentnode=self.ccnode,
+ file_format_id=self.file_format,
+ preset_id=self.preset,
+ original_filename=filename,
+ file_size=file_size,
+ uploaded_by_id=self.user_id,
+ )
+ logging.debug(
+ "Created exercise for {0} with checksum {1}".format(
+ self.ccnode.title, assessment_file_obj.checksum
+ )
+ )
+
+ def create_exercise_archive(self):
+ with TemporaryDirectory() as tempdir:
+ self.tempdir = tempdir
+ self.handle_before_assessment_items()
+ for question in (
+ self.ccnode.assessment_items.prefetch_related("files")
+ .all()
+ .order_by("order")
+ ):
+ self.process_assessment_item(question)
+ self.handle_after_assessment_items()
+ self._create_zipfile()
diff --git a/contentcuration/contentcuration/utils/assessment/markdown.py b/contentcuration/contentcuration/utils/assessment/markdown.py
new file mode 100644
index 0000000000..c34da5dee1
--- /dev/null
+++ b/contentcuration/contentcuration/utils/assessment/markdown.py
@@ -0,0 +1,135 @@
+import re
+import xml.etree.ElementTree as ET
+
+from latex2mathml.converter import convert
+from markdown_it import MarkdownIt
+from markdown_it.renderer import RendererProtocol
+from markdown_it.rules_block import StateBlock
+from markdown_it.rules_inline import StateInline
+from markdown_it.token import Token
+from markdown_it.utils import EnvType
+from markdown_it.utils import OptionsDict
+
+from contentcuration.utils.assessment.qti.mathml.core import Annotation
+from contentcuration.utils.assessment.qti.mathml.core import Semantics
+
+
+# Regex patterns for $$ delimited math
+INLINE_PATTERN = re.compile(r"^\$\$([\s\S]+?)\$\$")
+BLOCK_PATTERN = re.compile(r"^\$\$([\s\S]+?)\$\$", re.M)
+
+
+def math_inline_func(state: StateInline, silent: bool) -> bool:
+ """Parse inline math: $$expression$$"""
+ if not state.src.startswith("$$", state.pos):
+ return False
+
+ match = INLINE_PATTERN.match(state.src[state.pos :])
+ if not match:
+ return False
+
+ if not silent:
+ token = state.push("math_inline", "math", 0)
+ token.content = match.group(1)
+ token.markup = "$$"
+
+ state.pos += match.end()
+ return True
+
+
+def math_block_func(
+ state: StateBlock, begLine: int, endLine: int, silent: bool
+) -> bool:
+ """Parse block math: $$expression$$"""
+ begin = state.bMarks[begLine] + state.tShift[begLine]
+
+ if not state.src.startswith("$$", begin):
+ return False
+
+ match = BLOCK_PATTERN.match(state.src[begin:])
+ if not match:
+ return False
+
+ if not silent:
+ token = state.push("math_block", "math", 0)
+ token.block = True
+ token.content = match.group(1)
+ token.markup = "$$"
+
+ # Advance to next line after the math block
+ endpos = begin + match.end() - 1
+ line = begLine
+ while line < endLine:
+ if endpos >= state.bMarks[line] and endpos <= state.eMarks[line]:
+ state.line = line + 1
+ break
+ line += 1
+
+ return True
+
+
+def _convert(latex, inline=True):
+ # Remove the namespace declaration for cleaner output
+ markup = convert(latex, display="inline" if inline else "block").replace(
+ ' xmlns="http://www.w3.org/1998/Math/MathML"', ""
+ )
+ # By default latex2mathml encodes operators that don't need to be encoded
+ # so we parse it with ElementTree and turn it back into a string here for consistency.
+ math_element = ET.fromstring(markup)
+
+ # Create LaTeX annotation
+ latex_annotation_element = Annotation(
+ encoding="application/x-tex", children=[latex]
+ ).to_element()
+
+ semantics_element = Semantics().to_element()
+ for child in math_element:
+ math_element.remove(child)
+ semantics_element.append(child)
+ semantics_element.append(latex_annotation_element)
+ math_element.append(semantics_element)
+
+ return ET.tostring(math_element, encoding="unicode")
+
+
+def render_math_inline(
+ self: RendererProtocol,
+ tokens: list[Token],
+ idx: int,
+ options: OptionsDict,
+ env: EnvType,
+) -> str:
+ """Render inline math to MathML"""
+ return _convert(tokens[idx].content)
+
+
+def render_math_block(
+ self: RendererProtocol,
+ tokens: list[Token],
+ idx: int,
+ options: OptionsDict,
+ env: EnvType,
+) -> str:
+ """Render block math to MathML"""
+ return _convert(tokens[idx].content, inline=False)
+
+
+def texmath_to_mathml_plugin(md: MarkdownIt) -> None:
+ """Simple plugin for parsing TeX math with $$ delimiters.
+
+ Converts inline and block math expressions to MathML using latex2mathml.
+ """
+ # Register parsing rules
+ md.inline.ruler.before("escape", "math_inline", math_inline_func)
+ md.block.ruler.before("fence", "math_block", math_block_func)
+
+ # Register renderers
+ md.add_render_rule("math_inline", render_math_inline)
+ md.add_render_rule("math_block", render_math_block)
+
+
+md = MarkdownIt("gfm-like").disable("linkify").use(texmath_to_mathml_plugin)
+
+
+def render_markdown(markdown):
+ return md.render(markdown)
diff --git a/contentcuration/contentcuration/utils/assessment/perseus.py b/contentcuration/contentcuration/utils/assessment/perseus.py
new file mode 100644
index 0000000000..7ba4e1ce6f
--- /dev/null
+++ b/contentcuration/contentcuration/utils/assessment/perseus.py
@@ -0,0 +1,131 @@
+import json
+import re
+import zipfile
+
+from django.core.files.storage import default_storage as storage
+from django.template.loader import render_to_string
+from le_utils.constants import exercises
+from le_utils.constants import file_formats
+from le_utils.constants import format_presets
+
+from contentcuration import models
+from contentcuration.utils.assessment.base import ExerciseArchiveGenerator
+from contentcuration.utils.parser import extract_value
+
+
+_DOUBLE_DOLLAR_RE = re.compile(r"\$\$(.+?)\$\$", flags=re.DOTALL)
+
+
+class PerseusExerciseGenerator(ExerciseArchiveGenerator):
+ """
+ Exercise zip generator for Perseus format exercises.
+ """
+
+ ZIP_DATE_TIME = (2013, 3, 14, 1, 59, 26)
+ ZIP_COMPRESS_TYPE = zipfile.ZIP_STORED
+ ZIP_COMMENT = "Perseus file generated during export process".encode()
+
+ file_format = file_formats.PERSEUS
+ preset = format_presets.EXERCISE
+
+ TEMPLATE_MAP = {
+ exercises.MULTIPLE_SELECTION: "perseus/multiple_selection.json",
+ exercises.SINGLE_SELECTION: "perseus/multiple_selection.json",
+ exercises.INPUT_QUESTION: "perseus/input_question.json",
+ exercises.PERSEUS_QUESTION: "perseus/perseus_question.json",
+ "true_false": "perseus/multiple_selection.json",
+ }
+
+ def _write_raw_perseus_image_files(self, assessment_item):
+ # For raw perseus JSON questions, the files must be
+ # specified in advance.
+
+ # Files have been prefetched when the assessment item was
+ # queried, so take advantage of that.
+ files = sorted(assessment_item.files.all(), key=lambda x: x.checksum)
+ image_files = filter(
+ lambda x: x.preset_id == format_presets.EXERCISE_IMAGE, files
+ )
+ graphie_files = filter(
+ lambda x: x.preset_id == format_presets.EXERCISE_GRAPHIE, files
+ )
+ images_path = self.get_image_file_path()
+ for image in image_files:
+ image_name = "{}/{}.{}".format(
+ images_path, image.checksum, image.file_format_id
+ )
+ with storage.open(
+ models.generate_object_storage_name(image.checksum, str(image)),
+ "rb",
+ ) as content:
+ self.add_file_to_write(image_name, content.read())
+
+ for image in graphie_files:
+ svg_name = "{}/{}.svg".format(images_path, image.original_filename)
+ json_name = "{}/{}-data.json".format(images_path, image.original_filename)
+ with storage.open(
+ models.generate_object_storage_name(image.checksum, str(image)),
+ "rb",
+ ) as content:
+ content = content.read()
+ # in Python 3, delimiter needs to be in bytes format
+ content = content.split(exercises.GRAPHIE_DELIMITER.encode("ascii"))
+ if len(content) != 2:
+ raise ValueError(
+ f"Graphie file '{image.original_filename}' "
+ f"missing delimiter {exercises.GRAPHIE_DELIMITER!r}"
+ )
+ self.add_file_to_write(svg_name, content[0])
+ self.add_file_to_write(json_name, content[1])
+
+ def _process_formulas(self, content):
+ return _DOUBLE_DOLLAR_RE.sub(r"$\1$", content)
+
+ def _process_content(self, content):
+ content = self._process_formulas(content)
+ return super()._process_content(content)
+
+ def process_assessment_item(self, assessment_item):
+ if assessment_item.type == exercises.PERSEUS_QUESTION:
+ self._write_raw_perseus_image_files(assessment_item)
+ return super().process_assessment_item(assessment_item)
+
+ def _process_input_answers(self, processed_data):
+ """Extract input answer processing logic"""
+ non_empty_answers = []
+ for answer in processed_data["answers"]:
+ answer["answer"] = extract_value(answer["answer"])
+ if answer["answer"] or answer["answer"] == 0:
+ non_empty_answers.append(answer)
+
+ return {**processed_data, "answers": non_empty_answers}
+
+ def create_assessment_item(self, assessment_item, processed_data):
+ template = self.TEMPLATE_MAP.get(assessment_item.type)
+ if not template:
+ raise TypeError(
+ f"Unrecognized question type on item {assessment_item.assessment_id}: {assessment_item.type}"
+ )
+
+ # Handle input question special case
+ if assessment_item.type == exercises.INPUT_QUESTION:
+ processed_data = self._process_input_answers(processed_data)
+
+ filename = f"{assessment_item.assessment_id}.json"
+ content = render_to_string(template, processed_data).encode("utf-8", "ignore")
+ return filename, content
+
+ def get_image_file_path(self):
+ return "images"
+
+ def get_image_ref_prefix(self):
+ return f"${exercises.IMG_PLACEHOLDER}/"
+
+ def handle_before_assessment_items(self):
+ exercise_context = {
+ "exercise": json.dumps(self.exercise_data, sort_keys=True, indent=4)
+ }
+ exercise_result = render_to_string(
+ "perseus/exercise.json", exercise_context
+ ).encode("utf-8")
+ self.add_file_to_write("exercise.json", exercise_result)
diff --git a/contentcuration/contentcuration/utils/assessment/qti/__init__.py b/contentcuration/contentcuration/utils/assessment/qti/__init__.py
new file mode 100644
index 0000000000..c8cb0afb95
--- /dev/null
+++ b/contentcuration/contentcuration/utils/assessment/qti/__init__.py
@@ -0,0 +1,6 @@
+from .base import ElementTreeBase
+
+
+__all__ = [
+ "ElementTreeBase",
+]
diff --git a/contentcuration/contentcuration/utils/assessment/qti/archive.py b/contentcuration/contentcuration/utils/assessment/qti/archive.py
new file mode 100644
index 0000000000..4a29f20c84
--- /dev/null
+++ b/contentcuration/contentcuration/utils/assessment/qti/archive.py
@@ -0,0 +1,284 @@
+import base64
+from typing import Any
+from typing import Dict
+from typing import List
+from typing import Tuple
+
+from le_utils.constants import exercises
+from le_utils.constants import format_presets
+
+from contentcuration.utils.assessment.base import ExerciseArchiveGenerator
+from contentcuration.utils.assessment.markdown import render_markdown
+from contentcuration.utils.assessment.qti.assessment_item import AssessmentItem
+from contentcuration.utils.assessment.qti.assessment_item import CorrectResponse
+from contentcuration.utils.assessment.qti.assessment_item import ItemBody
+from contentcuration.utils.assessment.qti.assessment_item import OutcomeDeclaration
+from contentcuration.utils.assessment.qti.assessment_item import ResponseDeclaration
+from contentcuration.utils.assessment.qti.assessment_item import ResponseProcessing
+from contentcuration.utils.assessment.qti.assessment_item import Value
+from contentcuration.utils.assessment.qti.base import ElementTreeBase
+from contentcuration.utils.assessment.qti.constants import BaseType
+from contentcuration.utils.assessment.qti.constants import Cardinality
+from contentcuration.utils.assessment.qti.constants import Orientation
+from contentcuration.utils.assessment.qti.constants import ResourceType
+from contentcuration.utils.assessment.qti.constants import ShowHide
+from contentcuration.utils.assessment.qti.html import Div
+from contentcuration.utils.assessment.qti.html import FlowContentList
+from contentcuration.utils.assessment.qti.html import P
+from contentcuration.utils.assessment.qti.imsmanifest import File as ManifestFile
+from contentcuration.utils.assessment.qti.imsmanifest import Manifest
+from contentcuration.utils.assessment.qti.imsmanifest import Metadata
+from contentcuration.utils.assessment.qti.imsmanifest import Resource
+from contentcuration.utils.assessment.qti.imsmanifest import Resources
+from contentcuration.utils.assessment.qti.interaction_types.simple import (
+ ChoiceInteraction,
+)
+from contentcuration.utils.assessment.qti.interaction_types.simple import SimpleChoice
+from contentcuration.utils.assessment.qti.interaction_types.text_based import (
+ TextEntryInteraction,
+)
+from contentcuration.utils.assessment.qti.prompt import Prompt
+
+
+choice_interactions = {
+ exercises.MULTIPLE_SELECTION,
+ exercises.SINGLE_SELECTION,
+ "true_false",
+}
+text_entry_interactions = {exercises.INPUT_QUESTION, exercises.FREE_RESPONSE}
+
+
+def hex_to_qti_id(hex_string):
+ """
+ Encode a 32 digit hex to a 22 character base64 encoded id and a K prefix.
+ """
+ bytes_data = bytes.fromhex(hex_string)
+ return f"K{base64.urlsafe_b64encode(bytes_data).decode('ascii').rstrip('=')}"
+
+
+class QTIExerciseGenerator(ExerciseArchiveGenerator):
+ """
+ Exercise zip generator for QTI format exercises.
+ Creates IMS Content Package with QTI 3.0 assessment items.
+ """
+
+ file_format = "zip"
+ preset = format_presets.QTI_ZIP
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.qti_items = []
+
+ def get_image_file_path(self) -> str:
+ """Get the file path for QTI assessment items."""
+ return "items/images"
+
+ def _create_html_content_from_text(self, text: str) -> FlowContentList:
+ """Convert text content to QTI HTML flow content."""
+ if not text.strip():
+ return []
+ markup = render_markdown(text)
+ return ElementTreeBase.from_string(markup)
+
+ def _create_choice_interaction_and_response(
+ self, processed_data: Dict[str, Any]
+ ) -> Tuple[ChoiceInteraction, ResponseDeclaration]:
+ """Create a QTI choice interaction for multiple choice questions."""
+
+ prompt = Prompt(
+ children=self._create_html_content_from_text(processed_data["question"])
+ )
+
+ choices = []
+ correct_values = []
+ for i, answer in enumerate(processed_data.get("answers", [])):
+ choice_id = f"choice_{i}"
+ choice_content = self._create_html_content_from_text(
+ answer.get("answer", "")
+ )
+
+ choice = SimpleChoice(
+ identifier=choice_id,
+ children=choice_content,
+ show_hide=ShowHide.SHOW,
+ fixed=False,
+ )
+ choices.append(choice)
+
+ if answer.get("correct", False):
+ correct_values.append(Value(value=choice_id))
+
+ response_declaration = ResponseDeclaration(
+ identifier="RESPONSE",
+ cardinality=Cardinality.MULTIPLE
+ if processed_data["multiple_select"]
+ else Cardinality.SINGLE,
+ base_type=BaseType.IDENTIFIER,
+ correct_response=CorrectResponse(value=correct_values)
+ if correct_values
+ else None,
+ )
+
+ interaction = ChoiceInteraction(
+ response_identifier="RESPONSE",
+ prompt=prompt,
+ answers=choices,
+ shuffle=processed_data.get("randomize", False),
+ max_choices=len(choices) if processed_data["multiple_select"] else 1,
+ min_choices=0,
+ orientation=Orientation.VERTICAL,
+ )
+ return interaction, response_declaration
+
+ def _create_text_entry_interaction_and_response(
+ self, processed_data: Dict[str, Any]
+ ) -> Tuple[Div, ResponseDeclaration]:
+ prompt = self._create_html_content_from_text(processed_data["question"])
+ interaction_element = TextEntryInteraction(
+ response_identifier="RESPONSE",
+ expected_length=50, # Default expected length
+ placeholder_text="Enter your answer here",
+ )
+ # Text entry interaction is an inline element, so wrap it in a paragraph tag.
+ interaction_element = P(children=[interaction_element])
+ # prompt is already a list of elements, so just append the interaction to it.
+ prompt.append(interaction_element)
+ interaction = Div(children=prompt)
+
+ correct_values = [
+ Value(value=answer["answer"])
+ for answer in processed_data["answers"]
+ if answer["correct"]
+ ]
+
+ response_declaration = ResponseDeclaration(
+ identifier="RESPONSE",
+ cardinality=Cardinality.MULTIPLE
+ if processed_data["multiple_select"]
+ else Cardinality.SINGLE,
+ base_type=BaseType.STRING,
+ correct_response=CorrectResponse(value=correct_values)
+ if correct_values
+ else None,
+ )
+ return interaction, response_declaration
+
+ def _qti_item_filepath(self, assessment_id):
+ return f"items/{assessment_id}.xml"
+
+ def create_assessment_item(
+ self, assessment_item, processed_data: Dict[str, Any]
+ ) -> tuple[str, bytes]:
+ """Create QTI assessment item XML."""
+
+ # Skip Perseus questions as they can't be easily converted
+ if assessment_item.type == exercises.PERSEUS_QUESTION:
+ raise ValueError(
+ f"Perseus questions are not supported in QTI format: {assessment_item.assessment_id}"
+ )
+
+ if assessment_item.type in choice_interactions:
+ (
+ interaction,
+ response_declaration,
+ ) = self._create_choice_interaction_and_response(processed_data)
+ elif assessment_item.type in text_entry_interactions:
+ (
+ interaction,
+ response_declaration,
+ ) = self._create_text_entry_interaction_and_response(processed_data)
+ else:
+ raise ValueError(f"Unsupported question type: {assessment_item.type}")
+
+ # Create item body with the interaction
+ item_body = ItemBody(children=[interaction])
+
+ # Create outcome declaration
+ outcome_declaration = OutcomeDeclaration(
+ identifier="SCORE", cardinality=Cardinality.SINGLE, base_type=BaseType.FLOAT
+ )
+
+ # Create response processing
+ response_processing = ResponseProcessing(
+ template="https://purl.imsglobal.org/spec/qti/v3p0/rptemplates/match_correct"
+ )
+
+ language = (
+ self.ccnode.language.lang_code
+ if self.ccnode.language
+ else self.default_language
+ )
+
+ qti_item_id = hex_to_qti_id(assessment_item.assessment_id)
+
+ # Create the assessment item
+ qti_item = AssessmentItem(
+ identifier=qti_item_id,
+ title=f"{self.ccnode.title} {len(self.qti_items) + 1}",
+ language=language,
+ adaptive=False,
+ time_dependent=False,
+ response_declaration=[response_declaration],
+ outcome_declaration=[outcome_declaration],
+ item_body=item_body,
+ response_processing=response_processing,
+ )
+
+ # Store for manifest creation
+ self.qti_items.append(qti_item)
+
+ # Generate XML content
+ xml_content = qti_item.to_xml_string()
+
+ # Add XML declaration and format nicely
+ full_xml = f'\n{xml_content}'
+
+ filename = self._qti_item_filepath(qti_item_id)
+ return filename, full_xml.encode("utf-8")
+
+ def _create_manifest_resources(self) -> List[Resource]:
+ """Create manifest resources for all QTI items."""
+ resources = []
+
+ for qti_item in self.qti_items:
+ # Get file dependencies (images, etc.)
+ file_dependencies = qti_item.get_file_dependencies()
+
+ # Create file entries
+ qti_item_filepath = self._qti_item_filepath(qti_item.identifier)
+ files = [ManifestFile(href=qti_item_filepath)]
+ for dep in file_dependencies:
+ files.append(ManifestFile(href=dep))
+
+ resource = Resource(
+ identifier=qti_item.identifier,
+ type_=ResourceType.ASSESSMENT_ITEM.value,
+ href=qti_item_filepath,
+ files=files,
+ )
+ resources.append(resource)
+
+ return resources
+
+ def _create_imsmanifest(self) -> str:
+ # Create resources
+ resources = self._create_manifest_resources()
+
+ # Create manifest
+ manifest = Manifest(
+ identifier=hex_to_qti_id(self.ccnode.content_id),
+ version="1.0",
+ metadata=Metadata(schema="QTI Package", schemaversion="3.0.0"),
+ resources=Resources(resources=resources),
+ )
+
+ xml_content = manifest.to_xml_string()
+ return f'\n{xml_content}'
+
+ def handle_after_assessment_items(self):
+ # Create and write the IMS manifest
+ manifest_xml = self._create_imsmanifest()
+ self.add_file_to_write("imsmanifest.xml", manifest_xml.encode("utf-8"))
+ # Sort all paths to parallel the predictable zip generation logic in ricecooker
+ # and the Kolibri Studio frontend.
+ self.files_to_write = sorted(self.files_to_write)
diff --git a/contentcuration/contentcuration/utils/assessment/qti/assessment_item.py b/contentcuration/contentcuration/utils/assessment/qti/assessment_item.py
new file mode 100644
index 0000000000..830044ae79
--- /dev/null
+++ b/contentcuration/contentcuration/utils/assessment/qti/assessment_item.py
@@ -0,0 +1,237 @@
+from typing import Annotated
+from typing import List
+from typing import Optional
+from typing import Union
+
+from annotated_types import Len
+from pydantic import AnyUrl
+from pydantic import Field
+from pydantic import model_validator
+from pydantic import PositiveInt
+
+from contentcuration.utils.assessment.qti.base import BaseSequence
+from contentcuration.utils.assessment.qti.base import QTIBase
+from contentcuration.utils.assessment.qti.base import TextType
+from contentcuration.utils.assessment.qti.constants import BaseType
+from contentcuration.utils.assessment.qti.constants import Cardinality
+from contentcuration.utils.assessment.qti.constants import ExternalScored
+from contentcuration.utils.assessment.qti.constants import View
+from contentcuration.utils.assessment.qti.fields import BCP47Language
+from contentcuration.utils.assessment.qti.fields import LocalHrefPath
+from contentcuration.utils.assessment.qti.fields import QTIIdentifier
+from contentcuration.utils.assessment.qti.html import BlockContentElement
+from contentcuration.utils.assessment.qti.interaction_types.base import BlockInteraction
+
+
+class Value(QTIBase):
+ """
+ Represents a single value within a default value, correct response,
+ or other value container.
+
+ For record values, both the field-identifier and base-type attributes
+ are required to identify which field of the record this value belongs to
+ and what type that field is.
+
+ For non-record values (single, multiple, ordered cardinality), these
+ attributes are optional and typically not needed as the base-type is
+ determined by the parent variable declaration.
+ """
+
+ value: TextType # The actual value content
+ field_identifier: Optional[QTIIdentifier] = None # Required only for record values
+ base_type: Optional[BaseType] = None # Required only for record values
+
+
+ValueType = Annotated[List[Value], Len(min_length=1)]
+
+
+class CorrectResponse(QTIBase):
+ """Defines the correct response for the interaction."""
+
+ value: ValueType = Field(default_factory=list)
+
+
+class DefaultValue(QTIBase):
+ """
+ Defines the default value for a variable. Contains one or more
+ value elements depending on the cardinality of the variable.
+ """
+
+ value: ValueType = Field(default_factory=list)
+ # Human readable interpretation of the default value
+ interpretation: Optional[str] = None
+
+
+def _validate_value(self, attribute_name="default_value"):
+ attr_value = getattr(self, attribute_name)
+ if attr_value is not None:
+ if self.cardinality == Cardinality.SINGLE:
+ # Single cardinality should have exactly one value
+ if len(attr_value.value) > 1:
+ raise ValueError(
+ f"Single cardinality cannot have multiple {attribute_name.replace('_', ' ')}s"
+ )
+ elif self.cardinality == Cardinality.RECORD:
+ # Record cardinality requires field identifiers
+ for value in attr_value.value:
+ if not value.field_identifier:
+ raise ValueError(
+ f"Record cardinality requires field_identifier in {attribute_name.replace('_', ' ')}"
+ )
+ if not value.base_type:
+ raise ValueError(
+ f"Record cardinality requires base_type in {attribute_name.replace('_', ' ')}"
+ )
+
+
+class OutcomeDeclaration(QTIBase):
+ """
+ QTI outcome declaration defines an outcome variable, which represents the
+ result of response processing. Outcomes are typically scores but can also
+ be other results such as feedback identifiers or completion status.
+ """
+
+ identifier: QTIIdentifier
+ cardinality: Cardinality = Cardinality.SINGLE
+ base_type: Optional[BaseType] = None
+ view: Optional[View] = None
+ interpretation: Optional[AnyUrl] = None
+ long_interpretation: Optional[str] = None
+ normal_maximum: Optional[PositiveInt] = None
+ normal_minimum: Optional[float] = None
+ mastery_value: Optional[float] = None
+ external_scored: Optional[ExternalScored] = None
+ variable_identifier_ref: Optional[str] = None
+ default_value: Optional[DefaultValue] = None
+
+ @model_validator(mode="after")
+ def validate_cardinality_compatibility(self):
+ _validate_value(self)
+ return self
+
+
+class ItemBody(QTIBase, BaseSequence):
+ """Contains the content of the assessment item"""
+
+ children: List[Union[BlockInteraction, BlockContentElement]] = Field(
+ default_factory=list
+ )
+
+
+class ContextDeclaration(QTIBase):
+ """
+ QTI context declaration defines a 'contextual' variable with global scope to
+ an assessment item. Context variables provide contextual information to
+ template processing and response processing, such as candidate information,
+ test information, and environment information.
+ """
+
+ identifier: QTIIdentifier
+ cardinality: Cardinality
+ base_type: Optional[BaseType] = None
+ default_value: Optional[DefaultValue] = None
+
+ @model_validator(mode="after")
+ def validate_cardinality_compatibility(self):
+ _validate_value(self)
+ return self
+
+
+class MapEntry(QTIBase):
+ """Entry in a mapping that maps a specific value to a score"""
+
+ # Key (usually an identifier)
+ map_key: str
+ # Value to map
+ mapped_value: float
+ # Whether string comparison is case sensitive
+ case_sensitive: bool = False
+
+
+class Mapping(QTIBase):
+ """Maps response values to scores for partial credit scoring"""
+
+ map_entries: List[MapEntry] = Field(default_factory=list)
+ # Score for responses not explicitly mapped
+ default_value: float = 0.0
+ # Lower bound for mapping results
+ lower_bound: Optional[float] = None
+ # Upper bound for mapping results
+ upper_bound: Optional[float] = None
+
+
+class AreaMapEntry(QTIBase):
+ """Entry in an area mapping that maps a specific area to a score"""
+
+ # Shape of the area (rect, circle, poly, default)
+ shape: str
+ # Coordinates defining the area
+ coords: str
+ # Score for responses in this area
+ mapped_value: float
+
+
+class AreaMapping(QTIBase):
+ """Maps areas to scores for graphical interactions"""
+
+ area_map_entries: List[AreaMapEntry] = Field(default_factory=list)
+ # Score for responses not in any defined area
+ default_value: float = 0.0
+ # Lower bound for mapping results
+ lower_bound: Optional[float] = None
+ # Upper bound for mapping results
+ upper_bound: Optional[float] = None
+
+
+class ResponseDeclaration(QTIBase):
+ """
+ QTI response declaration defines a response variable and optionally its
+ correct response value and/or mapping. Response variables capture candidate
+ interactions with the assessment item's interactions and are used in response
+ processing to determine outcomes.
+ """
+
+ identifier: QTIIdentifier
+ cardinality: Cardinality
+ base_type: BaseType
+ correct_response: Optional[CorrectResponse] = None
+ mapping: Optional[Mapping] = None
+ area_mapping: Optional[AreaMapping] = None
+
+ @model_validator(mode="after")
+ def validate_cardinality_compatibility(self):
+ _validate_value(self, "correct_response")
+ return self
+
+
+class ResponseProcessing(QTIBase):
+ """Represents response processing rules or template reference"""
+
+ # URI reference to a response processing template
+ template: Optional[AnyUrl] = None
+ # Optional URL that resolves to the template - we additionally enforce that this be local
+ # although this is not required by the QTI spec
+ template_location: Optional[LocalHrefPath] = None
+ # rules deliberately not implemented yet
+
+
+class AssessmentItem(QTIBase):
+ """Represents a QTI assessment item"""
+
+ xmlns: str = "http://www.imsglobal.org/xsd/imsqtiasi_v3p0"
+ xmlns__xsi: str = "http://www.w3.org/2001/XMLSchema-instance"
+ xsi__schemaLocation: str = "http://www.imsglobal.org/xsd/imsqtiasi_v3p0 https://purl.imsglobal.org/spec/qti/v3p0/schema/xsd/imsqti_asiv3p0p1_v1p0.xsd"
+ identifier: QTIIdentifier
+ title: str
+ label: Optional[str] = None
+ adaptive: bool = False
+ time_dependent: Optional[bool] = None
+ language: BCP47Language
+ tool_name: str = "kolibri"
+ tool_version: str = "0.1"
+
+ context_declaration: List[ContextDeclaration] = Field(default_factory=list)
+ response_declaration: List[ResponseDeclaration] = Field(default_factory=list)
+ outcome_declaration: List[OutcomeDeclaration] = Field(default_factory=list)
+ item_body: Optional[ItemBody] = None
+ response_processing: Optional[ResponseProcessing] = None
diff --git a/contentcuration/contentcuration/utils/assessment/qti/base.py b/contentcuration/contentcuration/utils/assessment/qti/base.py
new file mode 100644
index 0000000000..5467654a6b
--- /dev/null
+++ b/contentcuration/contentcuration/utils/assessment/qti/base.py
@@ -0,0 +1,274 @@
+import re
+import xml.etree.ElementTree as ET
+from abc import ABC
+from enum import Enum
+from functools import partial
+from typing import Annotated
+from typing import List
+from typing import Optional
+from typing import Set
+from typing import Type
+from typing import Union
+
+from pydantic import BaseModel
+from pydantic import BeforeValidator
+from pydantic import ConfigDict
+from pydantic import PrivateAttr
+
+from .constants import Dir
+from .fields import entry_pattern as srcset_entry_pattern
+from contentcuration.utils.assessment.qti.fields import BCP47Language
+
+
+class TextNode(BaseModel):
+ """Class to represent text nodes within XML elements"""
+
+ text: str
+
+
+class XMLElement(BaseModel, ABC):
+ """Base class for XML elements"""
+
+ # Pydantic configuration
+ model_config = ConfigDict(
+ # Prevent extra fields
+ extra="forbid",
+ validate_assignment=True,
+ # Prevent mutations to ensure immutability
+ frozen=True,
+ )
+
+ # Private attributes (not included in Pydantic fields)
+ _file_dependencies: Set[str] = PrivateAttr(default_factory=set)
+ _element: ET.Element = PrivateAttr(default=None)
+
+ @classmethod
+ def element_name(cls):
+ return cls.__name__.lower()
+
+ def to_element(self) -> ET.Element: # noqa: C901
+ if self._element:
+ return self._element
+
+ element = ET.Element(self.element_name())
+
+ self._file_dependencies = set()
+
+ # Add attributes based on pydantic fields
+ for field_name in self.__class__.model_fields:
+
+ value = getattr(self, field_name)
+
+ # Skip None values
+ if value is None:
+ continue
+
+ if isinstance(value, (XMLElement, TextNode)):
+ value = [value]
+
+ if isinstance(value, list):
+ if all(isinstance(item, (XMLElement, TextNode)) for item in value):
+ for item in value:
+ if isinstance(item, XMLElement):
+ child_elements = item.to_element()
+ if not isinstance(child_elements, list):
+ child_elements = [child_elements]
+ for child_element in child_elements:
+ element.append(child_element)
+ self._file_dependencies |= item._file_dependencies
+ else:
+ current_children = list(element)
+ if current_children:
+ current_children[-1].tail = (
+ current_children[-1].tail or ""
+ ) + item.text
+ else:
+ element.text = (element.text or "") + item.text
+
+ continue
+ raise ValueError(
+ "List types should only contain XMLElement or TextNodes"
+ )
+
+ elif isinstance(value, bool):
+ value = str(value).lower()
+
+ elif isinstance(value, Enum):
+ # Handle enum values
+ value = value.value
+
+ # Some attribute names are reserved Python keywords or Python builtins
+ # to allow this, we allow a trailing underscore which we strip here.
+ # All attributes use kebab-case, which we can't easily use as field names
+ # so we encode them as snake_case and convert to kebab-case here.
+ # Some attributes also include : which we encode as double underscore.
+ attr_name = field_name.rstrip("_").replace("__", ":").replace("_", "-")
+
+ # Set the attribute
+ element.set(attr_name, str(value))
+
+ if attr_name == "src" or attr_name == "href":
+ self._file_dependencies.add(value)
+ elif attr_name == "srcset":
+ entries = re.findall(srcset_entry_pattern, value)
+ for entry in entries:
+ # Each entry is a tuple of (url, descriptors)
+ url = entry[0].strip()
+ self._file_dependencies.add(url)
+
+ self._element = element
+
+ return self._element
+
+ def to_xml_string(self) -> str:
+ """Convert to XML string"""
+ element = self.to_element()
+ return ET.tostring(element, encoding="unicode")
+
+ def get_file_dependencies(self) -> List[str]:
+ # Ensure the element has been processed so that the file dependencies are collected.
+ self.to_element()
+ return list(self._file_dependencies)
+
+
+class QTIBase(XMLElement):
+ """
+ A base class to allow us to conventionally generate element names from class names for QTI elements.
+ """
+
+ @classmethod
+ def element_name(cls):
+ # Convert PascalCase to kebab-case
+ name = re.sub(r"(?<=[a-z])(?=[A-Z])", "-", cls.__name__)
+ return f"qti-{name.lower()}"
+
+
+def coerce_str_to_model(element_type, value: Union[str, XMLElement]) -> XMLElement:
+ """Convert string to element_type if needed"""
+ if isinstance(value, str):
+ return element_type(text=value)
+ return value
+
+
+def generate_coerced_string_type(element_type):
+ return Annotated[
+ element_type, BeforeValidator(partial(coerce_str_to_model, element_type))
+ ]
+
+
+TextType = generate_coerced_string_type(TextNode)
+
+
+class BaseSequence(XMLElement):
+ id_: Optional[str] = None
+ class_: Optional[str] = None
+ lang: Optional[BCP47Language] = None
+ # We explicitly do not set the deprecated language value.
+ label: Optional[str] = None
+ # We explicitly do not set the base value.
+ dir_: Optional[Dir] = None
+
+
+# Pydantic's BaseModel Metaclass is only importable from an internal module,
+# so we inspect the BaseSequence class to get its metaclass.
+BaseSequenceMetaclass = type(BaseSequence)
+
+
+class RegistryMeta(BaseSequenceMetaclass):
+ """Generic metaclass that creates separate registries for each subclass"""
+
+ def __new__(mcs, name, bases, attrs):
+ cls = super().__new__(mcs, name, bases, attrs)
+
+ # Each metaclass gets its own registry
+ if not hasattr(mcs, "_registry"):
+ mcs._registry = {}
+
+ element_name = cls.element_name()
+ if element_name in mcs._registry and mcs._registry[element_name] is not cls:
+ raise ValueError(
+ f"Element name '{element_name}' already registered in {mcs.__name__}"
+ )
+ mcs._registry[element_name] = cls
+
+ return cls
+
+ @classmethod
+ def _ensure_registry_complete(cls):
+ """Ensure all HTML and MathML classes are registered"""
+ if not hasattr(cls, "_registry_initialized"):
+ # Import modules to trigger registration
+ from contentcuration.utils.assessment.qti import html, mathml # noqa: F401
+
+ cls._registry_initialized = True
+
+ @classmethod
+ def get_class_for_tag(cls, tag_name: str) -> Optional[Type]:
+ """Get the registered class for a given tag name"""
+ cls._ensure_registry_complete()
+ return getattr(cls, "_registry", {}).get(tag_name)
+
+
+class ElementTreeBase(BaseSequence, metaclass=RegistryMeta):
+ @classmethod
+ def from_element(cls, element: ET.Element) -> "ElementTreeBase":
+ # Get the appropriate class for this tag
+ target_class = type(cls).get_class_for_tag(element.tag)
+ if target_class is None:
+ raise ValueError(f"No registered class found for tag: {element.tag}")
+
+ # Convert attributes to field data - Pydantic will handle type coercion
+ field_data = {}
+ for attr_name, attr_value in element.attrib.items():
+ field_name = cls._attr_name_to_field_name(attr_name)
+ field_data[field_name] = attr_value
+
+ # Convert children and text
+ children = cls._extract_children(element)
+ if children:
+ field_data["children"] = children
+
+ return target_class(**field_data)
+
+ @classmethod
+ def _attr_name_to_field_name(cls, attr_name: str) -> str:
+ """Convert attribute name to Python field name"""
+ # kebab-case -> snake_case, : -> __
+ field_name = attr_name.replace(":", "__").replace("-", "_")
+
+ # Add trailing underscore for Python keywords
+ if field_name in {"class", "for", "type", "id", "dir"}:
+ field_name += "_"
+
+ return field_name
+
+ @classmethod
+ def _extract_children(
+ cls, element: ET.Element
+ ) -> List[Union["ElementTreeBase", TextNode]]:
+ """Extract child elements and text nodes from XML element"""
+ children = []
+
+ # Add initial text if present
+ if element.text and element.text.strip():
+ children.append(TextNode(text=element.text))
+
+ # Process child elements
+ for child_elem in element:
+ children.append(cls.from_element(child_elem))
+ # Add tail text after child element
+ if child_elem.tail and child_elem.tail.strip():
+ children.append(TextNode(text=child_elem.tail))
+
+ return children
+
+ @classmethod
+ def from_string(cls, string: str) -> List["ElementTreeBase"]:
+ """Parse markup string and return list of ElementTreeBase instances"""
+ try:
+ # Wrap in a root element to handle multiple top-level elements
+ wrapped_markup = f"{string} "
+ root = ET.fromstring(wrapped_markup)
+ return [cls.from_element(child) for child in root]
+ except ET.ParseError as e:
+ raise ValueError(f"Invalid Markup: {e}") from e
diff --git a/contentcuration/contentcuration/utils/assessment/qti/constants.py b/contentcuration/contentcuration/utils/assessment/qti/constants.py
new file mode 100644
index 0000000000..99ea507af3
--- /dev/null
+++ b/contentcuration/contentcuration/utils/assessment/qti/constants.py
@@ -0,0 +1,69 @@
+from enum import Enum
+
+
+# QTI Constants for Outcome Declarations
+
+
+class Cardinality(Enum):
+ MULTIPLE = "multiple"
+ SINGLE = "single"
+ ORDERED = "ordered"
+ RECORD = "record"
+
+
+class BaseType(Enum):
+ BOOLEAN = "boolean"
+ DIRECTED_PAIR = "directedPair"
+ DURATION = "duration"
+ FILE = "file"
+ FLOAT = "float"
+ IDENTIFIER = "identifier"
+ INTEGER = "integer"
+ PAIR = "pair"
+ POINT = "point"
+ STRING = "string"
+ URI = "uri"
+
+
+class View(Enum):
+ AUTHOR = "author"
+ CANDIDATE = "candidate"
+ PROCTOR = "proctor"
+ SCORER = "scorer"
+ TEST_CONSTRUCTOR = "testConstructor"
+ TUTOR = "tutor"
+
+
+class ExternalScored(Enum):
+ EXTERNAL_MACHINE = "externalMachine"
+ HUMAN = "human"
+
+
+class ShowHide(Enum):
+ SHOW = "show"
+ HIDE = "hide"
+
+
+class Dir(Enum):
+ LTR = "ltr"
+ RTL = "rtl"
+ AUTO = "auto"
+
+
+class Format(Enum):
+ PLAIN = "plain"
+ PREFORMATTED = "preformatted"
+ XHTML = "xhtml"
+
+
+class Orientation(Enum):
+ HORIZONTAL = "horizontal"
+ VERTICAL = "vertical"
+
+
+class ResourceType(Enum):
+ """Enumeration for QTI resource types"""
+
+ ASSESSMENT_TEST = "imsqti_test_xmlv3p0"
+ ASSESSMENT_ITEM = "imsqti_item_xmlv3p0"
+ RESPONSE_TEMPLATE = "imsqti_rptemplate_xmlv3p0"
diff --git a/contentcuration/contentcuration/utils/assessment/qti/fields.py b/contentcuration/contentcuration/utils/assessment/qti/fields.py
new file mode 100644
index 0000000000..f90b6d30e8
--- /dev/null
+++ b/contentcuration/contentcuration/utils/assessment/qti/fields.py
@@ -0,0 +1,118 @@
+import re
+from typing import Annotated
+from urllib.parse import urlparse
+
+from langcodes import Language as LangCodesLanguage
+from pydantic import BeforeValidator
+from pydantic import Field
+
+
+def validate_bcp47_language(value: str) -> str:
+ """Validate and normalize BCP47 language tag."""
+ if not isinstance(value, str):
+ raise ValueError(f"BCP47 language tag must be a string, got {type(value)}")
+
+ if not value:
+ raise ValueError("BCP47 language tag cannot be empty")
+
+ try:
+ # Validate and normalize using langcodes
+ return LangCodesLanguage.get(value).to_tag()
+ except ValueError as e:
+ raise ValueError("Invalid BCP47 language tag") from e
+
+
+BCP47Language = Annotated[str, BeforeValidator(validate_bcp47_language)]
+
+data_uri_pattern = r"data:(?:([-\w]+/[-+\w.]+)(?:(;[-\w]+=[-\w]+)*))?(;base64)?,(.*)"
+
+data_uri_regex = re.compile(rf"^{data_uri_pattern}$")
+
+
+def validate_data_uri(value: str) -> str:
+ """
+ Validate data URI format according to RFC 2397.
+ Format: data:[][;base64],
+ """
+
+ match = data_uri_regex.match(value)
+ if not match:
+ raise ValueError(f"Invalid data URI format: {value}")
+
+ return value
+
+
+def validate_local_href_path(value: str) -> str:
+ """
+ Validate that a path is relative (no scheme) and suitable for offline bundling.
+ Allows: relative/path.jpg, ../path.jpg, ./file.png, #fragment, data:...
+ Rejects: http://..., https://..., ftp://..., etc.
+ """
+ parsed = urlparse(value)
+ # Allow data URLs (for embedded content)
+ if parsed.scheme == "data":
+ return validate_data_uri(value)
+
+ # Reject absolute URLs
+ if parsed.scheme or parsed.netloc or parsed.path.startswith("/"):
+ raise ValueError(f"Absolute URLs not allowed in bundled content: {value}")
+
+ return value
+
+
+def validate_local_src_path(value: str) -> str:
+ """
+ Validate local src paths - stricter than href, should be actual file paths.
+ """
+ value = validate_local_href_path(value)
+
+ parsed = urlparse(value)
+ if not parsed.path:
+ raise ValueError(f"Invalid local src path: {value}")
+
+ # Allow relative paths
+ return value
+
+
+# Regex pattern for complete srcset validation
+# Matches: (data URI OR regular path) + one or more descriptors (2x, 100w, etc.)
+# Separated by commas with optional whitespace
+entry_pattern = rf"({data_uri_pattern}|[^\s,]+)(?:\s+\d*\.?\d+[xwh])+"
+# Pattern for complete srcset: one or more entries separated by commas
+srcset_pattern = rf"^{entry_pattern}(?:\s*,\s*{entry_pattern})*$"
+
+
+def validate_local_srcset(value: str) -> str:
+ if not value.strip():
+ return value
+
+ if not re.match(srcset_pattern, value.strip()):
+ raise ValueError(f"Invalid srcset format: {value}")
+
+ entries = re.findall(entry_pattern, value)
+
+ for entry in entries:
+ url = entry[0]
+ # Only need to validate the URL - descriptors already confirmed valid
+ validate_local_src_path(url.strip())
+
+ return value
+
+
+# Custom types for HTML attributes
+LocalHrefPath = Annotated[str, BeforeValidator(validate_local_href_path)]
+LocalSrcPath = Annotated[str, BeforeValidator(validate_local_src_path)]
+LocalSrcSet = Annotated[str, BeforeValidator(validate_local_srcset)]
+
+
+QTIIdentifier = Annotated[
+ str,
+ Field(
+ pattern=r"^[a-zA-Z_][a-zA-Z0-9_\-]{0,31}$",
+ min_length=1,
+ max_length=32,
+ description="QTI XML identifier: must start with letter or underscore, "
+ "contain only letters, digits, underscores, and hyphens, "
+ "no colons, max 32 characters",
+ ),
+]
diff --git a/contentcuration/contentcuration/utils/assessment/qti/html/__init__.py b/contentcuration/contentcuration/utils/assessment/qti/html/__init__.py
new file mode 100644
index 0000000000..f28fea09f0
--- /dev/null
+++ b/contentcuration/contentcuration/utils/assessment/qti/html/__init__.py
@@ -0,0 +1,188 @@
+# __init__.py
+from contentcuration.utils.assessment.qti.html.base import BlockContentElement
+from contentcuration.utils.assessment.qti.html.base import FlowContentElement
+from contentcuration.utils.assessment.qti.html.base import HTMLElement
+from contentcuration.utils.assessment.qti.html.base import Source
+from contentcuration.utils.assessment.qti.html.breaks import Br
+from contentcuration.utils.assessment.qti.html.breaks import Hr
+from contentcuration.utils.assessment.qti.html.content_types import FlowContent
+from contentcuration.utils.assessment.qti.html.content_types import FlowContentList
+from contentcuration.utils.assessment.qti.html.content_types import InlineContent
+from contentcuration.utils.assessment.qti.html.content_types import InlineContentList
+from contentcuration.utils.assessment.qti.html.content_types import InlineGroup
+from contentcuration.utils.assessment.qti.html.content_types import InlineGroupList
+from contentcuration.utils.assessment.qti.html.display import Details
+from contentcuration.utils.assessment.qti.html.display import Figcaption
+from contentcuration.utils.assessment.qti.html.display import Figure
+from contentcuration.utils.assessment.qti.html.display import Label
+from contentcuration.utils.assessment.qti.html.display import Summary
+from contentcuration.utils.assessment.qti.html.embed import Img
+from contentcuration.utils.assessment.qti.html.embed import Object
+from contentcuration.utils.assessment.qti.html.embed import Param
+from contentcuration.utils.assessment.qti.html.embed import Picture
+from contentcuration.utils.assessment.qti.html.flow import Address
+from contentcuration.utils.assessment.qti.html.flow import Article
+from contentcuration.utils.assessment.qti.html.flow import Aside
+from contentcuration.utils.assessment.qti.html.flow import Blockquote
+from contentcuration.utils.assessment.qti.html.flow import Div
+from contentcuration.utils.assessment.qti.html.flow import Footer
+from contentcuration.utils.assessment.qti.html.flow import Header
+from contentcuration.utils.assessment.qti.html.flow import Nav
+from contentcuration.utils.assessment.qti.html.flow import Section
+from contentcuration.utils.assessment.qti.html.media import Audio
+from contentcuration.utils.assessment.qti.html.media import Preload
+from contentcuration.utils.assessment.qti.html.media import Track
+from contentcuration.utils.assessment.qti.html.media import TrackKind
+from contentcuration.utils.assessment.qti.html.media import Video
+from contentcuration.utils.assessment.qti.html.sequence import Dd
+from contentcuration.utils.assessment.qti.html.sequence import Dl
+from contentcuration.utils.assessment.qti.html.sequence import Dt
+from contentcuration.utils.assessment.qti.html.sequence import Li
+from contentcuration.utils.assessment.qti.html.sequence import Ol
+from contentcuration.utils.assessment.qti.html.sequence import OlType
+from contentcuration.utils.assessment.qti.html.sequence import Ul
+from contentcuration.utils.assessment.qti.html.table import Caption
+from contentcuration.utils.assessment.qti.html.table import Col
+from contentcuration.utils.assessment.qti.html.table import Colgroup
+from contentcuration.utils.assessment.qti.html.table import Table
+from contentcuration.utils.assessment.qti.html.table import TBody
+from contentcuration.utils.assessment.qti.html.table import Td
+from contentcuration.utils.assessment.qti.html.table import TFoot
+from contentcuration.utils.assessment.qti.html.table import Th
+from contentcuration.utils.assessment.qti.html.table import THead
+from contentcuration.utils.assessment.qti.html.table import ThScope
+from contentcuration.utils.assessment.qti.html.table import Tr
+from contentcuration.utils.assessment.qti.html.table import TrList
+from contentcuration.utils.assessment.qti.html.text import A
+from contentcuration.utils.assessment.qti.html.text import Abbr
+from contentcuration.utils.assessment.qti.html.text import B
+from contentcuration.utils.assessment.qti.html.text import Bdi
+from contentcuration.utils.assessment.qti.html.text import Bdo
+from contentcuration.utils.assessment.qti.html.text import BdoDir
+from contentcuration.utils.assessment.qti.html.text import BlockHTMLText
+from contentcuration.utils.assessment.qti.html.text import Cite
+from contentcuration.utils.assessment.qti.html.text import Code
+from contentcuration.utils.assessment.qti.html.text import Dfn
+from contentcuration.utils.assessment.qti.html.text import Em
+from contentcuration.utils.assessment.qti.html.text import H1
+from contentcuration.utils.assessment.qti.html.text import H2
+from contentcuration.utils.assessment.qti.html.text import H3
+from contentcuration.utils.assessment.qti.html.text import H4
+from contentcuration.utils.assessment.qti.html.text import H5
+from contentcuration.utils.assessment.qti.html.text import H6
+from contentcuration.utils.assessment.qti.html.text import I
+from contentcuration.utils.assessment.qti.html.text import InlineHTMLText
+from contentcuration.utils.assessment.qti.html.text import Kbd
+from contentcuration.utils.assessment.qti.html.text import P
+from contentcuration.utils.assessment.qti.html.text import Pre
+from contentcuration.utils.assessment.qti.html.text import Q
+from contentcuration.utils.assessment.qti.html.text import Rp
+from contentcuration.utils.assessment.qti.html.text import Rt
+from contentcuration.utils.assessment.qti.html.text import Ruby
+from contentcuration.utils.assessment.qti.html.text import Samp
+from contentcuration.utils.assessment.qti.html.text import Small
+from contentcuration.utils.assessment.qti.html.text import Span
+from contentcuration.utils.assessment.qti.html.text import Strong
+from contentcuration.utils.assessment.qti.html.text import Sub
+from contentcuration.utils.assessment.qti.html.text import Sup
+from contentcuration.utils.assessment.qti.html.text import Var
+
+__all__ = [
+ # Base classes
+ "HTMLElement",
+ "FlowContentElement",
+ "BlockContentElement",
+ "InlineHTMLText",
+ "BlockHTMLText",
+ # Content type aliases
+ "FlowContent",
+ "FlowContentList",
+ "InlineContent",
+ "InlineContentList",
+ "InlineGroup",
+ "InlineGroupList",
+ # Breaks
+ "Br",
+ "Hr",
+ # Display elements
+ "Details",
+ "Figcaption",
+ "Figure",
+ "Label",
+ "Summary",
+ # Embedded content
+ "Img",
+ "Object",
+ "Param",
+ "Picture",
+ "Source",
+ # Flow/sectioning content
+ "Address",
+ "Article",
+ "Aside",
+ "Blockquote",
+ "Div",
+ "Footer",
+ "Header",
+ "Nav",
+ "Section",
+ # Media elements and enums
+ "Audio",
+ "Preload",
+ "Track",
+ "TrackKind",
+ "Video",
+ # Lists and sequences
+ "Dd",
+ "Dl",
+ "Dt",
+ "Li",
+ "Ol",
+ "OlType",
+ "Ul",
+ # Tables and related types
+ "Caption",
+ "Col",
+ "Colgroup",
+ "Table",
+ "TBody",
+ "Td",
+ "TFoot",
+ "Th",
+ "THead",
+ "ThScope",
+ "Tr",
+ "TrList",
+ # Text content
+ "A",
+ "Abbr",
+ "B",
+ "Bdi",
+ "Bdo",
+ "BdoDir",
+ "Cite",
+ "Code",
+ "Dfn",
+ "Em",
+ "H1",
+ "H2",
+ "H3",
+ "H4",
+ "H5",
+ "H6",
+ "I",
+ "Kbd",
+ "P",
+ "Pre",
+ "Q",
+ "Rp",
+ "Rt",
+ "Ruby",
+ "Samp",
+ "Small",
+ "Span",
+ "Strong",
+ "Sub",
+ "Sup",
+ "Var",
+]
diff --git a/contentcuration/contentcuration/utils/assessment/qti/html/base.py b/contentcuration/contentcuration/utils/assessment/qti/html/base.py
new file mode 100644
index 0000000000..79dba7cebf
--- /dev/null
+++ b/contentcuration/contentcuration/utils/assessment/qti/html/base.py
@@ -0,0 +1,56 @@
+from typing import List
+from typing import Optional
+
+from pydantic import model_validator
+
+from contentcuration.utils.assessment.qti.base import ElementTreeBase
+from contentcuration.utils.assessment.qti.fields import LocalSrcPath
+from contentcuration.utils.assessment.qti.fields import LocalSrcSet
+
+
+class HTMLElement(ElementTreeBase):
+ """
+ Represents an HTML element within QTI.
+ """
+
+ @classmethod
+ def from_html_string(cls, html_string: str) -> List["HTMLElement"]:
+ """Parse HTML string and return list of HTMLElement instances"""
+ return cls.from_string(html_string)
+
+
+class FlowContentElement(HTMLElement):
+ pass
+
+
+class InlineContentElement(FlowContentElement):
+ pass
+
+
+class BlockContentElement(FlowContentElement):
+ pass
+
+
+class Source(HTMLElement):
+ # These attributes are common to all elements in HTML5
+ media: Optional[str] = None
+ type: Optional[str] = None
+
+ # Required if a child of or
+ # not allowed if a child of
+ src: Optional[LocalSrcPath] = None
+
+ # Required if a child of
+ # not allowed if a child of or
+ srcset: Optional[LocalSrcSet] = None
+
+ sizes: Optional[str] = None
+ height: Optional[int] = None
+ width: Optional[int] = None
+
+ @model_validator(mode="after")
+ def _check_src_and_srcset(self):
+ # both None or both set
+ if (self.src is None) == (self.srcset is None):
+ raise ValueError("Exactly one of 'src' or 'srcset' must be specified")
+ return self
diff --git a/contentcuration/contentcuration/utils/assessment/qti/html/breaks.py b/contentcuration/contentcuration/utils/assessment/qti/html/breaks.py
new file mode 100644
index 0000000000..c7cb0051b1
--- /dev/null
+++ b/contentcuration/contentcuration/utils/assessment/qti/html/breaks.py
@@ -0,0 +1,10 @@
+from contentcuration.utils.assessment.qti.html.base import BlockContentElement
+from contentcuration.utils.assessment.qti.html.base import InlineContentElement
+
+
+class Br(InlineContentElement):
+ pass
+
+
+class Hr(BlockContentElement):
+ pass
diff --git a/contentcuration/contentcuration/utils/assessment/qti/html/content_types.py b/contentcuration/contentcuration/utils/assessment/qti/html/content_types.py
new file mode 100644
index 0000000000..b906caabdd
--- /dev/null
+++ b/contentcuration/contentcuration/utils/assessment/qti/html/content_types.py
@@ -0,0 +1,44 @@
+from typing import List
+from typing import Union
+
+from contentcuration.utils.assessment.qti.base import TextType
+from contentcuration.utils.assessment.qti.html.base import FlowContentElement
+from contentcuration.utils.assessment.qti.html.base import InlineContentElement
+from contentcuration.utils.assessment.qti.interaction_types.base import BlockInteraction
+from contentcuration.utils.assessment.qti.interaction_types.base import (
+ InlineInteraction,
+)
+from contentcuration.utils.assessment.qti.mathml import Math
+
+
+FlowContent = Union[FlowContentElement, TextType]
+FlowContentList = List[FlowContent]
+InlineContent = Union[InlineContentElement, TextType]
+InlineContentList = List[InlineContent]
+InlineGroup = Union[
+ InlineContentElement,
+ InlineInteraction,
+ # Not implemented
+ # Hottext,
+ # PrintedVariable,
+ # Gap,
+ # FeedbackInline,
+ # TemplateInline,
+ # These three should derive from InlineInteraction
+ # InlineChoiceInteraction,
+ # EndAttemptInteraction,
+ # CustomInteraction,
+ Math,
+ # Include,
+]
+
+InlineGroupList = List[Union[InlineGroup, TextType]]
+
+FlowGroup = Union[
+ FlowContentElement,
+ BlockInteraction,
+ InlineInteraction,
+ Math,
+]
+
+FlowGroupList = List[Union[FlowGroup, TextType]]
diff --git a/contentcuration/contentcuration/utils/assessment/qti/html/display.py b/contentcuration/contentcuration/utils/assessment/qti/html/display.py
new file mode 100644
index 0000000000..35dbb76e25
--- /dev/null
+++ b/contentcuration/contentcuration/utils/assessment/qti/html/display.py
@@ -0,0 +1,68 @@
+from typing import List
+from typing import Optional
+from typing import Union
+
+from pydantic import Field
+from pydantic import field_validator
+
+from contentcuration.utils.assessment.qti.base import TextType
+from contentcuration.utils.assessment.qti.html.base import FlowContentElement
+from contentcuration.utils.assessment.qti.html.base import HTMLElement
+from contentcuration.utils.assessment.qti.html.content_types import FlowContent
+from contentcuration.utils.assessment.qti.html.content_types import FlowContentList
+from contentcuration.utils.assessment.qti.html.text import BlockHTMLText
+from contentcuration.utils.assessment.qti.html.text import InlineHTMLText
+
+
+class Label(InlineHTMLText):
+ for_: Optional[str] = None
+
+
+class Summary(HTMLElement):
+ # Note that this is currently unnecessarily loose - elements should only
+ # contain phrasing content and headers.
+ children: List[Union[BlockHTMLText, InlineHTMLText, TextType]] = Field(
+ default_factory=list
+ )
+
+
+class Details(FlowContentElement):
+ open: Optional[bool] = None
+ children: List[Union[FlowContent, Summary]] = Field(default_factory=list)
+
+ @field_validator("children", mode="after")
+ def validate_summary_position(cls, children):
+ if not children:
+ raise ValueError(
+ "Details element must contain at least one Summary element"
+ )
+
+ if not isinstance(children[0], Summary):
+ raise ValueError("Details element must have a Summary as the first child")
+
+ summary_count = sum(1 for child in children if isinstance(child, Summary))
+ if summary_count > 1:
+ raise ValueError("Details element may contain at most one Summary element")
+
+ return children
+
+
+class Figcaption(HTMLElement):
+ children: FlowContentList = Field(default_factory=list)
+
+
+class Figure(FlowContentElement):
+ children: List[Union[Figcaption, FlowContent]] = Field(default_factory=list)
+
+ @field_validator("children", mode="after")
+ def validate_figcaption_position(cls, children):
+ # Collect all Figcaption instances
+ figcaps = [c for c in children if isinstance(c, Figcaption)]
+ if len(figcaps) > 1:
+ raise ValueError("Figure may contain at most one Figcaption")
+ if figcaps:
+ # Find its position
+ idx = next(i for i, c in enumerate(children) if isinstance(c, Figcaption))
+ if idx not in (0, len(children) - 1):
+ raise ValueError("Figcaption must be the first or last child of Figure")
+ return children
diff --git a/contentcuration/contentcuration/utils/assessment/qti/html/embed.py b/contentcuration/contentcuration/utils/assessment/qti/html/embed.py
new file mode 100644
index 0000000000..499f68ac0e
--- /dev/null
+++ b/contentcuration/contentcuration/utils/assessment/qti/html/embed.py
@@ -0,0 +1,54 @@
+from typing import List
+from typing import Optional
+
+from pydantic import Field
+
+from contentcuration.utils.assessment.qti.fields import LocalSrcPath
+from contentcuration.utils.assessment.qti.fields import LocalSrcSet
+from contentcuration.utils.assessment.qti.html.base import HTMLElement
+from contentcuration.utils.assessment.qti.html.base import InlineContentElement
+from contentcuration.utils.assessment.qti.html.base import Source
+from contentcuration.utils.assessment.qti.html.content_types import FlowContentList
+
+
+class Img(InlineContentElement): # Void element
+ alt: str
+ src: LocalSrcPath
+ srcset: Optional[LocalSrcSet] = None
+ sizes: Optional[str] = None
+ crossorigin: Optional[str] = None # "anonymous", "use-credentials"
+ usemap: Optional[str] = None
+ ismap: Optional[bool] = None
+ width: Optional[int] = None
+ height: Optional[int] = None
+ longdesc: Optional[str] = None
+
+
+class Param(HTMLElement):
+ name: str
+ value: str
+
+
+class Object(InlineContentElement):
+ @classmethod
+ def element_name(cls):
+ return "object"
+
+ data: Optional[str] = None
+ type: Optional[str] = None # MIME type of data
+ name: Optional[str] = None # For form submission
+ usemap: Optional[str] = None
+ width: Optional[
+ str
+ ] = None # String to allow percentages e.g., "100%" or pixels "300"
+ height: Optional[str] = None
+ # Children: 0+ elements, then transparent content (flow or phrasing depending on context)
+ # For simplicity, allowing FlowContent here.
+ params: List[Param] = Field(default_factory=list)
+ children: FlowContentList = Field(default_factory=list)
+
+
+class Picture(InlineContentElement): # Contains and
+ # Children: 0+ elements, then one element.
+ children: List[Source] = Field(default_factory=list)
+ img: Img
diff --git a/contentcuration/contentcuration/utils/assessment/qti/html/flow.py b/contentcuration/contentcuration/utils/assessment/qti/html/flow.py
new file mode 100644
index 0000000000..e191fa7b95
--- /dev/null
+++ b/contentcuration/contentcuration/utils/assessment/qti/html/flow.py
@@ -0,0 +1,56 @@
+from typing import Optional
+
+from pydantic import Field
+from pydantic import HttpUrl
+
+from contentcuration.utils.assessment.qti.html.base import BlockContentElement
+from contentcuration.utils.assessment.qti.html.content_types import FlowGroupList
+
+
+class HTMLFlowContainer(BlockContentElement):
+ """
+ Base class for HTML elements that can contain flow content
+ (block-level and inline elements).
+ Corresponds to HTML "Flow Content" category.
+ """
+
+ children: FlowGroupList = Field(default_factory=list)
+
+
+class Blockquote(HTMLFlowContainer):
+ cite: Optional[HttpUrl] = None
+
+
+class Div(HTMLFlowContainer):
+ pass
+
+
+class Article(HTMLFlowContainer):
+ pass
+
+
+class Section(HTMLFlowContainer):
+ pass
+
+
+class Nav(HTMLFlowContainer):
+ pass
+
+
+class Aside(HTMLFlowContainer):
+ pass
+
+
+class Header(HTMLFlowContainer):
+ pass
+
+
+class Footer(HTMLFlowContainer):
+ pass
+
+
+class Address(HTMLFlowContainer):
+ pass
+
+
+# SSMLGroup not implemented
diff --git a/contentcuration/contentcuration/utils/assessment/qti/html/media.py b/contentcuration/contentcuration/utils/assessment/qti/html/media.py
new file mode 100644
index 0000000000..67bb65cb89
--- /dev/null
+++ b/contentcuration/contentcuration/utils/assessment/qti/html/media.py
@@ -0,0 +1,61 @@
+from enum import Enum
+from typing import List
+from typing import Optional
+from typing import Union
+
+from pydantic import Field
+
+from contentcuration.utils.assessment.qti.fields import BCP47Language
+from contentcuration.utils.assessment.qti.fields import LocalSrcPath
+from contentcuration.utils.assessment.qti.html.base import BlockContentElement
+from contentcuration.utils.assessment.qti.html.base import HTMLElement
+from contentcuration.utils.assessment.qti.html.base import Source
+from contentcuration.utils.assessment.qti.html.content_types import FlowContent
+
+
+class TrackKind(Enum):
+ SUBTITLES = "subtitles"
+ CAPTIONS = "captions"
+ DESCRIPTIONS = "descriptions"
+ CHAPTERS = "chapters"
+ METADATA = "metadata"
+
+
+class Track(HTMLElement):
+ src: LocalSrcPath
+ kind: TrackKind = TrackKind.SUBTITLES
+ srclang: Optional[BCP47Language] = None
+ label: Optional[str] = None
+ default: Optional[bool] = None
+
+
+class Preload(Enum):
+ NONE = "none"
+ METADATA = "metadata"
+ AUTO = "auto"
+
+
+class Audio(BlockContentElement):
+ src: Optional[LocalSrcPath] = None
+ controls: Optional[bool] = None
+ autoplay: Optional[bool] = None
+ loop: Optional[bool] = None
+ muted: Optional[bool] = None
+ preload: Preload = Preload.METADATA
+ # Children: If src attribute is NOT set, 0+ , then 0+ , then transparent content (fallback).
+ # If src IS set, 0+ , then transparent content (fallback).
+ # For simplicity, using a broader model here. Can be refined.
+ children: List[Union[Source, Track, FlowContent]] = Field(default_factory=list)
+
+
+class Video(BlockContentElement): # Similar children model to Audio
+ src: Optional[LocalSrcPath] = None
+ controls: Optional[bool] = None
+ autoplay: Optional[bool] = None
+ loop: Optional[bool] = None
+ muted: Optional[bool] = None
+ poster: Optional[str] = None # URL of an image to show before video loads
+ preload: Preload = Preload.METADATA
+ width: Optional[str] = None # String for pixels or percentages
+ height: Optional[str] = None
+ children: List[Union[Source, Track, FlowContent]] = Field(default_factory=list)
diff --git a/contentcuration/contentcuration/utils/assessment/qti/html/sequence.py b/contentcuration/contentcuration/utils/assessment/qti/html/sequence.py
new file mode 100644
index 0000000000..ef81c17d00
--- /dev/null
+++ b/contentcuration/contentcuration/utils/assessment/qti/html/sequence.py
@@ -0,0 +1,48 @@
+from enum import Enum
+from typing import List
+from typing import Optional
+from typing import Union
+
+from pydantic import Field
+
+from contentcuration.utils.assessment.qti.html.base import BlockContentElement
+from contentcuration.utils.assessment.qti.html.base import HTMLElement
+from contentcuration.utils.assessment.qti.html.content_types import FlowContent
+from contentcuration.utils.assessment.qti.html.content_types import FlowContentList
+
+
+class Li(HTMLElement):
+ value: Optional[int] = None
+ children: FlowContentList = Field(default_factory=list)
+
+
+class OlType(Enum):
+ NUMBERS = "1"
+ LOWERCASE_LETTERS = "a"
+ UPPERCASE_LETTERS = "A"
+ LOWERCASE_ROMAN = "i"
+ UPPERCASE_ROMAN = "I"
+
+
+class Ol(BlockContentElement):
+ reversed: Optional[bool] = None
+ start: Optional[int] = None
+ type: OlType = OlType.NUMBERS
+ children: List[Li] = Field(default_factory=list)
+
+
+class Ul(BlockContentElement):
+ children: List[Li] = Field(default_factory=list)
+
+
+class Dt(HTMLElement):
+ # There are restrictions on allowed descendants
+ children: FlowContentList = Field(default_factory=list)
+
+
+class Dd(HTMLElement):
+ children: FlowContentList = Field(default_factory=list)
+
+
+class Dl(BlockContentElement):
+ children: List[Union[FlowContent, Dt, Dd]] = Field(default_factory=list)
diff --git a/contentcuration/contentcuration/utils/assessment/qti/html/table.py b/contentcuration/contentcuration/utils/assessment/qti/html/table.py
new file mode 100644
index 0000000000..fe5be0e584
--- /dev/null
+++ b/contentcuration/contentcuration/utils/assessment/qti/html/table.py
@@ -0,0 +1,72 @@
+from enum import Enum
+from typing import List
+from typing import Optional
+from typing import Union
+
+from pydantic import Field
+
+from contentcuration.utils.assessment.qti.html.base import BlockContentElement
+from contentcuration.utils.assessment.qti.html.base import HTMLElement
+from contentcuration.utils.assessment.qti.html.content_types import FlowContentList
+
+
+class Caption(HTMLElement):
+ children: FlowContentList = Field(default_factory=list)
+
+
+class Col(HTMLElement): # Void element
+ span: int = 1
+
+
+class Colgroup(HTMLElement):
+ span: Optional[int] = None
+ children: List[Col] = Field(default_factory=list)
+
+
+class Td(HTMLElement):
+ colspan: Optional[int] = None
+ rowspan: Optional[int] = None
+ headers: Optional[str] = None
+ children: FlowContentList = Field(default_factory=list)
+
+
+class ThScope(Enum):
+ ROW = "row"
+ COL = "col"
+ ROWGROUP = "rowgroup"
+ COLGROUP = "colgroup"
+ AUTO = "auto"
+
+
+class Th(HTMLElement):
+ colspan: Optional[int] = None
+ rowspan: Optional[int] = None
+ headers: Optional[str] = None
+ scope: Optional[ThScope] = None
+ abbr: Optional[str] = None
+ children: FlowContentList = Field(default_factory=list)
+
+
+class Tr(HTMLElement):
+ children: List[Union[Th, Td]] = Field(default_factory=list)
+
+
+TrList = List[Tr]
+
+
+class TBody(HTMLElement):
+ children: TrList = Field(default_factory=list)
+
+
+class THead(HTMLElement):
+ children: TrList = Field(default_factory=list)
+
+
+class TFoot(HTMLElement):
+ children: TrList = Field(default_factory=list)
+
+
+class Table(BlockContentElement):
+ children: List[Union[Caption, Colgroup, THead, TBody, TFoot, Tr]] = Field(
+ default_factory=list
+ )
diff --git a/contentcuration/contentcuration/utils/assessment/qti/html/text.py b/contentcuration/contentcuration/utils/assessment/qti/html/text.py
new file mode 100644
index 0000000000..9e69d390bc
--- /dev/null
+++ b/contentcuration/contentcuration/utils/assessment/qti/html/text.py
@@ -0,0 +1,151 @@
+from enum import Enum
+from typing import List
+from typing import Optional
+from typing import Union
+
+from pydantic import AnyUrl
+from pydantic import Field
+
+from contentcuration.utils.assessment.qti.base import TextType
+from contentcuration.utils.assessment.qti.fields import LocalHrefPath
+from contentcuration.utils.assessment.qti.html.base import BlockContentElement
+from contentcuration.utils.assessment.qti.html.base import InlineContentElement
+from contentcuration.utils.assessment.qti.html.content_types import InlineGroupList
+
+
+class InlineHTMLText(InlineContentElement):
+ children: InlineGroupList = Field(default_factory=list)
+
+
+class BlockHTMLText(BlockContentElement):
+ children: InlineGroupList = Field(default_factory=list)
+
+
+class A(InlineHTMLText):
+ href: LocalHrefPath
+ type_: Optional[str] = None
+
+
+class P(BlockHTMLText):
+ pass
+
+
+class Span(InlineHTMLText):
+ pass
+
+
+class H1(BlockHTMLText):
+ pass
+
+
+class H2(BlockHTMLText):
+ pass
+
+
+class H3(BlockHTMLText):
+ pass
+
+
+class H4(BlockHTMLText):
+ pass
+
+
+class H5(BlockHTMLText):
+ pass
+
+
+class H6(BlockHTMLText):
+ pass
+
+
+class Pre(BlockHTMLText):
+ pass
+
+
+class Em(InlineHTMLText):
+ pass
+
+
+class Code(InlineHTMLText):
+ pass
+
+
+class Kbd(InlineHTMLText):
+ pass
+
+
+class I(InlineHTMLText): # noqa: E742
+ pass
+
+
+class Dfn(InlineHTMLText):
+ pass
+
+
+class Abbr(InlineHTMLText):
+ pass
+
+
+class Strong(InlineHTMLText):
+ pass
+
+
+class Sup(InlineHTMLText):
+ pass
+
+
+class Sub(InlineHTMLText):
+ pass
+
+
+class Var(InlineHTMLText):
+ pass
+
+
+class Small(InlineHTMLText):
+ pass
+
+
+class Samp(InlineHTMLText):
+ pass
+
+
+class B(InlineHTMLText):
+ pass
+
+
+class Cite(InlineHTMLText):
+ pass
+
+
+class Q(InlineHTMLText):
+ cite: Optional[AnyUrl] = None
+
+
+class BdoDir(Enum):
+ LTR = "ltr"
+ RTL = "rtl"
+
+
+class Bdo(InlineHTMLText):
+ dir: BdoDir
+
+
+class Bdi(InlineHTMLText):
+ pass
+
+
+class Rt(InlineHTMLText):
+ pass
+
+
+class Rp(InlineContentElement):
+ text: TextType
+
+
+class Ruby(InlineContentElement):
+ @classmethod
+ def element_name(cls):
+ return "ruby"
+
+ children: List[Union[Rt, Rp, TextType]] = Field(default_factory=list)
diff --git a/contentcuration/contentcuration/utils/assessment/qti/imsmanifest.py b/contentcuration/contentcuration/utils/assessment/qti/imsmanifest.py
new file mode 100644
index 0000000000..cdb19ba772
--- /dev/null
+++ b/contentcuration/contentcuration/utils/assessment/qti/imsmanifest.py
@@ -0,0 +1,177 @@
+import re
+import zipfile
+from typing import Annotated
+from typing import List
+from typing import Optional
+from xml.etree import ElementTree as ET
+
+from pydantic import Field
+
+from contentcuration.utils.assessment.qti.base import generate_coerced_string_type
+from contentcuration.utils.assessment.qti.base import TextType
+from contentcuration.utils.assessment.qti.base import XMLElement
+from contentcuration.utils.assessment.qti.constants import ResourceType
+
+
+IMSCPIdentifier = Annotated[
+ str,
+ Field(
+ pattern=r"^[a-zA-Z_][a-zA-Z0-9_.-]*$",
+ min_length=1,
+ description="Resource identifier following XML NCName rules",
+ ),
+]
+
+
+class Schema(XMLElement):
+ text: TextType
+
+
+SchemaType = generate_coerced_string_type(Schema)
+
+
+class SchemaVersion(XMLElement):
+ text: TextType
+
+
+SchemaVersionType = generate_coerced_string_type(SchemaVersion)
+
+
+class Metadata(XMLElement):
+ """Represents the metadata element"""
+
+ schema: Optional[SchemaType] = None
+ schemaversion: Optional[SchemaVersionType] = None
+
+
+class Item(XMLElement):
+ """Represents the item element"""
+
+ identifier: Optional[IMSCPIdentifier] = None
+ identifierref: Optional[IMSCPIdentifier] = None
+
+
+class Organization(XMLElement):
+ """Represents the organization element"""
+
+ identifier: Optional[IMSCPIdentifier] = None
+ structure: Optional[str] = None
+ title: Optional[str] = None
+ item: List[Item] = Field(default_factory=list)
+
+
+class Organizations(XMLElement):
+ """Represents the organizations element"""
+
+ organizations: List[Organization] = Field(default_factory=list)
+
+
+class File(XMLElement):
+ """Represents the file element"""
+
+ href: Optional[str] = None
+
+
+class Dependency(XMLElement):
+ identifierref: IMSCPIdentifier
+
+
+class Resource(XMLElement):
+ """Represents the resource element"""
+
+ identifier: IMSCPIdentifier
+ type_: str
+ href: Optional[str] = None
+ files: List[File] = Field(default_factory=list)
+ dependencies: List[Dependency] = Field(default_factory=list)
+
+
+class Resources(XMLElement):
+ """Represents the resources element"""
+
+ resources: List[Resource] = Field(default_factory=list)
+
+
+class Manifest(XMLElement):
+ """Represents the imsmanifest.xml file"""
+
+ xmlns: str = "http://www.imsglobal.org/xsd/qti/qtiv3p0/imscp_v1p2"
+ xmlns__xsi: str = "http://www.w3.org/2001/XMLSchema-instance"
+ xsi__schemaLocation: str = "http://www.imsglobal.org/xsd/qti/qtiv3p0/imscp_v1p2 https://purl.imsglobal.org/spec/qti/v3p0/schema/xsd/imsqtiv3p0_imscpv1p2_v1p0.xsd" # noqa: E501
+ identifier: IMSCPIdentifier
+ version: Optional[str] = None
+ metadata: Metadata = Field(default_factory=Metadata)
+ organizations: Organizations = Field(default_factory=Organizations)
+ resources: Resources = Field(default_factory=Resources)
+ manifests: List["Manifest"] = Field(default_factory=list)
+
+
+def _get_item_ids_from_assessment_test(zip_file, test_href):
+ """Extract assessment item identifiers from an assessment test file."""
+ try:
+ with zip_file.open(test_href) as test_file:
+ test_content = test_file.read()
+ test_root = ET.fromstring(test_content)
+
+ # Look for both item references and inline items
+ qti_ns = {"qti": "http://www.imsglobal.org/xsd/imsqti_v3p0"}
+ item_refs = test_root.findall(".//qti:qti-assessment-item-ref", qti_ns)
+ # TODO: Add handling for assessment sections and assessment section refs.
+
+ all_items = list(item_refs)
+
+ return [
+ item.get("identifier") for item in all_items if item.get("identifier")
+ ]
+ except (KeyError, ET.ParseError):
+ return []
+
+
+namespace_re = re.compile("\\{([^}]+)\\}")
+
+
+def get_assessment_ids_from_manifest(zip_file_handle):
+ try:
+ with zipfile.ZipFile(zip_file_handle, "r") as zip_file:
+
+ # Read and parse the manifest
+ with zip_file.open("imsmanifest.xml") as manifest_file:
+ manifest_content = manifest_file.read()
+
+ # Parse the XML
+ root = ET.fromstring(manifest_content)
+
+ namespace = namespace_re.search(root.tag).group(1)
+
+ # Define namespace map for IMS Content Packaging
+ namespaces = {"imscp": namespace}
+
+ # Find all resources
+ resources = root.findall(".//imscp:resource", namespaces)
+
+ assessment_ids = []
+
+ # First, collect direct assessment item resources
+ for resource in resources:
+ resource_type = resource.get("type", "")
+ resource_identifier = resource.get("identifier")
+ if (
+ resource_type == ResourceType.ASSESSMENT_ITEM.value
+ and resource_identifier
+ ):
+ assessment_ids.append(resource_identifier)
+
+ if resource_type == ResourceType.ASSESSMENT_TEST.value:
+ assessment_ids.extend(
+ _get_item_ids_from_assessment_test(
+ zip_file, resource.get("href")
+ )
+ )
+
+ return assessment_ids
+ except ET.ParseError:
+ raise ValueError("Invalid XML in manifest")
+ except zipfile.BadZipFile:
+ raise ValueError("File is not a valid zip archive")
+ except KeyError:
+ raise ValueError("No IMS Manifest found in zip file")
diff --git a/contentcuration/contentcuration/utils/assessment/qti/interaction_types/__init__.py b/contentcuration/contentcuration/utils/assessment/qti/interaction_types/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/contentcuration/contentcuration/utils/assessment/qti/interaction_types/base.py b/contentcuration/contentcuration/utils/assessment/qti/interaction_types/base.py
new file mode 100644
index 0000000000..77a1de00f4
--- /dev/null
+++ b/contentcuration/contentcuration/utils/assessment/qti/interaction_types/base.py
@@ -0,0 +1,19 @@
+from contentcuration.utils.assessment.qti.base import BaseSequence
+from contentcuration.utils.assessment.qti.base import QTIBase
+from contentcuration.utils.assessment.qti.fields import QTIIdentifier
+
+
+class Interaction(QTIBase, BaseSequence):
+ """
+ Abstract base class for QTI interactions.
+ """
+
+ response_identifier: QTIIdentifier
+
+
+class BlockInteraction(Interaction):
+ pass
+
+
+class InlineInteraction(Interaction):
+ pass
diff --git a/contentcuration/contentcuration/utils/assessment/qti/interaction_types/simple.py b/contentcuration/contentcuration/utils/assessment/qti/interaction_types/simple.py
new file mode 100644
index 0000000000..a4dcec2251
--- /dev/null
+++ b/contentcuration/contentcuration/utils/assessment/qti/interaction_types/simple.py
@@ -0,0 +1,63 @@
+from typing import Annotated
+from typing import List
+from typing import Optional
+
+from annotated_types import Len
+from pydantic import Field
+from pydantic import field_validator
+from pydantic import model_validator
+from pydantic import NonNegativeInt
+
+from contentcuration.utils.assessment.qti.base import BaseSequence
+from contentcuration.utils.assessment.qti.base import QTIBase
+from contentcuration.utils.assessment.qti.constants import Orientation
+from contentcuration.utils.assessment.qti.constants import ShowHide
+from contentcuration.utils.assessment.qti.fields import QTIIdentifier
+from contentcuration.utils.assessment.qti.html import FlowContentList
+from contentcuration.utils.assessment.qti.interaction_types.base import BlockInteraction
+from contentcuration.utils.assessment.qti.prompt import Prompt
+
+
+class SimpleChoice(QTIBase, BaseSequence):
+ """
+ Represents a choice in a QTI choice interaction.
+ Each simple choice has an identifier and can contain HTML content
+ or a mixture of HTML and QTI elements.
+ """
+
+ identifier: QTIIdentifier
+ template_identifier: Optional[str] = None
+ show_hide: ShowHide = ShowHide.SHOW
+ fixed: bool = False
+ children: FlowContentList = Field(default_factory=list)
+
+
+class ChoiceInteraction(BlockInteraction):
+ """For multiple choice questions"""
+
+ shuffle: Optional[bool] = None
+ max_choices: Optional[NonNegativeInt] = 1
+ min_choices: Optional[NonNegativeInt] = 0
+ orientation: Orientation = Orientation.VERTICAL
+ prompt: Optional[Prompt] = None
+ answers: Annotated[List[SimpleChoice], Len(min_length=1)]
+
+ @field_validator("answers")
+ def _unique_answer_identifiers(
+ cls, answers: List[SimpleChoice]
+ ) -> List[SimpleChoice]:
+ identifiers = [choice.identifier for choice in answers]
+ if len(set(identifiers)) != len(identifiers):
+ raise ValueError(
+ "Duplicate identifiers detected in ChoiceInteraction.answers; "
+ "each SimpleChoice.identifier must be unique."
+ )
+ return answers
+
+ @model_validator(mode="after")
+ def _check_choice_bounds(self):
+ if self.min_choices > self.max_choices:
+ raise ValueError("`min_choices` cannot exceed `max_choices`")
+ if self.max_choices > len(self.answers):
+ raise ValueError("`max_choices` cannot exceed number of answers")
+ return self
diff --git a/contentcuration/contentcuration/utils/assessment/qti/interaction_types/text_based.py b/contentcuration/contentcuration/utils/assessment/qti/interaction_types/text_based.py
new file mode 100644
index 0000000000..da96e4d9a7
--- /dev/null
+++ b/contentcuration/contentcuration/utils/assessment/qti/interaction_types/text_based.py
@@ -0,0 +1,42 @@
+from typing import Optional
+
+from pydantic import NonNegativeInt
+
+from contentcuration.utils.assessment.qti.constants import Format
+from contentcuration.utils.assessment.qti.fields import QTIIdentifier
+from contentcuration.utils.assessment.qti.interaction_types.base import BlockInteraction
+from contentcuration.utils.assessment.qti.interaction_types.base import (
+ InlineInteraction,
+)
+from contentcuration.utils.assessment.qti.prompt import Prompt
+
+
+class TextEntryInteraction(InlineInteraction):
+ """For short text entry"""
+
+ # The QTI spec specifies a default of 10 here
+ # but this is only needed if we're intending to collect a numerical result
+ # so we let it be nullable and let the rendering engine handle this.
+ base: Optional[int] = None
+ string_identifier: Optional[QTIIdentifier] = None
+ expected_length: Optional[NonNegativeInt] = None
+ pattern_mask: Optional[str] = None
+ placeholder_text: Optional[str] = None
+ format_: Optional[Format] = None
+
+
+class ExtendedTextInteraction(BlockInteraction):
+ """For longer text entry/free response"""
+
+ # The QTI spec specifies a default of 10 here
+ # but this is only needed if we're intending to collect a numerical result
+ # so we let it be nullable and let the rendering engine handle this.
+ base: Optional[int] = None
+ string_identifier: Optional[QTIIdentifier] = None
+ pattern_mask: Optional[str] = None
+ placeholder_text: Optional[str] = None
+ max_strings: Optional[NonNegativeInt] = None
+ min_strings: Optional[NonNegativeInt] = 0
+ expected_lines: Optional[NonNegativeInt] = None
+ format_: Optional[Format] = Format.PLAIN
+ prompt: Optional[Prompt] = None
diff --git a/contentcuration/contentcuration/utils/assessment/qti/mathml/__init__.py b/contentcuration/contentcuration/utils/assessment/qti/mathml/__init__.py
new file mode 100644
index 0000000000..cdbbf3bbe8
--- /dev/null
+++ b/contentcuration/contentcuration/utils/assessment/qti/mathml/__init__.py
@@ -0,0 +1,75 @@
+from .base import MathMLElement
+from .core import Annotation
+from .core import AnnotationXml
+from .core import Math
+from .core import Merror
+from .core import Mfrac
+from .core import Mi
+from .core import Mmultiscripts
+from .core import Mn
+from .core import Mo
+from .core import Mover
+from .core import Mpadded
+from .core import Mphantom
+from .core import Mprescripts
+from .core import Mroot
+from .core import Mrow
+from .core import Ms
+from .core import Mspace
+from .core import Msqrt
+from .core import Mstyle
+from .core import Msub
+from .core import Msubsup
+from .core import Msup
+from .core import Mtable
+from .core import Mtd
+from .core import Mtext
+from .core import Mtr
+from .core import Munder
+from .core import Munderover
+from .core import Semantics
+from .fields import MathMLDisplay
+from .fields import MathMLForm
+
+__all__ = [
+ "MathMLElement",
+ # Root element
+ "Math",
+ # Token elements
+ "Mi",
+ "Mn",
+ "Mo",
+ "Mtext",
+ "Ms",
+ "Mspace",
+ # Layout elements
+ "Mrow",
+ "Mfrac",
+ "Msqrt",
+ "Mroot",
+ "Mpadded",
+ # Script elements
+ "Msub",
+ "Msup",
+ "Msubsup",
+ "Munder",
+ "Mover",
+ "Munderover",
+ "Mmultiscripts",
+ "Mprescripts",
+ # Table elements
+ "Mtd",
+ "Mtr",
+ "Mtable",
+ # Grouping elements
+ "Mstyle",
+ "Merror",
+ "Mphantom",
+ # Semantic elements
+ "Annotation",
+ "AnnotationXml",
+ "Semantics",
+ # enums
+ "MathMLForm",
+ "MathMLDisplay",
+]
diff --git a/contentcuration/contentcuration/utils/assessment/qti/mathml/base.py b/contentcuration/contentcuration/utils/assessment/qti/mathml/base.py
new file mode 100644
index 0000000000..1c8d3e7c1a
--- /dev/null
+++ b/contentcuration/contentcuration/utils/assessment/qti/mathml/base.py
@@ -0,0 +1,73 @@
+from typing import List
+from typing import Optional
+from typing import Union
+
+from contentcuration.utils.assessment.qti.base import ElementTreeBase
+from contentcuration.utils.assessment.qti.base import TextType
+from contentcuration.utils.assessment.qti.mathml.fields import ColorValue
+from contentcuration.utils.assessment.qti.mathml.fields import LengthPercentage
+from contentcuration.utils.assessment.qti.mathml.fields import ScriptLevel
+
+
+class MathMLElement(ElementTreeBase):
+ """
+ Base class for all MathML elements.
+ Similar to HTMLElement but for MathML namespace.
+ """
+
+ mathcolor: Optional[ColorValue] = None
+ mathbackground: Optional[ColorValue] = None
+ mathsize: Optional[LengthPercentage] = None
+ displaystyle: Optional[bool] = None
+ scriptlevel: Optional[ScriptLevel] = None
+
+ autofocus: Optional[bool] = None
+
+
+class MathMLPresentationElement(MathMLElement):
+ """
+ Base class for all presentation elements that can appear in math content.
+ Excludes annotation elements which are semantic-only.
+ """
+
+ pass
+
+
+class MathMLTokenElement(MathMLPresentationElement):
+ """
+ Base class for token elements (mi, mn, mo, mtext, ms, mspace).
+ These represent the atomic units of mathematical notation.
+ """
+
+ pass
+
+
+class MathMLLayoutElement(MathMLPresentationElement):
+ """
+ Base class for general layout elements (mrow, mfrac, msqrt, mroot, etc.).
+ These control the 2D layout of mathematical expressions.
+ """
+
+ pass
+
+
+class MathMLScriptElement(MathMLLayoutElement):
+ """
+ Base class for script elements (msub, msup, msubsup, munder, mover, etc.).
+ These attach scripts (sub/super/under/over) to base expressions.
+ """
+
+ pass
+
+
+class MathMLGroupingElement(MathMLPresentationElement):
+ """
+ Base class for grouping/container elements that don't affect layout much
+ but provide structure (maction, semantics, mphantom, mstyle).
+ """
+
+ pass
+
+
+PresentationContent = Union[MathMLPresentationElement, TextType]
+PresentationContentList = List[PresentationContent]
diff --git a/contentcuration/contentcuration/utils/assessment/qti/mathml/core.py b/contentcuration/contentcuration/utils/assessment/qti/mathml/core.py
new file mode 100644
index 0000000000..7a038b9174
--- /dev/null
+++ b/contentcuration/contentcuration/utils/assessment/qti/mathml/core.py
@@ -0,0 +1,245 @@
+from typing import Annotated
+from typing import List
+from typing import Optional
+from typing import Union
+
+from annotated_types import Len
+from pydantic import Field
+from pydantic import field_validator
+
+from .base import MathMLElement
+from .base import MathMLGroupingElement
+from .base import MathMLLayoutElement
+from .base import MathMLPresentationElement
+from .base import MathMLScriptElement
+from .base import MathMLTokenElement
+from .base import PresentationContent
+from .base import PresentationContentList
+from .fields import LengthPercentage
+from .fields import MathMLDisplay
+from .fields import MathMLForm
+from contentcuration.utils.assessment.qti.base import TextType
+
+
+PresentationContentListLength2 = Annotated[PresentationContentList, Len(2, 2)]
+PresentationContentListLength3 = Annotated[PresentationContentList, Len(3, 3)]
+
+
+class Math(MathMLElement):
+ display: Optional[MathMLDisplay] = None
+ alttext: Optional[str] = None
+ children: PresentationContentList = Field(default_factory=list)
+
+
+NonEmptyText = Annotated[List[TextType], Len(1)]
+
+
+class Mi(MathMLTokenElement):
+ # We deliberately do not include the `mathvariant` attribute here,
+ # as the only valid value in MathMLCore is "normal", which is the default.
+ children: NonEmptyText = Field(default_factory=list)
+
+
+class Mn(MathMLTokenElement):
+ children: NonEmptyText = Field(default_factory=list)
+
+
+class Mo(MathMLTokenElement):
+ fence: Optional[bool] = None
+ form: Optional[MathMLForm] = None
+ largeop: Optional[bool] = None
+ lspace: Optional[LengthPercentage] = None
+ maxsize: Optional[LengthPercentage] = None
+ minsize: Optional[LengthPercentage] = None
+ movablelimits: Optional[bool] = None
+ rspace: Optional[LengthPercentage] = None
+ separator: Optional[bool] = None
+ stretchy: Optional[bool] = None
+ symmetric: Optional[bool] = None
+ children: NonEmptyText = Field(default_factory=list)
+
+
+class Mtext(MathMLTokenElement):
+ children: NonEmptyText = Field(default_factory=list)
+
+
+class Ms(MathMLTokenElement):
+ children: NonEmptyText = Field(default_factory=list)
+
+
+class Mspace(MathMLTokenElement):
+ width: Optional[LengthPercentage] = None
+ height: Optional[LengthPercentage] = None
+ depth: Optional[LengthPercentage] = None
+ # This doesn't seem to be in the MathML Core spec
+ # but is used by MathJax and latex2mathml, so we allow it.
+ linebreak: Optional[str] = None
+
+
+class Mrow(MathMLLayoutElement):
+ children: PresentationContentList = Field(default_factory=list)
+
+
+class Mfrac(MathMLLayoutElement):
+ linethickness: Optional[LengthPercentage] = None
+ children: PresentationContentListLength2 = Field(default_factory=list)
+
+
+class Msqrt(MathMLLayoutElement):
+ children: PresentationContentList = Field(default_factory=list)
+
+
+class Mroot(MathMLLayoutElement):
+ children: PresentationContentListLength2 = Field(default_factory=list)
+
+
+class Mstyle(MathMLGroupingElement):
+ children: PresentationContentList = Field(default_factory=list)
+
+
+class Merror(MathMLGroupingElement):
+ children: PresentationContentList = Field(default_factory=list)
+
+
+class Mpadded(MathMLLayoutElement):
+ width: Optional[LengthPercentage] = None
+ height: Optional[LengthPercentage] = None
+ depth: Optional[LengthPercentage] = None
+ lspace: Optional[LengthPercentage] = None
+ voffset: Optional[LengthPercentage] = None
+ children: PresentationContentList = Field(default_factory=list)
+
+
+class Mphantom(MathMLGroupingElement):
+ children: PresentationContentList = Field(default_factory=list)
+
+
+class Msub(MathMLScriptElement):
+ children: PresentationContentListLength2 = Field(default_factory=list)
+
+
+class Msup(MathMLScriptElement):
+ children: PresentationContentListLength2 = Field(default_factory=list)
+
+
+class Msubsup(MathMLScriptElement):
+ children: PresentationContentListLength3 = Field(default_factory=list)
+
+
+class Munder(MathMLScriptElement):
+ accentunder: Optional[bool] = None
+ children: PresentationContentListLength2 = Field(default_factory=list)
+
+
+class Mover(MathMLScriptElement):
+ accent: Optional[bool] = None
+ children: PresentationContentListLength2 = Field(default_factory=list)
+
+
+class Munderover(MathMLScriptElement):
+ accent: Optional[bool] = None
+ accentunder: Optional[bool] = None
+ children: PresentationContentListLength3 = Field(default_factory=list)
+
+
+class Mprescripts(MathMLElement):
+ pass
+
+
+class Mmultiscripts(MathMLScriptElement):
+ children: List[Union[PresentationContent, Mprescripts]] = Field(
+ default_factory=list
+ )
+
+ @field_validator("children")
+ @classmethod
+ def _validate_children(cls, v):
+ if len(v) == 0:
+ raise ValueError(
+ " must have at least one child (base element)"
+ )
+
+ # MathML Core: at most one , and if present it must be last
+ prescripts = [i for i, c in enumerate(v) if isinstance(c, Mprescripts)]
+ if len(prescripts) > 1:
+ raise ValueError(" may contain only one ")
+
+ # Scripts must come in pairs (subscript, superscript)
+ if prescripts:
+ prescripts_index = prescripts[0]
+ # Validate post-scripts (between base and mprescripts)
+ post_scripts_count = prescripts_index - 1
+ if post_scripts_count % 2 != 0:
+ raise ValueError(
+ "Post-scripts must come in pairs (subscript, superscript)"
+ )
+
+ # Validate pre-scripts (after mprescripts)
+ pre_scripts_count = len(v) - prescripts_index - 1
+ if pre_scripts_count % 2 != 0:
+ raise ValueError(
+ "Pre-scripts must come in pairs (subscript, superscript)"
+ )
+ else:
+ # No mprescripts, all scripts after base must be in pairs
+ scripts_count = len(v) - 1
+ if scripts_count % 2 != 0:
+ raise ValueError("Scripts must come in pairs (subscript, superscript)")
+
+ return v
+
+
+class Mtd(MathMLElement):
+ columnspan: Optional[int] = None
+ rowspan: Optional[int] = None
+ children: PresentationContentList = Field(default_factory=list)
+
+
+class Mtr(MathMLElement):
+ children: List[Mtd] = Field(default_factory=list)
+
+
+class Mtable(MathMLElement):
+ children: List[Mtr] = Field(default_factory=list)
+
+
+class Annotation(MathMLElement):
+ encoding: Optional[str] = None
+ children: NonEmptyText = Field(default_factory=list)
+
+
+class AnnotationXml(MathMLElement):
+ encoding: Optional[str] = None
+ children: PresentationContentList = Field(default_factory=list)
+
+ @classmethod
+ def element_name(cls):
+ return "annotation-xml"
+
+
+class Semantics(MathMLGroupingElement):
+ children: Annotated[
+ List[Union[Annotation, AnnotationXml, PresentationContent]], Len(2)
+ ] = Field(default_factory=list)
+
+ @field_validator("children")
+ @classmethod
+ def validate_children_structure(cls, v):
+ if len(v) == 0:
+ raise ValueError("Semantics must have at least one child")
+
+ # First child must be presentation content MathMLPresentationElement
+ first_child = v[0]
+ if not isinstance(first_child, MathMLPresentationElement):
+ raise ValueError(
+ "First child of Semantics must be MathML presentation content"
+ )
+
+ # Remaining children must be annotations
+ for i, child in enumerate(v[1:], 1):
+ if not isinstance(child, (Annotation, AnnotationXml)):
+ raise ValueError(
+ f"Child at position {i} must be Annotation or AnnotationXml, got {type(child).__name__}"
+ )
+
+ return v
diff --git a/contentcuration/contentcuration/utils/assessment/qti/mathml/fields.py b/contentcuration/contentcuration/utils/assessment/qti/mathml/fields.py
new file mode 100644
index 0000000000..bc5fd42ce3
--- /dev/null
+++ b/contentcuration/contentcuration/utils/assessment/qti/mathml/fields.py
@@ -0,0 +1,66 @@
+from enum import Enum
+from typing import Annotated
+
+from pydantic import Field
+
+
+LengthPercentage = Annotated[
+ str,
+ Field(
+ # Accepts:
+ # - Length values: 10px, 2em, 1.5rem, 0.5in, 2pt, etc.
+ # - Percentage values: 50%, 100%, 0%, etc.
+ # - Zero: 0 (unitless zero is valid)
+ # Previously different attributes allowed for a range of values,
+ # but these seem to be deprecated.
+ pattern=r"^([+-]?0|[+-]?(?:\d+\.?\d*|\d*\.\d+)(?:px|pt|pc|in|cm|mm|Q|em|ex|ch|rem|lh|rlh|vw|vh|vi|vb|vmin|vmax|mu|%))$",
+ description="CSS length-percentage value (e.g., '10px', '2em', '50%', '0')",
+ examples=["10px", "2em", "50%", "0"],
+ ),
+]
+
+# Number patterns
+number = r"\d+(?:\.\d+)?"
+percentage = rf"{number}%"
+number_or_percent = rf"{number}%?"
+
+# Color function patterns
+hex_color = r"#[0-9a-fA-F]{3,8}"
+named_color = r"[a-zA-Z]+"
+rgb_pattern = rf"rgb\(\s*{number_or_percent}\s*,\s*{number_or_percent}\s*,\s*{number_or_percent}\s*\)"
+rgba_pattern = rf"rgba\(\s*{number_or_percent}\s*,\s*{number_or_percent}\s*,\s*{number_or_percent}\s*,\s*{number_or_percent}\s*\)"
+hsl_pattern = rf"hsl\(\s*{number}\s*,\s*{percentage}\s*,\s*{percentage}\s*\)"
+hsla_pattern = rf"hsla\(\s*{number}\s*,\s*{percentage}\s*,\s*{percentage}\s*,\s*{number_or_percent}\s*\)"
+
+# Final regex
+color_regex = rf"^(?:{hex_color}|{named_color}|{rgb_pattern}|{rgba_pattern}|{hsl_pattern}|{hsla_pattern})$"
+
+
+ColorValue = Annotated[
+ str,
+ Field(
+ pattern=color_regex,
+ description="CSS color value (hex, named color, rgb(), rgba(), hsl(), hsla())",
+ examples=["red", "#ff0000", "#f00", "rgb(255,0,0)", "rgba(255,0,0,0.5)"],
+ ),
+]
+
+ScriptLevel = Annotated[
+ str,
+ Field(
+ pattern=r"^[+-]?\d+$",
+ description="Script level value (integer, optionally with +/- prefix)",
+ examples=["0", "1", "+2", "-1"],
+ ),
+]
+
+
+class MathMLDisplay(Enum):
+ BLOCK = "block"
+ INLINE = "inline"
+
+
+class MathMLForm(Enum):
+ PREFIX = "prefix"
+ INFIX = "infix"
+ POSTFIX = "postfix"
diff --git a/contentcuration/contentcuration/utils/assessment/qti/prompt.py b/contentcuration/contentcuration/utils/assessment/qti/prompt.py
new file mode 100644
index 0000000000..63c2cde94c
--- /dev/null
+++ b/contentcuration/contentcuration/utils/assessment/qti/prompt.py
@@ -0,0 +1,13 @@
+from typing import List
+from typing import Union
+
+from pydantic import Field
+
+from contentcuration.utils.assessment.qti.base import QTIBase
+from contentcuration.utils.assessment.qti.html import FlowContent
+from contentcuration.utils.assessment.qti.mathml import Math
+
+
+class Prompt(QTIBase):
+
+ children: List[Union[Math, FlowContent]] = Field(default_factory=list)
diff --git a/contentcuration/contentcuration/utils/publish.py b/contentcuration/contentcuration/utils/publish.py
index 901f9aaa32..3e28f2d0e0 100644
--- a/contentcuration/contentcuration/utils/publish.py
+++ b/contentcuration/contentcuration/utils/publish.py
@@ -1,16 +1,11 @@
-import hashlib
import itertools
import json
import logging as logmodule
import os
-import re
import tempfile
import time
-import traceback
import uuid
-import zipfile
from copy import deepcopy
-from io import BytesIO
from itertools import chain
from django.conf import settings
@@ -41,7 +36,6 @@
from le_utils.constants import file_formats
from le_utils.constants import format_presets
from le_utils.constants import roles
-from PIL import Image
from search.models import ChannelFullTextSearch
from search.models import ContentNodeFullTextSearch
from search.utils import get_fts_annotated_channel_qs
@@ -49,11 +43,15 @@
from contentcuration import models as ccmodels
from contentcuration.decorators import delay_user_storage_calculation
+from contentcuration.utils.assessment.perseus import PerseusExerciseGenerator
+from contentcuration.utils.assessment.qti.archive import QTIExerciseGenerator
+from contentcuration.utils.assessment.qti.imsmanifest import (
+ get_assessment_ids_from_manifest,
+)
from contentcuration.utils.cache import delete_public_channel_cache_keys
from contentcuration.utils.files import create_thumbnail_from_base64
from contentcuration.utils.files import get_thumbnail_encoding
from contentcuration.utils.nodes import migrate_extra_fields
-from contentcuration.utils.parser import extract_value
from contentcuration.utils.parser import load_json_string
from contentcuration.utils.sentry import report_exception
@@ -151,7 +149,9 @@ def create_content_database(
"""
:type progress_tracker: contentcuration.utils.celery.ProgressTracker|None
"""
- # increment the channel version
+ if not channel.language:
+ raise ChannelIncompleteError("Channel must have a language set to be published")
+
if not use_staging_tree and not force:
raise_if_nodes_are_all_unchanged(channel)
fh, tempdb = tempfile.mkstemp(suffix=".sqlite3")
@@ -323,15 +323,43 @@ def recurse_nodes(self, node, inherited_fields): # noqa C901
)
if node.kind_id == content_kinds.EXERCISE:
- exercise_data = process_assessment_metadata(node, kolibrinode)
+ exercise_data = process_assessment_metadata(node)
+ any_free_response = any(
+ t == exercises.FREE_RESPONSE
+ for t in exercise_data["assessment_mapping"].values()
+ )
+ generator_class = (
+ QTIExerciseGenerator
+ if any_free_response
+ else PerseusExerciseGenerator
+ )
+
+ # If this exercise previously had a file generated by a different
+ # generator, make sure we clean it up here.
+ stale_presets = {
+ PerseusExerciseGenerator.preset,
+ QTIExerciseGenerator.preset,
+ } - {generator_class.preset}
+
+ # Remove archives produced by the previously-used generator
+ node.files.filter(preset_id__in=stale_presets).delete()
+
if (
self.force_exercises
or node.changed
- or not node.files.filter(preset_id=format_presets.EXERCISE).exists()
+ or not node.files.filter(preset_id=generator_class.preset).exists()
):
- create_perseus_exercise(
- node, kolibrinode, exercise_data, user_id=self.user_id
+
+ generator = generator_class(
+ node,
+ exercise_data,
+ self.channel_id,
+ self.default_language.lang_code,
+ user_id=self.user_id,
)
+ generator.create_exercise_archive()
+
+ create_kolibri_assessment_metadata(node, kolibrinode)
elif node.kind_id == content_kinds.SLIDESHOW:
create_slideshow_manifest(node, user_id=self.user_id)
elif node.kind_id == content_kinds.TOPIC:
@@ -609,38 +637,6 @@ def create_associated_file_objects(kolibrinode, ccnode):
)
-def create_perseus_exercise(ccnode, kolibrinode, exercise_data, user_id=None):
- logging.debug("Creating Perseus Exercise for Node {}".format(ccnode.title))
- filename = "{0}.{ext}".format(ccnode.title, ext=file_formats.PERSEUS)
- temppath = None
- resized_images_map = {}
- try:
- with tempfile.NamedTemporaryFile(suffix="zip", delete=False) as tempf:
- temppath = tempf.name
- create_perseus_zip(ccnode, exercise_data, tempf, resized_images_map)
- file_size = tempf.tell()
- tempf.flush()
-
- ccnode.files.filter(preset_id=format_presets.EXERCISE).delete()
-
- assessment_file_obj = ccmodels.File.objects.create(
- file_on_disk=File(open(temppath, "rb"), name=filename),
- contentnode=ccnode,
- file_format_id=file_formats.PERSEUS,
- preset_id=format_presets.EXERCISE,
- original_filename=filename,
- file_size=file_size,
- uploaded_by_id=user_id,
- )
- logging.debug(
- "Created exercise for {0} with checksum {1}".format(
- ccnode.title, assessment_file_obj.checksum
- )
- )
- finally:
- temppath and os.unlink(temppath)
-
-
def parse_assessment_metadata(ccnode):
extra_fields = ccnode.extra_fields
if isinstance(extra_fields, str):
@@ -656,11 +652,7 @@ def parse_assessment_metadata(ccnode):
)
-def process_assessment_metadata(ccnode, kolibrinode):
- # Get mastery model information, set to default if none provided
- assessment_items = ccnode.assessment_items.all().order_by("order")
- assessment_item_ids = [a.assessment_id for a in assessment_items]
-
+def _get_exercise_data_from_ccnode(ccnode, num_assessment_items):
randomize, mastery_criteria = parse_assessment_metadata(ccnode)
exercise_data = deepcopy(mastery_criteria)
@@ -669,14 +661,14 @@ def process_assessment_metadata(ccnode, kolibrinode):
mastery_model = {"type": exercise_data_type or exercises.M_OF_N}
if mastery_model["type"] == exercises.M_OF_N:
mastery_model.update(
- {"n": exercise_data.get("n") or min(5, assessment_items.count()) or 1}
+ {"n": exercise_data.get("n") or min(5, num_assessment_items) or 1}
)
mastery_model.update(
- {"m": exercise_data.get("m") or min(5, assessment_items.count()) or 1}
+ {"m": exercise_data.get("m") or min(5, num_assessment_items) or 1}
)
elif mastery_model["type"] == exercises.DO_ALL:
mastery_model.update(
- {"n": assessment_items.count() or 1, "m": assessment_items.count() or 1}
+ {"n": num_assessment_items or 1, "m": num_assessment_items or 1}
)
elif mastery_model["type"] == exercises.NUM_CORRECT_IN_A_ROW_2:
mastery_model.update({"n": 2, "m": 2})
@@ -686,6 +678,17 @@ def process_assessment_metadata(ccnode, kolibrinode):
mastery_model.update({"n": 5, "m": 5})
elif mastery_model["type"] == exercises.NUM_CORRECT_IN_A_ROW_10:
mastery_model.update({"n": 10, "m": 10})
+ return randomize, exercise_data, mastery_model
+
+
+def process_assessment_metadata(ccnode):
+ # Get mastery model information, set to default if none provided
+ assessment_items = ccnode.assessment_items.all().order_by("order")
+ assessment_item_ids = [a.assessment_id for a in assessment_items]
+
+ randomize, exercise_data, mastery_model = _get_exercise_data_from_ccnode(
+ ccnode, len(assessment_item_ids)
+ )
exercise_data.update(
{
@@ -704,315 +707,31 @@ def process_assessment_metadata(ccnode, kolibrinode):
}
)
+ return exercise_data
+
+
+def create_kolibri_assessment_metadata(ccnode, kolibrinode):
+ assessment_items = ccnode.assessment_items.all().order_by("order")
+ assessment_item_ids = [a.assessment_id for a in assessment_items]
+ randomize, _, mastery_model = _get_exercise_data_from_ccnode(
+ ccnode, len(assessment_item_ids)
+ )
+ qti_file = ccnode.files.filter(preset_id=format_presets.QTI_ZIP).first()
+ if qti_file:
+ # Open the zip file from Django storage
+ with qti_file.file_on_disk.open("rb") as file_handle:
+ assessment_item_ids = get_assessment_ids_from_manifest(file_handle)
+
kolibrimodels.AssessmentMetaData.objects.create(
id=uuid.uuid4(),
contentnode=kolibrinode,
assessment_item_ids=assessment_item_ids,
- number_of_assessments=assessment_items.count(),
+ number_of_assessments=len(assessment_item_ids),
mastery_model=mastery_model,
randomize=randomize,
is_manipulable=ccnode.kind_id == content_kinds.EXERCISE,
)
- return exercise_data
-
-
-def create_perseus_zip(ccnode, exercise_data, write_to_path, resized_images_map):
- with zipfile.ZipFile(write_to_path, "w") as zf:
- try:
- exercise_context = {
- "exercise": json.dumps(exercise_data, sort_keys=True, indent=4)
- }
- exercise_result = render_to_string(
- "perseus/exercise.json", exercise_context
- )
- write_to_zipfile("exercise.json", exercise_result, zf)
-
- channel_id = ccnode.get_channel_id()
-
- for question in (
- ccnode.assessment_items.prefetch_related("files")
- .all()
- .order_by("order")
- ):
- try:
- write_assessment_item(question, zf, channel_id, resized_images_map)
- except Exception as e:
- logging.error(
- "Error while publishing channel `{}`: {}".format(
- channel_id, str(e)
- )
- )
- logging.error(traceback.format_exc())
- # In production, these errors have historically been handled silently.
- # Retain that behavior for now, but raise an error locally so we can
- # better understand the cases in which this might happen.
- report_exception(e)
-
- # if we're in a testing or development environment, raise the error
- if os.environ.get("BRANCH_ENVIRONMENT", "") != "master":
- logging.warning(
- "NOTE: the following error would have been swallowed silently in production"
- )
- raise
- finally:
- zf.close()
-
-
-def write_to_zipfile(filename, content, zf):
- info = zipfile.ZipInfo(filename, date_time=(2013, 3, 14, 1, 59, 26))
- info.comment = "Perseus file generated during export process".encode()
- info.compress_type = zipfile.ZIP_STORED
- info.create_system = 0
- zf.writestr(info, content)
-
-
-def _write_raw_perseus_image_files_to_zip(assessment_item, zf):
- # For raw perseus JSON questions, the files must be
- # specified in advance.
-
- # Files have been prefetched when the assessment item was
- # queried, so take advantage of that.
- files = sorted(assessment_item.files.all(), key=lambda x: x.checksum)
- image_files = filter(lambda x: x.preset_id == format_presets.EXERCISE_IMAGE, files)
- graphie_files = filter(
- lambda x: x.preset_id == format_presets.EXERCISE_GRAPHIE, files
- )
- for image in image_files:
- image_name = "images/{}.{}".format(image.checksum, image.file_format_id)
- if image_name not in zf.namelist():
- with storage.open(
- ccmodels.generate_object_storage_name(image.checksum, str(image)),
- "rb",
- ) as content:
- write_to_zipfile(image_name, content.read(), zf)
-
- for image in graphie_files:
- svg_name = "images/{0}.svg".format(image.original_filename)
- json_name = "images/{0}-data.json".format(image.original_filename)
- if svg_name not in zf.namelist() or json_name not in zf.namelist():
- with storage.open(
- ccmodels.generate_object_storage_name(image.checksum, str(image)),
- "rb",
- ) as content:
- content = content.read()
- # in Python 3, delimiter needs to be in bytes format
- content = content.split(exercises.GRAPHIE_DELIMITER.encode("ascii"))
- write_to_zipfile(svg_name, content[0], zf)
- write_to_zipfile(json_name, content[1], zf)
-
-
-def write_assessment_item( # noqa C901
- assessment_item, zf, channel_id, resized_images_map
-):
- if assessment_item.type == exercises.MULTIPLE_SELECTION:
- template = "perseus/multiple_selection.json"
- elif (
- assessment_item.type == exercises.SINGLE_SELECTION
- or assessment_item.type == "true_false"
- ):
- template = "perseus/multiple_selection.json"
- elif assessment_item.type == exercises.INPUT_QUESTION:
- template = "perseus/input_question.json"
- elif assessment_item.type == exercises.PERSEUS_QUESTION:
- template = "perseus/perseus_question.json"
- _write_raw_perseus_image_files_to_zip(assessment_item, zf)
- else:
- raise TypeError(
- "Unrecognized question type on item {}".format(
- assessment_item.assessment_id
- )
- )
-
- question = process_formulas(assessment_item.question)
- question, question_images = process_image_strings(
- question, zf, channel_id, resized_images_map
- )
-
- answer_data = json.loads(assessment_item.answers)
- for answer in answer_data:
- if assessment_item.type == exercises.INPUT_QUESTION:
- answer["answer"] = extract_value(answer["answer"])
- else:
- answer["answer"] = answer["answer"].replace(
- exercises.CONTENT_STORAGE_PLACEHOLDER, PERSEUS_IMG_DIR
- )
- answer["answer"] = process_formulas(answer["answer"])
- # In case perseus doesn't support =wxh syntax, use below code
- answer["answer"], answer_images = process_image_strings(
- answer["answer"], zf, channel_id, resized_images_map
- )
- answer.update({"images": answer_images})
-
- answer_data = [
- a for a in answer_data if a["answer"] or a["answer"] == 0
- ] # Filter out empty answers, but not 0
- hint_data = json.loads(assessment_item.hints)
- for hint in hint_data:
- hint["hint"] = process_formulas(hint["hint"])
- hint["hint"], hint_images = process_image_strings(
- hint["hint"], zf, channel_id, resized_images_map
- )
- hint.update({"images": hint_images})
-
- answers_sorted = answer_data
- try:
- answers_sorted = sorted(answer_data, key=lambda x: x.get("order"))
- except TypeError:
- logging.error("Unable to sort answers, leaving unsorted.")
-
- hints_sorted = hint_data
- try:
- hints_sorted = sorted(hint_data, key=lambda x: x.get("order"))
- except TypeError:
- logging.error("Unable to sort hints, leaving unsorted.")
-
- context = {
- "question": question,
- "question_images": question_images,
- "answers": answers_sorted,
- "multiple_select": assessment_item.type == exercises.MULTIPLE_SELECTION,
- "raw_data": assessment_item.raw_data.replace(
- exercises.CONTENT_STORAGE_PLACEHOLDER, PERSEUS_IMG_DIR
- ),
- "hints": hints_sorted,
- "randomize": assessment_item.randomize,
- }
-
- result = render_to_string(template, context).encode("utf-8", "ignore")
- write_to_zipfile("{0}.json".format(assessment_item.assessment_id), result, zf)
-
-
-def process_formulas(content):
- for match in re.finditer(r"\$(\$.+\$)\$", content):
- content = content.replace(match.group(0), match.group(1))
- return content
-
-
-def resize_image(image_content, width, height):
- try:
- with Image.open(BytesIO(image_content)) as img:
- original_format = img.format
- img = img.resize((int(width), int(height)), Image.LANCZOS)
- buffered = BytesIO()
- img.save(buffered, format=original_format)
- return buffered.getvalue()
- except Exception as e:
- logging.warning(f"Error resizing image: {str(e)}")
- return None, None
-
-
-def get_resized_image_checksum(image_content):
- return hashlib.md5(image_content).hexdigest()
-
-
-def process_image_strings(content, zf, channel_id, resized_images_map): # noqa C901
- image_list = []
- content = content.replace(exercises.CONTENT_STORAGE_PLACEHOLDER, PERSEUS_IMG_DIR)
- for match in re.finditer(r"!\[(?:[^\]]*)]\(([^\)]+)\)", content):
- img_match = re.search(
- r"(.+/images/[^\s]+)(?:\s=([0-9\.]+)x([0-9\.]+))*", match.group(1)
- )
- if img_match:
- # Add any image files that haven't been written to the zipfile
- filename = img_match.group(1).split("/")[-1]
- checksum, ext = os.path.splitext(filename)
-
- if not ext:
- logging.warning(
- "While publishing channel `{}` a filename with no extension was encountered: `{}`".format(
- channel_id, filename
- )
- )
- try:
- # make sure the checksum is actually a hex string
- int(checksum, 16)
- except Exception:
- logging.warning(
- "while publishing channel `{}` a filename with an improper checksum was encountered: `{}`".format(
- channel_id, filename
- )
- )
-
- # if we're in a testing or development environment, raise the error
- if os.environ.get("BRANCH_ENVIRONMENT", "") != "master":
- logging.warning(
- "NOTE: the following error would have been swallowed silently in production"
- )
- raise
-
- original_image_name = "images/{}.{}".format(checksum, ext[1:])
- original_img_ref = match.group(1)
- if img_match.group(2) and img_match.group(3):
- width, height = float(img_match.group(2)), float(img_match.group(3))
- resized_key = (original_image_name, width, height)
-
- # Check if this resized version already exists
- new_img_ref = None
- if resized_key in resized_images_map:
- new_img_ref = resized_images_map[resized_key]
- else:
- # Check for similar resized images with the same original name
- similar_image = None
- for key, resized_image in resized_images_map.items():
- if (
- key[0] == original_image_name
- and abs(key[1] - width) / width < 0.01
- and abs(key[2] - height) / height < 0.01
- ):
- similar_image = resized_image
- break
-
- if similar_image:
- new_img_ref = similar_image
- else:
- with storage.open(
- ccmodels.generate_object_storage_name(checksum, filename),
- "rb",
- ) as imgfile:
- original_content = imgfile.read()
-
- resized_content = resize_image(original_content, width, height)
-
- if resized_content:
- resized_checksum = get_resized_image_checksum(
- resized_content
- )
- new_image_name = "images/{}.{}".format(
- resized_checksum, ext[1:]
- )
-
- if new_image_name not in zf.namelist():
- write_to_zipfile(new_image_name, resized_content, zf)
- new_img_ref = original_img_ref.replace(
- filename, f"{resized_checksum}{ext}"
- )
- resized_images_map[resized_key] = new_img_ref
- else:
- logging.warning(
- f"Failed to resize image {filename}. Using original image."
- )
- new_img_ref = img_match.group(1)
-
- new_img_match = re.search(
- r"(.+/images/[^\s]+)(?:\s=([0-9\.]+)x([0-9\.]+))*", new_img_ref
- )
- image_data = {"name": new_img_match.group(1)}
- image_data.update({"width": width})
- image_data.update({"height": height})
- image_list.append(image_data)
- content = content.replace(original_img_ref, new_img_match.group(1))
-
- else:
- if original_image_name not in zf.namelist():
- with storage.open(
- ccmodels.generate_object_storage_name(checksum, filename), "rb"
- ) as imgfile:
- original_content = imgfile.read()
- write_to_zipfile(original_image_name, original_content, zf)
- content = content.replace(match.group(1), img_match.group(1))
- return content, image_list
-
def map_prerequisites(root_node):
diff --git a/requirements-dev.in b/requirements-dev.in
index a21653e2dd..dbda3c794c 100644
--- a/requirements-dev.in
+++ b/requirements-dev.in
@@ -5,6 +5,7 @@ mixer==7.2.2
pytest
pytest-django
pytest-timeout
+pytest-subtests
pre-commit==4.2.0
nodeenv
pip-tools==7.4.1
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 645051ab23..20f9baebd9 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -8,6 +8,10 @@ asgiref==3.3.4
# via
# -c requirements.txt
# django
+attrs==23.1.0
+ # via
+ # -c requirements.txt
+ # pytest-subtests
build==1.2.1
# via pip-tools
cfgv==3.3.1
@@ -77,9 +81,12 @@ pytest==8.4.1
# via
# -r requirements-dev.in
# pytest-django
+ # pytest-subtests
# pytest-timeout
pytest-django==4.11.1
# via -r requirements-dev.in
+pytest-subtests==0.14.1
+ # via -r requirements-dev.in
pytest-timeout==2.4.0
# via -r requirements-dev.in
python-dateutil==2.9.0.post0
diff --git a/requirements.in b/requirements.in
index 37d3321278..53cbbf0064 100644
--- a/requirements.in
+++ b/requirements.in
@@ -33,3 +33,7 @@ python-dateutil>=2.8.1
jsonschema>=3.2.0
django-celery-results
packaging>=21.0
+langcodes==3.5.0
+pydantic==2.11.5
+latex2mathml==3.78.0
+markdown-it-py==3.0.0
diff --git a/requirements.txt b/requirements.txt
index 88fe425d5d..eabda29c41 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,6 +6,8 @@
#
amqp==5.1.1
# via kombu
+annotated-types==0.7.0
+ # via pydantic
asgiref==3.3.4
# via django
async-timeout==5.0.1
@@ -154,8 +156,20 @@ jsonschema-specifications==2024.10.1
# via jsonschema
kombu==5.5.2
# via celery
+langcodes==3.5.0
+ # via -r requirements.in
+language-data==1.3.0
+ # via langcodes
+latex2mathml==3.78.0
+ # via -r requirements.in
le-utils==0.2.12
# via -r requirements.in
+marisa-trie==1.2.1
+ # via language-data
+markdown-it-py==3.0.0
+ # via -r requirements.in
+mdurl==0.1.2
+ # via markdown-it-py
packaging==25.0
# via
# -r requirements.in
@@ -188,6 +202,10 @@ pyasn1==0.4.8
# rsa
pyasn1-modules==0.2.8
# via google-auth
+pydantic==2.11.5
+ # via -r requirements.in
+pydantic-core==2.33.2
+ # via pydantic
pyparsing==2.4.7
# via httplib2
python-dateutil==2.9.0.post0
@@ -237,7 +255,13 @@ six==1.16.0
sqlparse==0.4.1
# via django
typing-extensions==4.13.0
- # via referencing
+ # via
+ # pydantic
+ # pydantic-core
+ # referencing
+ # typing-inspection
+typing-inspection==0.4.1
+ # via pydantic
tzdata==2025.2
# via kombu
urllib3==1.26.18