From 3ac83b5affa4be3dcf9773ace79a1f3ddd77312f Mon Sep 17 00:00:00 2001
From: Jake LoRocco <jake.lorocco@ibm.com>
Date: Fri, 1 May 2026 13:46:57 -0400
Subject: [PATCH] fix: default intrinsic adapter types; add tests for
 granite-switch; fix expected canned input/output with temperature

Signed-off-by: Jake LoRocco <jake.lorocco@ibm.com>
Assisted-by: CLAUDE:OPUS
---
 mellea/backends/adapters/catalog.py           |  26 +--
 test/backends/test_adapters/test_catalog.py   |  12 +-
 test/backends/test_openai_intrinsics.py       | 205 +++++++++++++++++-
 .../answerability_answerable.json             |   1 +
 .../answerability_simple.json                 |   3 +-
 .../answerability_unanswerable.json           |   1 +
 .../test_canned_input/context_relevance.json  |   3 +-
 7 files changed, 220 insertions(+), 31 deletions(-)

diff --git a/mellea/backends/adapters/catalog.py b/mellea/backends/adapters/catalog.py
index 62763a1ec..9edeea2e4 100644
--- a/mellea/backends/adapters/catalog.py
+++ b/mellea/backends/adapters/catalog.py
@@ -78,31 +78,15 @@ class IntriniscsCatalogEntry(pydantic.BaseModel):
     IntriniscsCatalogEntry(name="citations", repo_id=_RAG_REPO),
     IntriniscsCatalogEntry(name="context_relevance", repo_id=_RAG_REPO),
     IntriniscsCatalogEntry(name="hallucination_detection", repo_id=_RAG_REPO),
-    IntriniscsCatalogEntry(
-        name="query_clarification", repo_id=_RAG_REPO, adapter_types=(AdapterType.LORA,)
-    ),
+    IntriniscsCatalogEntry(name="query_clarification", repo_id=_RAG_REPO),
     IntriniscsCatalogEntry(name="query_rewrite", repo_id=_RAG_REPO),
     ############################################
     # Guardian Intrinsics
     ############################################
-    IntriniscsCatalogEntry(
-        name="policy-guardrails",
-        repo_id=_GUARDIAN_REPO,
-        adapter_types=(AdapterType.LORA,),
-    ),
-    IntriniscsCatalogEntry(
-        name="guardian-core", repo_id=_GUARDIAN_REPO, adapter_types=(AdapterType.LORA,)
-    ),
-    IntriniscsCatalogEntry(
-        name="factuality-detection",
-        repo_id=_GUARDIAN_REPO,
-        adapter_types=(AdapterType.LORA,),
-    ),
-    IntriniscsCatalogEntry(
-        name="factuality-correction",
-        repo_id=_GUARDIAN_REPO,
-        adapter_types=(AdapterType.LORA,),
-    ),
+    IntriniscsCatalogEntry(name="policy-guardrails", repo_id=_GUARDIAN_REPO),
+    IntriniscsCatalogEntry(name="guardian-core", repo_id=_GUARDIAN_REPO),
+    IntriniscsCatalogEntry(name="factuality-detection", repo_id=_GUARDIAN_REPO),
+    IntriniscsCatalogEntry(name="factuality-correction", repo_id=_GUARDIAN_REPO),
 ]
 
 _INTRINSICS_CATALOG = {e.name: e for e in _INTRINSICS_CATALOG_ENTRIES}
diff --git a/test/backends/test_adapters/test_catalog.py b/test/backends/test_adapters/test_catalog.py
index 03d7c3538..cbbe504ca 100644
--- a/test/backends/test_adapters/test_catalog.py
+++ b/test/backends/test_adapters/test_catalog.py
@@ -54,6 +54,16 @@ def test_default_adapter_types():
     assert AdapterType.ALORA in entry.adapter_types
 
 
-def test_lora_only_entry():
+def test_lora_only_entry(monkeypatch):
+    from mellea.backends.adapters import catalog
+
+    fake_entry = catalog.IntriniscsCatalogEntry(
+        name="query_clarification",
+        repo_id="ibm-granite/granitelib-rag-r1.0",
+        adapter_types=(AdapterType.LORA,),
+    )
+    monkeypatch.setattr(
+        catalog, "_INTRINSICS_CATALOG", {"query_clarification": fake_entry}
+    )
     entry = fetch_intrinsic_metadata("query_clarification")
     assert entry.adapter_types == (AdapterType.LORA,)
diff --git a/test/backends/test_openai_intrinsics.py b/test/backends/test_openai_intrinsics.py
index 0e4575add..3d5287fe0 100644
--- a/test/backends/test_openai_intrinsics.py
+++ b/test/backends/test_openai_intrinsics.py
@@ -43,7 +43,7 @@
 from mellea.stdlib import functional as mfuncs
 from mellea.stdlib.components import Intrinsic, Message
 from mellea.stdlib.components.docs.document import Document
-from mellea.stdlib.components.intrinsic import rag
+from mellea.stdlib.components.intrinsic import core as intrinsic_core, guardian, rag
 from mellea.stdlib.context import ChatContext
 from test.formatters.granite.test_intrinsics_formatters import (
     _YAML_JSON_COMBOS_WITH_MODEL,
@@ -355,13 +355,21 @@ def test_call_intrinsic_answerability(call_intrinsic_backend):
 
 
 @pytest.mark.qualitative
-def test_call_intrinsic_context_relevance(call_intrinsic_backend):
-    """call_intrinsic path: check_context_relevance returns a score between 0 and 1."""
-    context, question, documents = _read_rag_input("context_relevance.json")
-    result = rag.check_context_relevance(
-        question, documents[0], context, call_intrinsic_backend
+def test_call_intrinsic_requirement_check(call_intrinsic_backend):
+    """call_intrinsic path: requirement_check returns a score between 0 and 1."""
+    with open(_RAG_TEST_DATA / "requirement_check.json", encoding="utf-8") as f:
+        data = json.load(f)
+
+    context = ChatContext()
+    for m in data["messages"]:
+        context = context.add(Message(m["role"], m["content"]))
+
+    requirement = data["requirement"]
+    result = intrinsic_core.requirement_check(
+        context, call_intrinsic_backend, requirement=requirement
     )
-    assert result in ["relevant", "irrelevant", "partially relevant"]
+    assert isinstance(result, float)
+    assert 0.0 <= result <= 1.0
 
 
 # ---------------------------------------------------------------------------
@@ -399,3 +407,186 @@ def get_temperature(location: str) -> int:
     assert len(result.value) > 0
     parsed = json.loads(result.value)
     assert isinstance(parsed, dict)
+
+
+# ---------------------------------------------------------------------------
+# Guardian intrinsic tests — exercise the high-level convenience wrappers
+# ---------------------------------------------------------------------------
+
+_GUARDIAN_TEST_DATA = (
+    pathlib.Path(__file__).parent.parent
+    / "stdlib"
+    / "components"
+    / "intrinsic"
+    / "testdata"
+    / "input_json"
+)
+
+
+def _read_guardian_input(file_name: str) -> ChatContext:
+    """Read guardian test input and convert to a ChatContext."""
+    with open(_GUARDIAN_TEST_DATA / file_name, encoding="utf-8") as f:
+        json_data = json.load(f)
+
+    context = ChatContext()
+    for m in json_data["messages"]:
+        role = m["role"]
+        content = m["content"]
+        context = context.add(Message(role, content))
+
+    return context
+
+
+@pytest.mark.qualitative
+def test_call_intrinsic_policy_guardrails(call_intrinsic_backend):
+    """call_intrinsic path: policy_guardrails returns a compliance label."""
+    context = _read_guardian_input("policy_guardrails.json")
+
+    policy_text = (
+        "hiring managers should steer away from any questions that directly seek "
+        'information about protected classes\u2014such as "how old are you," "where are '
+        'you from," "what year did you graduate" or "what are your plans for having kids."'
+    )
+
+    result = guardian.policy_guardrails(
+        context, call_intrinsic_backend, policy_text=policy_text
+    )
+    assert result in ("Yes", "No", "Ambiguous")
+
+
+@pytest.mark.qualitative
+def test_call_intrinsic_guardian_check_harm(call_intrinsic_backend):
+    """call_intrinsic path: guardian_check detects harmful prompts."""
+    context = _read_guardian_input("guardian_core.json")
+
+    result = guardian.guardian_check(
+        context, call_intrinsic_backend, criteria="harm", target_role="user"
+    )
+    assert isinstance(result, float)
+    assert 0.0 <= result <= 1.0
+
+
+@pytest.mark.qualitative
+def test_call_intrinsic_guardian_check_groundedness(call_intrinsic_backend):
+    """call_intrinsic path: guardian_check detects ungrounded responses."""
+    document = Document(
+        text=(
+            "Eat (1964) is a 45-minute underground film created by Andy Warhol. "
+            "The film was first shown by Jonas Mekas on July 16, 1964, at the "
+            "Washington Square Gallery."
+        ),
+        doc_id="0",
+    )
+
+    context = (
+        ChatContext()
+        .add(Message("user", "When was the film Eat first shown?"))
+        .add(
+            Message(
+                "assistant",
+                "The film Eat was first shown by Jonas Mekas on December 24, "
+                "1922 at the Washington Square Gallery.",
+                documents=[document],
+            )
+        )
+    )
+
+    result = guardian.guardian_check(
+        context, call_intrinsic_backend, criteria="groundedness"
+    )
+    assert isinstance(result, float)
+    assert 0.0 <= result <= 1.0
+
+
+@pytest.mark.qualitative
+def test_call_intrinsic_guardian_check_function_call(call_intrinsic_backend):
+    """call_intrinsic path: guardian_check detects function call hallucinations."""
+    tools = [
+        {
+            "name": "comment_list",
+            "description": "Fetches a list of comments for a specified IBM video.",
+            "parameters": {
+                "aweme_id": {
+                    "description": "The ID of the IBM video.",
+                    "type": "int",
+                    "default": "7178094165614464282",
+                },
+                "cursor": {
+                    "description": "The cursor for pagination. Defaults to 0.",
+                    "type": "int, optional",
+                    "default": "0",
+                },
+                "count": {
+                    "description": "The number of comments to fetch. Maximum is 30. Defaults to 20.",
+                    "type": "int, optional",
+                    "default": "20",
+                },
+            },
+        }
+    ]
+    tools_text = "Available tools:\n" + json.dumps(tools, indent=2)
+    user_text = "Fetch the first 15 comments for the IBM video with ID 456789123."
+    # Deliberately wrong: uses "video_id" instead of "aweme_id"
+    response_text = str(
+        [{"name": "comment_list", "arguments": {"video_id": 456789123, "count": 15}}]
+    )
+
+    context = (
+        ChatContext()
+        .add(Message("user", f"{tools_text}\n\n{user_text}"))
+        .add(Message("assistant", response_text))
+    )
+
+    result = guardian.guardian_check(
+        context, call_intrinsic_backend, criteria="function_call"
+    )
+    assert isinstance(result, float)
+    assert 0.0 <= result <= 1.0
+
+
+@pytest.mark.qualitative
+def test_call_intrinsic_factuality_detection(call_intrinsic_backend):
+    """call_intrinsic path: factuality_detection returns a yes/no label."""
+    with open(_GUARDIAN_TEST_DATA / "factuality_detection.json", encoding="utf-8") as f:
+        data = json.load(f)
+
+    context = ChatContext()
+    docs = [
+        Document(text=d["text"], doc_id=d.get("doc_id"))
+        for d in data.get("extra_body", {}).get("documents", [])
+    ]
+    messages = data["messages"]
+    for i, m in enumerate(messages):
+        is_last = i == len(messages) - 1
+        if is_last and docs:
+            context = context.add(Message(m["role"], m["content"], documents=docs))
+        else:
+            context = context.add(Message(m["role"], m["content"]))
+
+    result = guardian.factuality_detection(context, call_intrinsic_backend)
+    assert result in ("yes", "no")
+
+
+@pytest.mark.qualitative
+def test_call_intrinsic_factuality_correction(call_intrinsic_backend):
+    """call_intrinsic path: factuality_correction returns corrected text or 'none'."""
+    with open(
+        _GUARDIAN_TEST_DATA / "factuality_correction.json", encoding="utf-8"
+    ) as f:
+        data = json.load(f)
+
+    context = ChatContext()
+    docs = [
+        Document(text=d["text"], doc_id=d.get("doc_id"))
+        for d in data.get("extra_body", {}).get("documents", [])
+    ]
+    messages = data["messages"]
+    for i, m in enumerate(messages):
+        is_last = i == len(messages) - 1
+        if is_last and docs:
+            context = context.add(Message(m["role"], m["content"], documents=docs))
+        else:
+            context = context.add(Message(m["role"], m["content"]))
+
+    result = guardian.factuality_correction(context, call_intrinsic_backend)
+    assert isinstance(result, str)
diff --git a/test/formatters/granite/testdata/test_canned_input/answerability_answerable.json b/test/formatters/granite/testdata/test_canned_input/answerability_answerable.json
index c8f127d67..5843e43e5 100644
--- a/test/formatters/granite/testdata/test_canned_input/answerability_answerable.json
+++ b/test/formatters/granite/testdata/test_canned_input/answerability_answerable.json
@@ -26,5 +26,6 @@
       }
     }
   },
+  "temperature": 0.0,
   "max_completion_tokens": 6
 }
\ No newline at end of file
diff --git a/test/formatters/granite/testdata/test_canned_input/answerability_simple.json b/test/formatters/granite/testdata/test_canned_input/answerability_simple.json
index 3a42fc67e..d247c43d3 100644
--- a/test/formatters/granite/testdata/test_canned_input/answerability_simple.json
+++ b/test/formatters/granite/testdata/test_canned_input/answerability_simple.json
@@ -16,5 +16,6 @@
       }
     }
   },
-  "max_completion_tokens": 6
+  "max_completion_tokens": 6,
+  "temperature": 0.0
 }
\ No newline at end of file
diff --git a/test/formatters/granite/testdata/test_canned_input/answerability_unanswerable.json b/test/formatters/granite/testdata/test_canned_input/answerability_unanswerable.json
index 8475fd979..4d390b598 100644
--- a/test/formatters/granite/testdata/test_canned_input/answerability_unanswerable.json
+++ b/test/formatters/granite/testdata/test_canned_input/answerability_unanswerable.json
@@ -30,5 +30,6 @@
       }
     }
   },
+  "temperature": 0.0,
   "max_completion_tokens": 6
 }
\ No newline at end of file
diff --git a/test/formatters/granite/testdata/test_canned_input/context_relevance.json b/test/formatters/granite/testdata/test_canned_input/context_relevance.json
index 05ecc0562..9ce036c51 100644
--- a/test/formatters/granite/testdata/test_canned_input/context_relevance.json
+++ b/test/formatters/granite/testdata/test_canned_input/context_relevance.json
@@ -30,5 +30,6 @@
         ]
       }
     }
-  }
+  },
+  "temperature": 0.0
 }
\ No newline at end of file