generative-computing · nrfulton · May 1, 2026 · Apr 30, 2026 · Apr 30, 2026 · Apr 30, 2026
@@ -178,7 +178,7 @@ Intrinsics are specialized LoRA adapters that add task-specific capabilities (RA
 | `rag` | `rewrite_question(question, context, backend)` | Rewrite question into a retrieval query |
 | `rag` | `clarify_query(question, documents, context, backend)` | Generate clarification or return "CLEAR" |
 | `rag` | `find_citations(response, documents, context, backend)` | Document sentences supporting the response |
-| `rag` | `check_context_relevance(question, document, context, backend)` | Whether a document is relevant (0–1) |
+| `rag` | `check_context_relevance(question, document, context, backend)` | Whether a document is relevant (0–1); only supported for granite-4.0, not granite-4.1 |
 | `rag` | `flag_hallucinated_content(response, documents, context, backend)` | Flag potentially hallucinated sentences |
 
 ```python
@@ -187,7 +187,7 @@ from mellea.stdlib.components import Message
 from mellea.stdlib.components.intrinsic import core
 from mellea.stdlib.context import ChatContext
 
-backend = LocalHFBackend(model_id="ibm-granite/granite-4.0-micro")
+backend = LocalHFBackend(model_id="ibm-granite/granite-4.1-3b")
 context = (
     ChatContext()
     .add(Message("user", "What is the square root of 4?"))
@@ -223,5 +223,5 @@ https://huggingface.co/ibm-granite/granitelib-rag-r1.0/blob/main/{intrinsic_name
 
 Core and Guardian intrinsics (include model subfolder):
 ```
-https://huggingface.co/ibm-granite/granitelib-{core,guardian}-r1.0/blob/main/{intrinsic_name}/granite-4.0-micro/README.md
+https://huggingface.co/ibm-granite/granitelib-{core,guardian,rag}-r1.0/blob/main/{intrinsic_name}/granite-4.1-{3b,8b,30b}/{lora,alora}/README.md
 ```
@@ -6,7 +6,7 @@
 if __name__ == "__main__":
     generate_readme(
         dataset_path="stembolt_failure_dataset.jsonl",
-        base_model="granite-4.0-micro",
+        base_model="granite-4.1-3b",
         prompt_file=None,
         output_path="stembolts_model_readme.md",
         name="your-username/stembolts-alora",

@@ -12,7 +12,7 @@
 from mellea.stdlib.components.intrinsic import rag
 
 ctx, backend = start_backend(
-    "hf", model_id=model_ids.IBM_GRANITE_4_MICRO_3B, context_type="chat"
+    "hf", model_id=model_ids.IBM_GRANITE_4_1_3B, context_type="chat"
 )
 # NOTE: This example can also be run with the OpenAIBackend using a GraniteSwitch model. See docs/examples/granite-switch/.
 

@@ -15,7 +15,7 @@
 from mellea.stdlib.components.intrinsic import rag
 
 ctx, backend = start_backend(
-    "hf", model_id=model_ids.IBM_GRANITE_4_MICRO_3B, context_type="chat"
+    "hf", model_id=model_ids.IBM_GRANITE_4_1_3B, context_type="chat"
 )
 # NOTE: This example can also be run with the OpenAIBackend using a GraniteSwitch model. See docs/examples/granite-switch/.
 

@@ -19,7 +19,7 @@
 from mellea.stdlib.components.intrinsic import core
 
 ctx, backend = start_backend(
-    "hf", model_id=model_ids.IBM_GRANITE_4_MICRO_3B, context_type="chat"
+    "hf", model_id=model_ids.IBM_GRANITE_4_1_3B, context_type="chat"
 )
 # NOTE: This example can also be run with the OpenAIBackend using a GraniteSwitch model. See docs/examples/granite-switch/.
 

@@ -14,7 +14,7 @@
 ctx, backend = start_backend(
     "hf", model_id=model_ids.IBM_GRANITE_4_MICRO_3B, context_type="chat"
 )
-# NOTE: This example can also be run with the OpenAIBackend using a GraniteSwitch model. See docs/examples/granite-switch/.
+# NOTE: this example uses Granite 4.0 micro because there is no context_relevance intrinsic for Graniet 4.1
 
 question = "Who is the CEO of Microsoft?"
 document = (

@@ -82,7 +82,7 @@
 
 # Create the backend.
 ctx, backend = start_backend(
-    "hf", model_id=model_ids.IBM_GRANITE_4_MICRO_3B, context_type="chat"
+    "hf", model_id=model_ids.IBM_GRANITE_4_1_3B, context_type="chat"
 )
 # NOTE: This example can also be run with the OpenAIBackend using a GraniteSwitch model. See docs/examples/granite-switch/.
 

@@ -25,7 +25,7 @@
 
 # Create the backend.
 ctx, backend = start_backend(
-    "hf", model_id=model_ids.IBM_GRANITE_4_MICRO_3B, context_type="chat"
+    "hf", model_id=model_ids.IBM_GRANITE_4_1_3B, context_type="chat"
 )
 # NOTE: This example can also be run with the OpenAIBackend using a GraniteSwitch model. See docs/examples/granite-switch/.
 

@@ -19,7 +19,7 @@
 from mellea.stdlib.components.intrinsic import guardian
 
 ctx, backend = start_backend(
-    "hf", model_id=model_ids.IBM_GRANITE_4_MICRO_3B, context_type="chat"
+    "hf", model_id=model_ids.IBM_GRANITE_4_1_3B, context_type="chat"
 )
 # NOTE: This example can also be run with the OpenAIBackend using a GraniteSwitch model. See docs/examples/granite-switch/.
 

@@ -15,7 +15,7 @@
 from mellea.stdlib.components.intrinsic import rag
 
 ctx, backend = start_backend(
-    "hf", model_id=model_ids.IBM_GRANITE_4_MICRO_3B, context_type="chat"
+    "hf", model_id=model_ids.IBM_GRANITE_4_1_3B, context_type="chat"
 )
 # NOTE: This example can also be run with the OpenAIBackend using a GraniteSwitch model. See docs/examples/granite-switch/.
 

@@ -1,6 +1,7 @@
 # pytest: huggingface, e2e
 
 import mellea.stdlib.functional as mfuncs
+from mellea.backends import model_ids
 from mellea.backends.adapters.adapter import AdapterType, IntrinsicAdapter
 from mellea.backends.huggingface import LocalHFBackend
 from mellea.stdlib.components import Intrinsic, Message
@@ -9,7 +10,7 @@
 # This is an example for how you would directly use intrinsics. See `mellea/stdlib/intrinsics/rag.py`
 # for helper functions.
 
-backend = LocalHFBackend(model_id="ibm-granite/granite-3.3-8b-instruct")
+backend = LocalHFBackend(model_id=model_ids.IBM_GRANITE_4_1_3B)
 # --- Alternative: OpenAI backend with Granite Switch (requires vLLM server) ---
 # Requires the adapter for this intrinsic to be embedded in the Granite Switch
 # model. See docs/examples/granite-switch/ for a full runnable example.
@@ -28,7 +29,7 @@
 
 # Create the Adapter. IntrinsicAdapter's default to ALORAs.
 req_adapter = IntrinsicAdapter(
-    "requirement_check", base_model_name=backend.base_model_name
+    "requirement-check", base_model_name=backend.base_model_name
 )
 
 # Add the adapter to the backend.
@@ -42,15 +43,15 @@
 # ALORA and then LORA adapters.
 out, new_ctx = mfuncs.act(
     Intrinsic(
-        "requirement_check",
+        "requirement-check",
         intrinsic_kwargs={"requirement": "The assistant is helpful."},
     ),
     ctx,
     backend,
 )
 
-# Print the output. The requirement_check adapter has a specific output format:
-print(out)  # {"requirement_likelihood": 1.0}
+# Print the output. The requirement-check adapter has a specific output format:
+print(out)  # {"requirement_check": {"score": 0.41272119992000356}}
 
 # The AloraRequirement uses this adapter. It automatically parses that output
 # when validating the output.
@@ -17,7 +17,7 @@
 
 # Create the backend.
 ctx, backend = start_backend(
-    "hf", model_id=model_ids.IBM_GRANITE_4_MICRO_3B, context_type="chat"
+    "hf", model_id=model_ids.IBM_GRANITE_4_1_3B, context_type="chat"
 )
 # NOTE: This example can also be run with the OpenAIBackend using a GraniteSwitch model. See docs/examples/granite-switch/.
 

@@ -13,7 +13,7 @@
 from mellea.stdlib.components.intrinsic import rag
 
 ctx, backend = start_backend(
-    "hf", model_id=model_ids.IBM_GRANITE_4_MICRO_3B, context_type="chat"
+    "hf", model_id=model_ids.IBM_GRANITE_4_1_3B, context_type="chat"
 )
 # NOTE: This example can also be run with the OpenAIBackend using a GraniteSwitch model. See docs/examples/granite-switch/.
 

@@ -1,23 +1,13 @@
-# pytest: huggingface, e2e
-
-"""Example usage of the query rewrite intrinsic for RAG applications.
-
-To run this script from the root of the Mellea source tree, use the command:
-```
-uv run python docs/examples/intrinsics/query_rewrite.py
-```
-"""
-
 from mellea import model_ids, start_backend
 from mellea.stdlib.components import Message
 from mellea.stdlib.components.intrinsic import rag
 
 ctx, backend = start_backend(
-    "hf", model_id=model_ids.IBM_GRANITE_4_MICRO_3B, context_type="chat"
+    "hf", model_id=model_ids.IBM_GRANITE_4_1_3B, context_type="chat"
 )
 # NOTE: This example can also be run with the OpenAIBackend using a GraniteSwitch model. See docs/examples/granite-switch/.
 
-ctx = (
+ctx_with_question = (
     ctx.add(Message("assistant", "Welcome to pet questions!"))
     .add(
         Message(
@@ -34,12 +24,10 @@
             "probably enjoys her cozy indoor life.",
         )
     )
+    .add(Message("user", "But is he more likely to get fleas because of that?"))
 )
 
-next_user_turn = "But is he more likely to get fleas because of that?"
-ctx_with_question = ctx.add(Message("user", next_user_turn))
-
-print(f"Original user question: {next_user_turn}")
+print("Original user question: 'But is he more likely to get fleas because of that?'")
 
 result = rag.rewrite_question(None, ctx_with_question, backend)
 print(f"Rewritten user question: {result}")
@@ -40,7 +40,7 @@
 requirement = "Use a professional tone."
 
 ctx, backend = start_backend(
-    "hf", model_id=model_ids.IBM_GRANITE_4_MICRO_3B, context_type="chat"
+    "hf", model_id=model_ids.IBM_GRANITE_4_1_3B, context_type="chat"
 )
 # NOTE: This example can also be run with the OpenAIBackend using a GraniteSwitch model. See docs/examples/granite-switch/.
 

@@ -16,7 +16,7 @@
 from mellea.stdlib.components.intrinsic import core
 
 ctx, backend = start_backend(
-    "hf", model_id=model_ids.IBM_GRANITE_4_MICRO_3B, context_type="chat"
+    "hf", model_id=model_ids.IBM_GRANITE_4_1_3B, context_type="chat"
 )
 # NOTE: This example can also be run with the OpenAIBackend using a GraniteSwitch model. See docs/examples/granite-switch/.
 

@@ -33,6 +33,9 @@
     "ibm-granite/granite-3.3-2b-instruct": "granite-3.3-2b-instruct",
     "openai/gpt-oss-20b": "gpt-oss-20b",
     "ibm-granite/granite-4.0-micro": "granite-4.0-micro",
+    "ibm-granite/granite-4.1-3b": "granite-4.1-3b",
+    "ibm-granite/granite-4.1-8b": "granite-4.1-8b",
+    "ibm-granite/granite-4.1-30b": "granite-4.1-30b",
     "granite4:micro": "granite4_micro",
 }
 """Base model names that we accept for LoRA/aLoRA adapters in intrinsics libraries.

@@ -254,6 +254,7 @@ def _resolve_yaml(self):
 
 # Base models that are small enough to run locally with transformers
 _LOCAL_BASE_MODELS = {
+    "ibm-granite/granite-4.1-3b",
     "ibm-granite/granite-4.0-micro",
     "ibm-granite/granite-3.3-2b-instruct",
 }

@@ -39,7 +39,7 @@
 """Location of data files for the tests in this file."""
 
 
-BASE_MODEL = "ibm-granite/granite-4.0-micro"
+BASE_MODEL = "ibm-granite/granite-4.1-3b"
 
 
 @pytest.fixture(name="backend", scope="module")