Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/examples/intrinsics/query_clarification.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# pytest: huggingface, requires_heavy_ram, llm
"""
Example usage of the query clarification intrinsic for RAG applications.

Expand Down
2 changes: 1 addition & 1 deletion docs/examples/mini_researcher/researcher.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# pytest: ollama, qualitative, llm
# pytest: ollama, qualitative, llm, slow

from collections.abc import Callable
from functools import cache
Expand Down
4 changes: 4 additions & 0 deletions mellea/backends/ollama.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,10 @@ async def generate_from_raw(
result = None
error = None
if isinstance(response, BaseException):
FancyLogger.get_logger().warning(
f"generate_from_raw: request {i} failed with "
f"{type(response).__name__}: {response}"
)
result = ModelOutputThunk(value="")
error = response
else:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ markers = [
"requires_gpu: Tests requiring GPU",
"requires_heavy_ram: Tests requiring 48GB+ RAM",
"qualitative: Non-deterministic quality tests",
"slow: Tests taking >5 minutes (e.g., dataset loading)",
"slow: Tests taking >1 minute (e.g., multi-step pipelines like researcher)",

# Composite markers
"llm: Tests that make LLM calls (needs at least Ollama)",
Expand Down
27 changes: 17 additions & 10 deletions test/backends/test_ollama.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ class Email(pydantic.BaseModel):
output = session.instruct(
"Write a short email to Olivia, thanking her for organizing a sailing activity. Her email server is example.com. No more than two sentences. ",
format=Email,
model_options={ModelOption.MAX_NEW_TOKENS: 2**8},
model_options={ModelOption.MAX_NEW_TOKENS: 2**10},
)
print("Formatted output:")
email = Email.model_validate_json(
Expand All @@ -102,15 +102,20 @@ class Email(pydantic.BaseModel):


@pytest.mark.qualitative
@pytest.mark.timeout(150)
async def test_generate_from_raw(session) -> None:
prompts = ["what is 1+1?", "what is 2+2?", "what is 3+3?", "what is 4+4?"]

results = await session.backend.generate_from_raw(
actions=[CBlock(value=prompt) for prompt in prompts], ctx=session.ctx
actions=[CBlock(value=prompt) for prompt in prompts],
ctx=session.ctx,
model_options={ModelOption.CONTEXT_WINDOW: 2048},
)

assert len(results) == len(prompts)
assert results[0].value is not None
assert all(r.value for r in results), (
f"One or more requests returned empty (possible backend timeout): {[r.value for r in results]}"
)


@pytest.mark.xfail(reason="ollama sometimes fails generated structured outputs")
Expand All @@ -125,17 +130,19 @@ class Answer(pydantic.BaseModel):
actions=[CBlock(value=prompt) for prompt in prompts],
ctx=session.ctx,
format=Answer,
model_options={ModelOption.CONTEXT_WINDOW: 2048},
)

assert len(results) == len(prompts)
assert all(r.value for r in results), (
f"One or more requests returned empty (possible backend timeout): {[r.value for r in results]}"
)

random_result = results[0]
try:
Answer.model_validate_json(random_result.value)
except pydantic.ValidationError as e:
assert False, (
f"formatting directive failed for {random_result.value}: {e.json()}"
)
for result in results:
try:
Answer.model_validate_json(result.value)
except pydantic.ValidationError as e:
assert False, f"formatting directive failed for {result.value}: {e.json()}"


async def test_async_parallel_requests(session) -> None:
Expand Down
2 changes: 1 addition & 1 deletion test/backends/test_openai_ollama.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ class Email(pydantic.BaseModel):
output = m_session.instruct(
"Write a short email to Olivia, thanking her for organizing a sailing activity. Her email server is example.com. No more than two sentences. ",
format=Email,
model_options={ModelOption.MAX_NEW_TOKENS: 2**8},
model_options={ModelOption.MAX_NEW_TOKENS: 2**10},
)
print("Formatted output:")
email = Email.model_validate_json(
Expand Down