-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy path03_structured_extraction.py
More file actions
124 lines (105 loc) · 3.87 KB
/
03_structured_extraction.py
File metadata and controls
124 lines (105 loc) · 3.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
"""
Example 3: Structured Data Extraction (Chaperone Topology)
==========================================================
Demonstrates the Chaperone Protein pattern where raw LLM output
(unstructured text) is "folded" into a strict schema. If folding
fails, the output is rejected and can be retried.
This mirrors how biological chaperones force proteins to fold
correctly - misfolded proteins are tagged for degradation.
Topology:
Raw Text --> [Chaperone] --> Valid Schema?
| |
| YES: Return structured data
| |
+---- NO: Return error trace for retry
"""
from pydantic import BaseModel, Field
from operon_ai.organelles.chaperone import Chaperone
# Define the target schema - what we want to extract
class ContactInfo(BaseModel):
"""Structured contact information."""
name: str = Field(description="Full name of the person")
email: str = Field(description="Email address")
phone: str = Field(description="Phone number")
company: str = Field(description="Company or organization")
class MeetingRequest(BaseModel):
"""Structured meeting request."""
title: str = Field(description="Meeting title")
date: str = Field(description="Date in YYYY-MM-DD format")
duration_minutes: int = Field(description="Duration in minutes")
attendees: list[str] = Field(description="List of attendee names")
def main():
print("=" * 60)
print("Structured Data Extraction - Chaperone Demo")
print("=" * 60)
print()
chaperone = Chaperone()
# Test Case 1: Valid JSON that matches schema
print("--- Test 1: Valid Contact Info ---")
valid_contact = '''
{
"name": "Alice Johnson",
"email": "alice@example.com",
"phone": "+1-555-0123",
"company": "Acme Corp"
}
'''
result = chaperone.fold(valid_contact.strip(), ContactInfo)
print(f"Input: {valid_contact.strip()}")
print(f"Valid: {result.valid}")
if result.valid:
print(f"Extracted: {result.structure}")
print()
# Test Case 2: Invalid JSON (syntax error)
print("--- Test 2: Malformed JSON ---")
malformed = '{"name": "Bob", email: broken}'
result = chaperone.fold(malformed, ContactInfo)
print(f"Input: {malformed}")
print(f"Valid: {result.valid}")
print(f"Error: {result.error_trace}")
print()
# Test Case 3: Valid JSON but missing required fields
print("--- Test 3: Missing Required Fields ---")
incomplete = '{"name": "Charlie", "email": "charlie@test.com"}'
result = chaperone.fold(incomplete, ContactInfo)
print(f"Input: {incomplete}")
print(f"Valid: {result.valid}")
print(f"Error: {result.error_trace}")
print()
# Test Case 4: Valid meeting request
print("--- Test 4: Valid Meeting Request ---")
meeting = '''
{
"title": "Project Kickoff",
"date": "2025-01-15",
"duration_minutes": 60,
"attendees": ["Alice", "Bob", "Charlie"]
}
'''
result = chaperone.fold(meeting.strip(), MeetingRequest)
print(f"Input: {meeting.strip()}")
print(f"Valid: {result.valid}")
if result.valid:
print(f"Extracted: {result.structure}")
print()
# Test Case 5: Wrong types
print("--- Test 5: Wrong Field Types ---")
wrong_types = '''
{
"title": "Meeting",
"date": "2025-01-15",
"duration_minutes": "one hour",
"attendees": "just me"
}
'''
result = chaperone.fold(wrong_types.strip(), MeetingRequest)
print(f"Input: {wrong_types.strip()}")
print(f"Valid: {result.valid}")
print(f"Error: {result.error_trace}")
print()
print("=" * 60)
print("The Chaperone ensures only properly 'folded' data proceeds.")
print("Misfolded outputs are caught before they corrupt downstream agents.")
print("=" * 60)
if __name__ == "__main__":
main()