# One API, Multiple Providers
response = await llm.generate("Explain quantum computing")
# Switch providers instantly
llm.switch_to_claude() # Anthropic Claude
llm.switch_to_openai() # OpenAI GPT models
llm.switch_to_google() # Google Gemini models
llm.switch_to_bedrock() # 80+ AWS Bedrock models
# Same interface, different capabilities
async for chunk in llm.stream_generate("Write a story"):
print(chunk, end="")- π― Unified API - One interface for Claude, OpenAI, AWS Bedrock and other LLMs
- π Dynamic Model Switching - Change models/providers without code changes
- π Smart Model Discovery - Find the right model for your task automatically
- β‘ Async & Streaming - Full async/await support with real-time streaming
- π° Cost Tracking - Monitor API costs across all providers
- π§ Parameter Presets - Pre-configured settings for common use cases
- π‘οΈ Production Ready - Retry logic, error handling, and monitoring
- π¨ Type Safe - Full type hints and Pydantic models
- π Comprehensive Examples - 30+ real-world examples and guides
pip install multi-llm-sdkimport asyncio
from multi_llm_sdk import MultiLLM
async def main():
# Initialize client
llm = MultiLLM()
# Setup your preferred provider
llm.setup_claude("your-claude-api-key")
# OR llm.setup_openai("your-openai-api-key")
# OR llm.setup_google("your-google-api-key")
# OR llm.setup_grok("your-xai-api-key")
# OR llm.setup_bedrock(region_name="us-east-1") # Uses AWS credentials
# Generate text with any model
response = await llm.generate("Explain quantum computing in simple terms")
print(response.content)
asyncio.run(main())from multi_llm_sdk import Message
messages = [
Message(role="system", content="You are a helpful AI assistant"),
Message(role="user", content="What's the weather like?"),
Message(role="assistant", content="I don't have access to current weather data..."),
Message(role="user", content="How can I check the weather programmatically?")
]
response = await llm.chat(messages)
print(response.content)# Stream responses in real-time
async for chunk in llm.stream_generate("Write a short story about AI"):
print(chunk, end="", flush=True)Full support for AWS Bedrock models with automatic region optimization:
# Setup AWS Bedrock (uses your AWS credentials)
llm.setup_bedrock(region_name="us-east-1")
# Use any Bedrock model with the same API
models = [
"anthropic.claude-sonnet-4-20250514-v1:0", # Latest Claude 4
"meta.llama3-1-405b-instruct-v1:0", # Llama 405B
"amazon.nova-premier-v1:0", # Multimodal Nova
"mistral.mistral-large-2407-v1:0", # Mistral Large
"ai21.jamba-1-5-large-v1:0", # Jamba 1.5
]
for model in models:
response = await llm.generate("Hello!", model_override=model)
print(f"{model}: {response.content}")Find the perfect model for your use case:
# Discover models by capabilities
fast_models = llm.find_models(speed_tier="fast", cost_tier="low")
vision_models = llm.find_models(capabilities=["vision"])
code_models = llm.find_models(capabilities=["code_generation"])
# Get AI recommendations
best_for_code = llm.recommend_model("code", priority="quality")
best_for_chat = llm.recommend_model("chat", priority="speed")
best_for_analysis = llm.recommend_model("analysis", priority="cost")
print(f"Best coding model: {best_for_code.id}")
print(f"Best chat model: {best_for_chat.id}")Pre-configured settings for common tasks:
from multi_llm_sdk import ParameterPresets
# Creative writing
creative_config = ParameterPresets.creative_writing(api_key="your-key")
story = await llm.generate("Write a sci-fi story", creative_config)
# Code generation
code_config = ParameterPresets.code_generation(api_key="your-key")
function = await llm.generate("Create a Python sorting function", code_config)
# Research & analysis
research_config = ParameterPresets.research_assistant(api_key="your-key")
analysis = await llm.generate("Analyze market trends in AI", research_config)
# Fast responses
fast_config = ParameterPresets.fast_response(api_key="your-key")
quick_answer = await llm.generate("What is 2+2?", fast_config)Switch models on the fly:
# Load specific models dynamically
config = llm.load_model("claude-3-5-sonnet-20241022", "your-api-key")
detailed_response = await llm.generate("Complex analysis task", config)
# Switch between models
llm.switch_model("claude-3-5-sonnet-20241022") # Fast model
quick_response = await llm.generate("Simple question")
llm.switch_model("claude-sonnet-4-20250514") # Powerful model
complex_response = await llm.generate("Complex reasoning task")Fine-tune model behavior:
from multi_llm_sdk import LLMConfig
config = LLMConfig(
provider="claude",
model="claude-3-5-sonnet-20241022",
api_key="your-key",
# Generation parameters
temperature=0.8, # Creativity
max_tokens=2000, # Response length
top_p=0.95, # Nucleus sampling
# Advanced features
thinking_mode="chain_of_thought",
safety_level="high",
show_reasoning=True,
# Performance
cache_enabled=True,
max_retries=3,
timeout=30.0
)
response = await llm.generate("Solve this complex problem", config)Monitor your API usage and costs:
from multi_llm_sdk.monitoring import performance_monitor, cost_tracker
# Get performance metrics
metrics = performance_monitor.get_summary()
print(f"Total requests: {metrics['total_requests']}")
print(f"Average response time: {metrics['avg_response_time']:.2f}s")
print(f"Success rate: {metrics['success_rate']:.1%}")
# Get cost breakdown
costs = cost_tracker.get_cost_summary()
print(f"Total estimated cost: ${costs['total']['estimated_cost']:.2f}")
print(f"Cost by provider: {costs['by_provider']}")
print(f"Most expensive model: {costs['most_expensive_model']}")Check out the examples/ directory for complete examples:
- π Comprehensive Guides
comprehensive_claude_example.py- Complete Claude guide with 10 real-world examplescomprehensive_openai_example.py- Complete OpenAI guide with 11 practical examplescomprehensive_bedrock_example.py- Complete AWS Bedrock guide with multi-model examplescomprehensive_google_example.py- Complete Google Gemini guide with multimodal examplescomprehensive_grok_example.py- Complete xAI Grok guide with real-time information examples
git clone https://github.com/yourusername/multi-llm-sdk.git
cd multi-llm-sdk
pip install -e ".[dev]"
pip install -r requirements.txt# Set your API keys (choose your preferred providers)
export ANTHROPIC_API_KEY="your-claude-api-key"
export OPENAI_API_KEY="your-openai-api-key"
export GOOGLE_API_KEY="your-google-api-key"
export XAI_API_KEY="your-xai-api-key"
# For AWS Bedrock (or use AWS credential chain)
export AWS_ACCESS_KEY_ID="your-access-key"
export AWS_SECRET_ACCESS_KEY="your-secret-key"
export AWS_REGION="eu-central-1"from multi_llm_sdk import MultiLLM
# Quick test
llm = MultiLLM()
print("β
Multi-LLM SDK installed successfully!")# Run all tests
pytest
# Run with coverage
pytest --cov=multi_llm_sdk --cov-report=html
# Run specific test files
pytest tests/test_client.py -v
pytest tests/test_models.py -v
# Test real API integrations (requires API keys)
pytest examples/test_*.py -v# Clone and setup development environment
git clone https://github.com/yourusername/multi-llm-sdk.git
cd multi-llm-sdk
pip install -e .[dev]
# Run formatting and linting
black multi_llm_sdk/ examples/ tests/
isort multi_llm_sdk/ examples/ tests/
mypy multi_llm_sdk/
# Run comprehensive tests
python examples/comprehensive_claude_example.py
python examples/comprehensive_openai_example.py
python examples/comprehensive_bedrock_example.py
python examples/comprehensive_google_example.py
python examples/comprehensive_grok_example.py- Async/Await: Full async support for concurrent requests
- Connection Pooling: Efficient HTTP connection reuse
- Retry Logic: Automatic retries with exponential backoff
- Rate Limiting: Built-in rate limit handling
- Caching: Response caching for duplicate requests
- Monitoring: Performance and cost tracking
We welcome contributions! Please see CONTRIBUTING.md for guidelines.
# Clone and setup
git clone https://github.com/yourusername/multi-llm-sdk.git
cd multi-llm-sdk
pip install -e ".[dev]"
# Run tests
pytest
# Format code
black multi_llm_sdk/
isort multi_llm_sdk/
# Type checking
mypy multi_llm_sdk/This project is licensed under the MIT License - see the LICENSE file for details.