PlugLLM gives you a single, clean Python interface for 13+ AI providers โ OpenAI, Gemini, Groq, Claude, Mistral, Ollama & more. Built-in memory, streaming, async. Switch providers in one line.
from plugllm import ChatOpenAI, ChatGroq, LLMFactory
# โโ Direct provider usage โโโโโโโโโโโโโโโโโโ
llm = ChatOpenAI(api_key="sk-...", model="gpt-4o")
reply = llm.ask("What is machine learning?")
print(reply)
# โโ Factory: switch providers in 1 line โโโ
llm = LLMFactory.create(
"groq", api_key="gsk_...",
model="llama-3.3-70b-versatile"
)
print(llm.generate("Explain AI in one sentence"))
# โโ Built-in session memory โโโโโโโโโโโโโโโโ
chat = ChatGroq(api_key="gsk_...", max_history=10)
chat.chat("My name is Alice", session_id="u1")
response = chat.chat("What's my name?", session_id="u1")
# โ "Your name is Alice!"
# โโ Real-time streaming โโโโโโโโโโโโโโโโโโโโ
for chunk in llm.stream("Tell me a story"):
print(chunk, end="", flush=True)
A focused, production-ready library that does one thing exceptionally well โ unified LLM access.
.generate(), .chat(), .stream(), and .ask() methods across every provider. Swap OpenAI for Groq in one character change.max_history=20 and forget about context management.for chunk in llm.stream(prompt) just works for every provider โ perfect for chat UIs.agenerate(), achat(), astream(), aask(). Compatible with asyncio, FastAPI, and any async framework.import asyncio
from plugllm import ChatGemini
async def main():
llm = ChatGemini()
result = await llm.agenerate("Hello!")
async for chunk in llm.astream("Tell a story"):
print(chunk, end="", flush=True)
asyncio.run(main())
LLMFactory.create("groq", ...). Chain with .with_system(), .with_temperature(0.7), .call().AuthenticationError, RateLimitError, HTTPStatusError. Built-in retries, configurable timeouts, structured ChatResponse.From cloud giants to local models โ all accessible through a single unified interface.
| Provider | Class | Links |
|---|---|---|
๐ค OpenAI | ChatOpenAI | Get Key โ |
๐ Google Gemini | ChatGemini | Get Key โ Free |
โก Groq | ChatGroq | Get Key โ Free |
๐ง Anthropic Claude | ChatClaude | Get Key โ |
๐ Mistral AI | ChatMistral | Get Key โ Free |
โ๏ธ xAI Grok | ChatGrok | Get Key โ |
๐ต DeepSeek | ChatDeepSeek | Get Key โ |
๐ Ollama | ChatOllama | Install โ Local |
๐ Cohere | ChatCohere | Get Key โ |
๐ Alibaba Qwen | ChatQwen | Get Key โ |
๐ Moonshot Kimi | ChatKimi | Get Key โ |
๐ฆ Meta Llama | ChatLlama | Flexible |
๐ฎ๐ณ SarvamAI | ChatSarvam | Get Key โ Free |
Focused simplicity beats heavyweight orchestration for most production AI apps.
| Capability | โฆ PlugLLM |
|---|---|
| Switch provider in 1 line | โ Yes โ change class or factory string |
| Built-in memory | โ Native deque, per-session |
| Streaming (sync + async) | โ Both sync & async, unified |
| Package footprint | โ <5K LoC ultra-light |
| Learning curve | โ Minutes |
A complete, hands-on guide covering installation through advanced async patterns.
# Install PlugLLM from PyPI
pip install plugllm
# Optional: with dev dependencies
pip install "plugllm[dev]"
# Set your API keys
export OPENAI_API_KEY="sk-..."
export GEMINI_API_KEY="AIza..."
export GROQ_API_KEY="gsk_..."
export ANTHROPIC_API_KEY="sk-ant-..."
from plugllm import ChatOpenAI, ChatGroq, LLMFactory
# .generate() โ simple text completion
llm = ChatOpenAI(model="gpt-4o")
text = llm.generate("What is the difference between AI and ML?")
# .ask() โ Q&A with optional system prompt
answer = llm.ask(
"Explain transformers architecture",
system_prompt="You are a senior ML engineer. Be concise.",
max_tokens=300
)
print(answer.content) # ChatResponse object
print(answer.model) # "gpt-4o"
# LLMFactory โ dynamic provider switching
llm2 = LLMFactory.create("gemini", model="gemini-2.5-flash")
result = llm2.generate("Summarize the Transformer paper")
# Fluent chaining interface
response = (
ChatGroq()
.with_system("You are a Python expert.")
.with_temperature(0.2)
.call("Write a binary search function")
)
from plugllm import ChatOpenAI
llm = ChatOpenAI(model="gpt-4o-mini", max_history=20)
# Multi-user session isolation
llm.chat("My name is Alice, I live in Paris", session_id="alice")
llm.chat("My favourite language is Python", session_id="alice")
llm.chat("I'm Bob, a data scientist in Tokyo", session_id="bob")
# Each session has completely isolated memory
print(llm.chat("Where do I live?", session_id="alice"))
# โ "You live in Paris."
print(llm.chat("What do I work with?", session_id="bob"))
# โ "You mostly work with PyTorch."
# Clear memory for a session
llm.clear_history(session_id="alice")
history = llm.get_history(session_id="bob")
from plugllm import ChatGroq
llm = ChatGroq(model="llama-3.3-70b-versatile")
# Synchronous streaming
for chunk in llm.stream("Write a short poem about Python"):
print(chunk, end="", flush=True)
# Stream with system prompt
for chunk in llm.stream(
"Explain async/await in Python",
system_prompt="You are a concise technical writer."
):
print(chunk, end="", flush=True)
import asyncio
from plugllm import ChatGemini, ChatGroq, ChatOpenAI
async def parallel_summarize(text: str) -> dict:
providers = {
"GPT-4o": ChatOpenAI(model="gpt-4o-mini"),
"Gemini": ChatGemini(model="gemini-2.5-flash"),
"Llama": ChatGroq(model="llama-3.3-70b-versatile"),
}
prompt = f"Summarize in 2 sentences:\n{text}"
tasks = {name: llm.agenerate(prompt) for name, llm in providers.items()}
results = await asyncio.gather(*tasks.values())
return dict(zip(tasks.keys(), results))
async def main():
summaries = await parallel_summarize(
"Transformer architecture uses self-attention..."
)
for provider, summary in summaries.items():
print(f"\n[{provider}]\n{summary}")
asyncio.run(main())
Three complete production-ready projects you can run right now.
from plugllm import ChatOpenAI, ChatGroq, ChatGemini
def multi_summarize(text: str, max_tokens: int = 120):
providers = {
"GPT-4o": ChatOpenAI(model="gpt-4o-mini"),
"Groq": ChatGroq(model="llama-3.3-70b-versatile"),
"Gemini": ChatGemini(model="gemini-2.5-flash"),
}
results = {}
prompt = f"Summarize this concisely:\n\n{text}"
for name, llm in providers.items():
r = llm.ask(prompt, max_tokens=max_tokens)
results[name] = {"summary": r.content, "tokens": r.usage}
return results
from plugllm import ChatMistral
SYSTEM = """You are a helpful support agent.
Be empathetic, solution-focused, and brief."""
class SupportBot:
def __init__(self):
self.llm = ChatMistral(
model="mistral-large-latest",
max_history=15,
)
def handle(self, user_id: str, message: str) -> str:
return self.llm.chat(
message, session_id=user_id,
system_prompt=SYSTEM,
)
def reset(self, user_id: str):
self.llm.clear_history(session_id=user_id)
from plugllm import ChatOllama
# Requires: ollama pull deepseek-coder
assistant = ChatOllama(model="deepseek-coder:6.7b")
SYSTEM = """You are an expert code reviewer.
Explain clearly, identify bugs, and suggest
improvements with specific line references."""
def explain_code(code: str) -> str:
return assistant.ask(
f"Explain and review:\n\n```python\n{code}\n```",
system_prompt=SYSTEM,
).content
def review_code(code: str) -> str:
return assistant.ask(
f"Find bugs and improvements:\n\n```python\n{code}\n```",
system_prompt=SYSTEM,
).content
Every method, parameter, and return type โ all in one place.
str.ChatResponse.ChatResponse.str chunks.generate().chat().str chunks.ask().response = llm.ask("What is Python?")
response.content # str โ the text response
response.model # str โ model used
response.usage # dict โ token counts
response.raw_response # dict โ full API response
response.finish_reason # str โ "stop", "length"
# Message factory helpers
from plugllm.types import Message
msg = Message.user("Hello!")
msg = Message.assistant("Hi there!")
msg = Message.system("You are an expert.")
from plugllm import ChatOpenAI
from plugllm.types import (
AuthenticationError,
RateLimitError,
HTTPStatusError,
PlugLLMError,
)
llm = ChatOpenAI(max_retries=3)
try:
response = llm.generate("Hello!")
except AuthenticationError as e:
print(f"Invalid API key: {e}")
except RateLimitError as e:
print(f"Rate limited: {e}")
except HTTPStatusError as e:
print(f"HTTP {e.status_code}: {e}")
except PlugLLMError as e:
print(f"General error: {e}")
One command to install. One import to access every major LLM. Zero vendor lock-in.