Python SDK Reference
Full Python integration for Tapas — typed dataclasses, a synchronous client, async support via httpx, a LangChain wrapper, and batch query helpers. Works with Python 3.9+.
pip install tapas-aiInstallation
Install the core package. Add extras for async support or LangChain integration.
pip install tapas-ai # or with extras for async support: pip install tapas-ai[async] # or with LangChain integration: pip install tapas-ai[langchain]
Python version
3.9+
Core deps
requests, dataclasses-json
Async extra
httpx, anyio
Type Reference
All types are exported from tapas_ai and use Python dataclasses with Literal type hints for strict mode/routing discrimination.
from dataclasses import dataclass
from typing import Optional, List, Literal
# ─────────────────────────────────────────────────────────────────
# Core types exported from tapas-ai
# ─────────────────────────────────────────────────────────────────
RoutingMethod = Literal["cosine", "smart-router", "hybrid"]
ResponseMode = Literal["cache", "llm"]
@dataclass
class TapasResponse:
"""Full response returned by every Tapas query."""
mode: ResponseMode # "cache" or "llm"
lem_mode: bool # Whether Low Energy Mode was active
answer: str # Full prose answer (LLM mode)
bullets: Optional[List[str]] # Bullet points (cache/LEM mode)
category: str # e.g. "Quantum Computing"
domain: str # e.g. "science_technology"
confidence: float # Cosine similarity 0.0–1.0
routing_method: RoutingMethod # Classification method used
energy_wh_used: float # Watt-hours consumed
energy_wh_saved: float # Watt-hours saved vs. full inference
response_time_ms: int # End-to-end latency in ms
cached_response_id: Optional[str] # Cache entry ID (None for LLM)
@dataclass
class TapasAskOptions:
"""Options accepted by ask() and TapasClient."""
query: str # The user's question
lem_mode: bool = True # Enable Low Energy Mode
domain: Optional[str] = None # Hint the router to a domain slugmodeLiteral['cache', 'llm']How the answer was servedlem_modeboolWhether Low Energy Mode was activeanswerstrFull prose answer (LLM mode)bulletsOptional[List[str]]Bullet points (cache/LEM mode)categorystrMatched knowledge category namedomainstrTop-level domain slugconfidencefloatCosine similarity score 0.0–1.0routing_methodRoutingMethodClassification method usedenergy_wh_usedfloatWatt-hours consumedenergy_wh_savedfloatWatt-hours saved vs. full inferenceresponse_time_msintEnd-to-end latency in mscached_response_idOptional[str]Cache entry ID (None for LLM)TapasClient
The high-level synchronous client. Instantiate once and reuse across your application. Returns typed TapasResponse dataclass instances.
from tapas_ai import TapasClient
# Initialise once — reuse across your app
tapas = TapasClient(
api_key="your-api-key",
base_url="https://tapas.one", # default
timeout=8.0, # seconds
)
# ── Basic query ──────────────────────────────────────────────────
response = tapas.ask(
query="How does quantum computing work?",
lem_mode=True,
)
if response.mode == "cache":
# Served from semantic cache — 0.001 Wh used
print(f"Routing: {response.routing_method} @ {response.confidence:.2f}")
print(f"Latency: {response.response_time_ms} ms")
print(f"Saved: {response.energy_wh_saved:.3f} Wh")
for bullet in (response.bullets or []):
print(f"• {bullet}")
else:
# Full LLM inference — 3.0 Wh used
print(f"Answer: {response.answer}")
print(f"Category: {response.category} | {response.domain}")Zero-dependency requests helper
No SDK required — call the Tapas REST API directly with the built-inrequests library. Returns a plain dict matching the TapasResponse schema.
import requests
from dataclasses import dataclass
from typing import Optional, List
# Zero-dependency helper (no SDK required)
def ask_tapas(
query: str,
lem_mode: bool = True,
base_url: str = "https://tapas.one",
) -> dict:
"""Send a query to Tapas and return the parsed response dict."""
url = f"{base_url}/api/trpc/query.ask"
payload = {
"json": {
"query": query,
"lemMode": lem_mode,
}
}
resp = requests.post(url, json=payload, timeout=10)
resp.raise_for_status()
return resp.json()["result"]["data"]
# Usage
data = ask_tapas("What causes inflation?", lem_mode=True)
print(f"Mode: {data['mode']}")
print(f"Saved: {data['energyWhSaved']:.3f} Wh")
if data.get("bullets"):
for b in data["bullets"]:
print(f"• {b}")Async support with httpx
Use httpx.AsyncClient for non-blocking queries in FastAPI, Starlette, or any async Python framework. Requires pip install tapas-ai[async].
import httpx
import asyncio
async def ask_tapas_async(
query: str,
lem_mode: bool = True,
base_url: str = "https://tapas.one",
) -> dict:
"""Async version using httpx — ideal for FastAPI / async frameworks."""
url = f"{base_url}/api/trpc/query.ask"
async with httpx.AsyncClient(timeout=10.0) as client:
resp = await client.post(
url,
json={"json": {"query": query, "lemMode": lem_mode}},
)
resp.raise_for_status()
return resp.json()["result"]["data"]
# Usage in an async context (e.g. FastAPI endpoint)
async def main():
data = await ask_tapas_async("How does CRISPR work?")
print(f"Routing: {data['routingMethod']} | Saved: {data['energyWhSaved']:.3f} Wh")
for b in (data.get("bullets") or []):
print(f"• {b}")
asyncio.run(main())LangChain integration
Drop TapasLLM into any LangChain chain or agent as a low-energy alternative to ChatOpenAI. Cached answers cost 3000× less energy than full inference. Requires pip install tapas-ai[langchain].
from langchain.llms.base import LLM
from langchain.callbacks.manager import CallbackManagerForLLMRun
from typing import Any, Optional, List
import requests
class TapasLLM(LLM):
"""LangChain-compatible LLM wrapper for Tapas.
Drop this in anywhere you'd use ChatOpenAI or Anthropic —
Tapas will serve cached answers at 0.001 Wh instead of 3.0 Wh.
"""
base_url: str = "https://tapas.one"
lem_mode: bool = True
api_key: Optional[str] = None
@property
def _llm_type(self) -> str:
return "tapas"
def _call(
self,
prompt: str,
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> str:
resp = requests.post(
f"{self.base_url}/api/trpc/query.ask",
json={"json": {"query": prompt, "lemMode": self.lem_mode}},
timeout=10,
)
resp.raise_for_status()
data = resp.json()["result"]["data"]
if data["mode"] == "cache" and data.get("bullets"):
return "\n".join(f"• {b}" for b in data["bullets"])
return data["answer"]
# ── Usage ────────────────────────────────────────────────────────
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
tapas = TapasLLM(lem_mode=True)
chain = LLMChain(
llm=tapas,
prompt=PromptTemplate.from_template("{question}"),
)
result = chain.run(question="What is the Higgs boson?")
print(result)Error handling
All SDK exceptions inherit from TapasError. Catch specific subclasses for granular retry logic.
from tapas_ai import TapasClient
from tapas_ai.exceptions import (
TapasError,
TapasRateLimitError,
TapasNetworkError,
TapasTimeoutError,
)
tapas = TapasClient(api_key="your-api-key")
try:
response = tapas.ask(query="What is inflation?", lem_mode=True)
print(response.bullets)
except TapasRateLimitError as e:
# HTTP 429 — back off and retry
print(f"Rate limited. Retry after: {e.retry_after}s")
except TapasTimeoutError:
# Request exceeded timeout threshold
print("Request timed out — check network or increase timeout")
except TapasNetworkError as e:
# DNS failure, connection refused, etc.
print(f"Network error: {e}")
except TapasError as e:
# All other Tapas API errors (4xx, 5xx)
print(f"Tapas error {e.status_code}: {e.message}")TapasErrorBase class for all SDK errorsAny 4xx/5xxTapasRateLimitErrorToo many requestsHTTP 429TapasNetworkErrorDNS / connection failureNetworkTapasTimeoutErrorRequest exceeded timeoutTimeoutBatch queries
Process multiple questions in one go. Use the async concurrent version for maximum throughput — it fires all requests simultaneously and collects results in order.
from tapas_ai import TapasClient
import asyncio
import httpx
tapas = TapasClient(api_key="your-api-key")
# ── Synchronous batch (sequential) ──────────────────────────────
questions = [
"How does photosynthesis work?",
"What is compound interest?",
"Explain TCP/IP in simple terms.",
"What causes climate change?",
]
results = [tapas.ask(q, lem_mode=True) for q in questions]
total_saved = sum(r.energy_wh_saved for r in results)
print(f"Batch complete. Total saved: {total_saved:.3f} Wh")
# ── Async batch (concurrent, much faster) ───────────────────────
async def batch_ask(questions: list[str]) -> list[dict]:
url = "https://tapas.one/api/trpc/query.ask"
async with httpx.AsyncClient(timeout=15.0) as client:
tasks = [
client.post(url, json={"json": {"query": q, "lemMode": True}})
for q in questions
]
responses = await asyncio.gather(*tasks)
return [r.json()["result"]["data"] for r in responses]
results = asyncio.run(batch_ask(questions))
for q, r in zip(questions, results):
print(f"[{r['mode']:5s}] {q[:40]:<40} | {r['energyWhSaved']:.3f} Wh saved")