Python SDKtapas-ai 0.1

Python SDK Reference

Full Python integration for Tapas — typed dataclasses, a synchronous client, async support via httpx, a LangChain wrapper, and batch query helpers. Works with Python 3.9+.

pippip install tapas-ai

Installation

Install the core package. Add extras for async support or LangChain integration.

Terminal
bash
pip install tapas-ai
# or with extras for async support:
pip install tapas-ai[async]
# or with LangChain integration:
pip install tapas-ai[langchain]

Python version

3.9+

Core deps

requests, dataclasses-json

Async extra

httpx, anyio

Type Reference

All types are exported from tapas_ai and use Python dataclasses with Literal type hints for strict mode/routing discrimination.

tapas_ai/types.py
python
from dataclasses import dataclass
from typing import Optional, List, Literal

# ─────────────────────────────────────────────────────────────────
# Core types exported from tapas-ai
# ─────────────────────────────────────────────────────────────────

RoutingMethod = Literal["cosine", "smart-router", "hybrid"]
ResponseMode  = Literal["cache", "llm"]

@dataclass
class TapasResponse:
    """Full response returned by every Tapas query."""
    mode:              ResponseMode      # "cache" or "llm"
    lem_mode:          bool              # Whether Low Energy Mode was active
    answer:            str               # Full prose answer (LLM mode)
    bullets:           Optional[List[str]]  # Bullet points (cache/LEM mode)
    category:          str               # e.g. "Quantum Computing"
    domain:            str               # e.g. "science_technology"
    confidence:        float             # Cosine similarity 0.0–1.0
    routing_method:    RoutingMethod     # Classification method used
    energy_wh_used:    float             # Watt-hours consumed
    energy_wh_saved:   float             # Watt-hours saved vs. full inference
    response_time_ms:  int               # End-to-end latency in ms
    cached_response_id: Optional[str]   # Cache entry ID (None for LLM)

@dataclass
class TapasAskOptions:
    """Options accepted by ask() and TapasClient."""
    query:    str                        # The user's question
    lem_mode: bool = True                # Enable Low Energy Mode
    domain:   Optional[str] = None      # Hint the router to a domain slug
TapasResponse field reference
modeLiteral['cache', 'llm']How the answer was served
lem_modeboolWhether Low Energy Mode was active
answerstrFull prose answer (LLM mode)
bulletsOptional[List[str]]Bullet points (cache/LEM mode)
categorystrMatched knowledge category name
domainstrTop-level domain slug
confidencefloatCosine similarity score 0.0–1.0
routing_methodRoutingMethodClassification method used
energy_wh_usedfloatWatt-hours consumed
energy_wh_savedfloatWatt-hours saved vs. full inference
response_time_msintEnd-to-end latency in ms
cached_response_idOptional[str]Cache entry ID (None for LLM)

TapasClient

The high-level synchronous client. Instantiate once and reuse across your application. Returns typed TapasResponse dataclass instances.

client_example.py
python
from tapas_ai import TapasClient

# Initialise once — reuse across your app
tapas = TapasClient(
    api_key="your-api-key",
    base_url="https://tapas.one",  # default
    timeout=8.0,                   # seconds
)

# ── Basic query ──────────────────────────────────────────────────
response = tapas.ask(
    query="How does quantum computing work?",
    lem_mode=True,
)

if response.mode == "cache":
    # Served from semantic cache — 0.001 Wh used
    print(f"Routing: {response.routing_method} @ {response.confidence:.2f}")
    print(f"Latency: {response.response_time_ms} ms")
    print(f"Saved:   {response.energy_wh_saved:.3f} Wh")
    for bullet in (response.bullets or []):
        print(f"• {bullet}")
else:
    # Full LLM inference — 3.0 Wh used
    print(f"Answer:   {response.answer}")
    print(f"Category: {response.category} | {response.domain}")

Zero-dependency requests helper

No SDK required — call the Tapas REST API directly with the built-inrequests library. Returns a plain dict matching the TapasResponse schema.

requests_helper.py
python
import requests
from dataclasses import dataclass
from typing import Optional, List

# Zero-dependency helper (no SDK required)
def ask_tapas(
    query: str,
    lem_mode: bool = True,
    base_url: str = "https://tapas.one",
) -> dict:
    """Send a query to Tapas and return the parsed response dict."""
    url = f"{base_url}/api/trpc/query.ask"
    payload = {
        "json": {
            "query": query,
            "lemMode": lem_mode,
        }
    }
    resp = requests.post(url, json=payload, timeout=10)
    resp.raise_for_status()
    return resp.json()["result"]["data"]

# Usage
data = ask_tapas("What causes inflation?", lem_mode=True)
print(f"Mode:  {data['mode']}")
print(f"Saved: {data['energyWhSaved']:.3f} Wh")
if data.get("bullets"):
    for b in data["bullets"]:
        print(f"• {b}")

Async support with httpx

Use httpx.AsyncClient for non-blocking queries in FastAPI, Starlette, or any async Python framework. Requires pip install tapas-ai[async].

async_example.py
python
import httpx
import asyncio

async def ask_tapas_async(
    query: str,
    lem_mode: bool = True,
    base_url: str = "https://tapas.one",
) -> dict:
    """Async version using httpx — ideal for FastAPI / async frameworks."""
    url = f"{base_url}/api/trpc/query.ask"
    async with httpx.AsyncClient(timeout=10.0) as client:
        resp = await client.post(
            url,
            json={"json": {"query": query, "lemMode": lem_mode}},
        )
        resp.raise_for_status()
        return resp.json()["result"]["data"]

# Usage in an async context (e.g. FastAPI endpoint)
async def main():
    data = await ask_tapas_async("How does CRISPR work?")
    print(f"Routing: {data['routingMethod']} | Saved: {data['energyWhSaved']:.3f} Wh")
    for b in (data.get("bullets") or []):
        print(f"• {b}")

asyncio.run(main())

LangChain integration

Drop TapasLLM into any LangChain chain or agent as a low-energy alternative to ChatOpenAI. Cached answers cost 3000× less energy than full inference. Requires pip install tapas-ai[langchain].

langchain_example.py
python
from langchain.llms.base import LLM
from langchain.callbacks.manager import CallbackManagerForLLMRun
from typing import Any, Optional, List
import requests

class TapasLLM(LLM):
    """LangChain-compatible LLM wrapper for Tapas.

    Drop this in anywhere you'd use ChatOpenAI or Anthropic —
    Tapas will serve cached answers at 0.001 Wh instead of 3.0 Wh.
    """
    base_url: str = "https://tapas.one"
    lem_mode: bool = True
    api_key: Optional[str] = None

    @property
    def _llm_type(self) -> str:
        return "tapas"

    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        resp = requests.post(
            f"{self.base_url}/api/trpc/query.ask",
            json={"json": {"query": prompt, "lemMode": self.lem_mode}},
            timeout=10,
        )
        resp.raise_for_status()
        data = resp.json()["result"]["data"]
        if data["mode"] == "cache" and data.get("bullets"):
            return "\n".join(f"• {b}" for b in data["bullets"])
        return data["answer"]

# ── Usage ────────────────────────────────────────────────────────
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

tapas = TapasLLM(lem_mode=True)

chain = LLMChain(
    llm=tapas,
    prompt=PromptTemplate.from_template("{question}"),
)

result = chain.run(question="What is the Higgs boson?")
print(result)

Error handling

All SDK exceptions inherit from TapasError. Catch specific subclasses for granular retry logic.

error_handling.py
python
from tapas_ai import TapasClient
from tapas_ai.exceptions import (
    TapasError,
    TapasRateLimitError,
    TapasNetworkError,
    TapasTimeoutError,
)

tapas = TapasClient(api_key="your-api-key")

try:
    response = tapas.ask(query="What is inflation?", lem_mode=True)
    print(response.bullets)

except TapasRateLimitError as e:
    # HTTP 429 — back off and retry
    print(f"Rate limited. Retry after: {e.retry_after}s")

except TapasTimeoutError:
    # Request exceeded timeout threshold
    print("Request timed out — check network or increase timeout")

except TapasNetworkError as e:
    # DNS failure, connection refused, etc.
    print(f"Network error: {e}")

except TapasError as e:
    # All other Tapas API errors (4xx, 5xx)
    print(f"Tapas error {e.status_code}: {e.message}")
Exception hierarchy
TapasErrorBase class for all SDK errorsAny 4xx/5xx
TapasRateLimitErrorToo many requestsHTTP 429
TapasNetworkErrorDNS / connection failureNetwork
TapasTimeoutErrorRequest exceeded timeoutTimeout

Batch queries

Process multiple questions in one go. Use the async concurrent version for maximum throughput — it fires all requests simultaneously and collects results in order.

batch_example.py
python
from tapas_ai import TapasClient
import asyncio
import httpx

tapas = TapasClient(api_key="your-api-key")

# ── Synchronous batch (sequential) ──────────────────────────────
questions = [
    "How does photosynthesis work?",
    "What is compound interest?",
    "Explain TCP/IP in simple terms.",
    "What causes climate change?",
]

results = [tapas.ask(q, lem_mode=True) for q in questions]
total_saved = sum(r.energy_wh_saved for r in results)
print(f"Batch complete. Total saved: {total_saved:.3f} Wh")

# ── Async batch (concurrent, much faster) ───────────────────────
async def batch_ask(questions: list[str]) -> list[dict]:
    url = "https://tapas.one/api/trpc/query.ask"
    async with httpx.AsyncClient(timeout=15.0) as client:
        tasks = [
            client.post(url, json={"json": {"query": q, "lemMode": True}})
            for q in questions
        ]
        responses = await asyncio.gather(*tasks)
        return [r.json()["result"]["data"] for r in responses]

results = asyncio.run(batch_ask(questions))
for q, r in zip(questions, results):
    print(f"[{r['mode']:5s}] {q[:40]:<40} | {r['energyWhSaved']:.3f} Wh saved")

Ready to integrate?

Try the live chat, explore all 160 knowledge categories, or read the full REST API reference.