Skip to main content

Custom prompt templates

The fastest way to change behavior. See the Prompts guide and Custom Prompts examples for full details.
lc = LangChat(
    llm=OpenAI("gpt-4o-mini"),
    vector_db=Pinecone("my-index"),
    db=Supabase(),
    prompt_template="""You are Aria, a support agent for Acme Corp.

Context:
{context}

History:
{chat_history}

Customer: {question}
Aria:""",
)

Custom reranker settings

Control how many chunks reach the LLM:
from langchat.adapters.reranker import FlashrankRerankAdapter

reranker = FlashrankRerankAdapter(
    model_name="ms-marco-MiniLM-L-12-v2",
    cache_dir="rerank_models",
    top_n=5,   # pass 5 chunks instead of the default 3
)

lc = LangChat(
    llm=OpenAI("gpt-4o-mini"),
    vector_db=Pinecone("my-index"),
    db=Supabase(),
    reranker=reranker,
)

Custom standalone question prompt

Control how follow-up questions are reformulated:
STANDALONE = """Rewrite the follow-up question as a standalone question in English.
Keep greetings unchanged. Include domain context (Acme Corp products/services).

History:
{chat_history}

Follow-up: {question}
Standalone:"""

lc = LangChat(
    llm=OpenAI("gpt-4o-mini"),
    vector_db=Pinecone("my-index"),
    db=Supabase(),
    standalone_question_prompt=STANDALONE,
)

History window

Reduce token usage by keeping fewer past exchanges in context:
lc = LangChat(
    llm=OpenAI("gpt-4o-mini"),
    vector_db=Pinecone("my-index"),
    db=Supabase(),
    max_chat_history=5,   # only last 5 exchanges
)

Pre-processing questions

Intercept and modify questions before they reach the engine:
async def chat_with_preprocessing(query: str, user_id: str) -> ChatResponse:
    # Clean the query
    clean_query = query.strip()

    # Add user context
    enriched = f"[User tier: premium] {clean_query}"

    # Pass pre-processed standalone question to skip LLM reformulation
    raw = await lc.engine.chat(
        query=clean_query,
        user_id=user_id,
        standalone_question=enriched,
    )
    return ChatResponse(
        text=raw["response"],
        user_id=raw["user_id"],
        platform="web",
        status=raw["status"],
        response_time=raw["response_time"],
        timestamp=raw["timestamp"],
        error=raw.get("error"),
    )

Post-processing responses

Wrap chat() to modify or enrich responses:
import re

async def chat_with_postprocessing(query: str, user_id: str) -> ChatResponse:
    response = await lc.chat(query=query, user_id=user_id)

    if response:
        # Append disclaimer to financial answers
        if any(word in query.lower() for word in ["invest", "stock", "money"]):
            response = ChatResponse(
                text=response.text + "\n\n*This is not financial advice.*",
                user_id=response.user_id,
                platform=response.platform,
                status=response.status,
                response_time=response.response_time,
                timestamp=response.timestamp,
            )

    return response

Accessing the session directly

Manipulate conversation history before a chat call:
session = lc.get_session("alice", "web")

# Inject a system context message
session.chat_history.insert(0, (
    "System: What products do we sell?",
    "We sell enterprise software for HR teams.",
))

# Now the next chat() call will include this in history
response = await lc.chat(query="Tell me more", user_id="alice", platform="web")

Custom routes in API server

Add your own endpoints alongside LangChat’s:
from fastapi import APIRouter
from langchat.api import create_app

router = APIRouter(prefix="/api/v1")

@router.get("/topics")
async def get_topics():
    return ["pricing", "features", "support", "billing"]

@router.post("/feedback")
async def submit_feedback(message_id: str, helpful: bool):
    # save to your database
    return {"saved": True}

app = create_app(
    llm=OpenAI("gpt-4o-mini"),
    vector_db=Pinecone("my-index"),
    db=Supabase(),
    custom_routes=[router],
)