perplexity-reference-architecture | perplexity-pack

Stats

Actions

Tags

perplexity-reference-architecture | perplexity-pack

Perplexity Reference Architecture

Overview

Production architecture for AI-powered search with Perplexity Sonar API. Three tiers: search service (model routing + caching), citation pipeline (extract, validate, store), and research orchestrator (multi-query synthesis).

Architecture

┌─────────────────────────────────────────────┐
│              Application Layer              │
│  (Search Widget, Research Agent, Fact Check) │
└──────────────────────┬──────────────────────┘
                       │
┌──────────────────────▼──────────────────────┐
│            Search Service Layer             │
│  ┌──────────┐ ┌──────────┐ ┌─────────────┐ │
│  │  Model   │ │  Query   │ │   Response   │ │
│  │  Router  │ │  Cache   │ │   Parser     │ │
│  └──────────┘ └──────────┘ └─────────────┘ │
└──────────────────────┬──────────────────────┘
                       │
┌──────────────────────▼──────────────────────┐
│          api.perplexity.ai/chat/completions │
│  sonar | sonar-pro | sonar-reasoning-pro    │
└─────────────────────────────────────────────┘

Prerequisites

Perplexity API key with Sonar access
OpenAI-compatible client library (openai package)
Redis for production caching (LRU for development)

Instructions

Step 1: Search Service with Model Routing

// src/perplexity/search-service.ts
import OpenAI from "openai";
import { createHash } from "crypto";

type SearchDepth = "quick" | "standard" | "deep" | "reasoning";

const MODEL_MAP: Record<SearchDepth, { model: string; maxTokens: number; timeout: number }> = {
  quick:     { model: "sonar",               maxTokens: 256,  timeout: 10000 },
  standard:  { model: "sonar",               maxTokens: 1024, timeout: 15000 },
  deep:      { model: "sonar-pro",           maxTokens: 4096, timeout: 30000 },
  reasoning: { model: "sonar-reasoning-pro", maxTokens: 4096, timeout: 45000 },
};

export class SearchService {
  constructor(
    private client: OpenAI,
    private cache: Map<string, { result: any; expiry: number }> = new Map()
  ) {}

  async search(query: string, depth: SearchDepth = "standard", opts: {
    recencyFilter?: "hour" | "day" | "week" | "month";
    domainFilter?: string[];
    systemPrompt?: string;
  } = {}) {
    const config = MODEL_MAP[depth];
    const cacheKey = this.hashQuery(query, config.model, opts);

    // Check cache
    const cached = this.cache.get(cacheKey);
    if (cached && cached.expiry > Date.now()) {
      return { ...cached.result, cached: true };
    }

    const response = await this.client.chat.completions.create({
      model: config.model,
      messages: [
        ...(opts.systemPrompt ? [{ role: "system" as const, content: opts.systemPrompt }] : []),
        { role: "user" as const, content: query },
      ],
      max_tokens: config.maxTokens,
      ...(opts.recencyFilter && { search_recency_filter: opts.recencyFilter }),
      ...(opts.domainFilter && { search_domain_filter: opts.domainFilter }),
    } as any);

    const result = {
      answer: response.choices[0].message.content || "",
      citations: (response as any).citations || [],
      searchResults: (response as any).search_results || [],
      model: response.model,
      usage: response.usage,
    };

    // Cache with TTL based on query type
    const ttl = opts.recencyFilter === "hour" ? 900_000 : 3600_000;
    this.cache.set(cacheKey, { result, expiry: Date.now() + ttl });

    return { ...result, cached: false };
  }

  private hashQuery(query: string, model: string, opts: any): string {
    return createHash("sha256")
      .update(JSON.stringify({ query: query.toLowerCase().trim(), model, ...opts }))
      .digest("hex");
  }
}

Step 2: Citation Pipeline

// src/perplexity/citation-pipeline.ts
export interface Citation {
  url: string;
  domain: string;
  index: number;
}

export function extractCitations(answer: string, citationUrls: string[]): Citation[] {
  return citationUrls.map((url, i) => ({
    url,
    domain: new URL(url).hostname,
    index: i + 1,
  }));
}

export function renderCitationsAsMarkdown(answer: string, citations: Citation[]): string {
  let rendered = answer;
  for (const c of citations) {
    rendered = rendered.replaceAll(`[${c.index}]`, `[${c.index}](${c.url})`);
  }
  return rendered;
}

export function deduplicateCitations(citations: Citation[]): Citation[] {
  const seen = new Set<string>();
  return citations.filter((c) => {
    const normalized = c.url.split("?")[0].replace(/\/$/, "");
    if (seen.has(normalized)) return false;
    seen.add(normalized);
    return true;
  });
}

Step 3: Research Orchestrator

// src/perplexity/research-orchestrator.ts
export class ResearchOrchestrator {
  constructor(private searchService: SearchService) {}

  async research(topic: string): Promise<{
    sections: Array<{ question: string; answer: string; citations: string[] }>;
    bibliography: string[];
  }> {
    // Phase 1: Decompose topic (fast model)
    const overview = await this.searchService.search(
      `Break "${topic}" into 4-5 key research questions. List one per line.`,
      "quick"
    );
    const questions = overview.answer.split("\n").filter((q) => q.trim().length > 10);

    // Phase 2: Deep dive each question
    const sections = [];
    const allCitations = new Set<string>();

    for (const question of questions.slice(0, 5)) {
      const result = await this.searchService.search(question, "deep", {
        systemPrompt: `Research context: ${topic}. Provide detailed, well-cited answer.`,
      });
      sections.push({
        question: question.trim(),
        answer: result.answer,
        citations: result.citations,
      });
      result.citations.forEach((url: string) => allCitations.add(url));

      // Rate limit protection
      await new Promise((r) => setTimeout(r, 2000));
    }

    return { sections, bibliography: [...allCitations] };
  }
}

Step 4: Fact-Check Service

export async function factCheck(
  claim: string,
  searchService: SearchService
): Promise<{ verdict: string; confidence: string; sources: string[] }> {
  const result = await searchService.search(
    `Verify this claim with sources. State whether it is accurate, partially accurate, or inaccurate: "${claim}"`,
    "deep",
    { systemPrompt: "You are a fact-checker. Be precise and cite sources." }
  );

  return {
    verdict: result.answer,
    confidence: result.citations.length > 3 ? "high" : result.citations.length > 1 ? "medium" : "low",
    sources: result.citations,
  };
}

Error Handling

Issue	Cause	Solution
No citations returned	Using sonar for complex query	Upgrade to sonar-pro
Stale information	No recency filter	Add `search_recency_filter`
High cost	sonar-pro for simple queries	Route by depth
Rate limit on research	Too many sequential queries	Add 2s delay between calls

Output

Search service with model routing by query depth
Citation extraction and rendering pipeline
Multi-query research orchestrator
Fact-checking service

Resources