src/pocketflow/rag

RAG (Retrieval-Augmented Generation) utilities

Provides document chunking, embedding, and retrieval capabilities.

Types

Chunk = object
  text*: string
  metadata*: JsonNode
  embedding*: seq[float]
  index*: int
A document chunk with metadata
ChunkingOptions = object
  strategy*: ChunkingStrategy
  chunkSize*: int
  chunkOverlap*: int
  preserveStructure*: bool
Options for document chunking
ChunkingStrategy = enum
  FixedSize, Sentences, Paragraphs, Semantic
Strategy for splitting documents into chunks

Procs

proc chunkByFixedSize(text: string; chunkSize: int; overlap: int): seq[Chunk] {.
    ...raises: [], tags: [], forbids: [].}
Chunks text into fixed-size pieces with overlap
proc chunkByParagraphs(text: string; maxParagraphs: int = 3; overlap: int = 1): seq[
    Chunk] {....raises: [], tags: [], forbids: [].}
Chunks text by paragraphs
proc chunkBySentences(text: string; maxSentences: int = 5; overlap: int = 1): seq[
    Chunk] {....raises: [], tags: [], forbids: [].}
Chunks text by sentences
proc chunkDocument(text: string; options: ChunkingOptions): seq[Chunk] {.
    ...raises: [], tags: [], forbids: [].}
Chunks a document according to the specified strategy
proc cosineSimilarity(a, b: seq[float]): float {....raises: [], tags: [],
    forbids: [].}
Computes cosine similarity between two vectors
proc findTopK(query: seq[float]; chunks: seq[Chunk]; k: int = 5): seq[
    tuple[chunk: Chunk, score: float]] {....raises: [], tags: [], forbids: [].}
Finds the top-k most similar chunks to a query embedding
proc newChunkingOptions(strategy: ChunkingStrategy = FixedSize;
                        chunkSize: int = 1000; chunkOverlap: int = 200): ChunkingOptions {.
    ...raises: [], tags: [], forbids: [].}
Creates default chunking options
proc rerankChunks(chunks: seq[Chunk]; query: string): seq[Chunk] {....raises: [],
    tags: [], forbids: [].}
Reranks chunks based on query relevance (simple keyword-based)