RAG (Retrieval-Augmented Generation) utilities
Provides document chunking, embedding, and retrieval capabilities.
Types
Chunk = object text*: string metadata*: JsonNode embedding*: seq[float] index*: int
- A document chunk with metadata
ChunkingOptions = object strategy*: ChunkingStrategy chunkSize*: int chunkOverlap*: int preserveStructure*: bool
- Options for document chunking
ChunkingStrategy = enum FixedSize, Sentences, Paragraphs, Semantic
- Strategy for splitting documents into chunks
Procs
proc chunkByFixedSize(text: string; chunkSize: int; overlap: int): seq[Chunk] {. ...raises: [], tags: [], forbids: [].}
- Chunks text into fixed-size pieces with overlap
proc chunkByParagraphs(text: string; maxParagraphs: int = 3; overlap: int = 1): seq[ Chunk] {....raises: [], tags: [], forbids: [].}
- Chunks text by paragraphs
proc chunkBySentences(text: string; maxSentences: int = 5; overlap: int = 1): seq[ Chunk] {....raises: [], tags: [], forbids: [].}
- Chunks text by sentences
proc chunkDocument(text: string; options: ChunkingOptions): seq[Chunk] {. ...raises: [], tags: [], forbids: [].}
- Chunks a document according to the specified strategy
proc cosineSimilarity(a, b: seq[float]): float {....raises: [], tags: [], forbids: [].}
- Computes cosine similarity between two vectors
proc newChunkingOptions(strategy: ChunkingStrategy = FixedSize; chunkSize: int = 1000; chunkOverlap: int = 200): ChunkingOptions {. ...raises: [], tags: [], forbids: [].}
- Creates default chunking options
proc rerankChunks(chunks: seq[Chunk]; query: string): seq[Chunk] {....raises: [], tags: [], forbids: [].}
- Reranks chunks based on query relevance (simple keyword-based)