Spaces:

yuvis
/

Enterprise_RAG

Sleeping

App Files Files Community

yuvis commited on 19 days ago

Commit

f4c70c8

verified ·

1 Parent(s): 2a3a1de

Upload folder using huggingface_hub

Browse files

Files changed (36) hide show

src/app/__pycache__/main.cpython-311.pyc +0 -0
src/app/api/__pycache__/routes.cpython-311.pyc +0 -0
src/app/api/routes.py +42 -0
src/app/main.py +14 -0
src/embeddings/__pycache__/embedder.cpython-311.pyc +0 -0
src/embeddings/embedder.py +14 -0
src/eval/__pycache__/hallucination.cpython-311.pyc +0 -0
src/eval/__pycache__/relevancy.cpython-311.pyc +0 -0
src/eval/__pycache__/retrieval_metrics.cpython-311.pyc +0 -0
src/eval/hallucination.py +41 -0
src/eval/relevancy.py +35 -0
src/eval/retrieval_metrics.py +30 -0
src/indexer/__pycache__/bm25_index.cpython-311.pyc +0 -0
src/indexer/__pycache__/faiss_index.cpython-311.pyc +0 -0
src/indexer/bm25_index.py +33 -0
src/indexer/faiss_index.py +24 -0
src/ingestion/__pycache__/chunkers.cpython-311.pyc +0 -0
src/ingestion/__pycache__/cleaner.cpython-311.pyc +0 -0
src/ingestion/__pycache__/readers.cpython-311.pyc +0 -0
src/ingestion/chunkers.py +20 -0
src/ingestion/cleaner.py +6 -0
src/ingestion/ingest.py +83 -0
src/ingestion/readers.py +18 -0
src/llm/__pycache__/llm_client.cpython-311.pyc +0 -0
src/llm/llm_client.py +56 -0
src/pipeline/__pycache__/context_opt.cpython-311.pyc +0 -0
src/pipeline/__pycache__/query_pipeline.cpython-311.pyc +0 -0
src/pipeline/context_opt.py +55 -0
src/pipeline/query_pipeline.py +97 -0
src/reranker/__pycache__/cross_encoder.cpython-311.pyc +0 -0
src/reranker/cross_encoder.py +19 -0
src/retriever/__pycache__/hybrid_retriever.cpython-311.pyc +0 -0
src/retriever/__pycache__/hyde.cpython-311.pyc +0 -0
src/retriever/hybrid_retriever.py +71 -0
src/retriever/hyde.py +21 -0
src/ui/app.py +58 -0

src/app/__pycache__/main.cpython-311.pyc ADDED Viewed

Binary file (920 Bytes). View file

src/app/api/__pycache__/routes.cpython-311.pyc ADDED Viewed

Binary file (2.56 kB). View file

src/app/api/routes.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel
+from typing import List, Optional
+from src.pipeline.query_pipeline import QueryPipeline
+router = APIRouter()
+_pipeline = None
+def get_pipeline():
+    global _pipeline
+    if _pipeline is None:
+        _pipeline = QueryPipeline()
+    return _pipeline
+class QueryRequest(BaseModel):
+    query: str
+    top_k_retrieval: Optional[int] = 20
+    top_k_rerank: Optional[int] = 5
+    use_hyde: Optional[bool] = False
+class DocResponse(BaseModel):
+    content: str
+    score: float
+class QueryResponse(BaseModel):
+    query: str
+    answer: str
+    context: List[tuple]
+@router.post("/chat", response_model=QueryResponse)
+async def chat(request: QueryRequest):
+    try:
+        pipe = get_pipeline()
+        result = pipe.run(
+            query=request.query,
+            top_k_retrieval=request.top_k_retrieval,
+            top_k_rerank=request.top_k_rerank
+        )
+        return result
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))

src/app/main.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from fastapi import FastAPI
+from src.app.api import routes
+import uvicorn
+app = FastAPI(title="Enterprise RAG Search API")
+app.include_router(routes.router, prefix="/api/v1")
+@app.get("/health")
+def health():
+    return {"status": "ok"}
+if __name__ == "__main__":
+    uvicorn.run("src.app.main:app", host="0.0.0.0", port=8000, reload=True)

src/embeddings/__pycache__/embedder.cpython-311.pyc ADDED Viewed

Binary file (1.3 kB). View file

src/embeddings/embedder.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from sentence_transformers import SentenceTransformer
+import torch
+class Embedder:
+    def __init__(self, model_name: str = "all-MiniLM-L6-v2", device: str = None):
+        if device is None:
+            self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        else:
+            self.device = device
+        self.model = SentenceTransformer(model_name, device=self.device)
+    def embed(self, texts: list[str]):
+        return self.model.encode(texts, convert_to_numpy=True)

src/eval/__pycache__/hallucination.cpython-311.pyc ADDED Viewed

Binary file (1.89 kB). View file

src/eval/__pycache__/relevancy.cpython-311.pyc ADDED Viewed

Binary file (2.32 kB). View file

src/eval/__pycache__/retrieval_metrics.cpython-311.pyc ADDED Viewed

Binary file (1.8 kB). View file

src/eval/hallucination.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from src.llm.llm_client import LLMClient
+class HallucinationGrader:
+    def __init__(self, llm_client: LLMClient):
+        self.llm = llm_client
+    def grade(self, context: str, answer: str) -> dict:
+        """
+        Returns hallucination score based on token overlap.
+        """
+        # 1. Check for refusal
+        if "not enough information" in answer.lower():
+            return {"score": 0.0, "grounded": True}
+        # 2. Key Term Overlap
+        # Normalize and tokenize
+        def tokenize(text):
+            import re
+            text = text.lower()
+            tokens = re.findall(r'\w+', text)
+            # Remove stopwords
+            stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'is', 'are', 'was', 'were'}
+            return set([t for t in tokens if t not in stop_words])
+        answer_tokens = tokenize(answer)
+        context_tokens = tokenize(context)
+        if not answer_tokens:
+             return {"score": 0.1, "grounded": True} # Default for empty answer
+        # Calculate overlap
+        intersection = answer_tokens.intersection(context_tokens)
+        overlap_ratio = len(intersection) / len(answer_tokens)
+        # User Rule: if overlap < 0.25 -> 1.0 (Hallucination)
+        # Else -> 0.1 (Grounded) -- User requested 0.1 specifically
+        if overlap_ratio < 0.25:
+             return {"score": 1.0, "grounded": False}
+        else:
+             return {"score": 0.1, "grounded": True}

src/eval/relevancy.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from src.llm.llm_client import LLMClient
+class RelevancyGrader:
+    def __init__(self, llm_client: LLMClient):
+        self.llm = llm_client
+    def grade(self, query: str, answer: str) -> dict:
+        """
+        Returns score (0-1) on whether the answer addresses the query.
+        """
+        system_prompt = "You are a grader assessing if a generated answer is relevant to the user query."
+        user_prompt = f"""
+        User Query: {query}
+        Generated Answer: {answer}
+        Does the answer directly address the query?
+        Give a score between 0 and 1, and a boolean label (true/false).
+        Return JSON format: {{"score": 0.9, "relevant": true}}
+        """
+        messages = [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt}
+        ]
+        try:
+            response = self.llm.chat(messages, response_format={"type": "json_object"})
+            import json
+            data = json.loads(response)
+            # print(f"DEBUG_RELEVANCY_RAW: {response}")
+            return data
+        except Exception as e:
+            print(f"DEBUG_RELEVANCY_ERROR: {e}")
+            print(f"DEBUG_RELEVANCY_RESPONSE_WAS: {locals().get('response', 'Not generated')}")
+            return {"score": 0.5, "relevant": False, "error": str(e)}

src/eval/retrieval_metrics.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from typing import List
+def mrr_score(relevant_doc_ids: List[str], retrieved_doc_ids: List[str]) -> float:
+    """Calculates Mean Reciprocal Rank"""
+    for i, doc_id in enumerate(retrieved_doc_ids):
+        if doc_id in relevant_doc_ids:
+            return 1.0 / (i + 1)
+    return 0.0
+def recall_at_k(relevant_doc_ids: List[str], retrieved_doc_ids: List[str], k: int) -> float:
+    """Calculates Recall@K"""
+    retrieved_at_k = set(retrieved_doc_ids[:k])
+    relevant_set = set(relevant_doc_ids)
+    if not relevant_set:
+        return 0.0
+    hits = len(relevant_set.intersection(retrieved_at_k))
+    return hits / len(relevant_set)
+def precision_at_k(relevant_doc_ids: List[str], retrieved_doc_ids: List[str], k: int) -> float:
+    """Calculates Precision@K"""
+    retrieved_at_k = set(retrieved_doc_ids[:k])
+    relevant_set = set(relevant_doc_ids)
+    if not retrieved_at_k:
+        return 0.0
+    hits = len(relevant_set.intersection(retrieved_at_k))
+    return hits / len(retrieved_at_k)

src/indexer/__pycache__/bm25_index.cpython-311.pyc ADDED Viewed

Binary file (2.94 kB). View file

src/indexer/__pycache__/faiss_index.cpython-311.pyc ADDED Viewed

Binary file (2.15 kB). View file

src/indexer/bm25_index.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import pickle
+from rank_bm25 import BM25Okapi
+from typing import List
+import os
+class BM25Index:
+    def __init__(self):
+        self.bm25 = None
+        self.corpus = []
+    def build(self, corpus: List[str]):
+        """
+        Builds the BM25 index from a list of documents/chunks.
+        """
+        self.corpus = corpus
+        tokenized_corpus = [doc.split(" ") for doc in corpus]
+        self.bm25 = BM25Okapi(tokenized_corpus)
+    def search(self, query: str, top_k: int = 10):
+        if not self.bm25:
+            raise ValueError("Index not built.")
+        tokenized_query = query.split(" ")
+        scores = self.bm25.get_scores(tokenized_query)
+        top_n = self.bm25.get_top_n(tokenized_query, self.corpus, n=top_k)
+        return top_n, scores
+    def save(self, path: str):
+        with open(path, 'wb') as f:
+            pickle.dump((self.bm25, self.corpus), f)
+    def load(self, path: str):
+        with open(path, 'rb') as f:
+            self.bm25, self.corpus = pickle.load(f)

src/indexer/faiss_index.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import faiss
+import numpy as np
+import os
+class FaissIndex:
+    def __init__(self, dimension: int):
+        self.dimension = dimension
+        self.index = faiss.IndexFlatL2(dimension)
+        # Store metadata/mapping if needed, for now just simpler index
+    def add(self, embeddings: np.ndarray):
+        if embeddings.shape[1] != self.dimension:
+            raise ValueError(f"Embedding dimension mismatch. Expected {self.dimension}, got {embeddings.shape[1]}")
+        self.index.add(embeddings)
+    def search(self, query_embedding: np.ndarray, top_k: int = 10):
+        distances, indices = self.index.search(query_embedding, top_k)
+        return distances, indices
+    def save(self, path: str):
+        faiss.write_index(self.index, path)
+    def load(self, path: str):
+        self.index = faiss.read_index(path)

src/ingestion/__pycache__/chunkers.cpython-311.pyc ADDED Viewed

Binary file (1.94 kB). View file

src/ingestion/__pycache__/cleaner.cpython-311.pyc ADDED Viewed

Binary file (495 Bytes). View file

src/ingestion/__pycache__/readers.cpython-311.pyc ADDED Viewed

Binary file (1.82 kB). View file

src/ingestion/chunkers.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from typing import List
+class Chunker:
+    def chunk(self, text: str) -> List[str]:
+        raise NotImplementedError
+class SlidingWindowChunker(Chunker):
+    def __init__(self, chunk_size: int = 512, overlap: int = 50):
+        self.chunk_size = chunk_size
+        self.overlap = overlap
+    def chunk(self, text: str) -> List[str]:
+        words = text.split()
+        chunks = []
+        for i in range(0, len(words), self.chunk_size - self.overlap):
+            chunk_words = words[i : i + self.chunk_size]
+            chunks.append(" ".join(chunk_words))
+            if i + self.chunk_size >= len(words):
+                break
+        return chunks

src/ingestion/cleaner.py ADDED Viewed

	@@ -0,0 +1,6 @@

+import re
+def clean_text(text: str) -> str:
+    # Remove excessive whitespace
+    text = re.sub(r'\s+', ' ', text).strip()
+    return text

src/ingestion/ingest.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import os
+import glob
+from pathlib import Path
+from typing import List
+import numpy as np
+from tqdm import tqdm
+from src.ingestion.readers import get_reader
+from src.ingestion.cleaner import clean_text
+from src.ingestion.chunkers import SlidingWindowChunker
+from src.embeddings.embedder import Embedder
+from src.indexer.bm25_index import BM25Index
+from src.indexer.faiss_index import FaissIndex
+DATA_DIR = "data"
+RAW_DIR = os.path.join(DATA_DIR, "raw")
+INDEX_DIR = os.path.join(DATA_DIR, "index")
+class IngestionPipeline:
+    def __init__(self):
+        self.chunker = SlidingWindowChunker()
+        self.embedder = Embedder(model_name="all-MiniLM-L6-v2")
+        self.bm25_index = BM25Index()
+        # Dimension for all-MiniLM-L6-v2 is 384
+        self.faiss_index = FaissIndex(dimension=384)
+    def run(self):
+        print("Starting ingestion...")
+        files = glob.glob(os.path.join(RAW_DIR, "*.*"))
+        all_chunks = []
+        doc_map = [] # To map chunk index back to metadata/content if needed
+        # 1. Read, Clean, Chunk
+        print("Processing files...")
+        for file_path in tqdm(files):
+            path = Path(file_path)
+            try:
+                reader = get_reader(path)
+                raw_text = reader.read(path)
+                cleaned_text = clean_text(raw_text)
+                chunks = self.chunker.chunk(cleaned_text)
+                for chunk in chunks:
+                    all_chunks.append(chunk)
+                    doc_map.append({"source": str(path), "content": chunk})
+            except Exception as e:
+                print(f"Error processing {path}: {e}")
+        print(f"Total chunks generated: {len(all_chunks)}")
+        # 2. Build BM25 Index
+        print("Building BM25 Index...")
+        self.bm25_index.build(all_chunks)
+        os.makedirs(INDEX_DIR, exist_ok=True)
+        self.bm25_index.save(os.path.join(INDEX_DIR, "bm25.pkl"))
+        # 3. Embed and Build FAISS Index
+        if not os.getenv("DISABLE_FAISS"):
+            print("Embedding chunks and building FAISS Index...")
+            batch_size = 32
+            for i in range(0, len(all_chunks), batch_size):
+                batch = all_chunks[i : i + batch_size]
+                embeddings = self.embedder.embed(batch)
+                self.faiss_index.add(embeddings)
+            self.faiss_index.save(os.path.join(INDEX_DIR, "faiss.index"))
+        else:
+            print("Skipping FAISS build due to DISABLE_FAISS environment variable.")
+            # Create a dummy file to satisfy file existence checks if any (though lazy loaded)
+            with open(os.path.join(INDEX_DIR, "faiss.index"), "w") as f:
+                f.write("dummy")
+        # Save doc_map (simple persistence for retrieval lookup)
+        import pickle
+        with open(os.path.join(INDEX_DIR, "doc_map.pkl"), "wb") as f:
+            pickle.dump(doc_map, f)
+        print("Ingestion complete.")
+if __name__ == "__main__":
+    pipeline = IngestionPipeline()
+    pipeline.run()

src/ingestion/readers.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from pathlib import Path
+from typing import List, Dict
+class DocumentReader:
+    def read(self, file_path: Path) -> str:
+        raise NotImplementedError
+class TextReader(DocumentReader):
+    def read(self, file_path: Path) -> str:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            return f.read()
+def get_reader(file_path: Path) -> DocumentReader:
+    ext = file_path.suffix.lower()
+    if ext in ['.txt', '.md']:
+        return TextReader()
+    # Add PDF/Docx support here later
+    raise ValueError(f"Unsupported file type: {ext}")

src/llm/__pycache__/llm_client.cpython-311.pyc ADDED Viewed

Binary file (4.14 kB). View file

src/llm/llm_client.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import os
+import openai
+from typing import List, Dict, Any
+class LLMClient:
+    def chat(self, messages: List[Dict[str, str]], **kwargs) -> str:
+        raise NotImplementedError
+class OpenAIClient(LLMClient):
+    def __init__(self, api_key: str = None, model: str = "gpt-4o"):
+        self.client = openai.OpenAI(api_key=api_key or os.getenv("OPENAI_API_KEY"))
+        self.model = model
+    def chat(self, messages: List[Dict[str, str]], **kwargs) -> str:
+        response = self.client.chat.completions.create(
+            model=self.model,
+            messages=messages,
+            **kwargs
+        )
+        return response.choices[0].message.content
+class VLLMClient(LLMClient):
+    def __init__(self, api_url: str = None, model: str = None):
+        self.api_url = api_url or os.getenv("VLLM_API_URL", "http://localhost:8000/v1")
+        self.model = model or os.getenv("VLLM_MODEL", "mistralai/Mistral-7B-Instruct-v0.2")
+        # vLLM is OpenAI compatible
+        self.client = openai.OpenAI(
+            base_url=self.api_url,
+            api_key="EMPTY"
+        )
+    def chat(self, messages: List[Dict[str, str]], **kwargs) -> str:
+        response = self.client.chat.completions.create(
+            model=self.model,
+            messages=messages,
+            **kwargs
+        )
+        return response.choices[0].message.content
+class GroqClient(LLMClient):
+    def __init__(self, api_key: str = None, model: str = "llama-3.3-70b-versatile"):
+        self.api_key = api_key or os.getenv("GROQ_API_KEY")
+        self.model = model
+        self.client = openai.OpenAI(
+            base_url="https://api.groq.com/openai/v1",
+            api_key=self.api_key
+        )
+    def chat(self, messages: List[Dict[str, str]], **kwargs) -> str:
+        # Default behavior
+        response = self.client.chat.completions.create(
+            model=self.model,
+            messages=messages,
+            **kwargs
+        )
+        return response.choices[0].message.content

src/pipeline/__pycache__/context_opt.cpython-311.pyc ADDED Viewed

Binary file (2.72 kB). View file

src/pipeline/__pycache__/query_pipeline.cpython-311.pyc ADDED Viewed

Binary file (4.48 kB). View file

src/pipeline/context_opt.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import numpy as np
+from sklearn.metrics.pairwise import cosine_similarity
+def maximal_marginal_relevance(query_embedding: np.ndarray, doc_embeddings: np.ndarray, lambda_mult: float = 0.5, top_k: int = 5):
+    """
+    Selects docs that are relevant to query but diverse from each other.
+    """
+    if len(doc_embeddings) == 0:
+        return []
+    # Simple MMR implementation
+    selected_indices = []
+    candidate_indices = list(range(len(doc_embeddings)))
+    for _ in range(top_k):
+        best_score = -np.inf
+        best_idx = -1
+        for idx in candidate_indices:
+            # Relevance
+            rel_score = cosine_similarity(query_embedding.reshape(1, -1), doc_embeddings[idx].reshape(1, -1))[0][0]
+            # Diversity (sim to already selected)
+            if not selected_indices:
+                div_score = 0
+            else:
+                sims = cosine_similarity(doc_embeddings[idx].reshape(1, -1), doc_embeddings[selected_indices])[0]
+                div_score = np.max(sims)
+            mmr_score = lambda_mult * rel_score - (1 - lambda_mult) * div_score
+            if mmr_score > best_score:
+                best_score = mmr_score
+                best_idx = idx
+        if best_idx != -1:
+            selected_indices.append(best_idx)
+            candidate_indices.remove(best_idx)
+    return selected_indices
+def deduplicate_docs(docs: list[dict], threshold: float = 0.95) -> list[dict]:
+    """
+    Remove near-duplicates based on content string similarity (simple)
+    or just exact match for now to be fast.
+    """
+    seen = set()
+    unique_docs = []
+    for doc in docs:
+        # Assuming doc is a string or dict with 'content'
+        content = doc if isinstance(doc, str) else doc.get('content', '')
+        if content not in seen:
+            seen.add(content)
+            unique_docs.append(doc)
+    return unique_docs

src/pipeline/query_pipeline.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import os
+from typing import Optional
+from src.retriever.hybrid_retriever import HybridRetriever
+from src.retriever.hyde import HyDERetriever
+from src.reranker.cross_encoder import Reranker
+from src.llm.llm_client import OpenAIClient, VLLMClient, GroqClient
+from src.embeddings.embedder import Embedder
+from src.pipeline.context_opt import deduplicate_docs
+class QueryPipeline:
+    def __init__(self, use_hyde: bool = False):
+        self.embedder = Embedder()
+        self.retriever = HybridRetriever(
+            bm25_path="data/index/bm25.pkl",
+            faiss_path="data/index/faiss.index",
+            doc_map_path="data/index/doc_map.pkl",
+            embedder=self.embedder
+        )
+        # LLM Client Strategy
+        if os.getenv("GROQ_API_KEY"):
+             self.llm = GroqClient()
+        elif os.getenv("VLLM_API_URL"):
+            self.llm = VLLMClient()
+        else:
+            self.llm = OpenAIClient()
+        if use_hyde:
+            self.retriever = HyDERetriever(self.llm, self.retriever)
+        self.reranker = Reranker()
+    def run(self, query: str, top_k_retrieval: int = 20, top_k_rerank: int = 5):
+        # 1. Retrieve
+        print(f"Retrieving for query: {query}")
+        retrieved_docs = self.retriever.search(query, top_k=top_k_retrieval)
+        # 2. Deduplicate
+        unique_docs = deduplicate_docs(retrieved_docs)
+        # 3. Rerank
+        # Reranker expects strings
+        doc_contents = [d if isinstance(d, str) else d['content'] for d in unique_docs]
+        reranked = self.reranker.rerank(query, doc_contents, top_k=top_k_rerank)
+        # 4. Generate
+        # Retrieval Score Gate
+        RETRIEVAL_SCORE_THRESHOLD = -4.0
+        # reranked is list of (doc, score)
+        if not reranked or reranked[0][1] < RETRIEVAL_SCORE_THRESHOLD:
+            return {
+                "query": query,
+                "answer": "I do not have enough information in the provided documents to answer this question.",
+                "context": [],
+                "retrieval_score": reranked[0][1] if reranked else -99.9,
+                "hallucination_score": 0.0,
+                "groundedness": 1.0
+            }
+        context_text = "\n\n".join([doc for doc, score in reranked])
+        SYSTEM_PROMPT = """
+You are an enterprise-grade question answering system.
+Rules:
+1. Answer strictly using ONLY the provided context.
+2. DO NOT use prior knowledge or assumptions.
+3. If the answer is not explicitly stated in the context, respond EXACTLY with:
+   "I do not have enough information in the provided documents to answer this question."
+4. Do not add explanations, guesses, or external facts.
+"""
+        user_prompt = f"""
+{SYSTEM_PROMPT}
+Context:
+{context_text}
+Question:
+{query}
+"""
+        messages = [
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": user_prompt}
+        ]
+        answer = self.llm.chat(messages)
+        return {
+            "query": query,
+            "answer": answer,
+            "context": reranked,
+            "retrieval_score": reranked[0][1]
+        }

src/reranker/__pycache__/cross_encoder.cpython-311.pyc ADDED Viewed

Binary file (1.81 kB). View file

src/reranker/cross_encoder.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from sentence_transformers import CrossEncoder
+class Reranker:
+    def __init__(self, model_name: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"):
+        self.model = CrossEncoder(model_name)
+    def rerank(self, query: str, docs: list[str], top_k: int = 5):
+        if not docs:
+            return []
+        pairs = [[query, doc] for doc in docs]
+        scores = self.model.predict(pairs).tolist()
+        # Combine docs with scores
+        doc_scores = list(zip(docs, scores))
+        # Sort by score descending
+        doc_scores.sort(key=lambda x: x[1], reverse=True)
+        return doc_scores[:top_k]

src/retriever/__pycache__/hybrid_retriever.cpython-311.pyc ADDED Viewed

Binary file (4.55 kB). View file

src/retriever/__pycache__/hyde.cpython-311.pyc ADDED Viewed

Binary file (1.81 kB). View file

src/retriever/hybrid_retriever.py ADDED Viewed

	@@ -0,0 +1,71 @@

+from typing import List, Tuple
+import numpy as np
+from src.indexer.bm25_index import BM25Index
+from src.indexer.faiss_index import FaissIndex
+from src.embeddings.embedder import Embedder
+class HybridRetriever:
+    def __init__(self, bm25_path: str, faiss_path: str, doc_map_path: str, embedder: Embedder):
+        self.bm25 = BM25Index()
+        self.bm25.load(bm25_path)
+        self.embedder = embedder
+        self.embedder = embedder
+        self.faiss = None
+        import os
+        if not os.getenv("DISABLE_FAISS"):
+            try:
+                self.faiss = FaissIndex(dimension=384) # adjust dimension if needed
+                self.faiss.load(faiss_path)
+                print("Successfully loaded FAISS index.")
+            except Exception as e:
+                print(f"WARNING: Could not load FAISS index ({e}). Running in BM25-only mode.")
+        else:
+            print("FAISS disabled via environment variable. Running in BM25-only mode.")
+        # Load doc map
+        import pickle
+        with open(doc_map_path, 'rb') as f:
+            self.doc_map = pickle.load(f)
+    def search(self, query: str, top_k: int = 10, alpha: float = 0.5) -> List[dict]:
+        """
+        Hybrid search using BM25 and Dense embeddings.
+        alpha: weight for dense score (0 = pure BM25, 1 = pure Dense)
+        """
+        # 1. BM25 Search
+        # We need to normalize scores to combine them properly, usually RRF is safer if scores are not calibrated
+        # For simplicity here, we get top N and use RRF
+        top_n = top_k * 2
+        # BM25
+        bm25_docs, bm25_scores = self.bm25.search(query, top_k=top_n)
+        scores = {}
+        # Process BM25
+        for rank, doc in enumerate(bm25_docs):
+            key = doc
+            scores[key] = scores.get(key, 0) + (1 / (60 + rank))
+        # Dense (Only if FAISS is loaded)
+        if self.faiss:
+            try:
+                query_emb = self.embedder.embed([query])
+                dense_dists, dense_indices = self.faiss.search(query_emb, top_k=top_n)
+                # Merge using Reciprocal Rank Fusion (RRF)
+                # Dense indices refer to doc_map
+                for rank, idx in enumerate(dense_indices[0]):
+                    if idx == -1: continue
+                    doc_data = self.doc_map[idx]
+                    key = doc_data['content']
+                    scores[key] = scores.get(key, 0) + (1 / (60 + rank))
+            except Exception as e:
+                print(f"Error during dense search: {e}")
+        # Sort by RRF score
+        sorted_docs = sorted(scores.items(), key=lambda x: x[1], reverse=True)
+        return [doc for doc, score in sorted_docs[:top_k]]

src/retriever/hyde.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from src.llm.llm_client import LLMClient
+class HyDERetriever:
+    def __init__(self, llm_client: LLMClient, base_retriever):
+        self.llm = llm_client
+        self.retriever = base_retriever
+    def generate_hypothetical_doc(self, query: str) -> str:
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant. Write a hypothetical answer to the user's question. Do not include any explanation, just the answer."},
+            {"role": "user", "content": query}
+        ]
+        return self.llm.chat(messages, temperature=0.7)
+    def search(self, query: str, top_k: int = 10):
+        # 1. Generate hypothetical doc
+        hypothetical_doc = self.generate_hypothetical_doc(query)
+        print(f"DEBUG: HyDE Doc: {hypothetical_doc[:100]}...")
+        # 2. Retrieve using the hypothetical doc as query
+        return self.retriever.search(hypothetical_doc, top_k=top_k)

src/ui/app.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import streamlit as st
+import requests
+import json
+print("UI Starting up...")
+st.set_page_config(page_title="Enterprise RAG Search", layout="wide")
+import os
+API_URL = os.getenv("API_URL", "http://localhost:8000/api/v1/chat")
+st.title("Enterprise RAG Search")
+with st.sidebar:
+    st.header("Configuration")
+    top_k_retrieval = st.slider("Retrieval Top-K", 5, 50, 20)
+    top_k_rerank = st.slider("Rerank Top-K", 1, 10, 5)
+    # use_hyde = st.checkbox("Use HyDE", value=False)
+query = st.chat_input("Enter your query...")
+if query:
+    st.session_state.messages = st.session_state.get("messages", [])
+    st.session_state.messages.append({"role": "user", "content": query})
+# s = requests.Session()
+for msg in st.session_state.get("messages", []):
+    with st.chat_message(msg["role"]):
+        st.write(msg["content"])
+if query:
+    with st.chat_message("assistant"):
+        with st.spinner("Searching..."):
+            try:
+                payload = {
+                    "query": query,
+                    "top_k_retrieval": top_k_retrieval,
+                    "top_k_rerank": top_k_rerank,
+                    # "use_hyde": use_hyde
+                }
+                response = requests.post(API_URL, json=payload)
+                response.raise_for_status()
+                data = response.json()
+                answer = data["answer"]
+                st.write(answer)
+                with st.expander("View Context"):
+                    for i, (doc, score) in enumerate(data["context"]):
+                        st.markdown(f"**Relevance Score:** {score:.4f}")
+                        st.text(doc)
+                        st.divider()
+                st.session_state.messages.append({"role": "assistant", "content": answer})
+            except Exception as e:
+                st.error(f"Error: {e}")