Spaces:

Kalpokoch
/

ChatbotDemo

Running

App Files Files

Kalpokoch commited on Jul 26

Commit

8fdb143

1 Parent(s): 0a6902c

25july upload

Browse files

Files changed (5) hide show

Dockerfile +14 -21
app/app.py +61 -48
app/policy_vector_db.py +96 -83
processed_chunks.json +0 -0
requirements.txt +5 -1

Dockerfile CHANGED Viewed

@@ -1,39 +1,32 @@
-# Use official Python 3.11 image to match the wheel compatibility
 FROM python:3.11
-# Install system dependencies
 RUN apt-get update && apt-get install -y \
-    git \
-    wget \
     build-essential \
-    libopenblas-dev \
-    libcurl4-openssl-dev \
-    curl \
     && rm -rf /var/lib/apt/lists/*
 # Set working directory
 WORKDIR /app
-# Set Hugging Face and Transformers cache to avoid permission errors
 ENV TRANSFORMERS_CACHE=/app/.cache \
     HF_HOME=/app/.cache
-# Create and give full access to cache and model folders
-RUN mkdir -p /app/.cache /app/models && chmod -R 777 /app/.cache /app/models
-# Copy all project files
-COPY . .
-# ✅ Download and install llama-cpp-python wheel from Hugging Face Dataset
-RUN wget https://huggingface.co/datasets/Kalpokoch/wheel-llama/resolve/main/llama_cpp_python-0.3.13-cp311-cp311-linux_x86_64.whl && \
-    pip install llama_cpp_python-0.3.13-cp311-cp311-linux_x86_64.whl && \
-    rm llama_cpp_python-0.3.13-cp311-cp311-linux_x86_64.whl
-# Install remaining Python dependencies
 RUN pip install --no-cache-dir -r requirements.txt
-# Expose FastAPI port
 EXPOSE 7860
-# Start FastAPI app
 CMD ["uvicorn", "app.app:app", "--host", "0.0.0.0", "--port", "7860"]

+# Use official Python 3.11 image
 FROM python:3.11
+# Install system dependencies needed for building Python packages
 RUN apt-get update && apt-get install -y \
     build-essential \
     && rm -rf /var/lib/apt/lists/*
 # Set working directory
 WORKDIR /app
+# Set Hugging Face cache directory and grant permissions
+# Models downloaded from the Hub will be stored here.
 ENV TRANSFORMERS_CACHE=/app/.cache \
     HF_HOME=/app/.cache
+RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache
+# Copy only the requirements file to leverage Docker cache
+COPY requirements.txt .
+# Install Python dependencies
 RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of your application code
+COPY . .
+# Expose the port the app runs on
 EXPOSE 7860
+# Command to run the FastAPI application
 CMD ["uvicorn", "app.app:app", "--host", "0.0.0.0", "--port", "7860"]

app/app.py CHANGED Viewed

@@ -1,48 +1,44 @@
-from fastapi import FastAPI, Request
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
-from huggingface_hub import hf_hub_download
-from llama_cpp import Llama
-import os
-import json
-import numpy as np
-from typing import List
-from sentence_transformers import SentenceTransformer
-from sklearn.metrics.pairwise import cosine_similarity
-# Load processed chunks (RAG context source)
-with open("processed_chunks.json", "r") as f:
-    chunks = json.load(f)
-# Load embeddings model (use a lightweight one for Docker CPU)
-embedder = SentenceTransformer("all-MiniLM-L6-v2")
-# Precompute embeddings
-chunk_texts = [chunk["text"] for chunk in chunks]
-chunk_embeddings = embedder.encode(chunk_texts, convert_to_tensor=False)
-# Download model file
-model_path = hf_hub_download(
-    repo_id="TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
-    filename="tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
-    local_dir="/app/models",
-    token=os.getenv("HF_TOKEN")
 )
-# Load TinyLlama model
-llm = Llama(
-    model_path=model_path,
-    n_ctx=2048,
-    n_threads=4  # adjust depending on CPU cores
 )
-# FastAPI app
 app = FastAPI()
-# Allow Netlify frontend to access the backend
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"],  # or specify your Netlify URL for more security
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
@@ -50,7 +46,7 @@ app.add_middleware(
 @app.get("/")
 def read_root():
-    return {"message": "RAG chatbot backend is running!"}
 class ChatRequest(BaseModel):
     question: str
@@ -61,23 +57,40 @@ def chat(request: ChatRequest):
     if not question:
         return {"response": "Please ask a question."}
-    # Embed the user's question
-    q_embedding = embedder.encode([question])[0]
-    # Find top 3 most similar chunks
-    similarities = cosine_similarity([q_embedding], chunk_embeddings)[0]
-    top_indices = similarities.argsort()[-3:][::-1]
-    retrieved = "\n\n".join(chunk_texts[i] for i in top_indices)
-    # Build the prompt
     prompt = (
-        f"Context:\n{retrieved}\n\n"
-        f"User: {question}\n"
-        f"Assistant:"
     )
-    # Generate a response from the model
-    output = llm(prompt, max_tokens=256)
-    reply = output["choices"][0]["text"].strip()
-    return {"response": reply}

+from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+from app.policy_vector_db import PolicyVectorDB # Import your class
+# --- 1. Initialize the Vector Database and LLM ---
+# Load the vector database.
+# This connects to the persistent ChromaDB storage created by policy_vector_db.py
+print("Loading Vector Database...")
+db = PolicyVectorDB(persist_directory="policy_vector_db")
+print("Vector Database loaded successfully!")
+# Load your fine-tuned model from Hugging Face Hub
+model_id = "Kalpokoch/QuntizedTinyLama"
+print(f"Loading model: {model_id}...")
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    torch_dtype=torch.bfloat16,
+    device_map="auto"
 )
+# Create a text-generation pipeline for the LLM
+pipe = pipeline(
+    "text-generation",
+    model=model,
+    tokenizer=tokenizer,
+    max_new_tokens=256
 )
+print("LLM and pipeline loaded successfully!")
+# --- 2. FastAPI App Setup ---
 app = FastAPI()
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*"],
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 @app.get("/")
 def read_root():
+    return {"message": "RAG chatbot backend is running with Kalpokoch/QuntizedTinyLama and ChromaDB!"}
 class ChatRequest(BaseModel):
     question: str
     if not question:
         return {"response": "Please ask a question."}
+    # --- 3. RAG Retrieval using PolicyVectorDB ---
+    # Use the search method from your class to find relevant context
+    print(f"Searching for context for question: '{question}'")
+    search_results = db.search(query_text=question, top_k=3)
+    # Check if any results were found
+    if not search_results:
+        retrieved_context = "No relevant context found."
+    else:
+        # Format the retrieved documents into a single context string
+        retrieved_context = "\n\n".join([result['text'] for result in search_results])
+    print(f"Retrieved Context:\n{retrieved_context[:500]}...")
+    # --- 4. Prompt Engineering and Generation ---
+    # Build the prompt with the retrieved context
     prompt = (
+        f"<|system|>\nYou are a helpful assistant for NEEPCO policies. "
+        f"Use the following context to answer the user's question. If the context doesn't contain the answer, say that.\n"
+        f"Context:\n{retrieved_context}</s>\n"
+        f"<|user|>\n{question}</s>\n"
+        f"<|assistant|>"
     )
+    # Generate a response using the pipeline
+    try:
+        outputs = pipe(prompt)
+        reply = outputs[0]['generated_text']
+        # Extract only the assistant's newly generated reply
+        assistant_reply = reply.split("<|assistant|>")[1].strip()
+        return {"response": assistant_reply}
+    except Exception as e:
+        print(f"Error during model inference: {e}")
+        return {"response": "Sorry, I encountered an error while generating a response."}

app/policy_vector_db.py CHANGED Viewed

@@ -1,115 +1,128 @@
 import json
 import chromadb
 from sentence_transformers import SentenceTransformer
-from typing import List, Dict
 class PolicyVectorDB:
-    def __init__(self, db_path="./chroma_db"):
-        """Initialize vector database for policy chunks"""
-        self.client = chromadb.PersistentClient(path=db_path)
         self.collection_name = "neepco_dop_policies"
-        # Initialize embedding model
-        self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
-        # Create or get collection
-        try:
-            self.collection = self.client.get_collection(self.collection_name)
-            print("Loaded existing collection")
-        except:
-            self.collection = self.client.create_collection(
-                name=self.collection_name,
-                metadata={"description": "NEEPCO DOP Policy chunks"}
-            )
-            print("Created new collection")
     def _flatten_metadata(self, metadata: Dict) -> Dict:
-        """Remove nested metadata (dicts/lists) and stringify others"""
-        flat_meta = {}
-        for key, value in metadata.items():
-            if isinstance(value, (dict, list)):
-                continue  # skip nested fields
-            if isinstance(value, (str, int, float, bool)) or value is None:
-                flat_meta[key] = value
-            else:
-                flat_meta[key] = str(value)  # fallback to string
-        return flat_meta
     def add_chunks(self, chunks: List[Dict]):
-        """Add policy chunks to vector database"""
-        print(f"Adding {len(chunks)} chunks to database...")
-        texts = [chunk['text'] for chunk in chunks]
-        metadatas = [self._flatten_metadata(chunk['metadata']) for chunk in chunks]
-        ids = [chunk['id'] for chunk in chunks]
-        # Generate embeddings
-        embeddings = self.embedding_model.encode(texts).tolist()
-        # Add to collection
-        self.collection.add(
-            embeddings=embeddings,
-            documents=texts,
-            metadatas=metadatas,
-            ids=ids
-        )
-        print("Successfully added chunks to database!")
-    def search(self, query: str, top_k: int = 3) -> List[Dict]:
-        """Search for relevant policy chunks"""
-        # Generate query embedding
-        query_embedding = self.embedding_model.encode([query]).tolist()
-        # Search in vector database
         results = self.collection.query(
             query_embeddings=query_embedding,
             n_results=top_k,
             include=['documents', 'metadatas', 'distances']
         )
-        # Format results
         search_results = []
-        for i in range(len(results['documents'][0])):
             search_results.append({
-                'text': results['documents'][0][i],
                 'metadata': results['metadatas'][0][i],
-                'relevance_score': 1 - results['distances'][0][i]  # Convert distance to similarity
             })
         return search_results
-def setup_vector_database():
-    """Complete setup of vector database"""
-    # Load processed chunks
-    with open("processed_chunks.json", "r", encoding='utf-8') as f:
-        chunks = json.load(f)
-    # Initialize database
-    vector_db = PolicyVectorDB()
-    # Add chunks to database
-    vector_db.add_chunks(chunks)
-    return vector_db
-# Example usage
 if __name__ == "__main__":
-    # Setup database
-    db = setup_vector_database()
-    # Test search
-    query = "Who approves resignation for executives E-7 and above?"
-    results = db.search(query, top_k=2)
-    print(f"\nQuery: {query}")
-    print("Results:")
-    for i, result in enumerate(results, 1):
-        print(f"\n{i}. Relevance: {result['relevance_score']:.3f}")
-        print(f"Section: {result['metadata'].get('section', 'N/A')}")
-        print(f"Authority: {result['metadata'].get('authority', 'N/A')}")
-        print(f"Text: {result['text'][:200]}...")

 import json
+import os
+import shutil
+from typing import List, Dict
 import chromadb
 from sentence_transformers import SentenceTransformer
 class PolicyVectorDB:
+    """Manages the creation and searching of a persistent vector database."""
+    def __init__(self, persist_directory: str = "chroma_db"):
+        self.client = chromadb.PersistentClient(path=persist_directory)
         self.collection_name = "neepco_dop_policies"
+        self.embedding_model = SentenceTransformer('BAAI/bge-large-en-v1.5', device = 'cpu')
+        self.collection = self.client.get_or_create_collection(
+            name=self.collection_name,
+            metadata={"description": "NEEPCO Delegation of Powers Policy"}
+        )
+        print(f"Loaded/Created persistent collection '{self.collection_name}' at '{persist_directory}'")
     def _flatten_metadata(self, metadata: Dict) -> Dict:
+        """Ensures all metadata values are strings for ChromaDB compatibility."""
+        return {key: str(value) for key, value in metadata.items()}
     def add_chunks(self, chunks: List[Dict]):
+        """Encodes and adds a list of chunk dictionaries to the database."""
+        if not chunks:
+            print("No chunks provided to add.")
+            return
+        existing_ids = set(self.collection.get(include=[])['ids'])
+        new_chunks = [chunk for chunk in chunks if chunk.get('id') not in existing_ids]
+        if not new_chunks:
+            print("No new chunks to add. All provided chunks already exist in the database.")
+            return
+        print(f"Found {len(new_chunks)} new chunks to add.")
+        batch_size = 128
+        for i in range(0, len(new_chunks), batch_size):
+            batch = new_chunks[i:i + batch_size]
+            print(f"  - Processing batch {i//batch_size + 1}/{ -(-len(new_chunks) // batch_size) }...")
+            texts = [chunk['text'] for chunk in batch]
+            ids = [chunk['id'] for chunk in batch]
+            metadatas = [self._flatten_metadata(chunk['metadata']) for chunk in batch]
+            embeddings = self.embedding_model.encode(texts, show_progress_bar=False).tolist()
+            self.collection.add(ids=ids, embeddings=embeddings, documents=texts, metadatas=metadatas)
+        print(f"Successfully added {len(new_chunks)} new chunks to the database!")
+    def search(self, query_text: str, top_k: int = 3) -> List[Dict]:
+        """Searches the collection for a given query text."""
+        query_embedding = self.embedding_model.encode([query_text]).tolist()
         results = self.collection.query(
             query_embeddings=query_embedding,
             n_results=top_k,
             include=['documents', 'metadatas', 'distances']
         )
         search_results = []
+        if not results.get('documents'):
+            return []
+        for i, doc in enumerate(results['documents'][0]):
+            relevance_score = 1 - results['distances'][0][i]
             search_results.append({
+                'text': doc,
                 'metadata': results['metadatas'][0][i],
+                'relevance_score': relevance_score
             })
         return search_results
+def main():
+    """Main function to build and verify the vector database."""
+    INPUT_CHUNKS_PATH = "processed_chunks_final.json"
+    PERSIST_DIRECTORY = "policy_vector_db"
+    if not os.path.exists(INPUT_CHUNKS_PATH):
+        print(f"FATAL ERROR: The input chunk file was not found at '{INPUT_CHUNKS_PATH}'")
+        print("Please run 'create_chunks.py' first.")
+        return
+    if os.path.exists(PERSIST_DIRECTORY):
+        print(f"Removing existing database at '{PERSIST_DIRECTORY}' to ensure a clean build.")
+        shutil.rmtree(PERSIST_DIRECTORY)
+    print(f"Creating database directory: '{PERSIST_DIRECTORY}'")
+    os.makedirs(PERSIST_DIRECTORY, exist_ok=True)
+    os.chmod(PERSIST_DIRECTORY, 0o777)
+    print("\nStep 1: Loading processed chunks...")
+    with open(INPUT_CHUNKS_PATH, 'r', encoding='utf-8') as f:
+        chunks_to_add = json.load(f)
+    print(f"Loaded {len(chunks_to_add)} chunks.")
+    print("\nStep 2: Setting up persistent vector database...")
+    db = PolicyVectorDB(persist_directory=PERSIST_DIRECTORY)
+    print("\nStep 3: Adding chunks to the database...")
+    db.add_chunks(chunks_to_add)
+    print(f"\n✅ Vector database setup complete. Total chunks in DB: {db.collection.count()}")
+    print(f"Database is saved in: {os.path.abspath(PERSIST_DIRECTORY)}")
+    print("\n--- Running Verification Tests ---")
+    test_questions = [
+        "Who can approve changes to the pay structure?",
+        "What is the financial limit for a DGM for works on a limited tender basis?",
+        "What's the delegation power of an ED for single tender O&M contracts from an OEM?"
+    ]
+    for question in test_questions:
+        print(f"\n--- Testing Query ---")
+        print(f"Query: {question}")
+        search_results = db.search(question, top_k=2)
+        if search_results:
+            for j, result in enumerate(search_results, 1):
+                print(f"  Result {j} (Relevance: {result['relevance_score']:.4f}):")
+                print(f"  Text: {result['text'][:300]}...")
+                print(f"  Metadata: {result['metadata']}")
+        else:
+            print("  No results found.")
 if __name__ == "__main__":
+    main()

processed_chunks.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt CHANGED Viewed

@@ -1,5 +1,9 @@
 fastapi
 uvicorn
-huggingface_hub
 sentence-transformers
 scikit-learn

 fastapi
 uvicorn
+pydantic
 sentence-transformers
 scikit-learn
+torch
+transformers
+accelerate
+bitsandbytes