Spaces:

Kalpokoch
/

ChatbotDemo

Running

App Files Files

Kalpokoch commited on Jul 27, 2025

Commit

fd37461

1 Parent(s): ade6daa

Implemented dynamic DB build and other app/docker changes

Browse files

Files changed (4) hide show

.gitattributes +0 -35
Dockerfile +7 -4
app/app.py +11 -10
app/policy_vector_db.py +47 -20

.gitattributes DELETED Viewed

@@ -1,35 +0,0 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

Dockerfile CHANGED Viewed

@@ -14,8 +14,11 @@ ENV TRANSFORMERS_CACHE=/app/.cache \
     HF_HOME=/app/.cache
 RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache
-# ✅ Ensure ChromaDB can write its persistent database
-RUN mkdir -p /data/policy_vector_db && chmod -R 777 /data/policy_vector_db
 # Copy only the requirements file to leverage Docker cache
 COPY requirements.txt .
@@ -23,11 +26,11 @@ COPY requirements.txt .
 # Install Python dependencies
 RUN pip install --no-cache-dir -r requirements.txt
-# Copy the rest of your application code
 COPY . .
 # Expose the port the app runs on
 EXPOSE 7860
 # Command to run the FastAPI application
-CMD ["uvicorn", "app.app:app", "--host", "0.0.0.0", "--port", "7860"]

     HF_HOME=/app/.cache
 RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache
+# --- NEW: Copy the pre-built vector database ---
+# Create the directory for the DB inside the container
+RUN mkdir -p /app/vector_database && chmod -R 777 /app/vector_database
+# Copy the contents of your local 'vector_database' into the container
+COPY vector_database/ /app/vector_database/
 # Copy only the requirements file to leverage Docker cache
 COPY requirements.txt .
 # Install Python dependencies
 RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of your application code (app/ processed_chunks.json, README.md etc.)
 COPY . .
 # Expose the port the app runs on
 EXPOSE 7860
 # Command to run the FastAPI application
+CMD ["uvicorn", "app.app:app", "--host", "0.0.0.0", "--port", "7860"]

app/app.py CHANGED Viewed

@@ -2,15 +2,17 @@ from fastapi import FastAPI, Request
 from pydantic import BaseModel
 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 import torch
-from policy_vector_db import PolicyVectorDB  # Make sure this is your local DB logic
-import chromadb
 # Create FastAPI app
 app = FastAPI()
-# Load the vector database from /tmp (safe for Hugging Face Spaces)
 print("Loading Vector Database...")
-db = PolicyVectorDB(persist_directory="/tmp/policy_vector_db")
 print("Vector Database loaded successfully!")
 # Load your quantized model from Hugging Face Hub
@@ -50,7 +52,8 @@ async def chat(query: Query):
     # Step 1: Vector DB search
     search_results = db.search(question)
-    context = "\n".join([res["content"] for res in search_results])
     # Step 2: Build prompt
     prompt = f"Context:\n{context}\n\nQuestion: {question}\nAnswer:"
@@ -59,9 +62,7 @@ async def chat(query: Query):
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     outputs = model.generate(**inputs, max_new_tokens=200, do_sample=True, temperature=0.7)
-    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Optionally strip out the prompt from the output
-    final_answer = answer.split("Answer:")[-1].strip()
-    return {"answer": final_answer}

 from pydantic import BaseModel
 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 import torch
+from app.policy_vector_db import PolicyVectorDB
+import chromadb # Make sure chromadb is imported if you use it directly later, though PolicyVectorDB handles it.
 # Create FastAPI app
 app = FastAPI()
+# --- REVISED: Load the vector database from the path inside the Docker container ---
 print("Loading Vector Database...")
+# The path must match where you copied the DB in the Dockerfile
+DB_PERSIST_DIRECTORY = "/app/vector_database"
+db = PolicyVectorDB(persist_directory=DB_PERSIST_DIRECTORY)
 print("Vector Database loaded successfully!")
 # Load your quantized model from Hugging Face Hub
     # Step 1: Vector DB search
     search_results = db.search(question)
+    # --- FIX: Use 'text' key as per policy_vector_db.py's search return ---
+    context = "\n".join([res["text"] for res in search_results])
     # Step 2: Build prompt
     prompt = f"Context:\n{context}\n\nQuestion: {question}\nAnswer:"
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     outputs = model.generate(**inputs, max_new_tokens=200, do_sample=True, temperature=0.7)
+    # --- REVISED: Decode only the new tokens to avoid re-including prompt ---
+    answer = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True).strip()
+    return {"answer": answer} # Return the directly decoded answer

app/policy_vector_db.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import json
 import os
-import shutil
 from typing import List, Dict
 import chromadb
@@ -8,15 +8,34 @@ from sentence_transformers import SentenceTransformer
 class PolicyVectorDB:
     """Manages the creation and searching of a persistent vector database."""
-    def __init__(self, persist_directory: str = "/tmp/policy_vector_db"):
         self.client = chromadb.PersistentClient(path=persist_directory)
         self.collection_name = "neepco_dop_policies"
-        self.embedding_model = SentenceTransformer('BAAI/bge-large-en-v1.5', device='cpu')
-        self.collection = self.client.get_or_create_collection(
-            name=self.collection_name,
-            metadata={"description": "NEEPCO Delegation of Powers Policy"}
-        )
-        print(f"Loaded/Created persistent collection '{self.collection_name}' at '{persist_directory}'")
     def _flatten_metadata(self, metadata: Dict) -> Dict:
         """Ensures all metadata values are strings for ChromaDB compatibility."""
@@ -24,6 +43,8 @@ class PolicyVectorDB:
     def add_chunks(self, chunks: List[Dict]):
         """Encodes and adds a list of chunk dictionaries to the database."""
         if not chunks:
             print("No chunks provided to add.")
             return
@@ -40,7 +61,7 @@ class PolicyVectorDB:
         for i in range(0, len(new_chunks), batch_size):
             batch = new_chunks[i:i + batch_size]
-            print(f"  - Processing batch {i//batch_size + 1}/{ -(-len(new_chunks) // batch_size) }...")
             texts = [chunk['text'] for chunk in batch]
             ids = [chunk['id'] for chunk in batch]
@@ -73,38 +94,44 @@ class PolicyVectorDB:
             })
         return search_results
 def main():
-    """Main function to build and verify the vector database."""
     BASE_DIR = os.path.dirname(os.path.abspath(__file__))
     INPUT_CHUNKS_PATH = os.path.join(BASE_DIR, "../processed_chunks.json")
     PERSIST_DIRECTORY = "/tmp/policy_vector_db"
     if not os.path.exists(INPUT_CHUNKS_PATH):
         print(f"FATAL ERROR: The input chunk file was not found at '{INPUT_CHUNKS_PATH}'")
-        print("Please run 'create_chunks.py' first.")
         return
     if os.path.exists(PERSIST_DIRECTORY):
-        print(f"Removing existing database at '{PERSIST_DIRECTORY}' to ensure a clean build.")
         shutil.rmtree(PERSIST_DIRECTORY)
     print(f"Creating database directory: '{PERSIST_DIRECTORY}'")
     os.makedirs(PERSIST_DIRECTORY, exist_ok=True)
-    os.chmod(PERSIST_DIRECTORY, 0o777)
     print("\nStep 1: Loading processed chunks...")
     with open(INPUT_CHUNKS_PATH, 'r', encoding='utf-8') as f:
         chunks_to_add = json.load(f)
     print(f"Loaded {len(chunks_to_add)} chunks.")
-    print("\nStep 2: Setting up persistent vector database...")
-    db = PolicyVectorDB(persist_directory=PERSIST_DIRECTORY)
     print("\nStep 3: Adding chunks to the database...")
     db.add_chunks(chunks_to_add)
     print(f"\n✅ Vector database setup complete. Total chunks in DB: {db.collection.count()}")
     print(f"Database is saved in: {os.path.abspath(PERSIST_DIRECTORY)}")
     print("\n--- Running Verification Tests ---")
     test_questions = [
@@ -119,11 +146,11 @@ def main():
         search_results = db.search(question, top_k=2)
         if search_results:
             for j, result in enumerate(search_results, 1):
-                print(f"  Result {j} (Relevance: {result['relevance_score']:.4f}):")
-                print(f"  Text: {result['text'][:300]}...")
-                print(f"  Metadata: {result['metadata']}")
         else:
-            print("  No results found.")
 if __name__ == "__main__":
-    main()

 import json
 import os
+import shutil # Keep for potential cleanup during local testing, but not for deployment init
 from typing import List, Dict
 import chromadb
 class PolicyVectorDB:
     """Manages the creation and searching of a persistent vector database."""
+    def __init__(self, persist_directory: str = "/app/policy_vector_db"):
         self.client = chromadb.PersistentClient(path=persist_directory)
         self.collection_name = "neepco_dop_policies"
+        # Using 'cuda' if available, otherwise 'cpu' for the embedding model
+        # You can keep 'cpu' if you are sure about resource allocation.
+        self.embedding_model = SentenceTransformer('BAAI/bge-large-en-v1.5', device='cuda' if torch.cuda.is_available() else 'cpu')
+        # When loading a pre-existing DB, use get_or_create_collection cautiously.
+        # If the collection doesn't exist at the path, it will create an empty one.
+        # If you are always pre-building, get_collection is safer as it will fail if not found.
+        # However, get_or_create_collection is more robust against initial empty state.
+        try:
+            self.collection = self.client.get_collection(name=self.collection_name)
+            print(f"Successfully loaded existing collection '{self.collection_name}' from '{persist_directory}'")
+        except Exception as e:
+            # If get_collection fails, it means the collection doesn't exist yet,
+            # which shouldn't happen if pre-built correctly.
+            # For robustness, you could add creation here if desired, but for pre-built,
+            # this indicates an issue with the pre-built DB or path.
+            print(f"Error loading collection '{self.collection_name}': {e}")
+            print("Attempting to create a new (likely empty) collection. Ensure your pre-built DB is copied correctly.")
+            self.collection = self.client.create_collection(
+                name=self.collection_name,
+                metadata={"description": "NEEPCO Delegation of Powers Policy"}
+            )
+        print(f"ChromaDB client initialized for collection '{self.collection_name}' at '{persist_directory}'")
     def _flatten_metadata(self, metadata: Dict) -> Dict:
         """Ensures all metadata values are strings for ChromaDB compatibility."""
     def add_chunks(self, chunks: List[Dict]):
         """Encodes and adds a list of chunk dictionaries to the database."""
+        # This method is primarily for initial DB building, less for runtime in a deployed RAG.
+        # However, keeping it makes the class reusable.
         if not chunks:
             print("No chunks provided to add.")
             return
         for i in range(0, len(new_chunks), batch_size):
             batch = new_chunks[i:i + batch_size]
+            print(f"   - Processing batch {i//batch_size + 1}/{ -(-len(new_chunks) // batch_size) }...")
             texts = [chunk['text'] for chunk in batch]
             ids = [chunk['id'] for chunk in batch]
             })
         return search_results
+# --- REVISED: Remove database building logic from main for deployment ---
+# This main function is typically used for initial local building.
+# For deployment, the DB is now pre-built and copied.
 def main():
+    """Main function to build and verify the vector database (for local pre-building)."""
     BASE_DIR = os.path.dirname(os.path.abspath(__file__))
     INPUT_CHUNKS_PATH = os.path.join(BASE_DIR, "../processed_chunks.json")
     PERSIST_DIRECTORY = "/tmp/policy_vector_db"
     if not os.path.exists(INPUT_CHUNKS_PATH):
         print(f"FATAL ERROR: The input chunk file was not found at '{INPUT_CHUNKS_PATH}'")
+        print("Please ensure 'processed_chunks.json' is in the root directory.")
         return
+    # Remove existing local build directory to ensure clean start
     if os.path.exists(PERSIST_DIRECTORY):
+        print(f"Removing existing local build database at '{PERSIST_DIRECTORY}' to ensure a clean build.")
         shutil.rmtree(PERSIST_DIRECTORY)
     print(f"Creating database directory: '{PERSIST_DIRECTORY}'")
     os.makedirs(PERSIST_DIRECTORY, exist_ok=True)
+    os.chmod(PERSIST_DIRECTORY, 0o777) # Ensure write permissions
     print("\nStep 1: Loading processed chunks...")
     with open(INPUT_CHUNKS_PATH, 'r', encoding='utf-8') as f:
         chunks_to_add = json.load(f)
     print(f"Loaded {len(chunks_to_add)} chunks.")
+    print("\nStep 2: Setting up persistent vector database (local build)...")
+    db = PolicyVectorDB(persist_directory=PERSIST_DIRECTORY) # Pass the local build path
     print("\nStep 3: Adding chunks to the database...")
     db.add_chunks(chunks_to_add)
     print(f"\n✅ Vector database setup complete. Total chunks in DB: {db.collection.count()}")
     print(f"Database is saved in: {os.path.abspath(PERSIST_DIRECTORY)}")
+    print("\n--- Important: Copy the contents of this directory (NOT the directory itself) to your 'vector_database' folder in the project root for deployment. ---")
     print("\n--- Running Verification Tests ---")
     test_questions = [
         search_results = db.search(question, top_k=2)
         if search_results:
             for j, result in enumerate(search_results, 1):
+                print(f"   Result {j} (Relevance: {result['relevance_score']:.4f}):")
+                print(f"   Text: {result['text'][:300]}...")
+                print(f"   Metadata: {result['metadata']}")
         else:
+            print("   No results found.")
 if __name__ == "__main__":
+    main()