Kalpokoch commited on
Commit
8fdb143
·
1 Parent(s): 0a6902c

25july upload

Browse files
Files changed (5) hide show
  1. Dockerfile +14 -21
  2. app/app.py +61 -48
  3. app/policy_vector_db.py +96 -83
  4. processed_chunks.json +0 -0
  5. requirements.txt +5 -1
Dockerfile CHANGED
@@ -1,39 +1,32 @@
1
- # Use official Python 3.11 image to match the wheel compatibility
2
  FROM python:3.11
3
 
4
- # Install system dependencies
5
  RUN apt-get update && apt-get install -y \
6
- git \
7
- wget \
8
  build-essential \
9
- libopenblas-dev \
10
- libcurl4-openssl-dev \
11
- curl \
12
  && rm -rf /var/lib/apt/lists/*
13
 
14
  # Set working directory
15
  WORKDIR /app
16
 
17
- # Set Hugging Face and Transformers cache to avoid permission errors
 
18
  ENV TRANSFORMERS_CACHE=/app/.cache \
19
  HF_HOME=/app/.cache
 
20
 
21
- # Create and give full access to cache and model folders
22
- RUN mkdir -p /app/.cache /app/models && chmod -R 777 /app/.cache /app/models
23
 
24
- # Copy all project files
25
- COPY . .
26
-
27
- # ✅ Download and install llama-cpp-python wheel from Hugging Face Dataset
28
- RUN wget https://huggingface.co/datasets/Kalpokoch/wheel-llama/resolve/main/llama_cpp_python-0.3.13-cp311-cp311-linux_x86_64.whl && \
29
- pip install llama_cpp_python-0.3.13-cp311-cp311-linux_x86_64.whl && \
30
- rm llama_cpp_python-0.3.13-cp311-cp311-linux_x86_64.whl
31
-
32
- # Install remaining Python dependencies
33
  RUN pip install --no-cache-dir -r requirements.txt
34
 
35
- # Expose FastAPI port
 
 
 
36
  EXPOSE 7860
37
 
38
- # Start FastAPI app
39
  CMD ["uvicorn", "app.app:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
1
+ # Use official Python 3.11 image
2
  FROM python:3.11
3
 
4
+ # Install system dependencies needed for building Python packages
5
  RUN apt-get update && apt-get install -y \
 
 
6
  build-essential \
 
 
 
7
  && rm -rf /var/lib/apt/lists/*
8
 
9
  # Set working directory
10
  WORKDIR /app
11
 
12
+ # Set Hugging Face cache directory and grant permissions
13
+ # Models downloaded from the Hub will be stored here.
14
  ENV TRANSFORMERS_CACHE=/app/.cache \
15
  HF_HOME=/app/.cache
16
+ RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache
17
 
18
+ # Copy only the requirements file to leverage Docker cache
19
+ COPY requirements.txt .
20
 
21
+ # Install Python dependencies
 
 
 
 
 
 
 
 
22
  RUN pip install --no-cache-dir -r requirements.txt
23
 
24
+ # Copy the rest of your application code
25
+ COPY . .
26
+
27
+ # Expose the port the app runs on
28
  EXPOSE 7860
29
 
30
+ # Command to run the FastAPI application
31
  CMD ["uvicorn", "app.app:app", "--host", "0.0.0.0", "--port", "7860"]
32
+
app/app.py CHANGED
@@ -1,48 +1,44 @@
1
- from fastapi import FastAPI, Request
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel
4
- from huggingface_hub import hf_hub_download
5
- from llama_cpp import Llama
6
- import os
7
- import json
8
- import numpy as np
9
- from typing import List
10
- from sentence_transformers import SentenceTransformer
11
- from sklearn.metrics.pairwise import cosine_similarity
12
 
13
- # Load processed chunks (RAG context source)
14
- with open("processed_chunks.json", "r") as f:
15
- chunks = json.load(f)
16
 
17
- # Load embeddings model (use a lightweight one for Docker CPU)
18
- embedder = SentenceTransformer("all-MiniLM-L6-v2")
 
 
 
19
 
20
- # Precompute embeddings
21
- chunk_texts = [chunk["text"] for chunk in chunks]
22
- chunk_embeddings = embedder.encode(chunk_texts, convert_to_tensor=False)
23
-
24
- # Download model file
25
- model_path = hf_hub_download(
26
- repo_id="TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
27
- filename="tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
28
- local_dir="/app/models",
29
- token=os.getenv("HF_TOKEN")
30
  )
31
 
32
- # Load TinyLlama model
33
- llm = Llama(
34
- model_path=model_path,
35
- n_ctx=2048,
36
- n_threads=4 # adjust depending on CPU cores
 
37
  )
 
 
38
 
39
- # FastAPI app
40
  app = FastAPI()
41
 
42
- # Allow Netlify frontend to access the backend
43
  app.add_middleware(
44
  CORSMiddleware,
45
- allow_origins=["*"], # or specify your Netlify URL for more security
46
  allow_credentials=True,
47
  allow_methods=["*"],
48
  allow_headers=["*"],
@@ -50,7 +46,7 @@ app.add_middleware(
50
 
51
  @app.get("/")
52
  def read_root():
53
- return {"message": "RAG chatbot backend is running!"}
54
 
55
  class ChatRequest(BaseModel):
56
  question: str
@@ -61,23 +57,40 @@ def chat(request: ChatRequest):
61
  if not question:
62
  return {"response": "Please ask a question."}
63
 
64
- # Embed the user's question
65
- q_embedding = embedder.encode([question])[0]
66
-
67
- # Find top 3 most similar chunks
68
- similarities = cosine_similarity([q_embedding], chunk_embeddings)[0]
69
- top_indices = similarities.argsort()[-3:][::-1]
70
- retrieved = "\n\n".join(chunk_texts[i] for i in top_indices)
 
 
 
 
 
 
71
 
72
- # Build the prompt
 
73
  prompt = (
74
- f"Context:\n{retrieved}\n\n"
75
- f"User: {question}\n"
76
- f"Assistant:"
 
 
77
  )
78
 
79
- # Generate a response from the model
80
- output = llm(prompt, max_tokens=256)
81
- reply = output["choices"][0]["text"].strip()
 
 
 
 
 
 
 
 
 
82
 
83
- return {"response": reply}
 
1
+ from fastapi import FastAPI
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from pydantic import BaseModel
4
+ import torch
5
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
6
+ from app.policy_vector_db import PolicyVectorDB # Import your class
 
 
 
 
 
7
 
8
+ # --- 1. Initialize the Vector Database and LLM ---
 
 
9
 
10
+ # Load the vector database.
11
+ # This connects to the persistent ChromaDB storage created by policy_vector_db.py
12
+ print("Loading Vector Database...")
13
+ db = PolicyVectorDB(persist_directory="policy_vector_db")
14
+ print("Vector Database loaded successfully!")
15
 
16
+ # Load your fine-tuned model from Hugging Face Hub
17
+ model_id = "Kalpokoch/QuntizedTinyLama"
18
+ print(f"Loading model: {model_id}...")
19
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
20
+ model = AutoModelForCausalLM.from_pretrained(
21
+ model_id,
22
+ torch_dtype=torch.bfloat16,
23
+ device_map="auto"
 
 
24
  )
25
 
26
+ # Create a text-generation pipeline for the LLM
27
+ pipe = pipeline(
28
+ "text-generation",
29
+ model=model,
30
+ tokenizer=tokenizer,
31
+ max_new_tokens=256
32
  )
33
+ print("LLM and pipeline loaded successfully!")
34
+
35
 
36
+ # --- 2. FastAPI App Setup ---
37
  app = FastAPI()
38
 
 
39
  app.add_middleware(
40
  CORSMiddleware,
41
+ allow_origins=["*"],
42
  allow_credentials=True,
43
  allow_methods=["*"],
44
  allow_headers=["*"],
 
46
 
47
  @app.get("/")
48
  def read_root():
49
+ return {"message": "RAG chatbot backend is running with Kalpokoch/QuntizedTinyLama and ChromaDB!"}
50
 
51
  class ChatRequest(BaseModel):
52
  question: str
 
57
  if not question:
58
  return {"response": "Please ask a question."}
59
 
60
+ # --- 3. RAG Retrieval using PolicyVectorDB ---
61
+ # Use the search method from your class to find relevant context
62
+ print(f"Searching for context for question: '{question}'")
63
+ search_results = db.search(query_text=question, top_k=3)
64
+
65
+ # Check if any results were found
66
+ if not search_results:
67
+ retrieved_context = "No relevant context found."
68
+ else:
69
+ # Format the retrieved documents into a single context string
70
+ retrieved_context = "\n\n".join([result['text'] for result in search_results])
71
+
72
+ print(f"Retrieved Context:\n{retrieved_context[:500]}...")
73
 
74
+ # --- 4. Prompt Engineering and Generation ---
75
+ # Build the prompt with the retrieved context
76
  prompt = (
77
+ f"<|system|>\nYou are a helpful assistant for NEEPCO policies. "
78
+ f"Use the following context to answer the user's question. If the context doesn't contain the answer, say that.\n"
79
+ f"Context:\n{retrieved_context}</s>\n"
80
+ f"<|user|>\n{question}</s>\n"
81
+ f"<|assistant|>"
82
  )
83
 
84
+ # Generate a response using the pipeline
85
+ try:
86
+ outputs = pipe(prompt)
87
+ reply = outputs[0]['generated_text']
88
+
89
+ # Extract only the assistant's newly generated reply
90
+ assistant_reply = reply.split("<|assistant|>")[1].strip()
91
+
92
+ return {"response": assistant_reply}
93
+ except Exception as e:
94
+ print(f"Error during model inference: {e}")
95
+ return {"response": "Sorry, I encountered an error while generating a response."}
96
 
 
app/policy_vector_db.py CHANGED
@@ -1,115 +1,128 @@
1
  import json
 
 
 
 
2
  import chromadb
3
  from sentence_transformers import SentenceTransformer
4
- from typing import List, Dict
5
 
6
  class PolicyVectorDB:
7
- def __init__(self, db_path="./chroma_db"):
8
- """Initialize vector database for policy chunks"""
9
- self.client = chromadb.PersistentClient(path=db_path)
10
  self.collection_name = "neepco_dop_policies"
11
-
12
- # Initialize embedding model
13
- self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
14
-
15
- # Create or get collection
16
- try:
17
- self.collection = self.client.get_collection(self.collection_name)
18
- print("Loaded existing collection")
19
- except:
20
- self.collection = self.client.create_collection(
21
- name=self.collection_name,
22
- metadata={"description": "NEEPCO DOP Policy chunks"}
23
- )
24
- print("Created new collection")
25
 
26
  def _flatten_metadata(self, metadata: Dict) -> Dict:
27
- """Remove nested metadata (dicts/lists) and stringify others"""
28
- flat_meta = {}
29
- for key, value in metadata.items():
30
- if isinstance(value, (dict, list)):
31
- continue # skip nested fields
32
- if isinstance(value, (str, int, float, bool)) or value is None:
33
- flat_meta[key] = value
34
- else:
35
- flat_meta[key] = str(value) # fallback to string
36
- return flat_meta
37
 
38
  def add_chunks(self, chunks: List[Dict]):
39
- """Add policy chunks to vector database"""
40
- print(f"Adding {len(chunks)} chunks to database...")
 
 
41
 
42
- texts = [chunk['text'] for chunk in chunks]
43
- metadatas = [self._flatten_metadata(chunk['metadata']) for chunk in chunks]
44
- ids = [chunk['id'] for chunk in chunks]
45
-
46
- # Generate embeddings
47
- embeddings = self.embedding_model.encode(texts).tolist()
48
-
49
- # Add to collection
50
- self.collection.add(
51
- embeddings=embeddings,
52
- documents=texts,
53
- metadatas=metadatas,
54
- ids=ids
55
- )
56
-
57
- print("Successfully added chunks to database!")
58
 
59
- def search(self, query: str, top_k: int = 3) -> List[Dict]:
60
- """Search for relevant policy chunks"""
61
-
62
- # Generate query embedding
63
- query_embedding = self.embedding_model.encode([query]).tolist()
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
- # Search in vector database
 
 
 
 
66
  results = self.collection.query(
67
  query_embeddings=query_embedding,
68
  n_results=top_k,
69
  include=['documents', 'metadatas', 'distances']
70
  )
71
 
72
- # Format results
73
  search_results = []
74
- for i in range(len(results['documents'][0])):
 
 
 
 
75
  search_results.append({
76
- 'text': results['documents'][0][i],
77
  'metadata': results['metadatas'][0][i],
78
- 'relevance_score': 1 - results['distances'][0][i] # Convert distance to similarity
79
  })
80
-
81
  return search_results
82
 
 
 
 
 
83
 
84
- def setup_vector_database():
85
- """Complete setup of vector database"""
86
-
87
- # Load processed chunks
88
- with open("processed_chunks.json", "r", encoding='utf-8') as f:
89
- chunks = json.load(f)
 
 
 
 
 
 
 
 
 
 
 
90
 
91
- # Initialize database
92
- vector_db = PolicyVectorDB()
93
 
94
- # Add chunks to database
95
- vector_db.add_chunks(chunks)
96
 
97
- return vector_db
 
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
- # Example usage
101
  if __name__ == "__main__":
102
- # Setup database
103
- db = setup_vector_database()
104
-
105
- # Test search
106
- query = "Who approves resignation for executives E-7 and above?"
107
- results = db.search(query, top_k=2)
108
-
109
- print(f"\nQuery: {query}")
110
- print("Results:")
111
- for i, result in enumerate(results, 1):
112
- print(f"\n{i}. Relevance: {result['relevance_score']:.3f}")
113
- print(f"Section: {result['metadata'].get('section', 'N/A')}")
114
- print(f"Authority: {result['metadata'].get('authority', 'N/A')}")
115
- print(f"Text: {result['text'][:200]}...")
 
1
  import json
2
+ import os
3
+ import shutil
4
+ from typing import List, Dict
5
+
6
  import chromadb
7
  from sentence_transformers import SentenceTransformer
 
8
 
9
  class PolicyVectorDB:
10
+ """Manages the creation and searching of a persistent vector database."""
11
+ def __init__(self, persist_directory: str = "chroma_db"):
12
+ self.client = chromadb.PersistentClient(path=persist_directory)
13
  self.collection_name = "neepco_dop_policies"
14
+ self.embedding_model = SentenceTransformer('BAAI/bge-large-en-v1.5', device = 'cpu')
15
+ self.collection = self.client.get_or_create_collection(
16
+ name=self.collection_name,
17
+ metadata={"description": "NEEPCO Delegation of Powers Policy"}
18
+ )
19
+ print(f"Loaded/Created persistent collection '{self.collection_name}' at '{persist_directory}'")
 
 
 
 
 
 
 
 
20
 
21
  def _flatten_metadata(self, metadata: Dict) -> Dict:
22
+ """Ensures all metadata values are strings for ChromaDB compatibility."""
23
+ return {key: str(value) for key, value in metadata.items()}
 
 
 
 
 
 
 
 
24
 
25
  def add_chunks(self, chunks: List[Dict]):
26
+ """Encodes and adds a list of chunk dictionaries to the database."""
27
+ if not chunks:
28
+ print("No chunks provided to add.")
29
+ return
30
 
31
+ existing_ids = set(self.collection.get(include=[])['ids'])
32
+ new_chunks = [chunk for chunk in chunks if chunk.get('id') not in existing_ids]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+ if not new_chunks:
35
+ print("No new chunks to add. All provided chunks already exist in the database.")
36
+ return
37
+
38
+ print(f"Found {len(new_chunks)} new chunks to add.")
39
+ batch_size = 128
40
+
41
+ for i in range(0, len(new_chunks), batch_size):
42
+ batch = new_chunks[i:i + batch_size]
43
+ print(f" - Processing batch {i//batch_size + 1}/{ -(-len(new_chunks) // batch_size) }...")
44
+
45
+ texts = [chunk['text'] for chunk in batch]
46
+ ids = [chunk['id'] for chunk in batch]
47
+ metadatas = [self._flatten_metadata(chunk['metadata']) for chunk in batch]
48
+
49
+ embeddings = self.embedding_model.encode(texts, show_progress_bar=False).tolist()
50
+ self.collection.add(ids=ids, embeddings=embeddings, documents=texts, metadatas=metadatas)
51
 
52
+ print(f"Successfully added {len(new_chunks)} new chunks to the database!")
53
+
54
+ def search(self, query_text: str, top_k: int = 3) -> List[Dict]:
55
+ """Searches the collection for a given query text."""
56
+ query_embedding = self.embedding_model.encode([query_text]).tolist()
57
  results = self.collection.query(
58
  query_embeddings=query_embedding,
59
  n_results=top_k,
60
  include=['documents', 'metadatas', 'distances']
61
  )
62
 
 
63
  search_results = []
64
+ if not results.get('documents'):
65
+ return []
66
+
67
+ for i, doc in enumerate(results['documents'][0]):
68
+ relevance_score = 1 - results['distances'][0][i]
69
  search_results.append({
70
+ 'text': doc,
71
  'metadata': results['metadatas'][0][i],
72
+ 'relevance_score': relevance_score
73
  })
 
74
  return search_results
75
 
76
+ def main():
77
+ """Main function to build and verify the vector database."""
78
+ INPUT_CHUNKS_PATH = "processed_chunks_final.json"
79
+ PERSIST_DIRECTORY = "policy_vector_db"
80
 
81
+ if not os.path.exists(INPUT_CHUNKS_PATH):
82
+ print(f"FATAL ERROR: The input chunk file was not found at '{INPUT_CHUNKS_PATH}'")
83
+ print("Please run 'create_chunks.py' first.")
84
+ return
85
+
86
+ if os.path.exists(PERSIST_DIRECTORY):
87
+ print(f"Removing existing database at '{PERSIST_DIRECTORY}' to ensure a clean build.")
88
+ shutil.rmtree(PERSIST_DIRECTORY)
89
+
90
+ print(f"Creating database directory: '{PERSIST_DIRECTORY}'")
91
+ os.makedirs(PERSIST_DIRECTORY, exist_ok=True)
92
+ os.chmod(PERSIST_DIRECTORY, 0o777)
93
+
94
+ print("\nStep 1: Loading processed chunks...")
95
+ with open(INPUT_CHUNKS_PATH, 'r', encoding='utf-8') as f:
96
+ chunks_to_add = json.load(f)
97
+ print(f"Loaded {len(chunks_to_add)} chunks.")
98
 
99
+ print("\nStep 2: Setting up persistent vector database...")
100
+ db = PolicyVectorDB(persist_directory=PERSIST_DIRECTORY)
101
 
102
+ print("\nStep 3: Adding chunks to the database...")
103
+ db.add_chunks(chunks_to_add)
104
 
105
+ print(f"\n✅ Vector database setup complete. Total chunks in DB: {db.collection.count()}")
106
+ print(f"Database is saved in: {os.path.abspath(PERSIST_DIRECTORY)}")
107
 
108
+ print("\n--- Running Verification Tests ---")
109
+ test_questions = [
110
+ "Who can approve changes to the pay structure?",
111
+ "What is the financial limit for a DGM for works on a limited tender basis?",
112
+ "What's the delegation power of an ED for single tender O&M contracts from an OEM?"
113
+ ]
114
+
115
+ for question in test_questions:
116
+ print(f"\n--- Testing Query ---")
117
+ print(f"Query: {question}")
118
+ search_results = db.search(question, top_k=2)
119
+ if search_results:
120
+ for j, result in enumerate(search_results, 1):
121
+ print(f" Result {j} (Relevance: {result['relevance_score']:.4f}):")
122
+ print(f" Text: {result['text'][:300]}...")
123
+ print(f" Metadata: {result['metadata']}")
124
+ else:
125
+ print(" No results found.")
126
 
 
127
  if __name__ == "__main__":
128
+ main()
 
 
 
 
 
 
 
 
 
 
 
 
 
processed_chunks.json CHANGED
The diff for this file is too large to render. See raw diff
 
requirements.txt CHANGED
@@ -1,5 +1,9 @@
1
  fastapi
2
  uvicorn
3
- huggingface_hub
4
  sentence-transformers
5
  scikit-learn
 
 
 
 
 
1
  fastapi
2
  uvicorn
3
+ pydantic
4
  sentence-transformers
5
  scikit-learn
6
+ torch
7
+ transformers
8
+ accelerate
9
+ bitsandbytes