Spaces:

Kalpokoch
/

ChatbotDemo

Sleeping

App Files Files

Kalpokoch commited on Jul 28

Commit

5a1000f

verified ·

1 Parent(s): 3d9737f

Update app/app.py

Browse files

Files changed (1) hide show

app/app.py +69 -11

app/app.py CHANGED Viewed

@@ -6,30 +6,64 @@ from fastapi import FastAPI, Request
 from pydantic import BaseModel
 from llama_cpp import Llama
 import os
 from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
-# Initialize FastAPI app
 app = FastAPI()
 # -----------------------------
 # ✅ Vector DB Configuration
 # -----------------------------
 DB_PERSIST_DIRECTORY = "/app/vector_database"
 CHUNKS_FILE_PATH = "/app/processed_chunks.json"
-print("[INFO] Initializing vector DB...")
-db = PolicyVectorDB(persist_directory=DB_PERSIST_DIRECTORY)
 if not ensure_db_populated(db, CHUNKS_FILE_PATH):
-    print("[WARNING] DB not populated. Chunks file may be missing.")
 else:
-    print("[INFO] Vector DB ready.")
 # -----------------------------
 # ✅ Load GGUF Model with llama-cpp-python (model is pre-downloaded in Dockerfile)
 # -----------------------------
 MODEL_PATH = "/app/tinyllama_dop_q4_k_m.gguf"
-print(f"[INFO] Loading GGUF model from: {MODEL_PATH}")
 llm = Llama(
     model_path=MODEL_PATH,
     n_ctx=1024,
@@ -38,7 +72,7 @@ llm = Llama(
     use_mlock=False,
     verbose=False
 )
-print("[INFO] Model loaded successfully.")
 # -----------------------------
 # ✅ Request Schema
@@ -52,12 +86,36 @@ class Query(BaseModel):
 @app.post("/chat/")
 async def chat(query: Query):
     question = query.question
     search_results = db.search(question)
-    context = "\n".join([res["text"] for res in search_results])
-    prompt = f"""### Context:\n{context}\n\n### Question: {question}\n### Answer:"""
-    response = llm(prompt, max_tokens=150, stop=["###"])
     answer = response["choices"][0]["text"].strip()
-    return {"answer": answer}

 from pydantic import BaseModel
 from llama_cpp import Llama
 import os
+import logging
+from typing import Optional
 from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
+# -----------------------------
+# ✅ Logging Configuration
+# -----------------------------
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("app")
+# -----------------------------
+# ✅ Initialize FastAPI App
+# -----------------------------
 app = FastAPI()
+# -----------------------------
+# ✅ Health Check Endpoint
+# -----------------------------
+@app.get("/")
+async def root():
+    return {"status": "✅ Server is running and ready."}
+# -----------------------------
+# ✅ Feedback Collection Endpoint
+# -----------------------------
+class Feedback(BaseModel):
+    question: str
+    answer: str
+    feedback: str
+@app.post("/feedback/")
+async def collect_feedback(feedback: Feedback):
+    logger.info(f"[FEEDBACK] Question: {feedback.question} | Answer: {feedback.answer} | Feedback: {feedback.feedback}")
+    return {"status": "✅ Feedback recorded. Thank you!"}
 # -----------------------------
 # ✅ Vector DB Configuration
 # -----------------------------
 DB_PERSIST_DIRECTORY = "/app/vector_database"
 CHUNKS_FILE_PATH = "/app/processed_chunks.json"
+logger.info("[INFO] Initializing vector DB...")
+db = PolicyVectorDB(
+    persist_directory=DB_PERSIST_DIRECTORY,
+    top_k_default=7,                # Raise top_k for broader context
+    relevance_threshold=0.60        # Lower threshold for more inclusive context
+)
 if not ensure_db_populated(db, CHUNKS_FILE_PATH):
+    logger.warning("[WARNING] DB not populated. Chunks file may be missing.")
 else:
+    logger.info("[INFO] Vector DB ready.")
 # -----------------------------
 # ✅ Load GGUF Model with llama-cpp-python (model is pre-downloaded in Dockerfile)
 # -----------------------------
 MODEL_PATH = "/app/tinyllama_dop_q4_k_m.gguf"
+logger.info(f"[INFO] Loading GGUF model from: {MODEL_PATH}")
 llm = Llama(
     model_path=MODEL_PATH,
     n_ctx=1024,
     use_mlock=False,
     verbose=False
 )
+logger.info("[INFO] Model loaded successfully.")
 # -----------------------------
 # ✅ Request Schema
 @app.post("/chat/")
 async def chat(query: Query):
     question = query.question
+    logger.info(f"[QUERY] {question}")
+    # Vector DB search
     search_results = db.search(question)
+    context_chunks = [res for res in search_results if res["relevance_score"] > db.relevance_threshold]
+    context = "\n".join([res["text"] for res in context_chunks])
+    if not context:
+        logger.warning("[WARN] No relevant context found. Answering without it.")
+        context = "No relevant context found in policy database."
+    # Prompt Template
+    prompt = f"""You are a helpful assistant trained on NEEPCO Delegation of Powers policies.
+### Relevant Context:
+{context}
+### Question:
+{question}
+### Answer:"""
+    # LLM Inference
+    response = llm(prompt, max_tokens=200, stop=["###"], temperature=0.2)
     answer = response["choices"][0]["text"].strip()
+    logger.info(f"[RESPONSE] {answer}")
+    return {
+        "question": question,
+        "context_used": context,
+        "answer": answer
+    }