Kalpokoch commited on
Commit
5a1000f
Β·
verified Β·
1 Parent(s): 3d9737f

Update app/app.py

Browse files
Files changed (1) hide show
  1. app/app.py +69 -11
app/app.py CHANGED
@@ -6,30 +6,64 @@ from fastapi import FastAPI, Request
6
  from pydantic import BaseModel
7
  from llama_cpp import Llama
8
  import os
 
 
9
  from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
10
 
11
- # Initialize FastAPI app
 
 
 
 
 
 
 
 
12
  app = FastAPI()
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  # -----------------------------
15
  # βœ… Vector DB Configuration
16
  # -----------------------------
17
  DB_PERSIST_DIRECTORY = "/app/vector_database"
18
  CHUNKS_FILE_PATH = "/app/processed_chunks.json"
19
 
20
- print("[INFO] Initializing vector DB...")
21
- db = PolicyVectorDB(persist_directory=DB_PERSIST_DIRECTORY)
 
 
 
 
22
  if not ensure_db_populated(db, CHUNKS_FILE_PATH):
23
- print("[WARNING] DB not populated. Chunks file may be missing.")
24
  else:
25
- print("[INFO] Vector DB ready.")
26
 
27
  # -----------------------------
28
  # βœ… Load GGUF Model with llama-cpp-python (model is pre-downloaded in Dockerfile)
29
  # -----------------------------
30
  MODEL_PATH = "/app/tinyllama_dop_q4_k_m.gguf"
31
 
32
- print(f"[INFO] Loading GGUF model from: {MODEL_PATH}")
33
  llm = Llama(
34
  model_path=MODEL_PATH,
35
  n_ctx=1024,
@@ -38,7 +72,7 @@ llm = Llama(
38
  use_mlock=False,
39
  verbose=False
40
  )
41
- print("[INFO] Model loaded successfully.")
42
 
43
  # -----------------------------
44
  # βœ… Request Schema
@@ -52,12 +86,36 @@ class Query(BaseModel):
52
  @app.post("/chat/")
53
  async def chat(query: Query):
54
  question = query.question
 
 
 
55
  search_results = db.search(question)
56
- context = "\n".join([res["text"] for res in search_results])
 
57
 
58
- prompt = f"""### Context:\n{context}\n\n### Question: {question}\n### Answer:"""
 
 
59
 
60
- response = llm(prompt, max_tokens=150, stop=["###"])
 
 
 
 
 
 
 
 
 
 
 
 
61
  answer = response["choices"][0]["text"].strip()
62
 
63
- return {"answer": answer}
 
 
 
 
 
 
 
6
  from pydantic import BaseModel
7
  from llama_cpp import Llama
8
  import os
9
+ import logging
10
+ from typing import Optional
11
  from app.policy_vector_db import PolicyVectorDB, ensure_db_populated
12
 
13
+ # -----------------------------
14
+ # βœ… Logging Configuration
15
+ # -----------------------------
16
+ logging.basicConfig(level=logging.INFO)
17
+ logger = logging.getLogger("app")
18
+
19
+ # -----------------------------
20
+ # βœ… Initialize FastAPI App
21
+ # -----------------------------
22
  app = FastAPI()
23
 
24
+ # -----------------------------
25
+ # βœ… Health Check Endpoint
26
+ # -----------------------------
27
+ @app.get("/")
28
+ async def root():
29
+ return {"status": "βœ… Server is running and ready."}
30
+
31
+ # -----------------------------
32
+ # βœ… Feedback Collection Endpoint
33
+ # -----------------------------
34
+ class Feedback(BaseModel):
35
+ question: str
36
+ answer: str
37
+ feedback: str
38
+
39
+ @app.post("/feedback/")
40
+ async def collect_feedback(feedback: Feedback):
41
+ logger.info(f"[FEEDBACK] Question: {feedback.question} | Answer: {feedback.answer} | Feedback: {feedback.feedback}")
42
+ return {"status": "βœ… Feedback recorded. Thank you!"}
43
+
44
  # -----------------------------
45
  # βœ… Vector DB Configuration
46
  # -----------------------------
47
  DB_PERSIST_DIRECTORY = "/app/vector_database"
48
  CHUNKS_FILE_PATH = "/app/processed_chunks.json"
49
 
50
+ logger.info("[INFO] Initializing vector DB...")
51
+ db = PolicyVectorDB(
52
+ persist_directory=DB_PERSIST_DIRECTORY,
53
+ top_k_default=7, # Raise top_k for broader context
54
+ relevance_threshold=0.60 # Lower threshold for more inclusive context
55
+ )
56
  if not ensure_db_populated(db, CHUNKS_FILE_PATH):
57
+ logger.warning("[WARNING] DB not populated. Chunks file may be missing.")
58
  else:
59
+ logger.info("[INFO] Vector DB ready.")
60
 
61
  # -----------------------------
62
  # βœ… Load GGUF Model with llama-cpp-python (model is pre-downloaded in Dockerfile)
63
  # -----------------------------
64
  MODEL_PATH = "/app/tinyllama_dop_q4_k_m.gguf"
65
 
66
+ logger.info(f"[INFO] Loading GGUF model from: {MODEL_PATH}")
67
  llm = Llama(
68
  model_path=MODEL_PATH,
69
  n_ctx=1024,
 
72
  use_mlock=False,
73
  verbose=False
74
  )
75
+ logger.info("[INFO] Model loaded successfully.")
76
 
77
  # -----------------------------
78
  # βœ… Request Schema
 
86
  @app.post("/chat/")
87
  async def chat(query: Query):
88
  question = query.question
89
+ logger.info(f"[QUERY] {question}")
90
+
91
+ # Vector DB search
92
  search_results = db.search(question)
93
+ context_chunks = [res for res in search_results if res["relevance_score"] > db.relevance_threshold]
94
+ context = "\n".join([res["text"] for res in context_chunks])
95
 
96
+ if not context:
97
+ logger.warning("[WARN] No relevant context found. Answering without it.")
98
+ context = "No relevant context found in policy database."
99
 
100
+ # Prompt Template
101
+ prompt = f"""You are a helpful assistant trained on NEEPCO Delegation of Powers policies.
102
+
103
+ ### Relevant Context:
104
+ {context}
105
+
106
+ ### Question:
107
+ {question}
108
+
109
+ ### Answer:"""
110
+
111
+ # LLM Inference
112
+ response = llm(prompt, max_tokens=200, stop=["###"], temperature=0.2)
113
  answer = response["choices"][0]["text"].strip()
114
 
115
+ logger.info(f"[RESPONSE] {answer}")
116
+
117
+ return {
118
+ "question": question,
119
+ "context_used": context,
120
+ "answer": answer
121
+ }