Cleo commited on
Commit
e422038
·
1 Parent(s): d1ccee8

upload supporting files

Browse files
Files changed (6) hide show
  1. agent/__init__.py +1 -0
  2. agent/agent.py +985 -0
  3. agent/prompts.py +156 -0
  4. agent/utils.py +70 -0
  5. app.py +3 -2
  6. assets/custom.css +59 -0
agent/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Agent package for Vibe Reader"""
agent/agent.py ADDED
@@ -0,0 +1,985 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ LangGraph Agent for Vibe Reader
3
+ Implements the agentic workflow for book recommendation based on visual vibes
4
+ """
5
+
6
+ import os
7
+ import json
8
+ from typing import TypedDict, List, Dict, Any, Literal, Annotated
9
+ from operator import add
10
+ from openai import OpenAI
11
+ from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
12
+ from langgraph.graph import StateGraph, END
13
+ from langgraph.types import interrupt
14
+ from dotenv import load_dotenv
15
+
16
+ load_dotenv()
17
+
18
+ # ============================================================================
19
+ # CONFIGURATION
20
+ # ============================================================================
21
+
22
+ NEBIUS_API_KEY = os.getenv("NEBIUS_API_KEY")
23
+ NEBIUS_BASE_URL = "https://api.tokenfactory.nebius.com/v1/"
24
+ VLM_MODEL = "google/gemma-3-27b-it-fast"
25
+ REASONING_MODEL = "Qwen/Qwen3-30B-A3B-Thinking-2507"
26
+ FAST_MODEL = "moonshotai/Kimi-K2-Instruct" # Non-thinking model for simple tasks
27
+
28
+ MODAL_VECTOR_STORE_URL = os.getenv("MODAL_VECTOR_STORE_URL", "https://placeholder-modal-url.modal.run/search")
29
+ GOOGLE_BOOKS_MCP_URL = os.getenv("GOOGLE_BOOKS_MCP_URL", "https://mcp-1st-birthday-google-books-mcp.hf.space")
30
+
31
+ NUM_BOOKS_TO_RETRIEVE = 10 # Target number of books with valid descriptions
32
+ NUM_BOOKS_TO_FETCH = 15 # Fetch extra to account for books without descriptions
33
+ NUM_FINAL_BOOKS = 3
34
+
35
+ # ============================================================================
36
+ # STATE DEFINITION
37
+ # ============================================================================
38
+
39
+ class AgentState(TypedDict):
40
+ """State maintained throughout the agent workflow"""
41
+ # User inputs
42
+ images: List[str] # List of image URLs or base64 encoded images
43
+
44
+ # Conversation history (no reducer - we manage the list directly)
45
+ messages: List[Dict[str, str]]
46
+
47
+ # Vibe components (from JSON extraction)
48
+ aesthetic_genre_keywords: List[str] # Genre/aesthetic keywords
49
+ mood_atmosphere: List[str] # Mood descriptors
50
+ core_themes: List[str] # Core themes
51
+ tropes: List[str] # Story tropes
52
+ feels_like: str # User-facing "feels like" description (what gets refined)
53
+ vibe_refinement_count: int # Number of refinement iterations
54
+
55
+ # Book retrieval
56
+ retrieved_books: List[Dict[str, str]] # List of {title, author} dicts
57
+ books_with_metadata: List[Dict[str, Any]] # Enriched with Google Books data
58
+
59
+ # Narrowing process
60
+ q1_question: str # First narrowing question (stored for resume)
61
+ q2_question: str # Second narrowing question (stored for resume)
62
+ user_preferences: Dict[str, Any] # Accumulated user preferences from Q&A (question + answer pairs)
63
+ final_books: List[Dict[str, Any]] # Final 3 books
64
+
65
+ # Final outputs
66
+ soundtrack_url: str # ElevenLabs generated soundtrack
67
+
68
+ # Debug/reasoning (no reducer - we manage the list directly)
69
+ reasoning: List[str]
70
+
71
+
72
+ # ============================================================================
73
+ # HELPER FUNCTIONS
74
+ # ============================================================================
75
+
76
+ def create_openai_client() -> OpenAI:
77
+ """Create OpenAI client configured for Nebius"""
78
+ return OpenAI(api_key=NEBIUS_API_KEY, base_url=NEBIUS_BASE_URL)
79
+
80
+
81
+ def call_llm(messages: List[Dict[str, Any]], temperature: float = 0.7, model: str = REASONING_MODEL, include_reasoning: bool = False, max_tokens: int = 2500):
82
+ """Generic LLM call for reasoning and decision-making using Nebius API
83
+
84
+ Args:
85
+ messages: Conversation messages
86
+ temperature: Sampling temperature
87
+ model: Model to use
88
+ include_reasoning: If True, returns tuple of (content, reasoning_text)
89
+ max_tokens: Maximum tokens for response (default 1000)
90
+
91
+ Returns:
92
+ str or tuple: Response content, or (content, reasoning) if include_reasoning=True
93
+ """
94
+ client = create_openai_client() # Uses Nebius
95
+ response = client.chat.completions.create(
96
+ model=model,
97
+ messages=messages,
98
+ temperature=temperature,
99
+ max_tokens=max_tokens
100
+ )
101
+
102
+ message = response.choices[0].message
103
+ content = message.content or ""
104
+
105
+ if include_reasoning:
106
+ # Nebius API returns reasoning in a separate field for Thinking models
107
+ reasoning = getattr(message, 'reasoning_content', None) or ""
108
+
109
+ if reasoning:
110
+ # If content is empty, log a warning but don't try to extract from reasoning
111
+ # (the last line of reasoning is usually garbage, not the answer)
112
+ if not content.strip():
113
+ print(f"[DEBUG AGENT] Warning: LLM returned empty content with reasoning. This may indicate an issue.")
114
+ return content, reasoning
115
+
116
+ # Fallback: try parsing <think>...</think> tags from content
117
+ import re
118
+ think_match = re.match(r'<think>(.*?)</think>(.*)', content, re.DOTALL)
119
+ if think_match:
120
+ reasoning = think_match.group(1).strip()
121
+ final_content = think_match.group(2).strip()
122
+ return final_content, reasoning
123
+
124
+ # No reasoning found
125
+ return content, "No reasoning trace found"
126
+
127
+ return content
128
+
129
+
130
+ # ============================================================================
131
+ # NODES
132
+ # ============================================================================
133
+
134
+ def generate_initial_vibe(state: AgentState) -> AgentState:
135
+ """Node: Generate initial vibe description from uploaded images using VLM"""
136
+ from .prompts import VIBE_EXTRACTION
137
+ from .utils import parse_json_response, extract_vibe_components
138
+
139
+ client = create_openai_client()
140
+
141
+ # Construct message with images
142
+ content = [{"type": "text", "text": "Analyze these images and extract the vibe:"}]
143
+ for img in state["images"]:
144
+ # Convert local file paths to base64 data URLs if needed
145
+ if img.startswith(('http://', 'https://', 'data:')):
146
+ # Already a valid URL
147
+ image_url = img
148
+ else:
149
+ # Local file path - convert to base64
150
+ import base64
151
+ from pathlib import Path
152
+
153
+ img_path = Path(img)
154
+ if img_path.exists():
155
+ with open(img_path, 'rb') as f:
156
+ img_data = base64.b64encode(f.read()).decode('utf-8')
157
+ # Determine MIME type from extension
158
+ ext = img_path.suffix.lower()
159
+ mime_types = {'.jpg': 'jpeg', '.jpeg': 'jpeg', '.png': 'png', '.gif': 'gif', '.webp': 'webp'}
160
+ mime = mime_types.get(ext, 'jpeg')
161
+ image_url = f"data:image/{mime};base64,{img_data}"
162
+ else:
163
+ state["reasoning"].append(f"⚠️ Warning: Image file not found: {img}")
164
+ continue
165
+
166
+ content.append({
167
+ "type": "image_url",
168
+ "image_url": {"url": image_url}
169
+ })
170
+
171
+ response = client.chat.completions.create(
172
+ model=VLM_MODEL,
173
+ messages=[
174
+ {"role": "system", "content": VIBE_EXTRACTION},
175
+ {"role": "user", "content": content}
176
+ ],
177
+ temperature=0.7,
178
+ max_tokens=2000
179
+ )
180
+
181
+ vibe_json_str = response.choices[0].message.content
182
+
183
+ # Parse JSON response
184
+ vibe_json = parse_json_response(vibe_json_str)
185
+ if not vibe_json:
186
+ state["reasoning"].append(f"❌ Failed to parse vibe JSON. Raw response: {vibe_json_str[:200]}")
187
+ # Fallback to simple extraction
188
+ state["feels_like"] = vibe_json_str
189
+ state["aesthetic_genre_keywords"] = []
190
+ state["mood_atmosphere"] = []
191
+ state["core_themes"] = []
192
+ state["tropes"] = []
193
+ else:
194
+ # Extract components
195
+ components = extract_vibe_components(vibe_json)
196
+ state["aesthetic_genre_keywords"] = components["aesthetic_genre_keywords"]
197
+ state["mood_atmosphere"] = components["mood_atmosphere"]
198
+ state["core_themes"] = components["core_themes"]
199
+ state["tropes"] = components["tropes"]
200
+ state["feels_like"] = components["feels_like"]
201
+
202
+ state["reasoning"].append(f"✅ Extracted vibe components:\n"
203
+ f" - Aesthetics: {', '.join(state['aesthetic_genre_keywords'])}\n"
204
+ f" - Mood: {', '.join(state['mood_atmosphere'])}\n"
205
+ f" - Themes: {', '.join(state['core_themes'])}\n"
206
+ f" - Tropes: {', '.join(state['tropes'])}")
207
+
208
+ state["vibe_refinement_count"] = 0
209
+
210
+ # Only show feels_like to user
211
+ assistant_message = f"Here's the vibe I'm getting from your images:\n\n{state['feels_like']}\n\nDoes this capture what you're looking for, or would you like me to adjust it?"
212
+ state["messages"].append({
213
+ "role": "assistant",
214
+ "content": assistant_message
215
+ })
216
+
217
+ # Wait for user feedback; when resumed, user_response will contain their reply
218
+ user_response = interrupt(assistant_message)
219
+ if user_response:
220
+ state["messages"].append({"role": "user", "content": user_response})
221
+
222
+ return state
223
+
224
+
225
+ def refine_vibe(state: AgentState) -> AgentState:
226
+ """Node: Refine vibe based on user feedback - only refines feels_like portion"""
227
+ from .prompts import VIBE_REFINEMENT
228
+ from .utils import strip_thinking_tags
229
+
230
+ print("[DEBUG AGENT] refine_vibe node started")
231
+
232
+ # Get the latest user message (feedback)
233
+ user_messages = [m for m in state["messages"] if m.get("role") == "user"]
234
+ print(f"[DEBUG AGENT] Found {len(user_messages)} user messages")
235
+ if not user_messages:
236
+ state["reasoning"].append("⚠️ No user feedback found for refinement; skipping refine_vibe step")
237
+ return state
238
+
239
+ user_feedback = user_messages[-1]["content"]
240
+ print(f"[DEBUG AGENT] user_feedback: {user_feedback[:50] if user_feedback else 'None'}...")
241
+
242
+ # Use LLM to refine only the feels_like description
243
+ # Keep other vibe components (aesthetics, themes, tropes) unchanged
244
+ messages = [
245
+ {"role": "system", "content": VIBE_REFINEMENT},
246
+ {"role": "user", "content": f"Current 'feels like' description: {state['feels_like']}\n\nUser feedback: {user_feedback}\n\nProvide the refined 'feels like' description (4-5 sentences):"}
247
+ ]
248
+
249
+ print(f"[DEBUG AGENT] Calling LLM for refinement...")
250
+ refined_feels_like, reasoning = call_llm(messages, temperature=0.7, include_reasoning=True)
251
+ print(f"[DEBUG AGENT] LLM returned content: {refined_feels_like[:200] if refined_feels_like else 'None'}...")
252
+ print(f"[DEBUG AGENT] LLM reasoning: {reasoning[:200] if reasoning else 'None'}...")
253
+
254
+ # Ensure no thinking tags leak into the feels_like
255
+ refined_feels_like = strip_thinking_tags(refined_feels_like)
256
+
257
+ # Update only the feels_like portion
258
+ state["feels_like"] = refined_feels_like
259
+ state["vibe_refinement_count"] += 1
260
+
261
+ assistant_message = f"I've refined the vibe:\n\n{refined_feels_like}\n\nIs this better, or would you like further adjustments?"
262
+ print(f"[DEBUG AGENT] Adding assistant message to state, current msg count: {len(state['messages'])}")
263
+ state["messages"].append({
264
+ "role": "assistant",
265
+ "content": assistant_message
266
+ })
267
+ state["reasoning"].append(f"🧠 REASONING (Vibe Refinement #{state['vibe_refinement_count']}):\n{reasoning}\n")
268
+ print(f"[DEBUG AGENT] After append, msg count: {len(state['messages'])}")
269
+
270
+ # Wait for user feedback on the refined vibe
271
+ print(f"[DEBUG AGENT] About to call interrupt()")
272
+ user_response = interrupt(assistant_message)
273
+ print(f"[DEBUG AGENT] interrupt() returned: {user_response}")
274
+ if user_response:
275
+ state["messages"].append({"role": "user", "content": user_response})
276
+
277
+ return state
278
+
279
+
280
+ def check_vibe_satisfaction(state: AgentState) -> Literal["refine", "retrieve"]:
281
+ """Conditional edge: Check if user is satisfied with vibe description"""
282
+ from .prompts import VIBE_SATISFACTION_CHECKER
283
+
284
+ # Get the last user message
285
+ user_messages = [m for m in state["messages"] if m.get("role") == "user"]
286
+ if not user_messages:
287
+ # No explicit feedback; default to moving forward
288
+ return "retrieve"
289
+
290
+ raw_content = user_messages[-1]["content"]
291
+
292
+ # Content may occasionally be a non-string (e.g., list from upstream tools);
293
+ # normalize to text before passing into the LLM.
294
+ if isinstance(raw_content, str):
295
+ last_user_msg = raw_content
296
+ elif isinstance(raw_content, list):
297
+ # Join any text-like chunks into a single string representation
298
+ last_user_msg = " ".join(str(x) for x in raw_content)
299
+ else:
300
+ last_user_msg = str(raw_content)
301
+
302
+ # Use LLM to determine satisfaction
303
+ messages = [
304
+ {"role": "system", "content": VIBE_SATISFACTION_CHECKER},
305
+ {"role": "user", "content": f"User's response: {last_user_msg}"}
306
+ ]
307
+
308
+ decision, reasoning = call_llm(messages, temperature=0.0, include_reasoning=True)
309
+ decision = decision.strip().lower() if decision else ""
310
+
311
+ print(f"[DEBUG] check_vibe_satisfaction - user said: '{last_user_msg}'")
312
+ print(f"[DEBUG] check_vibe_satisfaction - LLM decision: '{decision}'")
313
+
314
+ state["reasoning"].append(f"🧠 REASONING (Satisfaction Check):\n{reasoning}\n\n→ Decision: {decision}")
315
+
316
+ if "satisfied" in decision and "not_satisfied" not in decision:
317
+ print(f"[DEBUG] check_vibe_satisfaction -> RETRIEVE (user satisfied)")
318
+ return "retrieve"
319
+ else:
320
+ print(f"[DEBUG] check_vibe_satisfaction -> REFINE (user not satisfied)")
321
+ return "refine"
322
+
323
+
324
+ def retrieve_books(state: AgentState) -> AgentState:
325
+ """Node: Retrieve books from Modal vector store"""
326
+ import requests
327
+
328
+ # Construct full vibe query from all components
329
+ vibe_query = f"{state['feels_like']}\n\nGenres/Aesthetics: {', '.join(state['aesthetic_genre_keywords'])}\nMood: {', '.join(state['mood_atmosphere'])}\nThemes: {', '.join(state['core_themes'])}\nTropes: {', '.join(state['tropes'])}"
330
+
331
+ try:
332
+ # Call Modal vector store endpoint
333
+ print(f"DEBUG: Calling Modal URL: {MODAL_VECTOR_STORE_URL}")
334
+ state["reasoning"].append(f"📚 Calling Modal vector store with full vibe profile")
335
+ state["reasoning"].append(f"URL: {MODAL_VECTOR_STORE_URL}")
336
+
337
+ response = requests.post(
338
+ MODAL_VECTOR_STORE_URL,
339
+ json={
340
+ "query": vibe_query,
341
+ "top_k": NUM_BOOKS_TO_RETRIEVE,
342
+ "min_books_per_vibe": 1
343
+ },
344
+ timeout=180 # Long timeout for cold start
345
+ )
346
+
347
+ print(f"DEBUG: Response status: {response.status_code}")
348
+ print(f"DEBUG: Response text: {response.text[:500] if response.text else 'empty'}")
349
+
350
+ if response.status_code == 200:
351
+ data = response.json()
352
+
353
+ # Extract books from search results with diversity across vibes
354
+ # Modal returns: {"results": [{"books": [...], "vibe_data": {...}, "score": ...}], ...}
355
+ # Strategy: Take up to MAX_BOOKS_PER_VIBE from each vibe to ensure diversity
356
+ MAX_BOOKS_PER_VIBE = 5
357
+
358
+ books = []
359
+ seen = set() # Track seen books for deduplication
360
+
361
+ for result in data.get("results", []):
362
+ vibe_score = result.get("score", 0)
363
+ vibe_books = result.get("books", [])
364
+ books_from_this_vibe = 0
365
+
366
+ for book in vibe_books:
367
+ if books_from_this_vibe >= MAX_BOOKS_PER_VIBE:
368
+ break
369
+
370
+ title = book.get("title", "")
371
+ author = book.get("author", "")
372
+ key = (title.lower(), author.lower())
373
+
374
+ # Skip duplicates
375
+ if key in seen:
376
+ continue
377
+
378
+ seen.add(key)
379
+ books.append({
380
+ "title": title,
381
+ "author": author,
382
+ "vibe_score": vibe_score # Track which vibe it came from
383
+ })
384
+ books_from_this_vibe += 1
385
+
386
+ # Fetch extra books to account for filtering (books without descriptions)
387
+ books = books[:NUM_BOOKS_TO_FETCH]
388
+
389
+ state["reasoning"].append(f"Retrieved {len(books)} books from {len(data.get('results', []))} vibes (max {MAX_BOOKS_PER_VIBE} per vibe)")
390
+
391
+ else:
392
+ raise Exception(f"HTTP {response.status_code}: {response.text[:200]}")
393
+
394
+ except Exception as e:
395
+ # Fallback to mock data for development
396
+ print(f"DEBUG ERROR: Vector store call failed: {e}")
397
+ import traceback
398
+ traceback.print_exc()
399
+ state["reasoning"].append(f"Vector store call failed: {e}. Using mock data.")
400
+ books = [
401
+ {"title": "The Night Circus", "author": "Erin Morgenstern"},
402
+ {"title": "The Ocean at the End of the Lane", "author": "Neil Gaiman"},
403
+ {"title": "The Starless Sea", "author": "Erin Morgenstern"},
404
+ {"title": "Piranesi", "author": "Susanna Clarke"},
405
+ {"title": "The House in the Cerulean Sea", "author": "TJ Klune"},
406
+ {"title": "Howl's Moving Castle", "author": "Diana Wynne Jones"},
407
+ {"title": "Circe", "author": "Madeline Miller"},
408
+ {"title": "The Invisible Life of Addie LaRue", "author": "V.E. Schwab"},
409
+ {"title": "Mexican Gothic", "author": "Silvia Moreno-Garcia"},
410
+ {"title": "The Ten Thousand Doors of January", "author": "Alix E. Harrow"},
411
+ {"title": "The Goblin Emperor", "author": "Katherine Addison"},
412
+ {"title": "The Priory of the Orange Tree", "author": "Samantha Shannon"},
413
+ {"title": "Uprooted", "author": "Naomi Novik"},
414
+ {"title": "The Bear and the Nightingale", "author": "Katherine Arden"},
415
+ {"title": "The City of Brass", "author": "S.A. Chakraborty"}
416
+ ]
417
+
418
+ state["retrieved_books"] = books
419
+ state["reasoning"].append(f"Total books in state: {len(books)}")
420
+
421
+ return state
422
+
423
+
424
+ def call_google_books_mcp(title: str, author: str = "") -> Dict[str, Any]:
425
+ """
426
+ Call the Google Books MCP server via Gradio MCP endpoint.
427
+
428
+ Args:
429
+ title: Book title
430
+ author: Book author (optional)
431
+
432
+ Returns:
433
+ Book metadata dict or None if not found
434
+ """
435
+ import requests
436
+
437
+ try:
438
+ # Gradio MCP endpoint (Streamable HTTP transport)
439
+ mcp_url = f"{GOOGLE_BOOKS_MCP_URL}/gradio_api/mcp/"
440
+
441
+ # MCP uses JSON-RPC style calls
442
+ payload = {
443
+ "jsonrpc": "2.0",
444
+ "method": "tools/call",
445
+ "params": {
446
+ "name": "google_books_mcp_search_book_by_title_author",
447
+ "arguments": {
448
+ "title": title,
449
+ "author": author
450
+ }
451
+ },
452
+ "id": 1
453
+ }
454
+
455
+ response = requests.post(
456
+ mcp_url,
457
+ json=payload,
458
+ headers={
459
+ "Content-Type": "application/json",
460
+ "Accept": "application/json, text/event-stream"
461
+ },
462
+ timeout=30
463
+ )
464
+
465
+ if response.status_code != 200:
466
+ print(f"[DEBUG] Google Books MCP failed: {response.status_code} - {response.text[:200]}")
467
+ return None
468
+
469
+ # Parse SSE response
470
+ for line in response.text.split('\n'):
471
+ if line.startswith('data: '):
472
+ try:
473
+ data = json.loads(line[6:])
474
+ if "result" in data:
475
+ result = data["result"]
476
+ if isinstance(result, dict):
477
+ # Check if it's a direct book response
478
+ if "success" in result and "book" in result:
479
+ if result.get("success") and result.get("book"):
480
+ return result["book"]
481
+ # Check if it's a content array response
482
+ elif "content" in result:
483
+ for content_item in result["content"]:
484
+ if content_item.get("type") == "text":
485
+ text_content = content_item.get("text", "")
486
+ if text_content.strip():
487
+ try:
488
+ book_data = json.loads(text_content)
489
+ if book_data.get("success") and book_data.get("found"):
490
+ return book_data.get("book")
491
+ except json.JSONDecodeError:
492
+ continue
493
+ return result
494
+ except json.JSONDecodeError:
495
+ continue
496
+
497
+ return None
498
+
499
+ except Exception as e:
500
+ print(f"[DEBUG] Google Books MCP error: {e}")
501
+ return None
502
+
503
+
504
+
505
+
506
+ def fetch_book_metadata(state: AgentState) -> AgentState:
507
+ """Node: Fetch metadata for retrieved books via Google Books API"""
508
+ print(f"[DEBUG AGENT] fetch_book_metadata node started with {len(state.get('retrieved_books', []))} books")
509
+
510
+ enriched_books = []
511
+ skipped_books = []
512
+ state["reasoning"].append(f"📖 Fetching metadata from Google Books (need {NUM_BOOKS_TO_RETRIEVE} with descriptions)...")
513
+
514
+ for book in state["retrieved_books"]:
515
+ # Stop once we have enough books with valid descriptions
516
+ if len(enriched_books) >= NUM_BOOKS_TO_RETRIEVE:
517
+ print(f"[DEBUG] Reached target of {NUM_BOOKS_TO_RETRIEVE} books, stopping")
518
+ break
519
+
520
+ try:
521
+ # Use Google Books MCP server
522
+ metadata = call_google_books_mcp(book['title'], book['author'])
523
+
524
+ if metadata and metadata.get("title"):
525
+ description = metadata.get("description", "")
526
+
527
+ # FILTER: Skip books without meaningful descriptions
528
+ if not description or len(description.strip()) < 50:
529
+ skipped_books.append(book['title'])
530
+ print(f"[DEBUG] Skipping '{book['title']}' - no/short description ({len(description.strip()) if description else 0} chars)")
531
+ continue
532
+
533
+ # Format authors as string
534
+ authors = metadata.get("authors", [])
535
+ author_str = ", ".join(authors) if isinstance(authors, list) else authors or book["author"]
536
+
537
+ enriched_books.append({
538
+ "title": metadata.get("title", book["title"]),
539
+ "author": author_str,
540
+ "description": description,
541
+ "cover_url": metadata.get("thumbnail"),
542
+ "isbn": metadata.get("isbn"),
543
+ "published_year": metadata.get("published_date", "")[:4] if metadata.get("published_date") else None,
544
+ "page_count": metadata.get("page_count"),
545
+ "categories": metadata.get("categories", []),
546
+ "preview_link": metadata.get("preview_link"),
547
+ "info_link": metadata.get("info_link")
548
+ })
549
+ print(f"[DEBUG] Found metadata for: {book['title']} ({len(description)} chars) [{len(enriched_books)}/{NUM_BOOKS_TO_RETRIEVE}]")
550
+ else:
551
+ # No results found - skip
552
+ skipped_books.append(book['title'])
553
+ print(f"[DEBUG] Skipping '{book['title']}' - no Google Books results")
554
+
555
+ except Exception as e:
556
+ # On any error, skip the book
557
+ skipped_books.append(book['title'])
558
+ state["reasoning"].append(f"Error fetching metadata for '{book['title']}': {str(e)}")
559
+
560
+ state["books_with_metadata"] = enriched_books
561
+
562
+ if skipped_books:
563
+ state["reasoning"].append(f"⚠️ Skipped {len(skipped_books)} books without descriptions")
564
+ state["reasoning"].append(f"✅ Found {len(enriched_books)}/{NUM_BOOKS_TO_RETRIEVE} books with full metadata")
565
+
566
+ return state
567
+
568
+
569
+ def _generate_narrowing_question(state: AgentState, question_num: int) -> tuple:
570
+ """Helper: Generate a narrowing question"""
571
+ from .prompts import NARROWING_QUESTION_GENERATOR
572
+
573
+ books_summary_parts = []
574
+ for i, b in enumerate(state["books_with_metadata"], 1):
575
+ desc = b.get('description', 'No description')
576
+ cats = ', '.join(b.get('categories', [])) if b.get('categories') else 'Uncategorized'
577
+ books_summary_parts.append(f"Book {i}: {b['title']} by {b['author']}\n Categories: {cats}\n Description: {desc}")
578
+ books_summary = "\n\n".join(books_summary_parts)
579
+
580
+ vibe_context = f"Feels like: {state['feels_like']}\nAesthetics: {', '.join(state['aesthetic_genre_keywords'])}\nMood: {', '.join(state['mood_atmosphere'])}\nThemes: {', '.join(state['core_themes'])}"
581
+
582
+ is_last = question_num >= 2
583
+ question_context = f"This is question {question_num} of 2." + (" THIS IS THE LAST QUESTION - make it count!" if is_last else "")
584
+
585
+ user_prompt = f"Books to narrow down:\n{books_summary}\n\nVibe:\n{vibe_context}\n\nPrevious preferences: {json.dumps(state.get('user_preferences', {}), indent=2)}\n\n{question_context}\n\nGenerate an either/or question:"
586
+
587
+ messages = [
588
+ {"role": "system", "content": NARROWING_QUESTION_GENERATOR},
589
+ {"role": "user", "content": user_prompt}
590
+ ]
591
+
592
+ return call_llm(messages, temperature=0.8, model=FAST_MODEL, include_reasoning=True)
593
+
594
+
595
+ def generate_question_1(state: AgentState) -> AgentState:
596
+ """Node: Generate Q1 and add to messages"""
597
+ print(f"[DEBUG AGENT] generate_question_1")
598
+
599
+ question, reasoning = _generate_narrowing_question(state, 1)
600
+
601
+ state["narrowing_questions_asked"] = 1
602
+ state["q1_question"] = question
603
+ state["reasoning"].append(f"🧠 REASONING (Narrowing Question #1):\n{reasoning}\n\n→ Question: {question}")
604
+
605
+ assistant_message = f"To help me find the perfect match:\n\n{question}"
606
+ print(f"[DEBUG AGENT] Q1: {question[:60]}...")
607
+
608
+ state["messages"].append({"role": "assistant", "content": assistant_message})
609
+ return state
610
+
611
+
612
+ def wait_for_answer_1(state: AgentState) -> AgentState:
613
+ """Node: Wait for user's answer to Q1"""
614
+ print(f"[DEBUG AGENT] wait_for_answer_1")
615
+
616
+ user_answer = interrupt("Waiting for Q1 answer")
617
+ if user_answer:
618
+ state["messages"].append({"role": "user", "content": user_answer})
619
+ state["user_preferences"]["q1"] = {
620
+ "question": state.get("q1_question", ""),
621
+ "answer": user_answer
622
+ }
623
+ print(f"[DEBUG AGENT] Q1 answered: {user_answer}")
624
+ return state
625
+
626
+
627
+ def generate_question_2(state: AgentState) -> AgentState:
628
+ """Node: Generate Q2 and add to messages"""
629
+ print(f"[DEBUG AGENT] generate_question_2")
630
+
631
+ question, reasoning = _generate_narrowing_question(state, 2)
632
+
633
+ state["narrowing_questions_asked"] = 2
634
+ state["q2_question"] = question
635
+ state["reasoning"].append(f"🧠 REASONING (Narrowing Question #2):\n{reasoning}\n\n→ Question: {question}")
636
+
637
+ assistant_message = f"To help me find the perfect match:\n\n{question}"
638
+ print(f"[DEBUG AGENT] Q2: {question[:60]}...")
639
+
640
+ state["messages"].append({"role": "assistant", "content": assistant_message})
641
+ return state
642
+
643
+
644
+ def wait_for_answer_2(state: AgentState) -> AgentState:
645
+ """Node: Wait for user's answer to Q2"""
646
+ print(f"[DEBUG AGENT] wait_for_answer_2")
647
+
648
+ user_answer = interrupt("Waiting for Q2 answer")
649
+ if user_answer:
650
+ state["messages"].append({"role": "user", "content": user_answer})
651
+ state["user_preferences"]["q2"] = {
652
+ "question": state.get("q2_question", ""),
653
+ "answer": user_answer
654
+ }
655
+ print(f"[DEBUG AGENT] Q2 answered: {user_answer}")
656
+ return state
657
+
658
+
659
+ def check_narrowing_complete(state: AgentState) -> Literal["ask_more", "finalize"]:
660
+ """Conditional edge: Check if we've asked all 2 narrowing questions"""
661
+ questions_asked = state.get("narrowing_questions_asked", 0)
662
+ if questions_asked >= 2:
663
+ return "finalize"
664
+ return "ask_more"
665
+
666
+
667
+
668
+
669
+ def finalize_books(state: AgentState) -> AgentState:
670
+ """Node: Use reasoning to select final 3 books based on vibe and preferences"""
671
+ print(f"[DEBUG AGENT] finalize_books node started")
672
+ print(f"[DEBUG AGENT] books_with_metadata count: {len(state.get('books_with_metadata', []))}")
673
+ from .prompts import get_book_finalizer_prompt
674
+
675
+ # Build detailed book summary with full descriptions - no truncation
676
+ books_summary_parts = []
677
+ for i, b in enumerate(state["books_with_metadata"]):
678
+ desc = b.get('description', 'No description available')
679
+ cats = ', '.join(b.get('categories', [])) if b.get('categories') else 'Uncategorized'
680
+ books_summary_parts.append(f"{i+1}. {b['title']} by {b['author']}\n Categories: {cats}\n Description: {desc}")
681
+ books_summary = "\n\n".join(books_summary_parts)
682
+
683
+ prefs_summary = json.dumps(state.get("user_preferences", {}), indent=2)
684
+ vibe_context = f"Feels like: {state['feels_like']}\nAesthetics: {', '.join(state['aesthetic_genre_keywords'])}\nMood: {', '.join(state['mood_atmosphere'])}\nThemes: {', '.join(state['core_themes'])}\nTropes: {', '.join(state['tropes'])}"
685
+
686
+ user_prompt = f"Vibe:\n{vibe_context}\n\nCandidate Books:\n{books_summary}\n\nUser Preferences (from Q&A):\n{prefs_summary}\n\nSelect the {NUM_FINAL_BOOKS} best matches (return only JSON array):"
687
+
688
+ messages = [
689
+ {"role": "system", "content": get_book_finalizer_prompt(NUM_FINAL_BOOKS)},
690
+ {"role": "user", "content": user_prompt}
691
+ ]
692
+
693
+ print(f"[DEBUG AGENT] finalize_books user_prompt:\n{user_prompt}")
694
+
695
+ # Use reasoning model for book selection - this is a complex decision
696
+ # Increase max_tokens since we're sending full book descriptions
697
+ selection_response, reasoning = call_llm(messages, temperature=0.3, model=REASONING_MODEL, include_reasoning=True, max_tokens=4000)
698
+
699
+ # Log reasoning even if empty
700
+ state["reasoning"].append(f"🧠 REASONING (Book Selection):\n{reasoning or 'No reasoning provided'}")
701
+
702
+ # Parse the JSON response - check both content and reasoning for the array
703
+ try:
704
+ import re
705
+ # First try to find JSON array in the response content
706
+ json_match = re.search(r'\[([\d,\s]+)\]', selection_response)
707
+
708
+ # If not found in content, try to find it in reasoning (some models put answer there)
709
+ if not json_match and reasoning:
710
+ json_match = re.search(r'\[([\d,\s]+)\]', reasoning)
711
+ if json_match:
712
+ print(f"[DEBUG AGENT] Found JSON in reasoning instead of content")
713
+
714
+ if json_match:
715
+ indices = json.loads(json_match.group(0))
716
+ selected_books = [state["books_with_metadata"][i-1] for i in indices if 0 < i <= len(state["books_with_metadata"])][:NUM_FINAL_BOOKS]
717
+ else:
718
+ # Fallback to first 3 books
719
+ print(f"[DEBUG AGENT] No JSON array found, using first 3 books")
720
+ selected_books = state["books_with_metadata"][:NUM_FINAL_BOOKS]
721
+ except Exception as e:
722
+ state["reasoning"].append(f"❌ Failed to parse book selection: {e}. Using first 3 books.")
723
+ selected_books = state["books_with_metadata"][:NUM_FINAL_BOOKS]
724
+
725
+ state["final_books"] = selected_books
726
+ state["reasoning"].append(f"🧠 REASONING (Book Selection):\n{reasoning}\n\n→ Selected: {[b['title'] for b in selected_books]}")
727
+
728
+ return state
729
+
730
+
731
+ def generate_soundtrack(state: AgentState) -> AgentState:
732
+ """Node: Generate ambient soundtrack using ElevenLabs Music API"""
733
+ print(f"[DEBUG AGENT] generate_soundtrack node started")
734
+
735
+ import requests
736
+ import tempfile
737
+
738
+ ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
739
+ print(f"[DEBUG AGENT] ELEVENLABS_API_KEY present: {bool(ELEVENLABS_API_KEY)}")
740
+
741
+ if not ELEVENLABS_API_KEY:
742
+ print(f"[DEBUG AGENT] No ELEVENLABS_API_KEY - skipping")
743
+ state["reasoning"].append("⚠️ ELEVENLABS_API_KEY not set - skipping soundtrack generation")
744
+ state["soundtrack_url"] = ""
745
+ return state
746
+
747
+ try:
748
+ # Build vibe context for music prompt generation
749
+ vibe_context = {
750
+ "feels_like": state["feels_like"],
751
+ "mood_atmosphere": state["mood_atmosphere"],
752
+ "aesthetic_genre_keywords": state["aesthetic_genre_keywords"],
753
+ "core_themes": state["core_themes"],
754
+ "tropes": state["tropes"]
755
+ }
756
+ print(f"[DEBUG AGENT] vibe_context built: {list(vibe_context.keys())}")
757
+
758
+ # Use LLM to generate music prompt from vibe context
759
+ from .prompts import MUSIC_PROMPT_GENERATION
760
+
761
+ messages = [
762
+ {"role": "system", "content": MUSIC_PROMPT_GENERATION},
763
+ {"role": "user", "content": f"Generate a music prompt based on this vibe:\n{json.dumps(vibe_context, indent=2)}"}
764
+ ]
765
+
766
+ print(f"[DEBUG AGENT] Calling LLM for music prompt...")
767
+ music_prompt, reasoning = call_llm(messages, temperature=0.7, model=FAST_MODEL, include_reasoning=True)
768
+ print(f"[DEBUG AGENT] Music prompt generated: {music_prompt[:100] if music_prompt else 'None'}...")
769
+ state["reasoning"].append(f"🎵 Music prompt: {music_prompt}")
770
+
771
+ # Call ElevenLabs Music API directly
772
+ print(f"[DEBUG AGENT] Calling ElevenLabs Music API...")
773
+ state["reasoning"].append(f"🎵 Calling ElevenLabs Music API...")
774
+
775
+ response = requests.post(
776
+ "https://api.elevenlabs.io/v1/music",
777
+ headers={
778
+ "xi-api-key": ELEVENLABS_API_KEY,
779
+ "Content-Type": "application/json"
780
+ },
781
+ json={
782
+ "prompt": music_prompt,
783
+ "music_length_ms": 90000, # 1:30 minute
784
+ "force_instrumental": True # No vocals, just ambient music
785
+ },
786
+ timeout=120 # Music generation can take a while
787
+ )
788
+
789
+ print(f"[DEBUG AGENT] ElevenLabs response status: {response.status_code}")
790
+
791
+ if response.status_code == 200:
792
+ print(f"[DEBUG AGENT] Success! Response size: {len(response.content)} bytes")
793
+ # Save the audio data to a temp file
794
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
795
+ temp_file.write(response.content)
796
+ temp_file.close()
797
+ print(f"[DEBUG AGENT] Saved to temp file: {temp_file.name}")
798
+
799
+ state["soundtrack_url"] = temp_file.name
800
+ state["reasoning"].append(f"✅ Generated soundtrack successfully ({len(response.content)} bytes)")
801
+ else:
802
+ print(f"[DEBUG AGENT] ElevenLabs API error: {response.status_code} - {response.text[:500]}")
803
+ state["reasoning"].append(f"❌ ElevenLabs API error: {response.status_code} - {response.text[:200]}")
804
+ state["soundtrack_url"] = ""
805
+
806
+ except Exception as e:
807
+ import traceback
808
+ print(f"[DEBUG AGENT] Exception in generate_soundtrack: {e}")
809
+ traceback.print_exc()
810
+ state["reasoning"].append(f"❌ Failed to generate soundtrack: {e}")
811
+ state["soundtrack_url"] = ""
812
+
813
+ print(f"[DEBUG AGENT] generate_soundtrack finished, soundtrack_url: {state.get('soundtrack_url', 'not set')}")
814
+ return state
815
+
816
+
817
+ def present_final_results(state: AgentState) -> AgentState:
818
+ """Node: Format and present final results to user"""
819
+
820
+ # Format books for display
821
+ books_text = "Here are your personalized book recommendations:\n\n"
822
+ for i, book in enumerate(state["final_books"], 1):
823
+ books_text += f"{i}. **{book['title']}** by {book['author']}\n"
824
+
825
+ state["messages"].append({
826
+ "role": "assistant",
827
+ "content": books_text + f"\n\nI'm also generating a soundtrack that matches your vibe! Scroll down for all the goodies ⬇️"
828
+ })
829
+
830
+ state["reasoning"].append("Presented final results to user")
831
+
832
+ return state
833
+
834
+
835
+ # ============================================================================
836
+ # GRAPH CONSTRUCTION
837
+ # ============================================================================
838
+
839
+ def create_agent_graph():
840
+ """Create and compile the LangGraph workflow with interrupts for user input"""
841
+ from langgraph.checkpoint.memory import MemorySaver
842
+
843
+ # Initialize graph
844
+ workflow = StateGraph(AgentState)
845
+
846
+ # Add nodes
847
+ workflow.add_node("generate_initial_vibe", generate_initial_vibe)
848
+ workflow.add_node("refine_vibe", refine_vibe)
849
+ workflow.add_node("retrieve_books", retrieve_books)
850
+ workflow.add_node("fetch_metadata", fetch_book_metadata)
851
+ workflow.add_node("generate_q1", generate_question_1)
852
+ workflow.add_node("wait_a1", wait_for_answer_1)
853
+ workflow.add_node("generate_q2", generate_question_2)
854
+ workflow.add_node("wait_a2", wait_for_answer_2)
855
+ workflow.add_node("finalize_books", finalize_books)
856
+ workflow.add_node("generate_soundtrack", generate_soundtrack)
857
+ workflow.add_node("present_results", present_final_results)
858
+
859
+ # Set entry point
860
+ workflow.set_entry_point("generate_initial_vibe")
861
+
862
+ # After initial vibe, check if user is satisfied or wants refinement
863
+ workflow.add_conditional_edges(
864
+ "generate_initial_vibe",
865
+ check_vibe_satisfaction,
866
+ {
867
+ "refine": "refine_vibe",
868
+ "retrieve": "retrieve_books"
869
+ }
870
+ )
871
+
872
+ # After refinement, check again if user is satisfied
873
+ workflow.add_conditional_edges(
874
+ "refine_vibe",
875
+ check_vibe_satisfaction,
876
+ {
877
+ "refine": "refine_vibe",
878
+ "retrieve": "retrieve_books"
879
+ }
880
+ )
881
+
882
+ # Sequential: retrieve -> fetch -> generate Q1 -> wait A1 -> generate Q2 -> wait A2 -> finalize
883
+ workflow.add_edge("retrieve_books", "fetch_metadata")
884
+ workflow.add_edge("fetch_metadata", "generate_q1")
885
+ workflow.add_edge("generate_q1", "wait_a1")
886
+ workflow.add_edge("wait_a1", "generate_q2")
887
+ workflow.add_edge("generate_q2", "wait_a2")
888
+ workflow.add_edge("wait_a2", "finalize_books")
889
+
890
+ # Sequential: finalize -> soundtrack -> present
891
+ workflow.add_edge("finalize_books", "generate_soundtrack")
892
+ workflow.add_edge("generate_soundtrack", "present_results")
893
+ workflow.add_edge("present_results", END)
894
+
895
+ # Compile with checkpointer for state persistence
896
+ memory = MemorySaver()
897
+ return workflow.compile(checkpointer=memory)
898
+
899
+
900
+ # ============================================================================
901
+ # MAIN INTERFACE
902
+ # ============================================================================
903
+
904
+ # Global graph instance with persistent checkpointer
905
+ _GRAPH_INSTANCE = None
906
+
907
+ def get_graph():
908
+ """Get or create the compiled graph with checkpointer"""
909
+ global _GRAPH_INSTANCE
910
+ if _GRAPH_INSTANCE is None:
911
+ print(f"[DEBUG AGENT] Creating NEW graph instance!")
912
+ _GRAPH_INSTANCE = create_agent_graph()
913
+ else:
914
+ print(f"[DEBUG AGENT] Reusing existing graph instance")
915
+ return _GRAPH_INSTANCE
916
+
917
+ def reset_agent():
918
+ """Reset the agent by clearing the graph instance"""
919
+ global _GRAPH_INSTANCE
920
+ _GRAPH_INSTANCE = None
921
+
922
+ def run_agent(images: List[str], user_message: str = None, thread_id: str = "main"):
923
+ """
924
+ Main interface to run the agent with interrupt-based human-in-the-loop
925
+
926
+ Args:
927
+ images: List of image URLs/base64 for initial upload
928
+ user_message: User's message (for resuming after interrupt)
929
+ thread_id: Unique identifier for the user session (required for multi-user support)
930
+
931
+ Returns:
932
+ Updated state with agent's response
933
+ """
934
+ from langgraph.types import Command
935
+
936
+ graph = get_graph()
937
+ thread_config = {"configurable": {"thread_id": thread_id}}
938
+
939
+ # Initialize state if new conversation (images provided)
940
+ if images and len(images) > 0:
941
+ initial_state = AgentState(
942
+ images=images,
943
+ messages=[],
944
+ aesthetic_genre_keywords=[],
945
+ mood_atmosphere=[],
946
+ core_themes=[],
947
+ tropes=[],
948
+ feels_like="",
949
+ vibe_refinement_count=0,
950
+ retrieved_books=[],
951
+ books_with_metadata=[],
952
+ q1_question="",
953
+ q2_question="",
954
+ user_preferences={},
955
+ final_books=[],
956
+ soundtrack_url="",
957
+ reasoning=[]
958
+ )
959
+ # Start the graph - it will stop at first interrupt()
960
+ result = graph.invoke(initial_state, thread_config)
961
+ return result
962
+
963
+ # Resume with user's message
964
+ if user_message:
965
+ # Check current state before resuming
966
+ current_state = graph.get_state(thread_config)
967
+ print(f"[DEBUG AGENT] State BEFORE resume:")
968
+ print(f"[DEBUG AGENT] messages count: {len(current_state.values.get('messages', []))}")
969
+ for i, m in enumerate(current_state.values.get('messages', [])):
970
+ print(f"[DEBUG AGENT] msg[{i}]: {m.get('role')} - {m.get('content', '')[:60]}...")
971
+ print(f"[DEBUG AGENT] q1_question: '{current_state.values.get('q1_question', '')[:50] if current_state.values.get('q1_question') else 'EMPTY'}'")
972
+
973
+ # Resume from the last interrupt; the value passed to Command(resume=...)
974
+ # is what the corresponding interrupt(...) call will return inside the node.
975
+ print(f"[DEBUG AGENT] Resuming graph with user_message: {user_message[:50]}...")
976
+ result = graph.invoke(Command(resume=user_message), thread_config)
977
+ print(f"[DEBUG AGENT] graph.invoke returned: {type(result)}, keys: {list(result.keys()) if hasattr(result, 'keys') else 'N/A'}")
978
+ print(f"[DEBUG AGENT] result has {len(result.get('messages', []))} messages")
979
+
980
+ # Remove __interrupt__ key if present before returning
981
+ if "__interrupt__" in result:
982
+ result = {k: v for k, v in result.items() if k != "__interrupt__"}
983
+ return result
984
+
985
+ return None
agent/prompts.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Centralized Prompts Configuration
3
+ Store all system prompts and templates used across the vibe-reader application
4
+ """
5
+
6
+ # ============================================================================
7
+ # VIBE EXTRACTION PROMPTS
8
+ # ============================================================================
9
+
10
+ VIBE_EXTRACTION = """You are an expert at capturing the emotional essence and atmosphere of visual content. Your task is to analyze one or more images and translate their collective 'vibe' into a detailed description that would help someone find fiction books with a similar feeling and atmosphere.
11
+
12
+ **Context:** This analysis will be used to recommend books based on visual mood boards, similar to how users on r/Booksthatfeellikethis share images to convey the type of story atmosphere they're seeking.
13
+
14
+ **Key Instructions:**
15
+ - Focus on the emotional atmosphere and feelings the images evoke, NOT literal descriptions of what's shown
16
+ - Think like a reader who wants to be immersed in a world that feels like these images
17
+ - When analyzing multiple images, treat them as a cohesive mood board that defines one unified vibe
18
+ - Consider what it would feel like to live in or experience a story set in this atmosphere
19
+ - Use natural, conversational language - be evocative but avoid overly poetic or academic terminology
20
+ - If images seem to have conflicting vibes, find the common emotional thread that unifies them
21
+ - Only reference specific time periods or cultures if the images clearly and obviously point to them
22
+ - Avoid describing graphic violence even if present in the images
23
+
24
+ **Target Output:** Your description should help match these vibes to fiction books across all genres.
25
+
26
+ **Required Format:**
27
+
28
+ You must output a valid JSON object with the following structure:
29
+
30
+ {
31
+ "aesthetic_genre_keywords": ["keyword1", "keyword2", "keyword3"],
32
+ "mood_atmosphere": ["mood1", "mood2", "mood3"],
33
+ "core_themes": ["theme1", "theme2", "theme3"],
34
+ "tropes": ["trope1", "trope2", "trope3"],
35
+ "feels_like": "4-5 sentences that synthesize the overall emotional essence..."
36
+ }
37
+
38
+ Guidelines for each field:
39
+ - **aesthetic_genre_keywords**: Style and genre descriptors like Gothic, Dark Academia, Cyberpunk, Cottagecore, Film Noir, Solarpunk, etc.
40
+ - **mood_atmosphere**: Emotional tone - words like Melancholic, Nostalgic, Tense, Cozy, Dreamlike, Whimsical, Foreboding, etc.
41
+ - **core_themes**: Broad underlying themes such as Isolation, Mystery, Self-discovery, Loss, Wonder, Coming-of-age, Redemption, Power and corruption, etc.
42
+ - **tropes**: Specific narrative tropes and patterns like Enemies-to-lovers, Found family, Chosen one, Unreliable narrator, Slow burn romance, Morally gray protagonist, etc.
43
+ - **feels_like**: Write 4-5 sentences that synthesize the overall emotional essence. Focus 60% on potential story atmosphere and subtle plot elements, 40% on pure mood. Describe what it would feel like to be immersed in a book with this atmosphere.
44
+
45
+ IMPORTANT: Return ONLY the JSON object. Do not include markdown code blocks, backticks, or any text outside the JSON.
46
+ """
47
+
48
+
49
+ # ============================================================================
50
+ # VIBE REFINEMENT PROMPTS
51
+ # ============================================================================
52
+
53
+ VIBE_REFINEMENT = """You are helping refine a vibe description based on user feedback.
54
+
55
+ IMPORTANT:
56
+ - If the user approves/accepts the vibe (says "yes", "perfect", "good", "love it", etc.), return the EXACT same description unchanged.
57
+ - Only modify the description if the user explicitly asks for changes or suggests specific adjustments.
58
+
59
+ When changes are requested, adjust the description to incorporate their suggestions while maintaining a natural, evocative tone."""
60
+
61
+
62
+ # ============================================================================
63
+ # BOOK SELECTION & NARROWING PROMPTS
64
+ # ============================================================================
65
+
66
+ NARROWING_QUESTION_GENERATOR = """You are helping narrow down book recommendations by finding the KEY DIFFERENCE between the candidate books.
67
+
68
+ YOUR PROCESS:
69
+ 1. READ the book descriptions and categories carefully
70
+ 2. IDENTIFY a concrete differentiating factor that actually appears in the books (not abstract vibes)
71
+ 3. FORMULATE a question where Option A matches some books and Option B matches others
72
+
73
+ GROUNDING RULES:
74
+ - Your question MUST be based on ACTUAL content from the book descriptions/categories provided
75
+ - Look for concrete differences: time period, setting type, protagonist type, plot focus, tone, narrative style
76
+ - Do NOT invent abstract aesthetic questions that aren't grounded in the books
77
+ - NEVER mention specific book titles or authors
78
+
79
+ FORMAT RULES:
80
+ - Use EXACTLY this format: "Do you prefer **A)** [option] or **B)** [option]?"
81
+ - Keep options SHORT (under 10 words each)
82
+ - The user should be able to answer with just "A" or "B"
83
+
84
+ If previous preferences exist, your question must be COMPATIBLE with what the user already chose.
85
+
86
+ Provide ONLY the question, no explanation."""
87
+
88
+
89
+ BOOK_FINALIZER = """You are selecting the {num_books} best books from a list based on vibe and user preferences.
90
+
91
+ You will receive:
92
+ 1. The vibe profile (aesthetics, mood, themes, tropes, feels_like)
93
+ 2. A list of candidate books with descriptions and categories
94
+ 3. User preferences from Q&A (question + answer pairs)
95
+
96
+ Your task:
97
+ - Analyze each book's description and categories against the vibe
98
+ - Apply the user's stated preferences as HARD FILTERS — if they said they prefer X over Y, prioritize books matching X
99
+ - Select the {num_books} books that best match BOTH the vibe AND the user's preferences
100
+
101
+ Respond with ONLY a JSON array of book indices (1-indexed), like: [3, 7, 12]"""
102
+
103
+
104
+ # ============================================================================
105
+ # MUSIC GENERATION PROMPTS
106
+ # ============================================================================
107
+
108
+ MUSIC_PROMPT_GENERATION = """You are creating a music generation prompt for ElevenLabs based on a book vibe analysis.
109
+ Your task is to translate the literary atmosphere and emotional elements into a descriptive music prompt that will generate an appropriate instrumental soundtrack.
110
+
111
+ Key Instructions:
112
+ - Create instrumental ambient music that captures the emotional essence of the vibe
113
+ - Focus on atmosphere, mood, and emotional tone - NOT specific story elements
114
+ - Use descriptive musical terms (tempo, instrumentation, style, mood)
115
+ - Consider how the music would feel as background for reading or immersing in this type of story
116
+ - Aim for 30-60 second ambient pieces that set a mood
117
+ - Avoid mentioning specific characters, plots, or narrative events
118
+ - DO NOT reference specific artists or copyrighted works
119
+ - Keep prompts concise but evocative (50-150 words)
120
+
121
+ Musical Elements to Consider:
122
+ - Tempo: slow, moderate, energetic
123
+ - Instrumentation: piano, strings, electronic, ambient textures, orchestral
124
+ - Style: ambient, classical, electronic, folk, cinematic
125
+ - Mood: mysterious, peaceful, tense, whimsical, melancholic, etc.
126
+
127
+ Output: A single descriptive prompt for ElevenLabs music generation."""
128
+
129
+
130
+ # ============================================================================
131
+ # USER SATISFACTION PROMPTS
132
+ # ============================================================================
133
+
134
+ VIBE_SATISFACTION_CHECKER = """Does the user want to change the vibe description? Reply with ONLY 'satisfied' or 'not_satisfied'.
135
+
136
+ 'satisfied' responses include: yes, yeah, perfect, good, love it, great, ok, okay, sure, sounds good, that works, etc.
137
+ 'not_satisfied' responses include: no, change it, more X, less Y, add Z, I want, make it, etc.
138
+
139
+ Default to 'satisfied' unless the user EXPLICITLY requests changes."""
140
+
141
+
142
+ # ============================================================================
143
+ # HELPER FUNCTIONS
144
+ # ============================================================================
145
+
146
+ def get_book_finalizer_prompt(num_books: int = 3) -> str:
147
+ """
148
+ Get the book finalizer prompt with the specified number of books
149
+
150
+ Args:
151
+ num_books: Number of books to select (default: 3)
152
+
153
+ Returns:
154
+ The formatted system prompt string
155
+ """
156
+ return BOOK_FINALIZER.format(num_books=num_books)
agent/utils.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Utility functions for the vibe-reader application
3
+ """
4
+
5
+ import json
6
+ import re
7
+ from typing import Dict, Any, Optional
8
+
9
+
10
+ def parse_json_response(response: str) -> Optional[Dict[str, Any]]:
11
+ """
12
+ Parse JSON from LLM response, handling various formats
13
+
14
+ Args:
15
+ response: Raw LLM response that may contain JSON
16
+
17
+ Returns:
18
+ Parsed JSON dict, or None if parsing fails
19
+ """
20
+ # Remove markdown code blocks if present
21
+ cleaned = re.sub(r'```json\s*|\s*```', '', response, flags=re.IGNORECASE)
22
+ cleaned = cleaned.strip()
23
+
24
+ # Try to find JSON object in the response
25
+ json_match = re.search(r'\{.*\}', cleaned, re.DOTALL)
26
+ if json_match:
27
+ try:
28
+ return json.loads(json_match.group(0))
29
+ except json.JSONDecodeError as e:
30
+ print(f"JSON parsing error: {e}")
31
+ return None
32
+
33
+ return None
34
+
35
+
36
+ def extract_vibe_components(vibe_json: Dict[str, Any]) -> Dict[str, Any]:
37
+ """
38
+ Extract and validate vibe components from parsed JSON
39
+
40
+ Args:
41
+ vibe_json: Parsed JSON from vibe extraction
42
+
43
+ Returns:
44
+ Dictionary with validated vibe components
45
+ """
46
+ return {
47
+ "aesthetic_genre_keywords": vibe_json.get("aesthetic_genre_keywords", []),
48
+ "mood_atmosphere": vibe_json.get("mood_atmosphere", []),
49
+ "core_themes": vibe_json.get("core_themes", []),
50
+ "tropes": vibe_json.get("tropes", []),
51
+ "feels_like": vibe_json.get("feels_like", "")
52
+ }
53
+
54
+
55
+ def strip_thinking_tags(text: str) -> str:
56
+ """
57
+ Remove <think>...</think> tags and any reasoning content from text
58
+ Qwen3 uses standard XML format: <think>...</think>
59
+
60
+ Args:
61
+ text: Text that may contain thinking tags
62
+
63
+ Returns:
64
+ Clean text without thinking tags
65
+ """
66
+ # Remove <think>...</think> blocks
67
+ cleaned = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL | re.IGNORECASE)
68
+ # Remove any leftover tags
69
+ cleaned = re.sub(r'</?think>', '', cleaned, flags=re.IGNORECASE)
70
+ return cleaned.strip()
app.py CHANGED
@@ -5,7 +5,7 @@ import os
5
  import sys
6
  import traceback
7
 
8
- from app.agent import run_agent
9
 
10
 
11
 
@@ -299,4 +299,5 @@ with gr.Blocks() as demo:
299
  )
300
 
301
  if __name__ == "__main__":
302
- demo.queue().launch(theme=gr.themes.Monochrome(), css_paths='app/custom.css')
 
 
5
  import sys
6
  import traceback
7
 
8
+ from agent.agent import run_agent
9
 
10
 
11
 
 
299
  )
300
 
301
  if __name__ == "__main__":
302
+ # Note: css_paths removed as custom.css location may vary
303
+ demo.queue().launch(theme=gr.themes.Monochrome())
assets/custom.css ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @import url('https://fonts.googleapis.com/css2?family=Pixelify+Sans:wght@400;500;600;700&display=swap');
2
+
3
+ body {
4
+ background-image: url('https://64.media.tumblr.com/677c7a2824b4c515b4c96b0cccb44740/tumblr_ney3botpbL1snc5kxo2_250.png');
5
+ background-repeat: repeat;
6
+ background-size: auto;
7
+ }
8
+
9
+ .gradio-container {
10
+ background-image: url('https://64.media.tumblr.com/677c7a2824b4c515b4c96b0cccb44740/tumblr_ney3botpbL1snc5kxo2_250.png');
11
+ background-repeat: repeat;
12
+ background-size: auto;
13
+ }
14
+
15
+ #main-title {
16
+ text-align: center;
17
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
18
+ -webkit-background-clip: text;
19
+ -webkit-text-fill-color: transparent;
20
+ background-clip: text;
21
+ font-weight: bold;
22
+ margin-bottom: 8px;
23
+ font-family: 'Pixelify Sans', sans-serif;
24
+ }
25
+
26
+ #subtitle {
27
+ text-align: center;
28
+ color: #1b0925;
29
+ margin-bottom: 30px;
30
+ }
31
+
32
+ .vibe-container {
33
+ background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
34
+ border-radius: 15px;
35
+ padding: 20px;
36
+ }
37
+
38
+ .recommendation-section {
39
+ margin-top: 30px;
40
+ }
41
+
42
+ #status-display textarea {
43
+ font-size: 1.4em !important;
44
+ font-weight: 500;
45
+ background: transparent !important;
46
+ }
47
+
48
+ /* Chatbot message text */
49
+ .chatbot .message-wrap {
50
+ font-size: 1.3em !important;
51
+ }
52
+
53
+ footer {
54
+ text-align: center;
55
+ margin-top: 50px;
56
+ padding: 20px;
57
+ color: #999;
58
+ font-size: 0.9em;
59
+ }