akshit4857 commited on
Commit
4f47bb5
·
verified ·
1 Parent(s): 67ede07

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +678 -933
src/streamlit_app.py CHANGED
@@ -1,1162 +1,907 @@
1
  """
2
- Review Validator - Advanced Edition
3
- With explainability graphs + PDF report download + Google Reverse Image Search
4
  """
5
 
6
  import os
7
  import io
8
- import time
9
- import base64
 
10
  import numpy as np
11
  import streamlit as st
12
  from transformers import pipeline, logging as hf_logging
13
  from PIL import Image
14
  import matplotlib
15
- matplotlib.use('Agg') # use non-GUI backend for Streamlit
16
  import matplotlib.pyplot as plt
17
  import requests
18
- import math
19
- import warnings
20
- import re
21
- from collections import Counter
22
- from datetime import datetime
23
- import textwrap
24
-
25
- # Try to import ReportLab for PDF generation
26
- try:
27
- from reportlab.lib.pagesizes import A4
28
- from reportlab.pdfgen import canvas
29
- HAVE_REPORTLAB = True
30
- except ImportError:
31
- HAVE_REPORTLAB = False
32
-
33
- # --- Setup: Silence the technical noise ---
34
  warnings.filterwarnings("ignore")
35
  hf_logging.set_verbosity_error()
36
  os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
 
37
 
38
  st.set_page_config(
39
  page_title="Review Validator",
40
  page_icon="🛡️",
41
  layout="wide",
42
- initial_sidebar_state="collapsed"
43
  )
44
 
45
- # ==========================================
46
- # 🧠 MODELS (Better public detectors)
47
- # ==========================================
48
-
49
- # 1. Text AI Detector: ModernBERT-based detector (0 = human, 1 = AI)
50
- MODEL_FAKE = "AICodexLab/answerdotai-ModernBERT-base-ai-detector"
51
-
52
- # 2. Mood Scanner: Sentiment model
53
  MODEL_MOOD = "cardiffnlp/twitter-roberta-base-sentiment-latest"
54
-
55
- # 3. Grammar Checker: Acceptability (CoLA)
56
  MODEL_GRAMMAR = "textattack/roberta-base-CoLA"
57
-
58
- # 4. Image Detector: Modern real vs fake classifier
59
- MODEL_IMG_MAIN = "prithivMLmods/Mirage-Photo-Classifier"
60
-
61
- # 5. Image Captioner (Optional): Describes the image content
62
  MODEL_CAPTION = "Salesforce/blip-image-captioning-base"
63
 
64
- # ==========================================
65
 
66
- # --- Robust Secrets Management (NON-CRASHING) ---
67
- def get_token():
68
- """
69
- Safely retrieves HF_TOKEN.
70
- Priority 1: Env var (Spaces)
71
- Priority 2: Streamlit Secrets (Local)
72
- Optional – app still runs if missing.
73
- """
74
  token = os.environ.get("HF_TOKEN")
75
  if token:
76
  return token
77
-
78
  try:
79
  if hasattr(st, "secrets") and "HF_TOKEN" in st.secrets:
80
  return st.secrets["HF_TOKEN"]
81
  except Exception:
82
  pass
83
-
84
  return None
85
 
 
86
  def get_serpapi_key():
87
- """
88
- Safely retrieves SERPAPI_KEY.
89
- Priority 1: Env var
90
- Priority 2: Streamlit Secrets
91
- """
92
  key = os.environ.get("SERPAPI_KEY")
93
  if key:
94
  return key
95
-
96
  try:
97
  if hasattr(st, "secrets") and "SERPAPI_KEY" in st.secrets:
98
  return st.secrets["SERPAPI_KEY"]
99
  except Exception:
100
  pass
101
-
102
  return None
103
 
104
- HF_TOKEN = get_token()
105
- SERPAPI_KEY = get_serpapi_key()
106
 
107
- # --- Custom CSS ---
 
 
 
108
  def inject_custom_css():
109
- st.markdown("""
 
110
  <style>
111
- .stApp {
112
- background-color: #FFFFFF;
113
- color: #333333;
114
- font-family: 'Helvetica Neue', sans-serif;
115
  }
116
-
117
- h1, h2, h3 { color: #2C3E50; }
118
- h1 { font-weight: 800; }
119
- h2 { font-weight: 600; }
120
-
121
  .hero-box {
122
- padding: 40px;
123
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
124
- border-radius: 20px;
125
- color: white;
126
- text-align: center;
127
- margin-bottom: 30px;
128
  }
129
- .hero-title { font-size: 3rem; font-weight: bold; margin-bottom: 10px; }
130
- .hero-subtitle { font-size: 1.2rem; opacity: 0.9; }
131
-
132
- .feature-card {
133
- background: #F8F9FA;
134
- padding: 20px;
135
- border-radius: 15px;
136
- border: 1px solid #EEEEEE;
137
- text-align: center;
138
- transition: transform 0.2s, box-shadow 0.2s;
139
  }
140
- .feature-card:hover {
141
- transform: translateY(-5px);
142
- border-color: #764ba2;
143
- box-shadow: 0 4px 12px rgba(0,0,0,0.06);
 
 
 
 
 
 
144
  }
145
- .emoji-icon { font-size: 3rem; margin-bottom: 10px; display: block; }
146
-
147
- .stat-box {
148
- text-align: center;
149
- padding: 15px;
150
- border-radius: 12px;
151
- background: white;
152
- box-shadow: 0 4px 6px rgba(0,0,0,0.05);
153
- border: 1px solid #EEE;
154
  }
155
- .stat-num { font-size: 24px; font-weight: 900; color: #333; }
156
- .stat-txt { font-size: 12px; text-transform: uppercase; color: #777; letter-spacing: 1px; }
157
-
158
- .analysis-box {
159
- background: #f0f7ff;
160
- border-left: 5px solid #4285F4;
161
- padding: 15px;
162
- border-radius: 5px;
163
- margin-top: 15px;
164
- }
165
-
166
- .warning-box {
167
- background: #fff6e5;
168
- border-left: 5px solid #ffb74d;
169
- padding: 10px 15px;
170
- border-radius: 5px;
171
- font-size: 0.85rem;
172
- margin-top: 8px;
173
- }
174
-
175
- .reverse-search-box {
176
- background: #f0fff4;
177
- border-left: 5px solid #48bb78;
178
- padding: 15px;
179
- border-radius: 5px;
180
- margin-top: 15px;
181
- }
182
-
183
- .result-item {
184
- background: white;
185
- padding: 12px;
186
- border-radius: 8px;
187
- margin: 8px 0;
188
- border: 1px solid #e2e8f0;
189
- transition: box-shadow 0.2s;
190
- }
191
- .result-item:hover {
192
- box-shadow: 0 2px 8px rgba(0,0,0,0.1);
193
- }
194
-
195
- .stButton>button {
196
- border-radius: 30px;
197
- font-weight: bold;
198
- border: none;
199
- padding: 0.6rem 2.2rem;
200
- transition: all 0.3s;
201
- }
202
- .stButton>button:hover {
203
- transform: translateY(-1px);
204
- box-shadow: 0 3px 8px rgba(0,0,0,0.15);
205
  }
206
  </style>
207
- """, unsafe_allow_html=True)
 
 
208
 
209
- # --- Load Models (Safe Mode, No Hard Crash) ---
 
210
  @st.cache_resource(show_spinner=False)
211
  def load_ai_squad():
212
- """
213
- Load all models. Never hard-crash the app.
214
- If some models fail, we still return partial squad.
215
- """
216
  squad = {}
217
- errors = []
 
218
 
219
- token_arg = {"token": HF_TOKEN} if HF_TOKEN else {}
220
-
221
- # TEXT MODELS
222
  try:
223
- squad['fake'] = pipeline(
224
- "text-classification",
225
- model=MODEL_FAKE,
226
- **token_arg
227
- )
228
- except Exception as e:
229
- errors.append(f"Fake detector: {e}")
230
 
231
- try:
232
- squad['mood'] = pipeline(
233
- "sentiment-analysis",
234
- model=MODEL_MOOD,
235
- tokenizer=MODEL_MOOD,
236
- **token_arg
237
- )
238
- except Exception as e:
239
- errors.append(f"Mood model: {e}")
240
 
241
- try:
242
- squad['grammar'] = pipeline(
243
- "text-classification",
244
- model=MODEL_GRAMMAR,
245
- **token_arg
246
- )
247
- except Exception as e:
248
- errors.append(f"Grammar model: {e}")
249
 
250
- # IMAGE MODELS
251
- try:
252
- squad['img_main'] = pipeline(
253
- "image-classification",
254
- model=MODEL_IMG_MAIN,
255
- **token_arg
256
- )
257
- except Exception as e:
258
- errors.append(f"Image main model: {e}")
 
 
 
259
 
260
- try:
261
- squad['caption'] = pipeline(
262
- "image-to-text",
263
- model=MODEL_CAPTION,
264
- **token_arg
265
- )
266
  except Exception as e:
267
- errors.append(f"Caption model: {e}")
268
-
269
- if not squad:
270
- return None, "No models could be loaded. Check internet / HF token / requirements."
271
-
272
- err_msg = "\n".join(errors) if errors else None
273
- return squad, err_msg
274
-
275
- # --- Utility: Basic text stats for explainability ---
276
- STOPWORDS = set([
277
- "the","a","an","is","are","am","and","or","in","on","at","of","to","for",
278
- "this","that","it","was","with","as","by","be","from","has","have","had",
279
- "i","you","we","they","he","she","my","our","their","your"
280
- ])
281
-
282
- def split_sentences(text: str):
283
- # simple sentence splitter
284
- parts = re.split(r'[.!?]+', text)
285
- return [s.strip() for s in parts if s.strip()]
286
-
287
- def tokenize_words(text: str):
288
- tokens = re.findall(r"[A-Za-z']+", text.lower())
289
- return tokens
290
-
291
- def analyze_text_structure(text: str):
292
- sentences = split_sentences(text)
293
- words = tokenize_words(text)
294
-
295
- num_sentences = max(len(sentences), 1)
296
- num_words = len(words)
297
-
298
- sent_lengths = [len(tokenize_words(s)) for s in sentences] or [0]
299
- avg_sent_len = sum(sent_lengths) / len(sent_lengths)
300
- var_sent_len = float(np.var(sent_lengths)) if len(sent_lengths) > 1 else 0.0
301
-
302
- # vocabulary diversity
303
- vocab = set(w for w in words if w not in STOPWORDS)
304
  vocab_size = len(vocab)
305
- ttr = (vocab_size / num_words) if num_words > 0 else 0.0 # type-token ratio
306
-
307
- # top words
308
- filtered = [w for w in words if w not in STOPWORDS]
309
- counter = Counter(filtered)
310
- top_words = counter.most_common(10)
311
-
312
  return {
313
- "num_sentences": num_sentences,
314
- "num_words": num_words,
315
- "avg_sentence_len": avg_sent_len,
316
- "var_sentence_len": var_sent_len,
317
- "ttr": ttr,
318
- "top_words": top_words,
319
  "sentence_lengths": sent_lengths,
 
320
  }
321
 
322
- def explain_text(res, stats, strict_mode: bool):
323
- """
324
- Heuristic explanation based on AI score + grammar + structure.
325
- Returns list of bullet strings.
326
- """
327
  bot = res["bot_score"]
328
  gram = res["grammar_score"]
329
  mood = res["mood_label"]
330
- avg_len = stats["avg_sentence_len"]
331
- var_len = stats["var_sentence_len"]
332
- ttr = stats["ttr"]
333
-
334
- reasons = []
335
 
336
- # AI-likeness
337
- if bot >= 85:
338
- reasons.append("High AI-likeness score model strongly associates this style with AI text.")
339
- elif bot >= 65:
340
- reasons.append("Moderate AI-likeness score – some patterns resemble AI-generated writing.")
 
 
 
341
  else:
342
- reasons.append("Low AI-likeness score – style leans closer to typical human-written reviews.")
343
-
344
- # Grammar
345
- if gram >= 85 and bot >= 70:
346
- reasons.append("Grammar is near-perfect and very consistent, which is common in AI text.")
347
- elif gram >= 85 and bot < 50:
348
- reasons.append("Grammar is very clean but the AI score is low, could be a careful human reviewer.")
349
- elif gram < 60:
350
- reasons.append("Grammar has noticeable imperfections, more typical of casual human writing.")
351
-
352
- # Sentence structure
353
- if var_len < 5 and avg_len > 12 and bot >= 70:
354
- reasons.append("Sentence length is very uniform and long, which often appears in AI outputs.")
355
- elif var_len > 15:
356
- reasons.append("Sentence length varies a lot, which is more natural for human writing.")
357
-
358
- # Vocabulary diversity
359
- if ttr < 0.3 and bot >= 70:
360
- reasons.append("Vocabulary diversity is low despite longer text, hinting at templated or generated style.")
361
- elif ttr > 0.45:
362
- reasons.append("Vocabulary diversity is relatively high, which often indicates a human author.")
363
 
364
- # Mood-based explanation
365
- reasons.append(f"Overall sentiment detected: **{mood}**.")
 
 
 
 
 
 
 
 
 
 
366
 
367
- if strict_mode:
368
- reasons.append("Strict mode: thresholds are higher, so AI flags are more conservative but precise.")
 
 
 
 
 
 
 
 
369
 
370
- return reasons
371
 
372
- # --- Logic: Analyze Text ---
373
  def check_text(text, squad):
374
- if 'fake' not in squad:
375
- return {
376
- "bot_score": 0,
377
- "mood_label": "Unavailable",
378
- "grammar_score": 0,
379
- "mood_confidence": 0,
380
- "error": True,
381
- "error_msg": "AI text detector not loaded."
382
- }
383
-
384
- # 1. Bot / AI Check
385
- res_fake = squad['fake'](text[:512])[0]
386
- raw_label = res_fake.get('label', '1')
387
- raw_score = float(res_fake.get('score', 0.5))
388
 
389
- try:
390
- label_id = int(raw_label)
391
- except ValueError:
392
- label_id = 1 if "1" in str(raw_label) else 0
393
 
394
- if label_id == 1:
395
- ai_prob = raw_score
396
- else:
397
- ai_prob = 1 - raw_score
398
-
399
- bot_score = ai_prob * 100.0
400
-
401
- # 2. Mood
402
  mood_label = "Unknown"
403
- if 'mood' in squad:
404
- try:
405
- res_mood = squad['mood'](text[:512])[0]
406
- mood_label = res_mood.get('label', 'Unknown')
407
- except Exception:
408
- mood_label = "Unknown"
 
 
 
 
409
 
410
- # 3. Grammar (CoLA)
411
- grammar_score = 50.0
412
- if 'grammar' in squad:
413
- try:
414
- res_grammar = squad['grammar'](text[:512])[0]
415
- glabel = res_grammar.get('label', 'LABEL_0')
416
- gscore = float(res_grammar.get('score', 0.5))
417
- grammar_score = (gscore if glabel == 'LABEL_1' else (1 - gscore)) * 100.0
418
- except Exception:
419
- grammar_score = 50.0
420
 
421
  return {
422
- "bot_score": bot_score,
423
  "mood_label": mood_label,
424
- "grammar_score": grammar_score,
425
- "mood_confidence": 0,
426
  "error": False,
427
- "error_msg": None
428
  }
429
 
430
- # --- Logic: Analyze Image ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
431
  def check_image(img, squad):
432
- caption_text = "Caption unavailable"
433
- ai_chance = 0.0
 
 
434
 
435
- if 'img_main' in squad:
436
  try:
437
- preds = squad['img_main'](img)
438
- if isinstance(preds, list) and preds:
439
- best = max(preds, key=lambda x: x.get('score', 0))
440
- label = str(best.get('label', '')).lower()
441
- score = float(best.get('score', 0.5))
442
-
443
- if "fake" in label or "ai" in label:
444
- ai_prob = score
445
- elif "real" in label:
446
- ai_prob = 1 - score
447
- else:
448
- ai_prob = score
449
 
450
- ai_chance = ai_prob * 100.0
451
- except Exception:
452
- ai_chance = 0.0
 
 
 
 
 
 
453
 
454
- if 'caption' in squad:
455
  try:
456
- cap_res = squad['caption'](img)
457
- if isinstance(cap_res, list) and cap_res:
458
- caption_text = cap_res[0].get('generated_text', caption_text)
459
  except Exception:
460
  pass
461
 
 
 
 
462
  return {
463
- "ai_chance": ai_chance,
464
- "match": 1.0,
465
- "score_a": ai_chance,
466
- "score_b": ai_chance,
467
- "caption": caption_text
468
  }
469
 
470
- def get_image_from_url(url):
471
- try:
472
- headers = {
473
- 'User-Agent': (
474
- 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
475
- 'AppleWebKit/537.36 (KHTML, like Gecko) '
476
- 'Chrome/91.0.4472.124 Safari/537.36'
477
- )
478
- }
479
- r = requests.get(url, headers=headers, timeout=7, stream=True)
480
- if r.status_code != 200:
481
- return None
482
- return Image.open(io.BytesIO(r.content)).convert("RGB")
483
- except Exception:
484
- return None
485
 
486
- # --- GOOGLE REVERSE IMAGE SEARCH (SerpAPI) ---
487
- def reverse_image_search(image_obj):
488
  """
489
- Performs Google reverse image search using SerpAPI.
490
- Uses Google Reverse Image Search engine (not Google Lens).
491
-
492
- Args:
493
- image_obj: PIL Image object
494
-
495
- Returns:
496
- dict with 'success', 'results', 'error' keys
497
  """
498
- if not SERPAPI_KEY:
499
- return {
500
- "success": False,
501
- "error": "SERPAPI_KEY not configured. Add it to secrets or environment variables.",
502
- "results": []
503
- }
504
-
505
  try:
506
- # Method 1: Try using serpapi library (most reliable)
507
- try:
508
- from serpapi import GoogleSearch
509
-
510
- # Resize image to reasonable size
511
- max_size = 512
512
- img_copy = image_obj.copy()
513
- if img_copy.width > max_size or img_copy.height > max_size:
514
- img_copy.thumbnail((max_size, max_size), Image.LANCZOS)
515
-
516
- # Convert to base64
517
- buffered = io.BytesIO()
518
- img_copy.save(buffered, format="JPEG", quality=75, optimize=True)
519
- img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
520
-
521
- # Use Google Reverse Image Search (not Lens)
522
- params = {
523
- "engine": "google_reverse_image",
524
- "api_key": SERPAPI_KEY,
525
- "image_url": f"data:image/jpeg;base64,{img_base64}"
526
- }
527
-
528
- search = GoogleSearch(params)
529
- results_data = search.get_dict()
530
-
531
- # Check for errors
532
- if "error" in results_data:
533
- # Try fallback method
534
- raise Exception(results_data['error'])
535
-
536
- # Extract inline images or image results
537
- image_results = results_data.get("inline_images", [])
538
- if not image_results:
539
- image_results = results_data.get("image_results", [])
540
-
541
- results = []
542
- for match in image_results[:10]:
543
- results.append({
544
- "title": match.get("title", match.get("source", "No title")),
545
- "link": match.get("link", match.get("original", "")),
546
- "source": match.get("source", "Unknown"),
547
- "thumbnail": match.get("thumbnail", match.get("image", ""))
548
- })
549
-
550
- if results:
551
- return {
552
- "success": True,
553
- "results": results,
554
- "error": None
555
- }
556
-
557
- except ImportError:
558
- pass
559
- except Exception:
560
- pass
561
-
562
- # Method 2: Upload to temporary hosting and use URL
563
- # Using imgbb as temporary image host
564
- try:
565
- # Resize image
566
- max_size = 512
567
- img_copy = image_obj.copy()
568
- if img_copy.width > max_size or img_copy.height > max_size:
569
- img_copy.thumbnail((max_size, max_size), Image.LANCZOS)
570
-
571
- # Convert to base64
572
- buffered = io.BytesIO()
573
- img_copy.save(buffered, format="JPEG", quality=75, optimize=True)
574
- img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
575
-
576
- # Upload to imgbb (free, no account needed)
577
- imgbb_response = requests.post(
578
- "https://api.imgbb.com/1/upload",
579
- data={
580
- "key": "d2f1d3e8c3f5d85e5e9c8b9f7d4f0b68", # Public demo key
581
- "image": img_base64
582
- },
583
- timeout=15
584
- )
585
-
586
- if imgbb_response.status_code == 200:
587
- img_url = imgbb_response.json()['data']['url']
588
-
589
- # Now use this URL with SerpAPI
590
- params = {
591
- "engine": "google_reverse_image",
592
- "image_url": img_url,
593
- "api_key": SERPAPI_KEY
594
- }
595
-
596
- response = requests.get(
597
- "https://serpapi.com/search",
598
- params=params,
599
- timeout=30
600
- )
601
-
602
- if response.status_code == 200:
603
- data = response.json()
604
-
605
- # Extract results
606
- image_results = data.get("inline_images", [])
607
- if not image_results:
608
- image_results = data.get("image_results", [])
609
-
610
- results = []
611
- for match in image_results[:10]:
612
- results.append({
613
- "title": match.get("title", match.get("source", "No title")),
614
- "link": match.get("link", match.get("original", "")),
615
- "source": match.get("source", "Unknown"),
616
- "thumbnail": match.get("thumbnail", match.get("image", ""))
617
- })
618
-
619
- if results:
620
- return {
621
- "success": True,
622
- "results": results,
623
- "error": None
624
- }
625
- except Exception:
626
- pass
627
-
628
- # If all methods fail, return appropriate error
629
- return {
630
- "success": False,
631
- "error": "Could not perform reverse search. Possible reasons: API key invalid, rate limit exceeded, or service temporarily unavailable. Check your SerpAPI dashboard at serpapi.com/dashboard",
632
- "results": []
633
  }
634
-
635
- except requests.exceptions.Timeout:
636
- return {
637
- "success": False,
638
- "error": "Request timeout. Please try again with a smaller image.",
639
- "results": []
 
 
 
 
 
 
 
640
  }
 
641
  except Exception as e:
642
- return {
643
- "success": False,
644
- "error": f"Reverse search failed: {str(e)}",
645
- "results": []
646
- }
647
-
648
- # --- Plotting ---
649
-
650
- def breakdown_chart(stats):
651
- labels = ['AI-Likeness', 'Grammar Quality']
652
- values = [stats['bot_score'], stats['grammar_score']]
653
 
654
- fig, ax = plt.subplots(figsize=(4, 2))
655
- y_pos = np.arange(len(labels))
656
 
657
- ax.barh(y_pos, values, align='center', height=0.6)
658
- ax.set_yticks(y_pos)
 
 
 
 
 
 
659
  ax.set_yticklabels(labels)
660
  ax.invert_yaxis()
661
- ax.set_xlabel('Score (0-100)')
662
  ax.set_xlim(0, 100)
663
-
664
- ax.spines['top'].set_visible(False)
665
- ax.spines['right'].set_visible(False)
666
- ax.spines['left'].set_visible(False)
667
- ax.spines['bottom'].set_color('#DDD')
668
-
669
  plt.tight_layout()
670
  return fig
671
 
672
- def sentence_length_chart(stats):
673
- lens = stats["sentence_lengths"]
674
- fig, ax = plt.subplots(figsize=(4, 2))
675
- ax.hist(lens, bins=min(len(lens), 8) or 1, edgecolor='black')
676
- ax.set_xlabel("Sentence length (words)")
677
- ax.set_ylabel("Count")
 
 
 
 
678
  ax.set_title("Sentence Length Distribution")
679
  plt.tight_layout()
680
  return fig
681
 
682
- def word_freq_chart(stats):
683
- top_words = stats["top_words"]
684
- if not top_words:
685
- fig, ax = plt.subplots(figsize=(4, 2))
686
- ax.text(0.5, 0.5, "Not enough text", ha='center', va='center')
687
- ax.axis('off')
688
- return fig
689
-
690
- words, freqs = zip(*top_words)
691
- fig, ax = plt.subplots(figsize=(4, 2))
692
- x = np.arange(len(words))
693
- ax.bar(x, freqs)
694
- ax.set_xticks(x)
695
- ax.set_xticklabels(words, rotation=45, ha='right')
696
- ax.set_ylabel("Frequency")
697
- ax.set_title("Top Words (excluding stopwords)")
698
  plt.tight_layout()
699
  return fig
700
 
701
- # --- PDF REPORT GENERATION ---
702
 
703
- def generate_pdf_report(platform, review_text, text_res, text_stats, image_info, reverse_search_data=None):
704
- """
705
- Returns PDF bytes. Requires ReportLab.
706
- image_info: dict or None
707
- reverse_search_data: dict with reverse search results or None
708
- """
709
- buffer = io.BytesIO()
710
- c = canvas.Canvas(buffer, pagesize=A4)
711
- width, height = A4
712
- y = height - 50
713
-
714
- def write_line(text, font="Helvetica", size=10, leading=14):
715
- nonlocal y
716
- c.setFont(font, size)
717
- wrapped = textwrap.wrap(text, width=90)
718
- for line in wrapped:
719
- if y < 50:
720
- c.showPage()
721
- y = height - 50
722
- c.setFont(font, size)
723
- c.drawString(50, y, line)
724
- y -= leading
725
-
726
- # Header
727
- c.setFont("Helvetica-Bold", 16)
728
- c.drawString(50, y, "Review Validator Report")
729
- y -= 25
730
- c.setFont("Helvetica", 10)
731
- c.drawString(50, y, f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
732
- y -= 15
733
- c.drawString(50, y, f"Platform: {platform}")
734
- y -= 25
735
-
736
- # Scores
737
- write_line("=== Text Analysis ===", font="Helvetica-Bold", size=12)
738
- write_line(f"AI-Likeness Score: {text_res['bot_score']:.1f}%")
739
- write_line(f"Grammar Quality: {text_res['grammar_score']:.1f}%")
740
- write_line(f"Sentiment: {text_res['mood_label']}")
741
- y -= 10
742
-
743
- # Structure stats
744
- write_line("Text Structure:", font="Helvetica-Bold", size=11)
745
- write_line(f"- Sentences: {text_stats['num_sentences']}")
746
- write_line(f"- Words: {text_stats['num_words']}")
747
- write_line(f"- Average sentence length: {text_stats['avg_sentence_len']:.1f} words")
748
- write_line(f"- Sentence length variance: {text_stats['var_sentence_len']:.1f}")
749
- write_line(f"- Vocabulary diversity (TTR): {text_stats['ttr']:.2f}")
750
- y -= 10
751
-
752
- # Review text
753
- write_line("Original Review:", font="Helvetica-Bold", size=11)
754
- write_line(review_text or "[empty review]")
755
- y -= 10
756
-
757
- # Image analysis
758
- if image_info is not None:
759
- write_line("=== Image Analysis ===", font="Helvetica-Bold", size=12)
760
- write_line(f"AI Probability: {image_info['ai_chance']:.1f}%")
761
- write_line(f"Caption (approx): {image_info['caption']}")
762
- y -= 10
763
-
764
- # Reverse search results
765
- if reverse_search_data and reverse_search_data.get('success'):
766
- write_line("=== Reverse Image Search Results ===", font="Helvetica-Bold", size=12)
767
- results = reverse_search_data.get('results', [])
768
- if results:
769
- write_line(f"Found {len(results)} matches online:")
770
- for i, result in enumerate(results[:5], 1):
771
- write_line(f"{i}. {result['title']}")
772
- write_line(f" Source: {result['source']}")
773
- write_line(f" Link: {result['link']}")
774
- y -= 5
775
- else:
776
- write_line("No matches found.")
777
- y -= 10
778
-
779
- c.showPage()
780
- c.save()
781
- pdf_bytes = buffer.getvalue()
782
- buffer.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
783
  return pdf_bytes
784
 
785
- # --- PAGES ---
786
 
 
787
  def landing_page():
788
- st.markdown("""
 
789
  <div class="hero-box">
790
  <div class="hero-title">🛡️ Review Validator</div>
791
  <div class="hero-subtitle">
792
- Advanced AI-powered review and image analysis with graphs, explainability, reverse image search, and exportable reports.
793
  </div>
794
  </div>
795
- """, unsafe_allow_html=True)
 
 
796
 
797
  c1, c2, c3 = st.columns(3)
798
  with c1:
799
- st.markdown("""
 
800
  <div class="feature-card">
801
  <span class="emoji-icon">🤖</span>
802
- <h3>AI Text Detector</h3>
803
- <p>Modern models estimate whether a review looks AI-generated or human-written.</p>
804
  </div>
805
- """, unsafe_allow_html=True)
 
 
806
  with c2:
807
- st.markdown("""
 
808
  <div class="feature-card">
809
  <span class="emoji-icon">📸</span>
810
  <h3>Image Authenticity</h3>
811
- <p>Checks if product photos look real or AI-generated, with approximate captions.</p>
812
  </div>
813
- """, unsafe_allow_html=True)
 
 
814
  with c3:
815
- st.markdown("""
 
816
  <div class="feature-card">
817
- <span class="emoji-icon">🔍</span>
818
- <h3>Reverse Image Search</h3>
819
- <p>Find where the image appears online using Google reverse image search.</p>
820
  </div>
821
- """, unsafe_allow_html=True)
822
-
823
- st.write("")
824
- st.write("")
825
- col1, col2, col3 = st.columns([1, 2, 1])
826
- with col2:
827
- if st.button("🚀 START CHECKING REVIEWS", type="primary", use_container_width=True, key="start_btn"):
828
- st.session_state['page'] = 'detector'
829
  st.rerun()
830
 
831
- def detector_page(squad, warnings_text=None):
832
- # Initialize session state for platform if not exists
833
- if 'platform' not in st.session_state:
834
- st.session_state['platform'] = "Amazon"
835
-
836
- # Header & Selector
837
  c1, c2 = st.columns([3, 1])
838
  with c1:
839
- st.markdown("### 🛒 Select the Website")
840
  platform = st.selectbox(
841
- "Where is this review from?",
842
- ["Amazon", "Flipkart", "Zomato", "Swiggy", "Myntra", "Other"],
843
  label_visibility="collapsed",
844
- key="platform_selector"
845
  )
846
- st.session_state['platform'] = platform
847
  with c2:
848
- if st.button("⬅️ Back Home", key="back_home_btn"):
849
- st.session_state['page'] = 'landing'
850
  st.rerun()
851
 
852
  st.divider()
853
 
854
- if warnings_text:
855
- st.markdown(f"""
856
- <div class="warning-box">
857
- <strong>Note:</strong><br>{warnings_text}
858
- </div>
859
- """, unsafe_allow_html=True)
860
 
861
- tab1, tab2 = st.tabs(["📝 Check Review Text", "📸 Check Product Image"])
862
-
863
- # --- TEXT TAB ---
864
- with tab1:
865
- col1, col2 = st.columns([2, 1])
866
- with col1:
867
- txt_input = st.text_area(
868
  "Paste Review Here:",
869
- height=150,
870
- placeholder="Example: I ordered this yesterday and the quality is amazing...",
871
- key="txt_input"
872
  )
873
- with col2:
874
- st.info("💡 Tip: Paste the full review for the best result.")
875
- strict_mode = st.checkbox("Use Strict AI Mode (safer, but misses some cases)", value=True)
876
-
877
- if st.button("Analyze Text", type="primary", use_container_width=True, key="analyze_text_btn"):
878
- if txt_input.strip():
879
- with st.spinner("Analyzing text..."):
880
- res = check_text(txt_input, squad)
881
- stats = analyze_text_structure(txt_input)
882
- st.session_state['text_res'] = (res, stats, strict_mode, st.session_state['platform'], txt_input)
883
- st.rerun()
884
  else:
885
- st.warning("Please paste a review first.")
886
-
887
- if 'text_res' in st.session_state:
888
- res, stats, strict_mode_saved, platform_saved, review_text_saved = st.session_state['text_res']
889
-
 
 
 
890
  if res.get("error"):
891
- st.error(res.get("error_msg", "Text models failed to load."))
892
  else:
 
893
  st.markdown("---")
894
-
895
- bot_score = res['bot_score']
896
- grammar_score = res['grammar_score']
897
- mood_label = res['mood_label']
898
-
899
- # Thresholds
900
- if strict_mode_saved:
901
- t_high = 90
902
- t_mid = 70
903
- else:
904
- t_high = 75
905
- t_mid = 55
906
-
907
- if bot_score >= t_high:
908
- verdict_text = "🚨 Very likely AI-generated"
909
- verdict_type = "error"
910
- elif bot_score >= t_mid:
911
- verdict_text = "🤔 Suspicious / Mixed"
912
- verdict_type = "warning"
913
- else:
914
- verdict_text = "✅ Likely human-written"
915
- verdict_type = "success"
916
-
917
  k1, k2, k3 = st.columns(3)
918
- color = "red" if bot_score > 50 else "green"
919
  k1.markdown(
920
- f"""<div class="stat-box">
921
- <div class="stat-num" style="color:{color}">{bot_score:.0f}%</div>
922
- <div class="stat-txt">AI-Likeness</div>
923
- </div>""",
924
- unsafe_allow_html=True
925
  )
926
  k2.markdown(
927
- f"""<div class="stat-box">
928
- <div class="stat-num">{grammar_score:.0f}%</div>
929
- <div class="stat-txt">Grammar Quality</div>
930
- </div>""",
931
- unsafe_allow_html=True
932
  )
933
  k3.markdown(
934
- f"""<div class="stat-box">
935
- <div class="stat-num">{mood_label}</div>
936
- <div class="stat-txt">Sentiment</div>
937
- </div>""",
938
- unsafe_allow_html=True
939
  )
940
 
941
- st.write("")
942
  g1, g2, g3 = st.columns(3)
943
  with g1:
944
- st.markdown("#### 📊 Scores")
945
- fig = breakdown_chart(res)
946
- st.pyplot(fig, use_container_width=True)
947
  with g2:
948
- st.markdown("#### 📏 Sentence Lengths")
949
- fig2 = sentence_length_chart(stats)
950
- st.pyplot(fig2, use_container_width=True)
951
  with g3:
952
- st.markdown("#### 🔤 Top Words")
953
- fig3 = word_freq_chart(stats)
954
- st.pyplot(fig3, use_container_width=True)
955
-
956
- st.markdown("#### 💡 Verdict & Explanation")
957
- if verdict_type == "error":
958
- st.error(verdict_text)
959
- elif verdict_type == "warning":
960
- st.warning(verdict_text)
961
- else:
962
- st.success(verdict_text)
963
 
964
- reasons = explain_text(res, stats, strict_mode_saved)
965
- for r in reasons:
966
- st.markdown(f"- {r}")
967
 
968
- st.markdown(
969
- "<small>Note: These scores and explanations are signals, not absolute proof. "
970
- "Always combine them with your own judgement.</small>",
971
- unsafe_allow_html=True
972
- )
973
-
974
- # PDF report button
975
- st.write("")
976
- if HAVE_REPORTLAB:
977
- img_info_for_pdf = st.session_state.get("img_res_for_pdf", None)
978
- reverse_search_for_pdf = st.session_state.get("reverse_search_for_pdf", None)
979
- pdf_bytes = generate_pdf_report(
980
- platform_saved,
981
- review_text_saved,
982
  res,
983
- stats,
984
- img_info_for_pdf,
985
- reverse_search_for_pdf
986
  )
 
 
987
  st.download_button(
988
- "📄 Download PDF Report",
989
- data=pdf_bytes,
990
- file_name="review_validator_report.pdf",
991
  mime="application/pdf",
992
  )
993
- else:
994
- st.info("PDF report requires reportlab. Add `reportlab` to requirements.txt to enable export.")
995
-
996
- # --- IMAGE TAB ---
997
- with tab2:
998
- col_in, col_view = st.columns([1, 1])
999
 
 
 
 
1000
  with col_in:
1001
  st.markdown("#### Step 1: Provide Image")
1002
  method = st.radio(
1003
- "Input Method",
1004
  ["Paste URL", "Upload File"],
1005
  horizontal=True,
1006
- label_visibility="collapsed"
1007
  )
1008
 
1009
  with st.form("image_form"):
1010
  img_file = None
1011
- img_url = None
 
1012
 
1013
  if method == "Paste URL":
1014
- img_url = st.text_input("Paste Image Link:")
 
 
 
 
1015
  else:
1016
- img_file = st.file_uploader("Upload Image", type=['jpg', 'jpeg', 'png'])
1017
-
1018
- strict_img = st.checkbox("Use Strict AI Mode for Images", value=True, key="strict_img_check")
1019
- do_reverse_search = st.checkbox("🔍 Perform Reverse Image Search", value=True, key="reverse_search_check")
1020
- submitted = st.form_submit_button("Scan Image", type="primary")
1021
 
1022
- if submitted:
1023
- target_img = None
1024
- if method == "Paste URL" and img_url:
1025
- target_img = get_image_from_url(img_url)
1026
- elif method == "Upload File" and img_file:
1027
- try:
1028
- target_img = Image.open(img_file).convert("RGB")
1029
- except Exception:
1030
- target_img = None
1031
 
1032
- if target_img is None:
1033
- st.error("Could not read image. Try another link or file.")
 
 
 
 
1034
  else:
1035
- with st.spinner("Scanning image..."):
1036
- data = check_image(target_img, squad)
1037
- st.session_state['img_res'] = (data, strict_img)
1038
- st.session_state['current_img'] = target_img
1039
- # store a simplified version for PDF report
1040
- st.session_state['img_res_for_pdf'] = data
1041
-
1042
- # Perform reverse image search if requested
1043
- if do_reverse_search:
1044
- with st.spinner("Performing reverse image search..."):
1045
- reverse_results = reverse_image_search(target_img)
1046
- st.session_state['reverse_search'] = reverse_results
1047
- st.session_state['reverse_search_for_pdf'] = reverse_results
1048
- else:
1049
- st.session_state['reverse_search'] = None
1050
- st.session_state['reverse_search_for_pdf'] = None
1051
-
1052
- # Force rerun to show results
1053
- st.rerun()
1054
-
1055
- with col_view:
1056
- if 'current_img' in st.session_state:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1057
  st.image(
1058
- st.session_state['current_img'],
1059
  use_column_width=True,
1060
- caption="Analyzed Image"
1061
  )
1062
 
1063
- if 'img_res' in st.session_state:
1064
- data, strict_img = st.session_state['img_res']
1065
- ai_score = data['ai_chance']
1066
- caption = data['caption']
1067
-
1068
- st.markdown("#### Step 2: Analysis Results")
1069
-
1070
- st.markdown(f"""
1071
  <div class="analysis-box">
1072
- <strong>👁️ Visual Caption (approx):</strong><br>
1073
- <em>{caption}</em>
1074
  </div>
1075
- """, unsafe_allow_html=True)
1076
-
1077
- st.write("")
1078
 
1079
- if strict_img:
1080
- t_high = 90
1081
- t_mid = 70
 
 
 
1082
  else:
1083
- t_high = 80
1084
- t_mid = 60
 
 
 
 
 
 
 
1085
 
1086
- if ai_score >= t_high:
1087
- st.error(f"🤖 Very likely AI-generated ({ai_score:.0f}% AI score)")
1088
- elif ai_score >= t_mid:
1089
- st.warning(f"🤔 Suspicious / possibly AI ({ai_score:.0f}% AI score)")
 
 
 
 
 
 
 
 
 
 
 
 
1090
  else:
1091
- st.success(f"📸 Likely real photo ({100 - ai_score:.0f}% real score)")
1092
-
1093
- st.markdown("**Detector Details:**")
1094
- st.progress(ai_score / 100.0, text=f"AI probability: {ai_score:.1f}%")
1095
-
1096
- with st.expander("See raw detector scores"):
1097
- st.write(f"Image AI Score (model): {ai_score:.1f}%")
1098
-
1099
- # Display reverse search results
1100
- if 'reverse_search' in st.session_state and st.session_state['reverse_search'] is not None:
1101
- reverse_data = st.session_state['reverse_search']
1102
-
1103
- st.markdown("---")
1104
- st.markdown("#### 🔍 Reverse Image Search Results")
1105
-
1106
- if reverse_data['success']:
1107
- results = reverse_data['results']
1108
-
1109
- if results:
1110
- st.markdown(f"""
1111
- <div class="reverse-search-box">
1112
- <strong>✅ Found {len(results)} matches online</strong><br>
1113
- <small>This image appears on the following websites:</small>
1114
- </div>
1115
- """, unsafe_allow_html=True)
1116
-
1117
- for i, result in enumerate(results, 1):
1118
- with st.container():
1119
- st.markdown(f"""
1120
- <div class="result-item">
1121
- <strong>{i}. {result['title']}</strong><br>
1122
- <small>📍 Source: {result['source']}</small><br>
1123
- <small>🔗 <a href="{result['link']}" target="_blank">View Original</a></small>
1124
- </div>
1125
- """, unsafe_allow_html=True)
1126
- else:
1127
- st.markdown("""
1128
- <div class="reverse-search-box">
1129
- <strong>🆕 No matches found online</strong><br>
1130
- <small>This could mean the image is original/new, or not indexed by Google yet.</small>
1131
- </div>
1132
- """, unsafe_allow_html=True)
1133
- else:
1134
- st.error(f"❌ {reverse_data['error']}")
1135
- if "SERPAPI_KEY" in reverse_data['error']:
1136
- st.info("💡 To enable reverse image search:\n1. Sign up at https://serpapi.com/\n2. Add your API key to secrets or environment variables")
 
 
 
 
1137
 
1138
- # --- MAIN CONTROLLER ---
1139
  def main():
1140
  inject_custom_css()
1141
 
1142
- if 'page' not in st.session_state:
1143
- st.session_state['page'] = 'landing'
1144
 
1145
- with st.spinner("Loading AI models (first run can take some time)..."):
1146
  squad, err = load_ai_squad()
1147
 
1148
- if squad is None:
1149
- st.error(err or "Failed to load models.")
1150
- st.stop()
1151
-
1152
- warnings_text = None
1153
- if err:
1154
- warnings_text = "Some features may be limited:<br>" + err.replace("\n", "<br>")
1155
 
1156
- if st.session_state['page'] == 'landing':
1157
  landing_page()
1158
  else:
1159
- detector_page(squad, warnings_text=warnings_text)
 
1160
 
1161
  if __name__ == "__main__":
1162
- main()
 
1
  """
2
+ Review Validator - Final Version with SerpAPI Integration
 
3
  """
4
 
5
  import os
6
  import io
7
+ import warnings
8
+ from collections import Counter
9
+
10
  import numpy as np
11
  import streamlit as st
12
  from transformers import pipeline, logging as hf_logging
13
  from PIL import Image
14
  import matplotlib
 
15
  import matplotlib.pyplot as plt
16
  import requests
17
+
18
+ from reportlab.lib.pagesizes import A4
19
+ from reportlab.platypus import (
20
+ SimpleDocTemplate,
21
+ Paragraph,
22
+ Spacer,
23
+ Table,
24
+ TableStyle,
25
+ )
26
+ from reportlab.lib.styles import getSampleStyleSheet
27
+ from reportlab.lib import colors
28
+
29
+ # ------------------- SILENCE NOISE -------------------
 
 
 
30
  warnings.filterwarnings("ignore")
31
  hf_logging.set_verbosity_error()
32
  os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
33
+ matplotlib.use("Agg")
34
 
35
  st.set_page_config(
36
  page_title="Review Validator",
37
  page_icon="🛡️",
38
  layout="wide",
39
+ initial_sidebar_state="collapsed",
40
  )
41
 
42
+ # ------------------- MODEL NAMES -------------------
43
+ MODEL_FAKE = "openai-community/roberta-base-openai-detector"
 
 
 
 
 
 
44
  MODEL_MOOD = "cardiffnlp/twitter-roberta-base-sentiment-latest"
 
 
45
  MODEL_GRAMMAR = "textattack/roberta-base-CoLA"
46
+ MODEL_IMG_A = "dima806/ai_generated_image_detection"
47
+ MODEL_IMG_B = "umm-maybe/AI-image-detector"
 
 
 
48
  MODEL_CAPTION = "Salesforce/blip-image-captioning-base"
49
 
 
50
 
51
+ # ------------------- TOKENS / SECRETS -------------------
52
+ def get_hf_token():
 
 
 
 
 
 
53
  token = os.environ.get("HF_TOKEN")
54
  if token:
55
  return token
 
56
  try:
57
  if hasattr(st, "secrets") and "HF_TOKEN" in st.secrets:
58
  return st.secrets["HF_TOKEN"]
59
  except Exception:
60
  pass
 
61
  return None
62
 
63
+
64
  def get_serpapi_key():
 
 
 
 
 
65
  key = os.environ.get("SERPAPI_KEY")
66
  if key:
67
  return key
 
68
  try:
69
  if hasattr(st, "secrets") and "SERPAPI_KEY" in st.secrets:
70
  return st.secrets["SERPAPI_KEY"]
71
  except Exception:
72
  pass
 
73
  return None
74
 
 
 
75
 
76
+ HF_TOKEN = get_hf_token()
77
+
78
+
79
+ # ------------------- CSS -------------------
80
  def inject_custom_css():
81
+ st.markdown(
82
+ """
83
  <style>
84
+ .stApp {
85
+ background-color: #ffffff;
86
+ color: #333333;
87
+ font-family: "Helvetica Neue", sans-serif;
88
  }
89
+ h1 { color:#2C3E50; font-weight:800; }
90
+ h2 { color:#34495E; font-weight:600; }
 
 
 
91
  .hero-box {
92
+ padding:40px;
93
+ background:linear-gradient(135deg,#667eea 0%,#764ba2 100%);
94
+ border-radius:20px;
95
+ color:white;
96
+ text-align:center;
97
+ margin-bottom:30px;
98
  }
99
+ .hero-title{font-size:3rem;font-weight:bold;margin-bottom:10px;}
100
+ .hero-subtitle{font-size:1.2rem;opacity:0.9;}
101
+
102
+ .feature-card{
103
+ background:#F8F9FA;
104
+ padding:20px;
105
+ border-radius:15px;
106
+ border:1px solid #EEEEEE;
107
+ text-align:center;
108
+ transition:transform 0.2s;
109
  }
110
+ .feature-card:hover{transform:translateY(-5px);border-color:#764ba2;}
111
+ .emoji-icon{font-size:3rem;margin-bottom:10px;display:block;}
112
+
113
+ .stat-box{
114
+ text-align:center;
115
+ padding:15px;
116
+ border-radius:12px;
117
+ background:white;
118
+ box-shadow:0 4px 6px rgba(0,0,0,0.05);
119
+ border:1px solid #EEE;
120
  }
121
+ .stat-num{font-size:24px;font-weight:900;color:#333;}
122
+ .stat-txt{font-size:12px;text-transform:uppercase;color:#777;letter-spacing:1px;}
123
+
124
+ .analysis-box{
125
+ background:#f0f7ff;
126
+ border-left:5px solid #4285F4;
127
+ padding:15px;
128
+ border-radius:5px;
129
+ margin-top:15px;
130
  }
131
+ .stButton>button{
132
+ border-radius:30px;
133
+ font-weight:bold;
134
+ border:none;
135
+ padding:0.5rem 2rem;
136
+ transition:all 0.3s;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  }
138
  </style>
139
+ """,
140
+ unsafe_allow_html=True,
141
+ )
142
 
143
+
144
+ # ------------------- LOAD MODELS -------------------
145
  @st.cache_resource(show_spinner=False)
146
  def load_ai_squad():
 
 
 
 
147
  squad = {}
148
+ if not HF_TOKEN:
149
+ return None, "HF_TOKEN missing. Set it in env or Streamlit secrets."
150
 
 
 
 
151
  try:
152
+ try:
153
+ squad["fake"] = pipeline(
154
+ "text-classification", model=MODEL_FAKE, token=HF_TOKEN
155
+ )
156
+ except Exception as e:
157
+ print("Fake model error:", e)
 
158
 
159
+ try:
160
+ squad["mood"] = pipeline(
161
+ "sentiment-analysis",
162
+ model=MODEL_MOOD,
163
+ tokenizer=MODEL_MOOD,
164
+ token=HF_TOKEN,
165
+ )
166
+ except Exception as e:
167
+ print("Mood model error:", e)
168
 
169
+ try:
170
+ squad["grammar"] = pipeline(
171
+ "text-classification", model=MODEL_GRAMMAR, token=HF_TOKEN
172
+ )
173
+ except Exception as e:
174
+ print("Grammar model error:", e)
 
 
175
 
176
+ try:
177
+ squad["img_a"] = pipeline(
178
+ "image-classification", model=MODEL_IMG_A, token=HF_TOKEN
179
+ )
180
+ squad["img_b"] = pipeline(
181
+ "image-classification", model=MODEL_IMG_B, token=HF_TOKEN
182
+ )
183
+ squad["caption"] = pipeline(
184
+ "image-to-text", model=MODEL_CAPTION, token=HF_TOKEN
185
+ )
186
+ except Exception as e:
187
+ print("Image model error:", e)
188
 
 
 
 
 
 
 
189
  except Exception as e:
190
+ return None, str(e)
191
+
192
+ return squad, None
193
+
194
+
195
+ # ------------------- TEXT HELPERS -------------------
196
+ def compute_text_stats(text: str):
197
+ sentences = [
198
+ s.strip()
199
+ for s in text.replace("!", ".").replace("?", ".").split(".")
200
+ if s.strip()
201
+ ]
202
+ words = text.split()
203
+ word_count = len(words)
204
+ sent_lengths = [len(s.split()) for s in sentences] if sentences else []
205
+ avg_sent_len = np.mean(sent_lengths) if sent_lengths else 0.0
206
+ vocab = {w.lower().strip(".,!?\"'") for w in words if w.strip()}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  vocab_size = len(vocab)
208
+ ttr = (vocab_size / word_count * 100) if word_count > 0 else 0.0
209
+ cleaned = [w.lower().strip(".,!?\"'") for w in words if w.strip()]
210
+ common = Counter(cleaned).most_common(8)
 
 
 
 
211
  return {
212
+ "sentence_count": len(sentences),
213
+ "word_count": word_count,
214
+ "avg_sentence_length": avg_sent_len,
215
+ "vocab_size": vocab_size,
216
+ "type_token_ratio": ttr,
 
217
  "sentence_lengths": sent_lengths,
218
+ "top_words": common,
219
  }
220
 
221
+
222
+ def explain_text(res, stats):
223
+ lines = []
 
 
224
  bot = res["bot_score"]
225
  gram = res["grammar_score"]
226
  mood = res["mood_label"]
 
 
 
 
 
227
 
228
+ if bot > 70:
229
+ lines.append(
230
+ "The AI-likeness score is high, indicating that the review strongly resembles machine-generated text."
231
+ )
232
+ elif bot > 40:
233
+ lines.append(
234
+ "The AI-likeness score is in a borderline range, so the review should be treated with caution."
235
+ )
236
  else:
237
+ lines.append(
238
+ "The AI-likeness score is low, suggesting the review is likely human-written."
239
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
 
241
+ if gram > 80:
242
+ lines.append(
243
+ "Grammar quality is unusually clean and consistent, which sometimes correlates with AI-written or heavily edited content."
244
+ )
245
+ elif gram < 40:
246
+ lines.append(
247
+ "Grammar quality is weak, which can indicate spammy content but usually not advanced AI writing."
248
+ )
249
+ else:
250
+ lines.append(
251
+ "Grammar quality is moderate and falls within a typical human writing range."
252
+ )
253
 
254
+ lines.append(
255
+ f"The sentiment model detects a {mood.lower()} tone, which can be cross-checked with the context of the review."
256
+ )
257
+ lines.append(
258
+ f"The review contains {stats['sentence_count']} sentences and {stats['word_count']} words, with an average of {stats['avg_sentence_length']:.1f} words per sentence."
259
+ )
260
+ lines.append(
261
+ f"The vocabulary richness (type-token ratio) is approximately {stats['type_token_ratio']:.1f}%, indicating how repetitive or diverse the language is."
262
+ )
263
+ return "\n\n".join(lines)
264
 
 
265
 
 
266
  def check_text(text, squad):
267
+ if "fake" not in squad:
268
+ return {"error": True}
 
 
 
 
 
 
 
 
 
 
 
 
269
 
270
+ res_fake = squad["fake"](text[:512])[0]
271
+ bot = res_fake["score"] if res_fake["label"] == "Fake" else 1 - res_fake["score"]
 
 
272
 
 
 
 
 
 
 
 
 
273
  mood_label = "Unknown"
274
+ if "mood" in squad:
275
+ res_m = squad["mood"](text[:512])[0]
276
+ mood_label = res_m["label"]
277
+
278
+ grammar_score = 0.5
279
+ if "grammar" in squad:
280
+ res_g = squad["grammar"](text[:512])[0]
281
+ grammar_score = (
282
+ res_g["score"] if res_g["label"] == "LABEL_1" else 1 - res_g["score"]
283
+ )
284
 
285
+ stats = compute_text_stats(text)
 
 
 
 
 
 
 
 
 
286
 
287
  return {
288
+ "bot_score": bot * 100,
289
  "mood_label": mood_label,
290
+ "grammar_score": grammar_score * 100,
291
+ "stats": stats,
292
  "error": False,
 
293
  }
294
 
295
+
296
+ # ------------------- IMAGE HELPERS -------------------
297
+ def get_image_from_url(url: str):
298
+ """
299
+ Returns (PIL.Image or None, error_message or None)
300
+ Handles 403 cleanly instead of throwing exceptions.
301
+ """
302
+ try:
303
+ headers = {
304
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
305
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
306
+ "Chrome/120.0 Safari/537.36"
307
+ }
308
+ r = requests.get(url, headers=headers, timeout=10)
309
+ if r.status_code == 403:
310
+ return None, (
311
+ "The image host returned HTTP 403 (Forbidden). "
312
+ "This usually means the server is blocking automated downloads. "
313
+ "Download the image manually and upload it as a file instead."
314
+ )
315
+ if r.status_code != 200:
316
+ return None, f"Image host returned HTTP {r.status_code}."
317
+ img = Image.open(io.BytesIO(r.content)).convert("RGB")
318
+ return img, None
319
+ except Exception as e:
320
+ return None, f"Error fetching image: {e}"
321
+
322
+
323
  def check_image(img, squad):
324
+ score_a = 0.0
325
+ score_b = 0.0
326
+ caption = "Analysis unavailable."
327
+ ai_words = ["fake", "artificial", "ai", "generated"]
328
 
329
+ if "img_a" in squad:
330
  try:
331
+ for r in squad["img_a"](img):
332
+ if any(w in r["label"].lower() for w in ai_words):
333
+ score_a = max(score_a, r["score"])
334
+ except Exception as e:
335
+ print("img_a error:", e)
 
 
 
 
 
 
 
336
 
337
+ if "img_b" in squad:
338
+ try:
339
+ for r in squad["img_b"](img):
340
+ if any(w in r["label"].lower() for w in ai_words):
341
+ score_b = max(score_b, r["score"])
342
+ except Exception as e:
343
+ print("img_b error:", e)
344
+ else:
345
+ score_b = score_a
346
 
347
+ if "caption" in squad:
348
  try:
349
+ cap_res = squad["caption"](img)
350
+ caption = cap_res[0]["generated_text"]
 
351
  except Exception:
352
  pass
353
 
354
+ avg_ai = (score_a + score_b) / 2
355
+ match = 1.0 - abs(score_a - score_b)
356
+
357
  return {
358
+ "ai_chance": avg_ai * 100,
359
+ "match": match,
360
+ "score_a": score_a * 100,
361
+ "score_b": score_b * 100,
362
+ "caption": caption,
363
  }
364
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
365
 
366
+ # ------------------- SERPAPI REVERSE IMAGE -------------------
367
+ def serpapi_reverse_image_search(image_url: str, api_key: str):
368
  """
369
+ Google Reverse Image Search using SerpAPI.
370
+ Returns dict or None, and error_message if any.
 
 
 
 
 
 
371
  """
372
+ if not api_key:
373
+ return None, "SerpAPI key not configured."
374
+ if not image_url:
375
+ return None, "No image URL provided."
376
+
 
 
377
  try:
378
+ params = {
379
+ "engine": "google_reverse_image",
380
+ "image_url": image_url,
381
+ "api_key": api_key,
382
+ "output": "json",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
383
  }
384
+ resp = requests.get("https://serpapi.com/search", params=params, timeout=25)
385
+ if resp.status_code == 403:
386
+ return None, (
387
+ "SerpAPI returned HTTP 403 (Forbidden). "
388
+ "Check that the API key is valid and you have enough quota."
389
+ )
390
+ if resp.status_code != 200:
391
+ return None, f"SerpAPI HTTP {resp.status_code}: {resp.text[:180]}"
392
+
393
+ data = resp.json()
394
+ result = {
395
+ "best_guess": data.get("image_guess"),
396
+ "visual_matches": data.get("visual_matches", []),
397
  }
398
+ return result, None
399
  except Exception as e:
400
+ return None, f"Error calling SerpAPI: {e}"
 
 
 
 
 
 
 
 
 
 
401
 
 
 
402
 
403
+ # ------------------- PLOTS -------------------
404
+ def breakdown_chart(res):
405
+ labels = ["Bot Probability", "Grammar Quality"]
406
+ vals = [res["bot_score"], res["grammar_score"]]
407
+ fig, ax = plt.subplots(figsize=(4, 2.2))
408
+ y = np.arange(len(labels))
409
+ ax.barh(y, vals)
410
+ ax.set_yticks(y)
411
  ax.set_yticklabels(labels)
412
  ax.invert_yaxis()
 
413
  ax.set_xlim(0, 100)
414
+ for i, v in enumerate(vals):
415
+ ax.text(v + 1, i, f"{v:.0f}%", va="center", fontsize=8)
 
 
 
 
416
  plt.tight_layout()
417
  return fig
418
 
419
+
420
+ def sentence_length_hist(stats):
421
+ fig, ax = plt.subplots(figsize=(4, 2.2))
422
+ if stats["sentence_lengths"]:
423
+ ax.hist(
424
+ stats["sentence_lengths"],
425
+ bins=min(8, len(stats["sentence_lengths"])),
426
+ )
427
+ ax.set_xlabel("Words per sentence")
428
+ ax.set_ylabel("Frequency")
429
  ax.set_title("Sentence Length Distribution")
430
  plt.tight_layout()
431
  return fig
432
 
433
+
434
+ def word_frequency_chart(stats):
435
+ fig, ax = plt.subplots(figsize=(4, 2.2))
436
+ top = stats["top_words"]
437
+ if top:
438
+ words = [w for w, _ in top]
439
+ counts = [c for _, c in top]
440
+ ax.bar(words, counts)
441
+ ax.set_xticklabels(words, rotation=45, ha="right", fontsize=8)
442
+ ax.set_title("Top Word Frequency")
 
 
 
 
 
 
443
  plt.tight_layout()
444
  return fig
445
 
 
446
 
447
+ # ------------------- PDF REPORT -------------------
448
+ def generate_pdf(text_input, text_res, image_res, reverse_res, platform):
449
+ buf = io.BytesIO()
450
+ doc = SimpleDocTemplate(buf, pagesize=A4, leftMargin=30, rightMargin=30)
451
+ styles = getSampleStyleSheet()
452
+ elems = []
453
+
454
+ elems.append(Paragraph("Review Validator Report", styles["Title"]))
455
+ elems.append(Spacer(1, 6))
456
+ elems.append(Paragraph(f"Platform: {platform}", styles["Normal"]))
457
+ elems.append(Spacer(1, 10))
458
+
459
+ if text_input:
460
+ elems.append(Paragraph("Input Review Text", styles["Heading2"]))
461
+ elems.append(Spacer(1, 4))
462
+ safe = text_input.replace("\n", "<br/>")
463
+ elems.append(Paragraph(safe, styles["Normal"]))
464
+ elems.append(Spacer(1, 8))
465
+
466
+ if text_res and not text_res.get("error", False):
467
+ stats = text_res["stats"]
468
+ elems.append(Paragraph("Text Authenticity Analysis", styles["Heading2"]))
469
+ data = [
470
+ ["Bot-likeness", f"{text_res['bot_score']:.1f}%"],
471
+ ["Grammar Quality", f"{text_res['grammar_score']:.1f}%"],
472
+ ["Sentiment", text_res["mood_label"]],
473
+ ["Sentence Count", str(stats["sentence_count"])],
474
+ ["Word Count", str(stats["word_count"])],
475
+ ["Avg. Sentence Length", f"{stats['avg_sentence_length']:.1f}"],
476
+ ["Type-Token Ratio", f"{stats['type_token_ratio']:.1f}%"],
477
+ ]
478
+ tbl = Table(data, hAlign="LEFT")
479
+ tbl.setStyle(
480
+ TableStyle(
481
+ [
482
+ ("BACKGROUND", (0, 0), (-1, 0), colors.lightgrey),
483
+ ("GRID", (0, 0), (-1, -1), 0.25, colors.grey),
484
+ ("BOX", (0, 0), (-1, -1), 0.25, colors.black),
485
+ ]
486
+ )
487
+ )
488
+ elems.append(tbl)
489
+ elems.append(Spacer(1, 8))
490
+
491
+ explanation = explain_text(text_res, stats)
492
+ elems.append(Paragraph("Interpretation", styles["Heading3"]))
493
+ for para in explanation.split("\n\n"):
494
+ elems.append(Paragraph(para, styles["Normal"]))
495
+ elems.append(Spacer(1, 3))
496
+
497
+ if image_res:
498
+ elems.append(Spacer(1, 8))
499
+ elems.append(Paragraph("Image Authenticity Analysis", styles["Heading2"]))
500
+ data2 = [
501
+ ["AI-likeness (avg)", f"{image_res['ai_chance']:.1f}%"],
502
+ ["Model A Score", f"{image_res['score_a']:.1f}%"],
503
+ ["Model B Score", f"{image_res['score_b']:.1f}%"],
504
+ ["Model Agreement", f"{image_res['match']*100:.1f}%"],
505
+ ]
506
+ t2 = Table(data2, hAlign="LEFT")
507
+ t2.setStyle(
508
+ TableStyle(
509
+ [
510
+ ("BACKGROUND", (0, 0), (-1, 0), colors.lightgrey),
511
+ ("GRID", (0, 0), (-1, -1), 0.25, colors.grey),
512
+ ("BOX", (0, 0), (-1, -1), 0.25, colors.black),
513
+ ]
514
+ )
515
+ )
516
+ elems.append(t2)
517
+ elems.append(Spacer(1, 4))
518
+ elems.append(Paragraph(f"Caption: {image_res['caption']}", styles["Normal"]))
519
+
520
+ if reverse_res:
521
+ elems.append(Spacer(1, 8))
522
+ elems.append(Paragraph("Reverse Image Search (SerpAPI)", styles["Heading2"]))
523
+ best = reverse_res.get("best_guess")
524
+ count = reverse_res.get("count", 0)
525
+ elems.append(Paragraph(f"Visual matches found: {count}", styles["Normal"]))
526
+ if best:
527
+ elems.append(Paragraph(f"Google best guess: {best}", styles["Normal"]))
528
+ links = reverse_res.get("top_links", [])
529
+ if links:
530
+ elems.append(Spacer(1, 4))
531
+ elems.append(Paragraph("Top Matching Sources:", styles["Heading3"]))
532
+ for item in links:
533
+ line = f"{item.get('title') or item.get('link')} (source: {item.get('source')})"
534
+ elems.append(Paragraph(line, styles["Normal"]))
535
+ elems.append(Spacer(1, 2))
536
+
537
+ doc.build(elems)
538
+ pdf_bytes = buf.getvalue()
539
+ buf.close()
540
  return pdf_bytes
541
 
 
542
 
543
+ # ------------------- UI: LANDING -------------------
544
  def landing_page():
545
+ st.markdown(
546
+ """
547
  <div class="hero-box">
548
  <div class="hero-title">🛡️ Review Validator</div>
549
  <div class="hero-subtitle">
550
+ Detect AI-written reviews, AI-generated product images, and reused images via Google Reverse Image Search.
551
  </div>
552
  </div>
553
+ """,
554
+ unsafe_allow_html=True,
555
+ )
556
 
557
  c1, c2, c3 = st.columns(3)
558
  with c1:
559
+ st.markdown(
560
+ """
561
  <div class="feature-card">
562
  <span class="emoji-icon">🤖</span>
563
+ <h3>Text Authenticity</h3>
564
+ <p>Transformer-based models estimate how likely a review is written by AI.</p>
565
  </div>
566
+ """,
567
+ unsafe_allow_html=True,
568
+ )
569
  with c2:
570
+ st.markdown(
571
+ """
572
  <div class="feature-card">
573
  <span class="emoji-icon">📸</span>
574
  <h3>Image Authenticity</h3>
575
+ <p>Dual detectors and captioning analyze whether an image is real or AI-generated.</p>
576
  </div>
577
+ """,
578
+ unsafe_allow_html=True,
579
+ )
580
  with c3:
581
+ st.markdown(
582
+ """
583
  <div class="feature-card">
584
+ <span class="emoji-icon">🔎</span>
585
+ <h3>Reverse Search</h3>
586
+ <p>SerpAPI + Google Reverse Image API to see where else the image appears online.</p>
587
  </div>
588
+ """,
589
+ unsafe_allow_html=True,
590
+ )
591
+
592
+ _, mid, _ = st.columns([1, 2, 1])
593
+ with mid:
594
+ if st.button("🚀 START CHECKING REVIEWS", type="primary", use_container_width=True):
595
+ st.session_state["page"] = "detector"
596
  st.rerun()
597
 
598
+
599
+ # ------------------- UI: DETECTOR -------------------
600
+ def detector_page(squad):
 
 
 
601
  c1, c2 = st.columns([3, 1])
602
  with c1:
603
+ st.markdown("### 🛒 Select Platform")
604
  platform = st.selectbox(
605
+ "Platform", ["Amazon", "Flipkart", "Zomato", "Swiggy", "Myntra", "Other"],
 
606
  label_visibility="collapsed",
 
607
  )
 
608
  with c2:
609
+ if st.button("⬅️ Back Home"):
610
+ st.session_state["page"] = "landing"
611
  st.rerun()
612
 
613
  st.divider()
614
 
615
+ tab_text, tab_img = st.tabs(["📝 Text Review", "📸 Product Image"])
 
 
 
 
 
616
 
617
+ # -------- TEXT TAB --------
618
+ with tab_text:
619
+ col_left, col_right = st.columns([2, 1])
620
+ with col_left:
621
+ txt = st.text_area(
 
 
622
  "Paste Review Here:",
623
+ height=180,
624
+ placeholder="Example: I ordered this yesterday and it exceeded expectations...",
 
625
  )
626
+ with col_right:
627
+ st.info("Tip: Paste full review text for more accurate analysis.")
628
+ if st.button("Analyze Text", type="primary", use_container_width=True):
629
+ if not txt.strip():
630
+ st.error("Please paste a review first.")
 
 
 
 
 
 
631
  else:
632
+ with st.spinner("Analyzing review..."):
633
+ res = check_text(txt.strip(), squad)
634
+ st.session_state["text_res"] = res
635
+ st.session_state["text_raw"] = txt.strip()
636
+ st.session_state["platform"] = platform
637
+
638
+ if "text_res" in st.session_state:
639
+ res = st.session_state["text_res"]
640
  if res.get("error"):
641
+ st.error("Text models failed to load. Check HF_TOKEN.")
642
  else:
643
+ stats = res["stats"]
644
  st.markdown("---")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
645
  k1, k2, k3 = st.columns(3)
646
+ color = "red" if res["bot_score"] > 50 else "green"
647
  k1.markdown(
648
+ f'<div class="stat-box"><div class="stat-num" style="color:{color}">{res["bot_score"]:.0f}%</div><div class="stat-txt">Bot Chance</div></div>',
649
+ unsafe_allow_html=True,
 
 
 
650
  )
651
  k2.markdown(
652
+ f'<div class="stat-box"><div class="stat-num">{res["grammar_score"]:.0f}%</div><div class="stat-txt">Grammar</div></div>',
653
+ unsafe_allow_html=True,
 
 
 
654
  )
655
  k3.markdown(
656
+ f'<div class="stat-box"><div class="stat-num">{stats["word_count"]}</div><div class="stat-txt">Total Words</div></div>',
657
+ unsafe_allow_html=True,
 
 
 
658
  )
659
 
 
660
  g1, g2, g3 = st.columns(3)
661
  with g1:
662
+ st.pyplot(breakdown_chart(res))
 
 
663
  with g2:
664
+ st.pyplot(sentence_length_hist(stats))
 
 
665
  with g3:
666
+ st.pyplot(word_frequency_chart(stats))
 
 
 
 
 
 
 
 
 
 
667
 
668
+ st.markdown("#### Explanation")
669
+ st.markdown(explain_text(res, stats))
 
670
 
671
+ st.markdown("---")
672
+ if st.button("Generate PDF (Text Only)", use_container_width=False):
673
+ pdf = generate_pdf(
674
+ st.session_state.get("text_raw", ""),
 
 
 
 
 
 
 
 
 
 
675
  res,
676
+ st.session_state.get("img_res"),
677
+ st.session_state.get("reverse_search_results"),
678
+ st.session_state.get("platform", platform),
679
  )
680
+ st.session_state["pdf_text"] = pdf
681
+ if "pdf_text" in st.session_state:
682
  st.download_button(
683
+ "⬇️ Download Text Analysis PDF",
684
+ data=st.session_state["pdf_text"],
685
+ file_name="review_validator_text.pdf",
686
  mime="application/pdf",
687
  )
 
 
 
 
 
 
688
 
689
+ # -------- IMAGE TAB --------
690
+ with tab_img:
691
+ col_in, col_out = st.columns([1, 1])
692
  with col_in:
693
  st.markdown("#### Step 1: Provide Image")
694
  method = st.radio(
695
+ "Input type",
696
  ["Paste URL", "Upload File"],
697
  horizontal=True,
698
+ label_visibility="collapsed",
699
  )
700
 
701
  with st.form("image_form"):
702
  img_file = None
703
+ url = ""
704
+ auto_reverse = False
705
 
706
  if method == "Paste URL":
707
+ url = st.text_input("Image URL")
708
+ auto_reverse = st.checkbox(
709
+ "Also perform Google Reverse Image Search on this URL",
710
+ value=True,
711
+ )
712
  else:
713
+ img_file = st.file_uploader(
714
+ "Upload Image", type=["jpg", "jpeg", "png"]
715
+ )
 
 
716
 
717
+ submitted = st.form_submit_button(
718
+ "Analyze Image", type="primary", use_container_width=True
719
+ )
 
 
 
 
 
 
720
 
721
+ if submitted:
722
+ target = None
723
+ err_msg = None
724
+ if method == "Paste URL":
725
+ if not url.strip():
726
+ st.error("Please enter a valid image URL.")
727
  else:
728
+ img, err = get_image_from_url(url.strip())
729
+ if err:
730
+ st.error(err)
731
+ else:
732
+ target = img
733
+ st.session_state["last_image_url"] = url.strip()
734
+ else:
735
+ if not img_file:
736
+ st.error("Please upload an image file.")
737
+ else:
738
+ try:
739
+ target = Image.open(img_file).convert("RGB")
740
+ st.session_state["last_image_url"] = None
741
+ except Exception as e:
742
+ st.error(f"Error reading image: {e}")
743
+
744
+ if target is not None:
745
+ with st.spinner("Running image authenticity checks..."):
746
+ img_res = check_image(target, squad)
747
+ st.session_state["current_img"] = target
748
+ st.session_state["img_res"] = img_res
749
+
750
+ # Auto reverse search if URL + checkbox + key available
751
+ if method == "Paste URL" and auto_reverse:
752
+ serp_key = get_serpapi_key()
753
+ if not serp_key:
754
+ st.warning(
755
+ "SerpAPI key not configured. Skipping reverse image search."
756
+ )
757
+ else:
758
+ with st.spinner("Performing reverse image search via SerpAPI..."):
759
+ rev, err = serpapi_reverse_image_search(
760
+ url.strip(), serp_key
761
+ )
762
+ if err:
763
+ st.error(err)
764
+ elif rev:
765
+ matches = rev.get("visual_matches", [])
766
+ st.session_state["reverse_search_results"] = {
767
+ "best_guess": rev.get("best_guess"),
768
+ "count": len(matches),
769
+ "top_links": [
770
+ {
771
+ "title": m.get("title"),
772
+ "link": m.get("link"),
773
+ "source": m.get("source"),
774
+ }
775
+ for m in matches[:5]
776
+ ],
777
+ }
778
+
779
+ with col_out:
780
+ if "current_img" in st.session_state:
781
  st.image(
782
+ st.session_state["current_img"],
783
  use_column_width=True,
784
+ caption="Analyzed Image",
785
  )
786
 
787
+ if "img_res" in st.session_state:
788
+ data = st.session_state["img_res"]
789
+ ai = data["ai_chance"]
790
+ st.markdown("#### Step 2: Image Analysis Result")
791
+ st.markdown(
792
+ f"""
 
 
793
  <div class="analysis-box">
794
+ <strong>Visual Caption:</strong><br/>
795
+ {data['caption']}
796
  </div>
797
+ """,
798
+ unsafe_allow_html=True,
799
+ )
800
 
801
+ if data["match"] < 0.6:
802
+ st.warning(
803
+ "Detectors disagree significantly. Image may be heavily edited or ambiguous."
804
+ )
805
+ elif ai > 60:
806
+ st.error(f"Likely AI-generated image ({ai:.0f}% probability).")
807
  else:
808
+ st.success(
809
+ f"Likely real photograph ({100 - ai:.0f}% probability)."
810
+ )
811
+
812
+ st.progress(ai / 100.0, text=f"AI-likeness: {ai:.1f}%")
813
+ with st.expander("Detector Breakdown"):
814
+ st.write(f"Model A: {data['score_a']:.1f}%")
815
+ st.write(f"Model B: {data['score_b']:.1f}%")
816
+ st.write(f"Agreement: {data['match']*100:.1f}%")
817
 
818
+ st.markdown("---")
819
+ st.markdown("### 🔎 Reverse Image Search (Manual Call)")
820
+
821
+ r_col1, r_col2 = st.columns([2, 1])
822
+ with r_col1:
823
+ manual_url = st.text_input(
824
+ "Public image URL (optional, for manual reverse search):",
825
+ value=st.session_state.get("last_image_url", "") or "",
826
+ )
827
+ with r_col2:
828
+ if st.button("Run Reverse Search", use_container_width=True):
829
+ key = get_serpapi_key()
830
+ if not key:
831
+ st.error("SerpAPI key not configured.")
832
+ elif not manual_url.strip():
833
+ st.error("Please enter an image URL.")
834
  else:
835
+ with st.spinner("Calling SerpAPI Google Reverse Image API..."):
836
+ rev, err = serpapi_reverse_image_search(
837
+ manual_url.strip(), key
838
+ )
839
+ if err:
840
+ st.error(err)
841
+ elif rev:
842
+ matches = rev.get("visual_matches", [])
843
+ st.success("Reverse image search completed.")
844
+ if rev.get("best_guess"):
845
+ st.write(f"Google best guess: {rev['best_guess']}")
846
+ st.write(f"Total visual matches: {len(matches)}")
847
+ if matches:
848
+ st.markdown("**Top sources:**")
849
+ for m in matches[:5]:
850
+ st.markdown(
851
+ f"- [{m.get('title') or m.get('link')}]({m.get('link')}) _(source: {m.get('source')})_"
852
+ )
853
+ st.session_state["reverse_search_results"] = {
854
+ "best_guess": rev.get("best_guess"),
855
+ "count": len(matches),
856
+ "top_links": [
857
+ {
858
+ "title": m.get("title"),
859
+ "link": m.get("link"),
860
+ "source": m.get("source"),
861
+ }
862
+ for m in matches[:5]
863
+ ],
864
+ }
865
+
866
+ st.markdown("---")
867
+ if st.button("Generate Full PDF (Text + Image + Reverse)", use_container_width=False):
868
+ pdf_full = generate_pdf(
869
+ st.session_state.get("text_raw", ""),
870
+ st.session_state.get("text_res"),
871
+ st.session_state.get("img_res"),
872
+ st.session_state.get("reverse_search_results"),
873
+ st.session_state.get("platform", "Unknown"),
874
+ )
875
+ st.session_state["pdf_full"] = pdf_full
876
+
877
+ if "pdf_full" in st.session_state:
878
+ st.download_button(
879
+ "⬇️ Download Full Analysis PDF",
880
+ data=st.session_state["pdf_full"],
881
+ file_name="review_validator_full.pdf",
882
+ mime="application/pdf",
883
+ )
884
+
885
 
886
+ # ------------------- MAIN -------------------
887
  def main():
888
  inject_custom_css()
889
 
890
+ if "page" not in st.session_state:
891
+ st.session_state["page"] = "landing"
892
 
893
+ with st.spinner("Loading AI models..."):
894
  squad, err = load_ai_squad()
895
 
896
+ if not squad:
897
+ st.error(err)
898
+ return
 
 
 
 
899
 
900
+ if st.session_state["page"] == "landing":
901
  landing_page()
902
  else:
903
+ detector_page(squad)
904
+
905
 
906
  if __name__ == "__main__":
907
+ main()