Duv

Running

App Files Files Community

akshit4857 commited on 14 days ago

Commit

4f47bb5

verified ·

1 Parent(s): 67ede07

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +678 -933

src/streamlit_app.py CHANGED Viewed

@@ -1,1162 +1,907 @@
 """
-Review Validator - Advanced Edition
-With explainability graphs + PDF report download + Google Reverse Image Search
 """
 import os
 import io
-import time
-import base64
 import numpy as np
 import streamlit as st
 from transformers import pipeline, logging as hf_logging
 from PIL import Image
 import matplotlib
-matplotlib.use('Agg')  # use non-GUI backend for Streamlit
 import matplotlib.pyplot as plt
 import requests
-import math
-import warnings
-import re
-from collections import Counter
-from datetime import datetime
-import textwrap
-# Try to import ReportLab for PDF generation
-try:
-    from reportlab.lib.pagesizes import A4
-    from reportlab.pdfgen import canvas
-    HAVE_REPORTLAB = True
-except ImportError:
-    HAVE_REPORTLAB = False
-# --- Setup: Silence the technical noise ---
 warnings.filterwarnings("ignore")
 hf_logging.set_verbosity_error()
 os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
 st.set_page_config(
     page_title="Review Validator",
     page_icon="🛡️",
     layout="wide",
-    initial_sidebar_state="collapsed"
 )
-# ==========================================
-# 🧠 MODELS (Better public detectors)
-# ==========================================
-# 1. Text AI Detector: ModernBERT-based detector (0 = human, 1 = AI)
-MODEL_FAKE = "AICodexLab/answerdotai-ModernBERT-base-ai-detector"
-# 2. Mood Scanner: Sentiment model
 MODEL_MOOD = "cardiffnlp/twitter-roberta-base-sentiment-latest"
-# 3. Grammar Checker: Acceptability (CoLA)
 MODEL_GRAMMAR = "textattack/roberta-base-CoLA"
-# 4. Image Detector: Modern real vs fake classifier
-MODEL_IMG_MAIN = "prithivMLmods/Mirage-Photo-Classifier"
-# 5. Image Captioner (Optional): Describes the image content
 MODEL_CAPTION = "Salesforce/blip-image-captioning-base"
-# ==========================================
-# --- Robust Secrets Management (NON-CRASHING) ---
-def get_token():
-    """
-    Safely retrieves HF_TOKEN.
-    Priority 1: Env var (Spaces)
-    Priority 2: Streamlit Secrets (Local)
-    Optional – app still runs if missing.
-    """
     token = os.environ.get("HF_TOKEN")
     if token:
         return token
     try:
         if hasattr(st, "secrets") and "HF_TOKEN" in st.secrets:
             return st.secrets["HF_TOKEN"]
     except Exception:
         pass
     return None
 def get_serpapi_key():
-    """
-    Safely retrieves SERPAPI_KEY.
-    Priority 1: Env var
-    Priority 2: Streamlit Secrets
-    """
     key = os.environ.get("SERPAPI_KEY")
     if key:
         return key
     try:
         if hasattr(st, "secrets") and "SERPAPI_KEY" in st.secrets:
             return st.secrets["SERPAPI_KEY"]
     except Exception:
         pass
     return None
-HF_TOKEN = get_token()
-SERPAPI_KEY = get_serpapi_key()
-# --- Custom CSS ---
 def inject_custom_css():
-    st.markdown("""
     <style>
-        .stApp {
-            background-color: #FFFFFF;
-            color: #333333;
-            font-family: 'Helvetica Neue', sans-serif;
         }
-        h1, h2, h3 { color: #2C3E50; }
-        h1 { font-weight: 800; }
-        h2 { font-weight: 600; }
         .hero-box {
-            padding: 40px;
-            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-            border-radius: 20px;
-            color: white;
-            text-align: center;
-            margin-bottom: 30px;
         }
-        .hero-title { font-size: 3rem; font-weight: bold; margin-bottom: 10px; }
-        .hero-subtitle { font-size: 1.2rem; opacity: 0.9; }
-        .feature-card {
-            background: #F8F9FA;
-            padding: 20px;
-            border-radius: 15px;
-            border: 1px solid #EEEEEE;
-            text-align: center;
-            transition: transform 0.2s, box-shadow 0.2s;
         }
-        .feature-card:hover {
-            transform: translateY(-5px);
-            border-color: #764ba2;
-            box-shadow: 0 4px 12px rgba(0,0,0,0.06);
         }
-        .emoji-icon { font-size: 3rem; margin-bottom: 10px; display: block; }
-        .stat-box {
-            text-align: center;
-            padding: 15px;
-            border-radius: 12px;
-            background: white;
-            box-shadow: 0 4px 6px rgba(0,0,0,0.05);
-            border: 1px solid #EEE;
         }
-        .stat-num { font-size: 24px; font-weight: 900; color: #333; }
-        .stat-txt { font-size: 12px; text-transform: uppercase; color: #777; letter-spacing: 1px; }
-        .analysis-box {
-            background: #f0f7ff;
-            border-left: 5px solid #4285F4;
-            padding: 15px;
-            border-radius: 5px;
-            margin-top: 15px;
-        }
-        .warning-box {
-            background: #fff6e5;
-            border-left: 5px solid #ffb74d;
-            padding: 10px 15px;
-            border-radius: 5px;
-            font-size: 0.85rem;
-            margin-top: 8px;
-        }
-        .reverse-search-box {
-            background: #f0fff4;
-            border-left: 5px solid #48bb78;
-            padding: 15px;
-            border-radius: 5px;
-            margin-top: 15px;
-        }
-        .result-item {
-            background: white;
-            padding: 12px;
-            border-radius: 8px;
-            margin: 8px 0;
-            border: 1px solid #e2e8f0;
-            transition: box-shadow 0.2s;
-        }
-        .result-item:hover {
-            box-shadow: 0 2px 8px rgba(0,0,0,0.1);
-        }
-        .stButton>button {
-            border-radius: 30px;
-            font-weight: bold;
-            border: none;
-            padding: 0.6rem 2.2rem;
-            transition: all 0.3s;
-        }
-        .stButton>button:hover {
-            transform: translateY(-1px);
-            box-shadow: 0 3px 8px rgba(0,0,0,0.15);
         }
     </style>
-    """, unsafe_allow_html=True)
-# --- Load Models (Safe Mode, No Hard Crash) ---
 @st.cache_resource(show_spinner=False)
 def load_ai_squad():
-    """
-    Load all models. Never hard-crash the app.
-    If some models fail, we still return partial squad.
-    """
     squad = {}
-    errors = []
-    token_arg = {"token": HF_TOKEN} if HF_TOKEN else {}
-    # TEXT MODELS
     try:
-        squad['fake'] = pipeline(
-            "text-classification",
-            model=MODEL_FAKE,
-            **token_arg
-        )
-    except Exception as e:
-        errors.append(f"Fake detector: {e}")
-    try:
-        squad['mood'] = pipeline(
-            "sentiment-analysis",
-            model=MODEL_MOOD,
-            tokenizer=MODEL_MOOD,
-            **token_arg
-        )
-    except Exception as e:
-        errors.append(f"Mood model: {e}")
-    try:
-        squad['grammar'] = pipeline(
-            "text-classification",
-            model=MODEL_GRAMMAR,
-            **token_arg
-        )
-    except Exception as e:
-        errors.append(f"Grammar model: {e}")
-    # IMAGE MODELS
-    try:
-        squad['img_main'] = pipeline(
-            "image-classification",
-            model=MODEL_IMG_MAIN,
-            **token_arg
-        )
-    except Exception as e:
-        errors.append(f"Image main model: {e}")
-    try:
-        squad['caption'] = pipeline(
-            "image-to-text",
-            model=MODEL_CAPTION,
-            **token_arg
-        )
     except Exception as e:
-        errors.append(f"Caption model: {e}")
-    if not squad:
-        return None, "No models could be loaded. Check internet / HF token / requirements."
-    err_msg = "\n".join(errors) if errors else None
-    return squad, err_msg
-# --- Utility: Basic text stats for explainability ---
-STOPWORDS = set([
-    "the","a","an","is","are","am","and","or","in","on","at","of","to","for",
-    "this","that","it","was","with","as","by","be","from","has","have","had",
-    "i","you","we","they","he","she","my","our","their","your"
-])
-def split_sentences(text: str):
-    # simple sentence splitter
-    parts = re.split(r'[.!?]+', text)
-    return [s.strip() for s in parts if s.strip()]
-def tokenize_words(text: str):
-    tokens = re.findall(r"[A-Za-z']+", text.lower())
-    return tokens
-def analyze_text_structure(text: str):
-    sentences = split_sentences(text)
-    words = tokenize_words(text)
-    num_sentences = max(len(sentences), 1)
-    num_words = len(words)
-    sent_lengths = [len(tokenize_words(s)) for s in sentences] or [0]
-    avg_sent_len = sum(sent_lengths) / len(sent_lengths)
-    var_sent_len = float(np.var(sent_lengths)) if len(sent_lengths) > 1 else 0.0
-    # vocabulary diversity
-    vocab = set(w for w in words if w not in STOPWORDS)
     vocab_size = len(vocab)
-    ttr = (vocab_size / num_words) if num_words > 0 else 0.0  # type-token ratio
-    # top words
-    filtered = [w for w in words if w not in STOPWORDS]
-    counter = Counter(filtered)
-    top_words = counter.most_common(10)
     return {
-        "num_sentences": num_sentences,
-        "num_words": num_words,
-        "avg_sentence_len": avg_sent_len,
-        "var_sentence_len": var_sent_len,
-        "ttr": ttr,
-        "top_words": top_words,
         "sentence_lengths": sent_lengths,
     }
-def explain_text(res, stats, strict_mode: bool):
-    """
-    Heuristic explanation based on AI score + grammar + structure.
-    Returns list of bullet strings.
-    """
     bot = res["bot_score"]
     gram = res["grammar_score"]
     mood = res["mood_label"]
-    avg_len = stats["avg_sentence_len"]
-    var_len = stats["var_sentence_len"]
-    ttr = stats["ttr"]
-    reasons = []
-    # AI-likeness
-    if bot >= 85:
-        reasons.append("High AI-likeness score – model strongly associates this style with AI text.")
-    elif bot >= 65:
-        reasons.append("Moderate AI-likeness score – some patterns resemble AI-generated writing.")
     else:
-        reasons.append("Low AI-likeness score – style leans closer to typical human-written reviews.")
-    # Grammar
-    if gram >= 85 and bot >= 70:
-        reasons.append("Grammar is near-perfect and very consistent, which is common in AI text.")
-    elif gram >= 85 and bot < 50:
-        reasons.append("Grammar is very clean but the AI score is low, could be a careful human reviewer.")
-    elif gram < 60:
-        reasons.append("Grammar has noticeable imperfections, more typical of casual human writing.")
-    # Sentence structure
-    if var_len < 5 and avg_len > 12 and bot >= 70:
-        reasons.append("Sentence length is very uniform and long, which often appears in AI outputs.")
-    elif var_len > 15:
-        reasons.append("Sentence length varies a lot, which is more natural for human writing.")
-    # Vocabulary diversity
-    if ttr < 0.3 and bot >= 70:
-        reasons.append("Vocabulary diversity is low despite longer text, hinting at templated or generated style.")
-    elif ttr > 0.45:
-        reasons.append("Vocabulary diversity is relatively high, which often indicates a human author.")
-    # Mood-based explanation
-    reasons.append(f"Overall sentiment detected: **{mood}**.")
-    if strict_mode:
-        reasons.append("Strict mode: thresholds are higher, so AI flags are more conservative but precise.")
-    return reasons
-# --- Logic: Analyze Text ---
 def check_text(text, squad):
-    if 'fake' not in squad:
-        return {
-            "bot_score": 0,
-            "mood_label": "Unavailable",
-            "grammar_score": 0,
-            "mood_confidence": 0,
-            "error": True,
-            "error_msg": "AI text detector not loaded."
-        }
-    # 1. Bot / AI Check
-    res_fake = squad['fake'](text[:512])[0]
-    raw_label = res_fake.get('label', '1')
-    raw_score = float(res_fake.get('score', 0.5))
-    try:
-        label_id = int(raw_label)
-    except ValueError:
-        label_id = 1 if "1" in str(raw_label) else 0
-    if label_id == 1:
-        ai_prob = raw_score
-    else:
-        ai_prob = 1 - raw_score
-    bot_score = ai_prob * 100.0
-    # 2. Mood
     mood_label = "Unknown"
-    if 'mood' in squad:
-        try:
-            res_mood = squad['mood'](text[:512])[0]
-            mood_label = res_mood.get('label', 'Unknown')
-        except Exception:
-            mood_label = "Unknown"
-    # 3. Grammar (CoLA)
-    grammar_score = 50.0
-    if 'grammar' in squad:
-        try:
-            res_grammar = squad['grammar'](text[:512])[0]
-            glabel = res_grammar.get('label', 'LABEL_0')
-            gscore = float(res_grammar.get('score', 0.5))
-            grammar_score = (gscore if glabel == 'LABEL_1' else (1 - gscore)) * 100.0
-        except Exception:
-            grammar_score = 50.0
     return {
-        "bot_score": bot_score,
         "mood_label": mood_label,
-        "grammar_score": grammar_score,
-        "mood_confidence": 0,
         "error": False,
-        "error_msg": None
     }
-# --- Logic: Analyze Image ---
 def check_image(img, squad):
-    caption_text = "Caption unavailable"
-    ai_chance = 0.0
-    if 'img_main' in squad:
         try:
-            preds = squad['img_main'](img)
-            if isinstance(preds, list) and preds:
-                best = max(preds, key=lambda x: x.get('score', 0))
-                label = str(best.get('label', '')).lower()
-                score = float(best.get('score', 0.5))
-                if "fake" in label or "ai" in label:
-                    ai_prob = score
-                elif "real" in label:
-                    ai_prob = 1 - score
-                else:
-                    ai_prob = score
-                ai_chance = ai_prob * 100.0
-        except Exception:
-            ai_chance = 0.0
-    if 'caption' in squad:
         try:
-            cap_res = squad['caption'](img)
-            if isinstance(cap_res, list) and cap_res:
-                caption_text = cap_res[0].get('generated_text', caption_text)
         except Exception:
             pass
     return {
-        "ai_chance": ai_chance,
-        "match": 1.0,
-        "score_a": ai_chance,
-        "score_b": ai_chance,
-        "caption": caption_text
     }
-def get_image_from_url(url):
-    try:
-        headers = {
-            'User-Agent': (
-                'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
-                'AppleWebKit/537.36 (KHTML, like Gecko) '
-                'Chrome/91.0.4472.124 Safari/537.36'
-            )
-        }
-        r = requests.get(url, headers=headers, timeout=7, stream=True)
-        if r.status_code != 200:
-            return None
-        return Image.open(io.BytesIO(r.content)).convert("RGB")
-    except Exception:
-        return None
-# --- GOOGLE REVERSE IMAGE SEARCH (SerpAPI) ---
-def reverse_image_search(image_obj):
     """
-    Performs Google reverse image search using SerpAPI.
-    Uses Google Reverse Image Search engine (not Google Lens).
-    Args:
-        image_obj: PIL Image object
-    Returns:
-        dict with 'success', 'results', 'error' keys
     """
-    if not SERPAPI_KEY:
-        return {
-            "success": False,
-            "error": "SERPAPI_KEY not configured. Add it to secrets or environment variables.",
-            "results": []
-        }
     try:
-        # Method 1: Try using serpapi library (most reliable)
-        try:
-            from serpapi import GoogleSearch
-            # Resize image to reasonable size
-            max_size = 512
-            img_copy = image_obj.copy()
-            if img_copy.width > max_size or img_copy.height > max_size:
-                img_copy.thumbnail((max_size, max_size), Image.LANCZOS)
-            # Convert to base64
-            buffered = io.BytesIO()
-            img_copy.save(buffered, format="JPEG", quality=75, optimize=True)
-            img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
-            # Use Google Reverse Image Search (not Lens)
-            params = {
-                "engine": "google_reverse_image",
-                "api_key": SERPAPI_KEY,
-                "image_url": f"data:image/jpeg;base64,{img_base64}"
-            }
-            search = GoogleSearch(params)
-            results_data = search.get_dict()
-            # Check for errors
-            if "error" in results_data:
-                # Try fallback method
-                raise Exception(results_data['error'])
-            # Extract inline images or image results
-            image_results = results_data.get("inline_images", [])
-            if not image_results:
-                image_results = results_data.get("image_results", [])
-            results = []
-            for match in image_results[:10]:
-                results.append({
-                    "title": match.get("title", match.get("source", "No title")),
-                    "link": match.get("link", match.get("original", "")),
-                    "source": match.get("source", "Unknown"),
-                    "thumbnail": match.get("thumbnail", match.get("image", ""))
-                })
-            if results:
-                return {
-                    "success": True,
-                    "results": results,
-                    "error": None
-                }
-        except ImportError:
-            pass
-        except Exception:
-            pass
-        # Method 2: Upload to temporary hosting and use URL
-        # Using imgbb as temporary image host
-        try:
-            # Resize image
-            max_size = 512
-            img_copy = image_obj.copy()
-            if img_copy.width > max_size or img_copy.height > max_size:
-                img_copy.thumbnail((max_size, max_size), Image.LANCZOS)
-            # Convert to base64
-            buffered = io.BytesIO()
-            img_copy.save(buffered, format="JPEG", quality=75, optimize=True)
-            img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
-            # Upload to imgbb (free, no account needed)
-            imgbb_response = requests.post(
-                "https://api.imgbb.com/1/upload",
-                data={
-                    "key": "d2f1d3e8c3f5d85e5e9c8b9f7d4f0b68",  # Public demo key
-                    "image": img_base64
-                },
-                timeout=15
-            )
-            if imgbb_response.status_code == 200:
-                img_url = imgbb_response.json()['data']['url']
-                # Now use this URL with SerpAPI
-                params = {
-                    "engine": "google_reverse_image",
-                    "image_url": img_url,
-                    "api_key": SERPAPI_KEY
-                }
-                response = requests.get(
-                    "https://serpapi.com/search",
-                    params=params,
-                    timeout=30
-                )
-                if response.status_code == 200:
-                    data = response.json()
-                    # Extract results
-                    image_results = data.get("inline_images", [])
-                    if not image_results:
-                        image_results = data.get("image_results", [])
-                    results = []
-                    for match in image_results[:10]:
-                        results.append({
-                            "title": match.get("title", match.get("source", "No title")),
-                            "link": match.get("link", match.get("original", "")),
-                            "source": match.get("source", "Unknown"),
-                            "thumbnail": match.get("thumbnail", match.get("image", ""))
-                        })
-                    if results:
-                        return {
-                            "success": True,
-                            "results": results,
-                            "error": None
-                        }
-        except Exception:
-            pass
-        # If all methods fail, return appropriate error
-        return {
-            "success": False,
-            "error": "Could not perform reverse search. Possible reasons: API key invalid, rate limit exceeded, or service temporarily unavailable. Check your SerpAPI dashboard at serpapi.com/dashboard",
-            "results": []
         }
-    except requests.exceptions.Timeout:
-        return {
-            "success": False,
-            "error": "Request timeout. Please try again with a smaller image.",
-            "results": []
         }
     except Exception as e:
-        return {
-            "success": False,
-            "error": f"Reverse search failed: {str(e)}",
-            "results": []
-        }
-# --- Plotting ---
-def breakdown_chart(stats):
-    labels = ['AI-Likeness', 'Grammar Quality']
-    values = [stats['bot_score'], stats['grammar_score']]
-    fig, ax = plt.subplots(figsize=(4, 2))
-    y_pos = np.arange(len(labels))
-    ax.barh(y_pos, values, align='center', height=0.6)
-    ax.set_yticks(y_pos)
     ax.set_yticklabels(labels)
     ax.invert_yaxis()
-    ax.set_xlabel('Score (0-100)')
     ax.set_xlim(0, 100)
-    ax.spines['top'].set_visible(False)
-    ax.spines['right'].set_visible(False)
-    ax.spines['left'].set_visible(False)
-    ax.spines['bottom'].set_color('#DDD')
     plt.tight_layout()
     return fig
-def sentence_length_chart(stats):
-    lens = stats["sentence_lengths"]
-    fig, ax = plt.subplots(figsize=(4, 2))
-    ax.hist(lens, bins=min(len(lens), 8) or 1, edgecolor='black')
-    ax.set_xlabel("Sentence length (words)")
-    ax.set_ylabel("Count")
     ax.set_title("Sentence Length Distribution")
     plt.tight_layout()
     return fig
-def word_freq_chart(stats):
-    top_words = stats["top_words"]
-    if not top_words:
-        fig, ax = plt.subplots(figsize=(4, 2))
-        ax.text(0.5, 0.5, "Not enough text", ha='center', va='center')
-        ax.axis('off')
-        return fig
-    words, freqs = zip(*top_words)
-    fig, ax = plt.subplots(figsize=(4, 2))
-    x = np.arange(len(words))
-    ax.bar(x, freqs)
-    ax.set_xticks(x)
-    ax.set_xticklabels(words, rotation=45, ha='right')
-    ax.set_ylabel("Frequency")
-    ax.set_title("Top Words (excluding stopwords)")
     plt.tight_layout()
     return fig
-# --- PDF REPORT GENERATION ---
-def generate_pdf_report(platform, review_text, text_res, text_stats, image_info, reverse_search_data=None):
-    """
-    Returns PDF bytes. Requires ReportLab.
-    image_info: dict or None
-    reverse_search_data: dict with reverse search results or None
-    """
-    buffer = io.BytesIO()
-    c = canvas.Canvas(buffer, pagesize=A4)
-    width, height = A4
-    y = height - 50
-    def write_line(text, font="Helvetica", size=10, leading=14):
-        nonlocal y
-        c.setFont(font, size)
-        wrapped = textwrap.wrap(text, width=90)
-        for line in wrapped:
-            if y < 50:
-                c.showPage()
-                y = height - 50
-                c.setFont(font, size)
-            c.drawString(50, y, line)
-            y -= leading
-    # Header
-    c.setFont("Helvetica-Bold", 16)
-    c.drawString(50, y, "Review Validator Report")
-    y -= 25
-    c.setFont("Helvetica", 10)
-    c.drawString(50, y, f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
-    y -= 15
-    c.drawString(50, y, f"Platform: {platform}")
-    y -= 25
-    # Scores
-    write_line("=== Text Analysis ===", font="Helvetica-Bold", size=12)
-    write_line(f"AI-Likeness Score: {text_res['bot_score']:.1f}%")
-    write_line(f"Grammar Quality: {text_res['grammar_score']:.1f}%")
-    write_line(f"Sentiment: {text_res['mood_label']}")
-    y -= 10
-    # Structure stats
-    write_line("Text Structure:", font="Helvetica-Bold", size=11)
-    write_line(f"- Sentences: {text_stats['num_sentences']}")
-    write_line(f"- Words: {text_stats['num_words']}")
-    write_line(f"- Average sentence length: {text_stats['avg_sentence_len']:.1f} words")
-    write_line(f"- Sentence length variance: {text_stats['var_sentence_len']:.1f}")
-    write_line(f"- Vocabulary diversity (TTR): {text_stats['ttr']:.2f}")
-    y -= 10
-    # Review text
-    write_line("Original Review:", font="Helvetica-Bold", size=11)
-    write_line(review_text or "[empty review]")
-    y -= 10
-    # Image analysis
-    if image_info is not None:
-        write_line("=== Image Analysis ===", font="Helvetica-Bold", size=12)
-        write_line(f"AI Probability: {image_info['ai_chance']:.1f}%")
-        write_line(f"Caption (approx): {image_info['caption']}")
-        y -= 10
-    # Reverse search results
-    if reverse_search_data and reverse_search_data.get('success'):
-        write_line("=== Reverse Image Search Results ===", font="Helvetica-Bold", size=12)
-        results = reverse_search_data.get('results', [])
-        if results:
-            write_line(f"Found {len(results)} matches online:")
-            for i, result in enumerate(results[:5], 1):
-                write_line(f"{i}. {result['title']}")
-                write_line(f"   Source: {result['source']}")
-                write_line(f"   Link: {result['link']}")
-                y -= 5
-        else:
-            write_line("No matches found.")
-        y -= 10
-    c.showPage()
-    c.save()
-    pdf_bytes = buffer.getvalue()
-    buffer.close()
     return pdf_bytes
-# --- PAGES ---
 def landing_page():
-    st.markdown("""
     <div class="hero-box">
         <div class="hero-title">🛡️ Review Validator</div>
         <div class="hero-subtitle">
-            Advanced AI-powered review and image analysis with graphs, explainability, reverse image search, and exportable reports.
         </div>
     </div>
-    """, unsafe_allow_html=True)
     c1, c2, c3 = st.columns(3)
     with c1:
-        st.markdown("""
         <div class="feature-card">
             <span class="emoji-icon">🤖</span>
-            <h3>AI Text Detector</h3>
-            <p>Modern models estimate whether a review looks AI-generated or human-written.</p>
         </div>
-        """, unsafe_allow_html=True)
     with c2:
-        st.markdown("""
         <div class="feature-card">
             <span class="emoji-icon">📸</span>
             <h3>Image Authenticity</h3>
-            <p>Checks if product photos look real or AI-generated, with approximate captions.</p>
         </div>
-        """, unsafe_allow_html=True)
     with c3:
-        st.markdown("""
         <div class="feature-card">
-            <span class="emoji-icon">🔍</span>
-            <h3>Reverse Image Search</h3>
-            <p>Find where the image appears online using Google reverse image search.</p>
         </div>
-        """, unsafe_allow_html=True)
-    st.write("")
-    st.write("")
-    col1, col2, col3 = st.columns([1, 2, 1])
-    with col2:
-        if st.button("🚀 START CHECKING REVIEWS", type="primary", use_container_width=True, key="start_btn"):
-            st.session_state['page'] = 'detector'
             st.rerun()
-def detector_page(squad, warnings_text=None):
-    # Initialize session state for platform if not exists
-    if 'platform' not in st.session_state:
-        st.session_state['platform'] = "Amazon"
-    # Header & Selector
     c1, c2 = st.columns([3, 1])
     with c1:
-        st.markdown("### 🛒 Select the Website")
         platform = st.selectbox(
-            "Where is this review from?",
-            ["Amazon", "Flipkart", "Zomato", "Swiggy", "Myntra", "Other"],
             label_visibility="collapsed",
-            key="platform_selector"
         )
-        st.session_state['platform'] = platform
     with c2:
-        if st.button("⬅️ Back Home", key="back_home_btn"):
-            st.session_state['page'] = 'landing'
             st.rerun()
     st.divider()
-    if warnings_text:
-        st.markdown(f"""
-        <div class="warning-box">
-        <strong>Note:</strong><br>{warnings_text}
-        </div>
-        """, unsafe_allow_html=True)
-    tab1, tab2 = st.tabs(["📝 Check Review Text", "📸 Check Product Image"])
-    # --- TEXT TAB ---
-    with tab1:
-        col1, col2 = st.columns([2, 1])
-        with col1:
-            txt_input = st.text_area(
                 "Paste Review Here:",
-                height=150,
-                placeholder="Example: I ordered this yesterday and the quality is amazing...",
-                key="txt_input"
             )
-        with col2:
-            st.info("💡 Tip: Paste the full review for the best result.")
-            strict_mode = st.checkbox("Use Strict AI Mode (safer, but misses some cases)", value=True)
-            if st.button("Analyze Text", type="primary", use_container_width=True, key="analyze_text_btn"):
-                if txt_input.strip():
-                    with st.spinner("Analyzing text..."):
-                        res = check_text(txt_input, squad)
-                        stats = analyze_text_structure(txt_input)
-                        st.session_state['text_res'] = (res, stats, strict_mode, st.session_state['platform'], txt_input)
-                        st.rerun()
                 else:
-                    st.warning("Please paste a review first.")
-        if 'text_res' in st.session_state:
-            res, stats, strict_mode_saved, platform_saved, review_text_saved = st.session_state['text_res']
             if res.get("error"):
-                st.error(res.get("error_msg", "Text models failed to load."))
             else:
                 st.markdown("---")
-                bot_score = res['bot_score']
-                grammar_score = res['grammar_score']
-                mood_label = res['mood_label']
-                # Thresholds
-                if strict_mode_saved:
-                    t_high = 90
-                    t_mid = 70
-                else:
-                    t_high = 75
-                    t_mid = 55
-                if bot_score >= t_high:
-                    verdict_text = "🚨 Very likely AI-generated"
-                    verdict_type = "error"
-                elif bot_score >= t_mid:
-                    verdict_text = "🤔 Suspicious / Mixed"
-                    verdict_type = "warning"
-                else:
-                    verdict_text = "✅ Likely human-written"
-                    verdict_type = "success"
                 k1, k2, k3 = st.columns(3)
-                color = "red" if bot_score > 50 else "green"
                 k1.markdown(
-                    f"""<div class="stat-box">
-                        <div class="stat-num" style="color:{color}">{bot_score:.0f}%</div>
-                        <div class="stat-txt">AI-Likeness</div>
-                    </div>""",
-                    unsafe_allow_html=True
                 )
                 k2.markdown(
-                    f"""<div class="stat-box">
-                        <div class="stat-num">{grammar_score:.0f}%</div>
-                        <div class="stat-txt">Grammar Quality</div>
-                    </div>""",
-                    unsafe_allow_html=True
                 )
                 k3.markdown(
-                    f"""<div class="stat-box">
-                        <div class="stat-num">{mood_label}</div>
-                        <div class="stat-txt">Sentiment</div>
-                    </div>""",
-                    unsafe_allow_html=True
                 )
-                st.write("")
                 g1, g2, g3 = st.columns(3)
                 with g1:
-                    st.markdown("#### 📊 Scores")
-                    fig = breakdown_chart(res)
-                    st.pyplot(fig, use_container_width=True)
                 with g2:
-                    st.markdown("#### 📏 Sentence Lengths")
-                    fig2 = sentence_length_chart(stats)
-                    st.pyplot(fig2, use_container_width=True)
                 with g3:
-                    st.markdown("#### 🔤 Top Words")
-                    fig3 = word_freq_chart(stats)
-                    st.pyplot(fig3, use_container_width=True)
-                st.markdown("#### 💡 Verdict & Explanation")
-                if verdict_type == "error":
-                    st.error(verdict_text)
-                elif verdict_type == "warning":
-                    st.warning(verdict_text)
-                else:
-                    st.success(verdict_text)
-                reasons = explain_text(res, stats, strict_mode_saved)
-                for r in reasons:
-                    st.markdown(f"- {r}")
-                st.markdown(
-                    "<small>Note: These scores and explanations are signals, not absolute proof. "
-                    "Always combine them with your own judgement.</small>",
-                    unsafe_allow_html=True
-                )
-                # PDF report button
-                st.write("")
-                if HAVE_REPORTLAB:
-                    img_info_for_pdf = st.session_state.get("img_res_for_pdf", None)
-                    reverse_search_for_pdf = st.session_state.get("reverse_search_for_pdf", None)
-                    pdf_bytes = generate_pdf_report(
-                        platform_saved,
-                        review_text_saved,
                         res,
-                        stats,
-                        img_info_for_pdf,
-                        reverse_search_for_pdf
                     )
                     st.download_button(
-                        "📄 Download PDF Report",
-                        data=pdf_bytes,
-                        file_name="review_validator_report.pdf",
                         mime="application/pdf",
                     )
-                else:
-                    st.info("PDF report requires reportlab. Add `reportlab` to requirements.txt to enable export.")
-    # --- IMAGE TAB ---
-    with tab2:
-        col_in, col_view = st.columns([1, 1])
         with col_in:
             st.markdown("#### Step 1: Provide Image")
             method = st.radio(
-                "Input Method",
                 ["Paste URL", "Upload File"],
                 horizontal=True,
-                label_visibility="collapsed"
             )
             with st.form("image_form"):
                 img_file = None
-                img_url = None
                 if method == "Paste URL":
-                    img_url = st.text_input("Paste Image Link:")
                 else:
-                    img_file = st.file_uploader("Upload Image", type=['jpg', 'jpeg', 'png'])
-                strict_img = st.checkbox("Use Strict AI Mode for Images", value=True, key="strict_img_check")
-                do_reverse_search = st.checkbox("🔍 Perform Reverse Image Search", value=True, key="reverse_search_check")
-                submitted = st.form_submit_button("Scan Image", type="primary")
-                if submitted:
-                    target_img = None
-                    if method == "Paste URL" and img_url:
-                        target_img = get_image_from_url(img_url)
-                    elif method == "Upload File" and img_file:
-                        try:
-                            target_img = Image.open(img_file).convert("RGB")
-                        except Exception:
-                            target_img = None
-                    if target_img is None:
-                        st.error("Could not read image. Try another link or file.")
                     else:
-                        with st.spinner("Scanning image..."):
-                            data = check_image(target_img, squad)
-                            st.session_state['img_res'] = (data, strict_img)
-                            st.session_state['current_img'] = target_img
-                            # store a simplified version for PDF report
-                            st.session_state['img_res_for_pdf'] = data
-                            # Perform reverse image search if requested
-                            if do_reverse_search:
-                                with st.spinner("Performing reverse image search..."):
-                                    reverse_results = reverse_image_search(target_img)
-                                    st.session_state['reverse_search'] = reverse_results
-                                    st.session_state['reverse_search_for_pdf'] = reverse_results
-                            else:
-                                st.session_state['reverse_search'] = None
-                                st.session_state['reverse_search_for_pdf'] = None
-                        # Force rerun to show results
-                        st.rerun()
-        with col_view:
-            if 'current_img' in st.session_state:
                 st.image(
-                    st.session_state['current_img'],
                     use_column_width=True,
-                    caption="Analyzed Image"
                 )
-            if 'img_res' in st.session_state:
-                data, strict_img = st.session_state['img_res']
-                ai_score = data['ai_chance']
-                caption = data['caption']
-                st.markdown("#### Step 2: Analysis Results")
-                st.markdown(f"""
                 <div class="analysis-box">
-                    <strong>👁️ Visual Caption (approx):</strong><br>
-                    <em>{caption}</em>
                 </div>
-                """, unsafe_allow_html=True)
-                st.write("")
-                if strict_img:
-                    t_high = 90
-                    t_mid = 70
                 else:
-                    t_high = 80
-                    t_mid = 60
-                if ai_score >= t_high:
-                    st.error(f"🤖 Very likely AI-generated ({ai_score:.0f}% AI score)")
-                elif ai_score >= t_mid:
-                    st.warning(f"🤔 Suspicious / possibly AI ({ai_score:.0f}% AI score)")
                 else:
-                    st.success(f"📸 Likely real photo ({100 - ai_score:.0f}% real score)")
-                st.markdown("**Detector Details:**")
-                st.progress(ai_score / 100.0, text=f"AI probability: {ai_score:.1f}%")
-                with st.expander("See raw detector scores"):
-                    st.write(f"Image AI Score (model): {ai_score:.1f}%")
-                # Display reverse search results
-                if 'reverse_search' in st.session_state and st.session_state['reverse_search'] is not None:
-                    reverse_data = st.session_state['reverse_search']
-                    st.markdown("---")
-                    st.markdown("#### 🔍 Reverse Image Search Results")
-                    if reverse_data['success']:
-                        results = reverse_data['results']
-                        if results:
-                            st.markdown(f"""
-                            <div class="reverse-search-box">
-                                <strong>✅ Found {len(results)} matches online</strong><br>
-                                <small>This image appears on the following websites:</small>
-                            </div>
-                            """, unsafe_allow_html=True)
-                            for i, result in enumerate(results, 1):
-                                with st.container():
-                                    st.markdown(f"""
-                                    <div class="result-item">
-                                        <strong>{i}. {result['title']}</strong><br>
-                                        <small>📍 Source: {result['source']}</small><br>
-                                        <small>🔗 <a href="{result['link']}" target="_blank">View Original</a></small>
-                                    </div>
-                                    """, unsafe_allow_html=True)
-                        else:
-                            st.markdown("""
-                            <div class="reverse-search-box">
-                                <strong>🆕 No matches found online</strong><br>
-                                <small>This could mean the image is original/new, or not indexed by Google yet.</small>
-                            </div>
-                            """, unsafe_allow_html=True)
-                    else:
-                        st.error(f"❌ {reverse_data['error']}")
-                        if "SERPAPI_KEY" in reverse_data['error']:
-                            st.info("💡 To enable reverse image search:\n1. Sign up at https://serpapi.com/\n2. Add your API key to secrets or environment variables")
-# --- MAIN CONTROLLER ---
 def main():
     inject_custom_css()
-    if 'page' not in st.session_state:
-        st.session_state['page'] = 'landing'
-    with st.spinner("Loading AI models (first run can take some time)..."):
         squad, err = load_ai_squad()
-    if squad is None:
-        st.error(err or "Failed to load models.")
-        st.stop()
-    warnings_text = None
-    if err:
-        warnings_text = "Some features may be limited:<br>" + err.replace("\n", "<br>")
-    if st.session_state['page'] == 'landing':
         landing_page()
     else:
-        detector_page(squad, warnings_text=warnings_text)
 if __name__ == "__main__":
-    main()

 """
+Review Validator - Final Version with SerpAPI Integration
 """
 import os
 import io
+import warnings
+from collections import Counter
 import numpy as np
 import streamlit as st
 from transformers import pipeline, logging as hf_logging
 from PIL import Image
 import matplotlib
 import matplotlib.pyplot as plt
 import requests
+from reportlab.lib.pagesizes import A4
+from reportlab.platypus import (
+    SimpleDocTemplate,
+    Paragraph,
+    Spacer,
+    Table,
+    TableStyle,
+)
+from reportlab.lib.styles import getSampleStyleSheet
+from reportlab.lib import colors
+# ------------------- SILENCE NOISE -------------------
 warnings.filterwarnings("ignore")
 hf_logging.set_verbosity_error()
 os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
+matplotlib.use("Agg")
 st.set_page_config(
     page_title="Review Validator",
     page_icon="🛡️",
     layout="wide",
+    initial_sidebar_state="collapsed",
 )
+# ------------------- MODEL NAMES -------------------
+MODEL_FAKE = "openai-community/roberta-base-openai-detector"
 MODEL_MOOD = "cardiffnlp/twitter-roberta-base-sentiment-latest"
 MODEL_GRAMMAR = "textattack/roberta-base-CoLA"
+MODEL_IMG_A = "dima806/ai_generated_image_detection"
+MODEL_IMG_B = "umm-maybe/AI-image-detector"
 MODEL_CAPTION = "Salesforce/blip-image-captioning-base"
+# ------------------- TOKENS / SECRETS -------------------
+def get_hf_token():
     token = os.environ.get("HF_TOKEN")
     if token:
         return token
     try:
         if hasattr(st, "secrets") and "HF_TOKEN" in st.secrets:
             return st.secrets["HF_TOKEN"]
     except Exception:
         pass
     return None
 def get_serpapi_key():
     key = os.environ.get("SERPAPI_KEY")
     if key:
         return key
     try:
         if hasattr(st, "secrets") and "SERPAPI_KEY" in st.secrets:
             return st.secrets["SERPAPI_KEY"]
     except Exception:
         pass
     return None
+HF_TOKEN = get_hf_token()
+# ------------------- CSS -------------------
 def inject_custom_css():
+    st.markdown(
+        """
     <style>
+        .stApp {
+            background-color: #ffffff;
+            color: #333333;
+            font-family: "Helvetica Neue", sans-serif;
         }
+        h1 { color:#2C3E50; font-weight:800; }
+        h2 { color:#34495E; font-weight:600; }
         .hero-box {
+            padding:40px;
+            background:linear-gradient(135deg,#667eea 0%,#764ba2 100%);
+            border-radius:20px;
+            color:white;
+            text-align:center;
+            margin-bottom:30px;
         }
+        .hero-title{font-size:3rem;font-weight:bold;margin-bottom:10px;}
+        .hero-subtitle{font-size:1.2rem;opacity:0.9;}
+        .feature-card{
+            background:#F8F9FA;
+            padding:20px;
+            border-radius:15px;
+            border:1px solid #EEEEEE;
+            text-align:center;
+            transition:transform 0.2s;
         }
+        .feature-card:hover{transform:translateY(-5px);border-color:#764ba2;}
+        .emoji-icon{font-size:3rem;margin-bottom:10px;display:block;}
+        .stat-box{
+            text-align:center;
+            padding:15px;
+            border-radius:12px;
+            background:white;
+            box-shadow:0 4px 6px rgba(0,0,0,0.05);
+            border:1px solid #EEE;
         }
+        .stat-num{font-size:24px;font-weight:900;color:#333;}
+        .stat-txt{font-size:12px;text-transform:uppercase;color:#777;letter-spacing:1px;}
+        .analysis-box{
+            background:#f0f7ff;
+            border-left:5px solid #4285F4;
+            padding:15px;
+            border-radius:5px;
+            margin-top:15px;
         }
+        .stButton>button{
+            border-radius:30px;
+            font-weight:bold;
+            border:none;
+            padding:0.5rem 2rem;
+            transition:all 0.3s;
         }
     </style>
+    """,
+        unsafe_allow_html=True,
+    )
+# ------------------- LOAD MODELS -------------------
 @st.cache_resource(show_spinner=False)
 def load_ai_squad():
     squad = {}
+    if not HF_TOKEN:
+        return None, "HF_TOKEN missing. Set it in env or Streamlit secrets."
     try:
+        try:
+            squad["fake"] = pipeline(
+                "text-classification", model=MODEL_FAKE, token=HF_TOKEN
+            )
+        except Exception as e:
+            print("Fake model error:", e)
+        try:
+            squad["mood"] = pipeline(
+                "sentiment-analysis",
+                model=MODEL_MOOD,
+                tokenizer=MODEL_MOOD,
+                token=HF_TOKEN,
+            )
+        except Exception as e:
+            print("Mood model error:", e)
+        try:
+            squad["grammar"] = pipeline(
+                "text-classification", model=MODEL_GRAMMAR, token=HF_TOKEN
+            )
+        except Exception as e:
+            print("Grammar model error:", e)
+        try:
+            squad["img_a"] = pipeline(
+                "image-classification", model=MODEL_IMG_A, token=HF_TOKEN
+            )
+            squad["img_b"] = pipeline(
+                "image-classification", model=MODEL_IMG_B, token=HF_TOKEN
+            )
+            squad["caption"] = pipeline(
+                "image-to-text", model=MODEL_CAPTION, token=HF_TOKEN
+            )
+        except Exception as e:
+            print("Image model error:", e)
     except Exception as e:
+        return None, str(e)
+    return squad, None
+# ------------------- TEXT HELPERS -------------------
+def compute_text_stats(text: str):
+    sentences = [
+        s.strip()
+        for s in text.replace("!", ".").replace("?", ".").split(".")
+        if s.strip()
+    ]
+    words = text.split()
+    word_count = len(words)
+    sent_lengths = [len(s.split()) for s in sentences] if sentences else []
+    avg_sent_len = np.mean(sent_lengths) if sent_lengths else 0.0
+    vocab = {w.lower().strip(".,!?\"'") for w in words if w.strip()}
     vocab_size = len(vocab)
+    ttr = (vocab_size / word_count * 100) if word_count > 0 else 0.0
+    cleaned = [w.lower().strip(".,!?\"'") for w in words if w.strip()]
+    common = Counter(cleaned).most_common(8)
     return {
+        "sentence_count": len(sentences),
+        "word_count": word_count,
+        "avg_sentence_length": avg_sent_len,
+        "vocab_size": vocab_size,
+        "type_token_ratio": ttr,
         "sentence_lengths": sent_lengths,
+        "top_words": common,
     }
+def explain_text(res, stats):
+    lines = []
     bot = res["bot_score"]
     gram = res["grammar_score"]
     mood = res["mood_label"]
+    if bot > 70:
+        lines.append(
+            "The AI-likeness score is high, indicating that the review strongly resembles machine-generated text."
+        )
+    elif bot > 40:
+        lines.append(
+            "The AI-likeness score is in a borderline range, so the review should be treated with caution."
+        )
     else:
+        lines.append(
+            "The AI-likeness score is low, suggesting the review is likely human-written."
+        )
+    if gram > 80:
+        lines.append(
+            "Grammar quality is unusually clean and consistent, which sometimes correlates with AI-written or heavily edited content."
+        )
+    elif gram < 40:
+        lines.append(
+            "Grammar quality is weak, which can indicate spammy content but usually not advanced AI writing."
+        )
+    else:
+        lines.append(
+            "Grammar quality is moderate and falls within a typical human writing range."
+        )
+    lines.append(
+        f"The sentiment model detects a {mood.lower()} tone, which can be cross-checked with the context of the review."
+    )
+    lines.append(
+        f"The review contains {stats['sentence_count']} sentences and {stats['word_count']} words, with an average of {stats['avg_sentence_length']:.1f} words per sentence."
+    )
+    lines.append(
+        f"The vocabulary richness (type-token ratio) is approximately {stats['type_token_ratio']:.1f}%, indicating how repetitive or diverse the language is."
+    )
+    return "\n\n".join(lines)
 def check_text(text, squad):
+    if "fake" not in squad:
+        return {"error": True}
+    res_fake = squad["fake"](text[:512])[0]
+    bot = res_fake["score"] if res_fake["label"] == "Fake" else 1 - res_fake["score"]
     mood_label = "Unknown"
+    if "mood" in squad:
+        res_m = squad["mood"](text[:512])[0]
+        mood_label = res_m["label"]
+    grammar_score = 0.5
+    if "grammar" in squad:
+        res_g = squad["grammar"](text[:512])[0]
+        grammar_score = (
+            res_g["score"] if res_g["label"] == "LABEL_1" else 1 - res_g["score"]
+        )
+    stats = compute_text_stats(text)
     return {
+        "bot_score": bot * 100,
         "mood_label": mood_label,
+        "grammar_score": grammar_score * 100,
+        "stats": stats,
         "error": False,
     }
+# ------------------- IMAGE HELPERS -------------------
+def get_image_from_url(url: str):
+    """
+    Returns (PIL.Image or None, error_message or None)
+    Handles 403 cleanly instead of throwing exceptions.
+    """
+    try:
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+            "AppleWebKit/537.36 (KHTML, like Gecko) "
+            "Chrome/120.0 Safari/537.36"
+        }
+        r = requests.get(url, headers=headers, timeout=10)
+        if r.status_code == 403:
+            return None, (
+                "The image host returned HTTP 403 (Forbidden). "
+                "This usually means the server is blocking automated downloads. "
+                "Download the image manually and upload it as a file instead."
+            )
+        if r.status_code != 200:
+            return None, f"Image host returned HTTP {r.status_code}."
+        img = Image.open(io.BytesIO(r.content)).convert("RGB")
+        return img, None
+    except Exception as e:
+        return None, f"Error fetching image: {e}"
 def check_image(img, squad):
+    score_a = 0.0
+    score_b = 0.0
+    caption = "Analysis unavailable."
+    ai_words = ["fake", "artificial", "ai", "generated"]
+    if "img_a" in squad:
         try:
+            for r in squad["img_a"](img):
+                if any(w in r["label"].lower() for w in ai_words):
+                    score_a = max(score_a, r["score"])
+        except Exception as e:
+            print("img_a error:", e)
+    if "img_b" in squad:
+        try:
+            for r in squad["img_b"](img):
+                if any(w in r["label"].lower() for w in ai_words):
+                    score_b = max(score_b, r["score"])
+        except Exception as e:
+            print("img_b error:", e)
+    else:
+        score_b = score_a
+    if "caption" in squad:
         try:
+            cap_res = squad["caption"](img)
+            caption = cap_res[0]["generated_text"]
         except Exception:
             pass
+    avg_ai = (score_a + score_b) / 2
+    match = 1.0 - abs(score_a - score_b)
     return {
+        "ai_chance": avg_ai * 100,
+        "match": match,
+        "score_a": score_a * 100,
+        "score_b": score_b * 100,
+        "caption": caption,
     }
+# ------------------- SERPAPI REVERSE IMAGE -------------------
+def serpapi_reverse_image_search(image_url: str, api_key: str):
     """
+    Google Reverse Image Search using SerpAPI.
+    Returns dict or None, and error_message if any.
     """
+    if not api_key:
+        return None, "SerpAPI key not configured."
+    if not image_url:
+        return None, "No image URL provided."
     try:
+        params = {
+            "engine": "google_reverse_image",
+            "image_url": image_url,
+            "api_key": api_key,
+            "output": "json",
         }
+        resp = requests.get("https://serpapi.com/search", params=params, timeout=25)
+        if resp.status_code == 403:
+            return None, (
+                "SerpAPI returned HTTP 403 (Forbidden). "
+                "Check that the API key is valid and you have enough quota."
+            )
+        if resp.status_code != 200:
+            return None, f"SerpAPI HTTP {resp.status_code}: {resp.text[:180]}"
+        data = resp.json()
+        result = {
+            "best_guess": data.get("image_guess"),
+            "visual_matches": data.get("visual_matches", []),
         }
+        return result, None
     except Exception as e:
+        return None, f"Error calling SerpAPI: {e}"
+# ------------------- PLOTS -------------------
+def breakdown_chart(res):
+    labels = ["Bot Probability", "Grammar Quality"]
+    vals = [res["bot_score"], res["grammar_score"]]
+    fig, ax = plt.subplots(figsize=(4, 2.2))
+    y = np.arange(len(labels))
+    ax.barh(y, vals)
+    ax.set_yticks(y)
     ax.set_yticklabels(labels)
     ax.invert_yaxis()
     ax.set_xlim(0, 100)
+    for i, v in enumerate(vals):
+        ax.text(v + 1, i, f"{v:.0f}%", va="center", fontsize=8)
     plt.tight_layout()
     return fig
+def sentence_length_hist(stats):
+    fig, ax = plt.subplots(figsize=(4, 2.2))
+    if stats["sentence_lengths"]:
+        ax.hist(
+            stats["sentence_lengths"],
+            bins=min(8, len(stats["sentence_lengths"])),
+        )
+    ax.set_xlabel("Words per sentence")
+    ax.set_ylabel("Frequency")
     ax.set_title("Sentence Length Distribution")
     plt.tight_layout()
     return fig
+def word_frequency_chart(stats):
+    fig, ax = plt.subplots(figsize=(4, 2.2))
+    top = stats["top_words"]
+    if top:
+        words = [w for w, _ in top]
+        counts = [c for _, c in top]
+        ax.bar(words, counts)
+        ax.set_xticklabels(words, rotation=45, ha="right", fontsize=8)
+    ax.set_title("Top Word Frequency")
     plt.tight_layout()
     return fig
+# ------------------- PDF REPORT -------------------
+def generate_pdf(text_input, text_res, image_res, reverse_res, platform):
+    buf = io.BytesIO()
+    doc = SimpleDocTemplate(buf, pagesize=A4, leftMargin=30, rightMargin=30)
+    styles = getSampleStyleSheet()
+    elems = []
+    elems.append(Paragraph("Review Validator Report", styles["Title"]))
+    elems.append(Spacer(1, 6))
+    elems.append(Paragraph(f"Platform: {platform}", styles["Normal"]))
+    elems.append(Spacer(1, 10))
+    if text_input:
+        elems.append(Paragraph("Input Review Text", styles["Heading2"]))
+        elems.append(Spacer(1, 4))
+        safe = text_input.replace("\n", "<br/>")
+        elems.append(Paragraph(safe, styles["Normal"]))
+        elems.append(Spacer(1, 8))
+    if text_res and not text_res.get("error", False):
+        stats = text_res["stats"]
+        elems.append(Paragraph("Text Authenticity Analysis", styles["Heading2"]))
+        data = [
+            ["Bot-likeness", f"{text_res['bot_score']:.1f}%"],
+            ["Grammar Quality", f"{text_res['grammar_score']:.1f}%"],
+            ["Sentiment", text_res["mood_label"]],
+            ["Sentence Count", str(stats["sentence_count"])],
+            ["Word Count", str(stats["word_count"])],
+            ["Avg. Sentence Length", f"{stats['avg_sentence_length']:.1f}"],
+            ["Type-Token Ratio", f"{stats['type_token_ratio']:.1f}%"],
+        ]
+        tbl = Table(data, hAlign="LEFT")
+        tbl.setStyle(
+            TableStyle(
+                [
+                    ("BACKGROUND", (0, 0), (-1, 0), colors.lightgrey),
+                    ("GRID", (0, 0), (-1, -1), 0.25, colors.grey),
+                    ("BOX", (0, 0), (-1, -1), 0.25, colors.black),
+                ]
+            )
+        )
+        elems.append(tbl)
+        elems.append(Spacer(1, 8))
+        explanation = explain_text(text_res, stats)
+        elems.append(Paragraph("Interpretation", styles["Heading3"]))
+        for para in explanation.split("\n\n"):
+            elems.append(Paragraph(para, styles["Normal"]))
+            elems.append(Spacer(1, 3))
+    if image_res:
+        elems.append(Spacer(1, 8))
+        elems.append(Paragraph("Image Authenticity Analysis", styles["Heading2"]))
+        data2 = [
+            ["AI-likeness (avg)", f"{image_res['ai_chance']:.1f}%"],
+            ["Model A Score", f"{image_res['score_a']:.1f}%"],
+            ["Model B Score", f"{image_res['score_b']:.1f}%"],
+            ["Model Agreement", f"{image_res['match']*100:.1f}%"],
+        ]
+        t2 = Table(data2, hAlign="LEFT")
+        t2.setStyle(
+            TableStyle(
+                [
+                    ("BACKGROUND", (0, 0), (-1, 0), colors.lightgrey),
+                    ("GRID", (0, 0), (-1, -1), 0.25, colors.grey),
+                    ("BOX", (0, 0), (-1, -1), 0.25, colors.black),
+                ]
+            )
+        )
+        elems.append(t2)
+        elems.append(Spacer(1, 4))
+        elems.append(Paragraph(f"Caption: {image_res['caption']}", styles["Normal"]))
+    if reverse_res:
+        elems.append(Spacer(1, 8))
+        elems.append(Paragraph("Reverse Image Search (SerpAPI)", styles["Heading2"]))
+        best = reverse_res.get("best_guess")
+        count = reverse_res.get("count", 0)
+        elems.append(Paragraph(f"Visual matches found: {count}", styles["Normal"]))
+        if best:
+            elems.append(Paragraph(f"Google best guess: {best}", styles["Normal"]))
+        links = reverse_res.get("top_links", [])
+        if links:
+            elems.append(Spacer(1, 4))
+            elems.append(Paragraph("Top Matching Sources:", styles["Heading3"]))
+            for item in links:
+                line = f"{item.get('title') or item.get('link')} (source: {item.get('source')})"
+                elems.append(Paragraph(line, styles["Normal"]))
+                elems.append(Spacer(1, 2))
+    doc.build(elems)
+    pdf_bytes = buf.getvalue()
+    buf.close()
     return pdf_bytes
+# ------------------- UI: LANDING -------------------
 def landing_page():
+    st.markdown(
+        """
     <div class="hero-box">
         <div class="hero-title">🛡️ Review Validator</div>
         <div class="hero-subtitle">
+            Detect AI-written reviews, AI-generated product images, and reused images via Google Reverse Image Search.
         </div>
     </div>
+    """,
+        unsafe_allow_html=True,
+    )
     c1, c2, c3 = st.columns(3)
     with c1:
+        st.markdown(
+            """
         <div class="feature-card">
             <span class="emoji-icon">🤖</span>
+            <h3>Text Authenticity</h3>
+            <p>Transformer-based models estimate how likely a review is written by AI.</p>
         </div>
+        """,
+            unsafe_allow_html=True,
+        )
     with c2:
+        st.markdown(
+            """
         <div class="feature-card">
             <span class="emoji-icon">📸</span>
             <h3>Image Authenticity</h3>
+            <p>Dual detectors and captioning analyze whether an image is real or AI-generated.</p>
         </div>
+        """,
+            unsafe_allow_html=True,
+        )
     with c3:
+        st.markdown(
+            """
         <div class="feature-card">
+            <span class="emoji-icon">🔎</span>
+            <h3>Reverse Search</h3>
+            <p>SerpAPI + Google Reverse Image API to see where else the image appears online.</p>
         </div>
+        """,
+            unsafe_allow_html=True,
+        )
+    _, mid, _ = st.columns([1, 2, 1])
+    with mid:
+        if st.button("🚀 START CHECKING REVIEWS", type="primary", use_container_width=True):
+            st.session_state["page"] = "detector"
             st.rerun()
+# ------------------- UI: DETECTOR -------------------
+def detector_page(squad):
     c1, c2 = st.columns([3, 1])
     with c1:
+        st.markdown("### 🛒 Select Platform")
         platform = st.selectbox(
+            "Platform", ["Amazon", "Flipkart", "Zomato", "Swiggy", "Myntra", "Other"],
             label_visibility="collapsed",
         )
     with c2:
+        if st.button("⬅️ Back Home"):
+            st.session_state["page"] = "landing"
             st.rerun()
     st.divider()
+    tab_text, tab_img = st.tabs(["📝 Text Review", "📸 Product Image"])
+    # -------- TEXT TAB --------
+    with tab_text:
+        col_left, col_right = st.columns([2, 1])
+        with col_left:
+            txt = st.text_area(
                 "Paste Review Here:",
+                height=180,
+                placeholder="Example: I ordered this yesterday and it exceeded expectations...",
             )
+        with col_right:
+            st.info("Tip: Paste full review text for more accurate analysis.")
+            if st.button("Analyze Text", type="primary", use_container_width=True):
+                if not txt.strip():
+                    st.error("Please paste a review first.")
                 else:
+                    with st.spinner("Analyzing review..."):
+                        res = check_text(txt.strip(), squad)
+                    st.session_state["text_res"] = res
+                    st.session_state["text_raw"] = txt.strip()
+                    st.session_state["platform"] = platform
+        if "text_res" in st.session_state:
+            res = st.session_state["text_res"]
             if res.get("error"):
+                st.error("Text models failed to load. Check HF_TOKEN.")
             else:
+                stats = res["stats"]
                 st.markdown("---")
                 k1, k2, k3 = st.columns(3)
+                color = "red" if res["bot_score"] > 50 else "green"
                 k1.markdown(
+                    f'<div class="stat-box"><div class="stat-num" style="color:{color}">{res["bot_score"]:.0f}%</div><div class="stat-txt">Bot Chance</div></div>',
+                    unsafe_allow_html=True,
                 )
                 k2.markdown(
+                    f'<div class="stat-box"><div class="stat-num">{res["grammar_score"]:.0f}%</div><div class="stat-txt">Grammar</div></div>',
+                    unsafe_allow_html=True,
                 )
                 k3.markdown(
+                    f'<div class="stat-box"><div class="stat-num">{stats["word_count"]}</div><div class="stat-txt">Total Words</div></div>',
+                    unsafe_allow_html=True,
                 )
                 g1, g2, g3 = st.columns(3)
                 with g1:
+                    st.pyplot(breakdown_chart(res))
                 with g2:
+                    st.pyplot(sentence_length_hist(stats))
                 with g3:
+                    st.pyplot(word_frequency_chart(stats))
+                st.markdown("#### Explanation")
+                st.markdown(explain_text(res, stats))
+                st.markdown("---")
+                if st.button("Generate PDF (Text Only)", use_container_width=False):
+                    pdf = generate_pdf(
+                        st.session_state.get("text_raw", ""),
                         res,
+                        st.session_state.get("img_res"),
+                        st.session_state.get("reverse_search_results"),
+                        st.session_state.get("platform", platform),
                     )
+                    st.session_state["pdf_text"] = pdf
+                if "pdf_text" in st.session_state:
                     st.download_button(
+                        "⬇️ Download Text Analysis PDF",
+                        data=st.session_state["pdf_text"],
+                        file_name="review_validator_text.pdf",
                         mime="application/pdf",
                     )
+    # -------- IMAGE TAB --------
+    with tab_img:
+        col_in, col_out = st.columns([1, 1])
         with col_in:
             st.markdown("#### Step 1: Provide Image")
             method = st.radio(
+                "Input type",
                 ["Paste URL", "Upload File"],
                 horizontal=True,
+                label_visibility="collapsed",
             )
             with st.form("image_form"):
                 img_file = None
+                url = ""
+                auto_reverse = False
                 if method == "Paste URL":
+                    url = st.text_input("Image URL")
+                    auto_reverse = st.checkbox(
+                        "Also perform Google Reverse Image Search on this URL",
+                        value=True,
+                    )
                 else:
+                    img_file = st.file_uploader(
+                        "Upload Image", type=["jpg", "jpeg", "png"]
+                    )
+                submitted = st.form_submit_button(
+                    "Analyze Image", type="primary", use_container_width=True
+                )
+            if submitted:
+                target = None
+                err_msg = None
+                if method == "Paste URL":
+                    if not url.strip():
+                        st.error("Please enter a valid image URL.")
                     else:
+                        img, err = get_image_from_url(url.strip())
+                        if err:
+                            st.error(err)
+                        else:
+                            target = img
+                            st.session_state["last_image_url"] = url.strip()
+                else:
+                    if not img_file:
+                        st.error("Please upload an image file.")
+                    else:
+                        try:
+                            target = Image.open(img_file).convert("RGB")
+                            st.session_state["last_image_url"] = None
+                        except Exception as e:
+                            st.error(f"Error reading image: {e}")
+                if target is not None:
+                    with st.spinner("Running image authenticity checks..."):
+                        img_res = check_image(target, squad)
+                    st.session_state["current_img"] = target
+                    st.session_state["img_res"] = img_res
+                    # Auto reverse search if URL + checkbox + key available
+                    if method == "Paste URL" and auto_reverse:
+                        serp_key = get_serpapi_key()
+                        if not serp_key:
+                            st.warning(
+                                "SerpAPI key not configured. Skipping reverse image search."
+                            )
+                        else:
+                            with st.spinner("Performing reverse image search via SerpAPI..."):
+                                rev, err = serpapi_reverse_image_search(
+                                    url.strip(), serp_key
+                                )
+                            if err:
+                                st.error(err)
+                            elif rev:
+                                matches = rev.get("visual_matches", [])
+                                st.session_state["reverse_search_results"] = {
+                                    "best_guess": rev.get("best_guess"),
+                                    "count": len(matches),
+                                    "top_links": [
+                                        {
+                                            "title": m.get("title"),
+                                            "link": m.get("link"),
+                                            "source": m.get("source"),
+                                        }
+                                        for m in matches[:5]
+                                    ],
+                                }
+        with col_out:
+            if "current_img" in st.session_state:
                 st.image(
+                    st.session_state["current_img"],
                     use_column_width=True,
+                    caption="Analyzed Image",
                 )
+            if "img_res" in st.session_state:
+                data = st.session_state["img_res"]
+                ai = data["ai_chance"]
+                st.markdown("#### Step 2: Image Analysis Result")
+                st.markdown(
+                    f"""
                 <div class="analysis-box">
+                    <strong>Visual Caption:</strong><br/>
+                    {data['caption']}
                 </div>
+                """,
+                    unsafe_allow_html=True,
+                )
+                if data["match"] < 0.6:
+                    st.warning(
+                        "Detectors disagree significantly. Image may be heavily edited or ambiguous."
+                    )
+                elif ai > 60:
+                    st.error(f"Likely AI-generated image ({ai:.0f}% probability).")
                 else:
+                    st.success(
+                        f"Likely real photograph ({100 - ai:.0f}% probability)."
+                    )
+                st.progress(ai / 100.0, text=f"AI-likeness: {ai:.1f}%")
+                with st.expander("Detector Breakdown"):
+                    st.write(f"Model A: {data['score_a']:.1f}%")
+                    st.write(f"Model B: {data['score_b']:.1f}%")
+                    st.write(f"Agreement: {data['match']*100:.1f}%")
+        st.markdown("---")
+        st.markdown("### 🔎 Reverse Image Search (Manual Call)")
+        r_col1, r_col2 = st.columns([2, 1])
+        with r_col1:
+            manual_url = st.text_input(
+                "Public image URL (optional, for manual reverse search):",
+                value=st.session_state.get("last_image_url", "") or "",
+            )
+        with r_col2:
+            if st.button("Run Reverse Search", use_container_width=True):
+                key = get_serpapi_key()
+                if not key:
+                    st.error("SerpAPI key not configured.")
+                elif not manual_url.strip():
+                    st.error("Please enter an image URL.")
                 else:
+                    with st.spinner("Calling SerpAPI Google Reverse Image API..."):
+                        rev, err = serpapi_reverse_image_search(
+                            manual_url.strip(), key
+                        )
+                    if err:
+                        st.error(err)
+                    elif rev:
+                        matches = rev.get("visual_matches", [])
+                        st.success("Reverse image search completed.")
+                        if rev.get("best_guess"):
+                            st.write(f"Google best guess: {rev['best_guess']}")
+                        st.write(f"Total visual matches: {len(matches)}")
+                        if matches:
+                            st.markdown("**Top sources:**")
+                            for m in matches[:5]:
+                                st.markdown(
+                                    f"- [{m.get('title') or m.get('link')}]({m.get('link')}) _(source: {m.get('source')})_"
+                                )
+                        st.session_state["reverse_search_results"] = {
+                            "best_guess": rev.get("best_guess"),
+                            "count": len(matches),
+                            "top_links": [
+                                {
+                                    "title": m.get("title"),
+                                    "link": m.get("link"),
+                                    "source": m.get("source"),
+                                }
+                                for m in matches[:5]
+                            ],
+                        }
+        st.markdown("---")
+        if st.button("Generate Full PDF (Text + Image + Reverse)", use_container_width=False):
+            pdf_full = generate_pdf(
+                st.session_state.get("text_raw", ""),
+                st.session_state.get("text_res"),
+                st.session_state.get("img_res"),
+                st.session_state.get("reverse_search_results"),
+                st.session_state.get("platform", "Unknown"),
+            )
+            st.session_state["pdf_full"] = pdf_full
+        if "pdf_full" in st.session_state:
+            st.download_button(
+                "⬇️ Download Full Analysis PDF",
+                data=st.session_state["pdf_full"],
+                file_name="review_validator_full.pdf",
+                mime="application/pdf",
+            )
+# ------------------- MAIN -------------------
 def main():
     inject_custom_css()
+    if "page" not in st.session_state:
+        st.session_state["page"] = "landing"
+    with st.spinner("Loading AI models..."):
         squad, err = load_ai_squad()
+    if not squad:
+        st.error(err)
+        return
+    if st.session_state["page"] == "landing":
         landing_page()
     else:
+        detector_page(squad)
 if __name__ == "__main__":
+    main()