Spaces:

sahinomercan
/

sahinhukuk-bot

Sleeping

App Files Files Community

sahinomercan commited on Nov 1

Commit

ef3ea72

verified ·

1 Parent(s): a2ea8df

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -52

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
 from sentence_transformers import SentenceTransformer, util
-from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
 import torch
 import os
 import pickle
@@ -10,38 +10,27 @@ import re
 TEXT_FILE = "icerik.txt"
 EMBEDDING_CACHE_FILE = "embeddings.pkl"
 EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
-GENERATOR_MODEL_NAME = "google/flan-t5-small"
-THRESHOLD = 0.55 # Minimum güven skoru (Retrieval için)
-# İLETİŞİM BİLGİLERİ (Güncel ve Entegre)
 PHONE_MAIN = '+90 531 294 22 34'
 PHONE_LANDLINE_EXT = '0232 464 41 00 (Dahili 165)'
 WHATSAPP_LINK = 'http://wa.me/905312942234'
 APPOINTMENT_LINK = 'https://calendar.app.google/JT9A1oGHVGopNZ9y8'
 MAP_LINK = 'https://maps.app.goo.gl/PLsBy9afjiRB9WDb6'
-EMAIL_ADDRESS = '[email protected]'
 # --- 2. MODELLERİN YÜKLENMESİ ve VERİ YÜKLEME ---
 print("Modeller yükleniyor...")
 embedding_model = SentenceTransformer(EMBEDDING_MODEL_NAME)
-# Generative Model (PyTorch) Yükleniyor
-try:
-    generator_tokenizer = AutoTokenizer.from_pretrained(GENERATOR_MODEL_NAME)
-    generator_model = AutoModelForSeq2SeqLM.from_pretrained(
-        GENERATOR_MODEL_NAME,
-        device_map="auto" # CPU'ya yüklenmesini sağlar
-    )
-    generator_pipeline = pipeline(
-        "text2text-generation",
-        model=generator_model,
-        tokenizer=generator_tokenizer
-    )
-    print("PyTorch Generative AI modeli yüklendi.")
-except Exception as e:
-    print(f"Generative AI YÜKLEME HATASI: {e}")
-    generator_pipeline = None
 def load_documents():
@@ -66,9 +55,8 @@ def load_documents():
 docs, embeddings = load_documents()
-# --- 3. POST-PROCESSİNG ve UTILS ---
 def post_process_context(context: str) -> str:
-    """Metin temizliği yapar ve düzenli bir görünüm için HTML'e dönüştürür."""
     paragraphs = context.split('\n\n')
     cleaned_paragraphs = []
     for p in paragraphs:
@@ -79,15 +67,15 @@ def post_process_context(context: str) -> str:
     return "<br> • " + "<br> • ".join(cleaned_paragraphs)
-# --- 4. ANA MANTIK (Generative RAG) ---
 def answer_question(question: str):
-    if generator_pipeline is None or not docs or embeddings is None:
-        return "<div style='font-family: Arial; color: #8b0000;'><h3>Sistem Hatası:</h3><p>Lokal bilgi kaynakları veya Yapay Zeka motoru yüklenemedi. Lütfen 'icerik.txt' dosyasını kontrol edin ve tekrar deneyin.</p></div>"
-    # 1. Retrieval (Çekme)
     question_embedding = embedding_model.encode(question, convert_to_tensor=True)
     scores = util.pytorch_cos_sim(question_embedding, embeddings)[0]
     top_k = min(3, len(docs))
@@ -95,7 +83,7 @@ def answer_question(question: str):
     selected_context = []
     for score, idx in zip(top_results.values, top_results.indices):
-        if score.item() >= THRESHOLD:
             selected_context.append(docs[idx.item()])
     if not selected_context:
@@ -111,25 +99,23 @@ def answer_question(question: str):
     full_context = "\n\n".join(selected_context)
-    # 2. Generation (Üretme)
-    rag_prompt = f"""
-    Sana sunulan metni (Bağlam) kullanarak, aşağıdaki soruyu SADECE TÜRKÇE ve mantıklı cümlelerle özetle. Cevabın 2-3 cümleyi geçmesin ve sadece bağlamdaki bilgilere dayanmalıdır. Eğer bağlamda kesin cevap yoksa 'Net bir cevap bulunamadı.' de.
-    Soru: {question}
-    Bağlam: {full_context}
-    Özet Cevap:
-    """
-    generated_output = generator_pipeline(
-        rag_prompt,
-        max_new_tokens=100,
-        do_sample=False,
-        temperature=0.01
-    )
-    synthesized_answer = generated_output[0]['generated_text'].strip()
     # 3. Final Sunum
     processed_context_html = post_process_context(full_context)
@@ -139,19 +125,19 @@ def answer_question(question: str):
       <h3 style="color: #003366;">🧾 Sorunuz</h3>
       <p><strong>{question}</strong></p>
-      <h3 style="color: #003366;">✅ Ön Rehberlik ve Yol Haritası</h3>
       <p style="background-color: #eef7ff; border-left: 5px solid #005580; padding: 15px; border-radius: 5px; font-weight: bold;">
-        {synthesized_answer}
       </p>
-      <h3 style="color: #003366; margin-top: 20px;">📚 Kaynak Metin (Analiz Özeti)</h3>
       <p style="font-size: 14px; color: #333; padding: 0 15px 0 15px;">
         {processed_context_html}
       </p>
       <h3 style="color: #8b0000; margin-top: 25px;">📢 Kişisel Değerlendirme ve Görüşme</h3>
       <p style="font-size: 16px;">
-        Bu özet bilgi, genel durumunuz hakkında bir fikir verir. Detaylı durum tespiti ve kişisel yol haritanız için hemen bizimle iletişime geçin:
         <br><br>
         <a href='{WHATSAPP_LINK}' style='color: #003366; font-weight: bold; text-decoration: underline;'>📞 WhatsApp Üzerinden Hızlı İletişim</a>
         <br>
@@ -162,7 +148,7 @@ def answer_question(question: str):
     return answer_html
 # --- 5. GRADIO ARAYÜZÜ ---
-# Tüm iletişim bilgileri ve Randevu/Harita linkleri buraya entegre edildi.
 header_info = """
 <div style="font-family: Arial; padding: 15px; background: linear-gradient(to right, #003366, #005580); color: white; border-radius: 10px;">
   <h2 style="color: white;">Şahin Hukuk | Akıllı Asistan</h2>
@@ -181,7 +167,6 @@ header_info = """
     MAP_LINK=MAP_LINK
 )
 interface = gr.Interface(
     fn=answer_question,
     inputs=gr.Textbox(label="Sorunuzu buraya yazın", lines=2, placeholder="Örn: Kira tespit davası nedir?"),

 import gradio as gr
 from sentence_transformers import SentenceTransformer, util
+from transformers import pipeline # T5 yerine standart pipeline kullanacağız
 import torch
 import os
 import pickle
 TEXT_FILE = "icerik.txt"
 EMBEDDING_CACHE_FILE = "embeddings.pkl"
 EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
+# YENİ MODEL: Türkçe için eğitilmiş, "saçmalamayan" Extractive QA modeli
+QA_MODEL_NAME = "savasy/bert-base-turkish-squad"
+RETRIEVAL_THRESHOLD = 0.55 # Paragraf bulma benzerlik skoru
+QA_THRESHOLD = 0.40 # Cevap çıkarma güven skoru
+# İLETİŞİM BİLGİLERİ (Arayüze Entegre Edilecek)
 PHONE_MAIN = '+90 531 294 22 34'
 PHONE_LANDLINE_EXT = '0232 464 41 00 (Dahili 165)'
 WHATSAPP_LINK = 'http://wa.me/905312942234'
 APPOINTMENT_LINK = 'https://calendar.app.google/JT9A1oGHVGopNZ9y8'
 MAP_LINK = 'https://maps.app.goo.gl/PLsBy9afjiRB9WDb6'
 # --- 2. MODELLERİN YÜKLENMESİ ve VERİ YÜKLEME ---
 print("Modeller yükleniyor...")
+# 1. Paragraf Bulucu (Retrieval)
 embedding_model = SentenceTransformer(EMBEDDING_MODEL_NAME)
+# 2. Cevap Çıkarıcı (Extraction)
+qa_pipeline = pipeline("question-answering", model=QA_MODEL_NAME, tokenizer=QA_MODEL_NAME)
+print("Tüm modeller yüklendi.")
 def load_documents():
 docs, embeddings = load_documents()
+# --- 3. POST-PROCESSİNG (Sadece Kaynak Gösterimi İçin) ---
 def post_process_context(context: str) -> str:
     paragraphs = context.split('\n\n')
     cleaned_paragraphs = []
     for p in paragraphs:
     return "<br> • " + "<br> • ".join(cleaned_paragraphs)
+# --- 4. ANA MANTIK (Extractive RAG) ---
 def answer_question(question: str):
+    if qa_pipeline is None or not docs or embeddings is None:
+        return "<div style='font-family: Arial; color: #8b0000;'><h3>Sistem Hatası:</h3><p>Lokal bilgi kaynakları veya Yapay Zeka motoru yüklenemedi.</p></div>"
+    # 1. Retrieval (Çekme): En alakalı paragrafları bul
     question_embedding = embedding_model.encode(question, convert_to_tensor=True)
     scores = util.pytorch_cos_sim(question_embedding, embeddings)[0]
     top_k = min(3, len(docs))
     selected_context = []
     for score, idx in zip(top_results.values, top_results.indices):
+        if score.item() >= RETRIEVAL_THRESHOLD:
             selected_context.append(docs[idx.item()])
     if not selected_context:
     full_context = "\n\n".join(selected_context)
+    # 2. Extraction (Cevabı Çıkarma) - YENİ KISIM
+    # Model, bu metin içinden sorunun cevabını bulur
+    qa_result = qa_pipeline(question=question, context=full_context)
+    answer = qa_result['answer']
+    score = qa_result['score']
+    # Güven skoru düşükse (cevap alakasızsa) veya cevap çok kısaysa
+    if score < QA_THRESHOLD or len(answer) < 10:
+        return f"""
+        <div style="font-family: Arial; color: #8b0000;">
+          <h3>Bilgi bankamızda bu soruya yönelik net bir cevap bulunamadı.</h3>
+          <p>Lütfen ofisimizle iletişime geçin.
+              <a href='{WHATSAPP_LINK}' style='color: #8b0000; text-decoration: underline;'>WhatsApp üzerinden bize ulaşabilirsiniz</a>.
+          </p>
+        </div>
+        """
     # 3. Final Sunum
     processed_context_html = post_process_context(full_context)
       <h3 style="color: #003366;">🧾 Sorunuz</h3>
       <p><strong>{question}</strong></p>
+      <h3 style="color: #003366;">✅ Yapay Zeka Yanıtı (Çıkarım)</h3>
       <p style="background-color: #eef7ff; border-left: 5px solid #005580; padding: 15px; border-radius: 5px; font-weight: bold;">
+        {answer}
       </p>
+      <h3 style="color: #003366; margin-top: 20px;">📚 Cevabın Alındığı Kaynak Metinler</h3>
       <p style="font-size: 14px; color: #333; padding: 0 15px 0 15px;">
         {processed_context_html}
       </p>
       <h3 style="color: #8b0000; margin-top: 25px;">📢 Kişisel Değerlendirme ve Görüşme</h3>
       <p style="font-size: 16px;">
+        Detaylı durum tespiti ve kişisel yol haritanız için hemen bizimle iletişime geçin:
         <br><br>
         <a href='{WHATSAPP_LINK}' style='color: #003366; font-weight: bold; text-decoration: underline;'>📞 WhatsApp Üzerinden Hızlı İletişim</a>
         <br>
     return answer_html
 # --- 5. GRADIO ARAYÜZÜ ---
+# (Arayüz kodunda değişiklik yok, tüm iletişim bilgileri yerinde)
 header_info = """
 <div style="font-family: Arial; padding: 15px; background: linear-gradient(to right, #003366, #005580); color: white; border-radius: 10px;">
   <h2 style="color: white;">Şahin Hukuk | Akıllı Asistan</h2>
     MAP_LINK=MAP_LINK
 )
 interface = gr.Interface(
     fn=answer_question,
     inputs=gr.Textbox(label="Sorunuzu buraya yazın", lines=2, placeholder="Örn: Kira tespit davası nedir?"),