Spaces:

Bman21
/

Medstudyeasyai

Sleeping

App Files Files Community

Bman21 commited on Sep 21

Commit

a03bd33

verified ·

1 Parent(s): 64e3bee

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -159

app.py DELETED Viewed

@@ -1,159 +0,0 @@
-import gradio as gr
-from huggingface_hub import InferenceClient
-import os
-import faiss
-import numpy as np
-from sentence_transformers import SentenceTransformer
-import PyPDF2
-import pytesseract
-from pdf2image import convert_from_path
-import gdown
-import pickle
-# --- Configuration ---
-MODEL_NAME = "openai/gpt-oss-20b"
-SECURE_HF_TOKEN = os.environ.get("HF_TOKEN")
-if not SECURE_HF_TOKEN:
-    raise ValueError("HF_TOKEN environment variable not set. Add a Secret in Space settings.")
-client = InferenceClient(token=SECURE_HF_TOKEN, model=MODEL_NAME)
-embedder = SentenceTransformer("all-MiniLM-L6-v2")
-chunks, sources = [], []
-# --- Google Drive PDFs: FILE_ID → Subject ---
-drive_files = {
-    "1wP4uK_GA6rjg_4maZpmbDNSynhvZnCNo": "PSM",
-    "1LtwDGeWLF357elmtbA-R_uujHqmM2jC_": "PSM2",
-    "1wnqsdt0st5wy60zAg7DKZFeAl_vMgu4T": "FMT concise reddy",
-    "1BhySXMqZxcnLSccq-D0UB9-rGb1YIsN1": "FMT",
-    "1sNoc8qLR5VznT28MIrJ0CvCgW6OunF_v": "Pediatrics",
-    "1s9772ypXMudsLSdHn1xhtGcZCFFghYB0": "Medicine1",
-    "1sAAwpNCqfbjB-d5GqImj50qZzUDopvtR": "Gynae",
-    "1rvgHxpzvE7v4Ed13UXBTaxn3GXljfccE": "ENT",
-    "1vd7wg3HlQanVl8Nk-W90wJOyYRzunu0g": "Ophthalmology",
-    # Add remaining FILE_IDs here
-}
-notes_folder = "notes"
-os.makedirs(notes_folder, exist_ok=True)
-cache_file = os.path.join(notes_folder, "embeddings_cache.pkl")
-# --- Load cached embeddings if available ---
-if os.path.exists(cache_file):
-    with open(cache_file, "rb") as f:
-        chunks, sources, embeddings = pickle.load(f)
-    dim = embeddings.shape[1]
-    index = faiss.IndexFlatL2(dim)
-    index.add(np.array(embeddings).astype("float32"))
-else:
-    # --- Download PDFs and extract text safely ---
-    for file_id, subject in drive_files.items():
-        output_path = os.path.join(notes_folder, f"{subject}.pdf")
-        if not os.path.exists(output_path):
-            url = f"https://drive.google.com/uc?id={file_id}"
-            try:
-                gdown.download(url, output_path, quiet=False)
-            except Exception as e:
-                print(f"Warning: Could not download {subject}: {e}")
-                continue  # skip this PDF
-        # Extract text
-        text = ""
-        try:
-            reader = PyPDF2.PdfReader(output_path)
-            text = " ".join([p.extract_text() for p in reader.pages if p.extract_text()])
-        except:
-            text = ""
-        # OCR if text empty
-        if not text.strip():
-            try:
-                images = convert_from_path(output_path)
-                for img in images:
-                    text += pytesseract.image_to_string(img) + " "
-            except Exception as e:
-                print(f"Warning: OCR failed for {subject}: {e}")
-                continue
-        # Split into chunks
-        file_chunks = [text[i:i+500] for i in range(0, len(text), 500)]
-        chunks.extend(file_chunks)
-        sources.extend([subject] * len(file_chunks))
-    # Build FAISS index
-    if chunks:
-        embeddings = embedder.encode(chunks)
-        dim = embeddings.shape[1]
-        index = faiss.IndexFlatL2(dim)
-        index.add(np.array(embeddings).astype("float32"))
-        # Save to cache
-        with open(cache_file, "wb") as f:
-            pickle.dump((chunks, sources, embeddings), f)
-    else:
-        index = None
-# --- Chat respond function with source display ---
-def respond(message, history: list, system_message, max_tokens, temperature, top_p):
-    context = ""
-    source_names = set()
-    if index is not None and len(chunks) > 0:
-        query_emb = embedder.encode([message])
-        query_emb = np.array(query_emb).astype("float32")
-        k = min(3, len(chunks))
-        D, I = index.search(query_emb, k=k)
-        retrieved_chunks = [chunks[i] for i in I[0] if i != -1]
-        retrieved_sources = [sources[i] for i in I[0] if i != -1]
-        if retrieved_chunks:
-            context = "\n".join([retrieved_chunks[j] for j in range(len(retrieved_chunks))])
-            source_names.update(retrieved_sources)
-    messages = [{"role": "system", "content": system_message}]
-    messages.extend(history)
-    source_text = ""
-    if source_names:
-        source_text = "Sources: " + ", ".join(sorted(source_names)) + "\n\n"
-    prompt_content = f"{source_text}Answer using the following notes if relevant:\n{context}\n\nQuestion: {message}"
-    messages.append({"role": "user", "content": prompt_content})
-    response = ""
-    for message_chunk in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        choices = message_chunk.choices
-        token = ""
-        if len(choices) and choices[0].delta.content:
-            token = choices[0].delta.content
-        response += token
-        yield response
-# --- Gradio interface ---
-chatbot = gr.ChatInterface(
-    respond,
-    type="messages",
-    additional_inputs=[
-        gr.Textbox(value="Hey, need help?", label="System message"),
-        gr.Slider(1, 5000, value=3000, step=1, label="Max new tokens"),
-        gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p"),
-    ],
-)
-with gr.Blocks() as demo:
-    with gr.Sidebar():
-        gr.Markdown("# AI Chatbot with Google Drive PDFs & Source Display")
-        gr.Markdown("Handles failed downloads, OCR, caching, and shows sources automatically")
-    chatbot.render()
-if __name__ == "__main__":
-    demo.launch()