|
|
import os |
|
|
from fastapi import FastAPI, HTTPException |
|
|
from pydantic import BaseModel |
|
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
|
from llama_cpp import Llama |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CACHE_DIR = "/app/.cache/huggingface" |
|
|
os.makedirs(CACHE_DIR, exist_ok=True) |
|
|
|
|
|
|
|
|
os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR |
|
|
os.environ["HF_HOME"] = CACHE_DIR |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app = FastAPI( |
|
|
title="MGZON Smart Assistant", |
|
|
description="دمج نموذج T5 المدرب مع Mistral‑7B (GGUF) داخل Space" |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
T5_REPO = "MGZON/mgzon-flan-t5-base" |
|
|
try: |
|
|
t5_tokenizer = AutoTokenizer.from_pretrained(T5_REPO, cache_dir=CACHE_DIR) |
|
|
t5_model = AutoModelForSeq2SeqLM.from_pretrained(T5_REPO, cache_dir=CACHE_DIR) |
|
|
except Exception as e: |
|
|
raise RuntimeError(f"فشل تحميل نموذج T5 من {T5_REPO}: {str(e)}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
gguf_path = os.path.abspath("models/mistral-7b-instruct-v0.1.Q4_K_M.gguf") |
|
|
if not os.path.exists(gguf_path): |
|
|
raise RuntimeError( |
|
|
f"ملف Mistral .gguf غير موجود في {gguf_path}. " |
|
|
"تأكد من أن ملف setup.sh تم تنفيذه أثناء الـ build." |
|
|
) |
|
|
|
|
|
try: |
|
|
mistral = Llama( |
|
|
model_path=gguf_path, |
|
|
n_ctx=2048, |
|
|
n_threads=8, |
|
|
|
|
|
) |
|
|
except Exception as e: |
|
|
raise RuntimeError(f"فشل تحميل نموذج Mistral من {gguf_path}: {str(e)}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class AskRequest(BaseModel): |
|
|
question: str |
|
|
max_new_tokens: int = 150 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/ask") |
|
|
def ask(req: AskRequest): |
|
|
q = req.question.strip() |
|
|
if not q: |
|
|
raise HTTPException(status_code=400, detail="Empty question") |
|
|
|
|
|
|
|
|
try: |
|
|
if any(tok in q.lower() for tok in ["mgzon", "flan", "t5"]): |
|
|
|
|
|
inputs = t5_tokenizer(q, return_tensors="pt", truncation=True, max_length=256) |
|
|
out_ids = t5_model.generate(**inputs, max_length=req.max_new_tokens) |
|
|
answer = t5_tokenizer.decode(out_ids[0], skip_special_tokens=True) |
|
|
model_name = "MGZON-FLAN-T5" |
|
|
else: |
|
|
|
|
|
out = mistral(prompt=q, max_tokens=req.max_new_tokens) |
|
|
answer = out["choices"][0]["text"].strip() |
|
|
model_name = "Mistral-7B-GGUF" |
|
|
return {"model": model_name, "response": answer} |
|
|
except Exception as e: |
|
|
raise HTTPException(status_code=500, detail=f"خطأ أثناء معالجة الطلب: {str(e)}") |
|
|
|