MGZON commited on
Commit
c6e2d82
·
verified ·
1 Parent(s): 23be6d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -37
app.py CHANGED
@@ -1,44 +1,59 @@
1
- # app.py
2
  from fastapi import FastAPI, HTTPException
3
  from pydantic import BaseModel
4
-
5
- import os
6
-
7
- os.environ["TRANSFORMERS_CACHE"] = "/tmp/.cache"
8
-
9
-
10
- from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
11
-
12
 
13
  app = FastAPI(
14
- title="MGZON FLAN‑T5 API",
15
- description="API for MGZON FLAN‑T5 model",
16
  )
17
 
18
- MODEL_NAME = "MGZON/mgzon-flan-t5-base"
19
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
20
- model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
21
-
22
-
23
- class PromptRequest(BaseModel):
24
- prompt: str
25
- max_new_tokens: int = 100
26
-
 
 
 
 
 
 
 
 
27
 
28
- @app.post("/generate")
29
- def generate(req: PromptRequest):
30
- """
31
- استدعاء النموذج وإرجاع النص المولد.
32
- """
33
- try:
34
- inputs = tokenizer(req.prompt, return_tensors="pt")
35
- out_ids = model.generate(
36
- **inputs,
37
- max_new_tokens=req.max_new_tokens,
38
- num_beams=4,
39
- early_stopping=True,
40
- )
41
- text = tokenizer.decode(out_ids[0], skip_special_tokens=True)
42
- return {"generated_text": text}
43
- except Exception as e:
44
- raise HTTPException(status_code=500, detail=str(e))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
  from fastapi import FastAPI, HTTPException
3
  from pydantic import BaseModel
4
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
5
+ from llama_cpp import Llama
 
 
 
 
 
 
6
 
7
  app = FastAPI(
8
+ title="MGZON Smart Assistant",
9
+ description="دمج نموذج T5 المدرب مع Mistral 7B (GGUF) داخل Space"
10
  )
11
 
12
+ # -------------------------------------------------
13
+ # 1️⃣ تحميل نموذج T5 المدرب من Hub
14
+ # -------------------------------------------------
15
+ T5_REPO = "MGZON/mgzon-flan-t5-base" # إذا رفعته باسم آخر عدّل هنا
16
+ t5_tokenizer = AutoTokenizer.from_pretrained(T5_REPO)
17
+ t5_model = AutoModelForSeq2SeqLM.from_pretrained(T5_REPO)
18
+
19
+ # -------------------------------------------------
20
+ # 2️⃣ تحميل ملف Mistral .gguf (تم تنزيله بـ setup.sh)
21
+ # -------------------------------------------------
22
+ gguf_path = os.path.abspath("models/mistral-7b-instruct-v0.1.Q4_K_M.gguf")
23
+ mistral = Llama(
24
+ model_path=gguf_path,
25
+ n_ctx=2048,
26
+ n_threads=8, # عدد الـ threads المتاح داخل Space
27
+ # إذا كان لديك GPU داخل Space يمكنك إضافة n_gpu_layers=35
28
+ )
29
 
30
+ # -------------------------------------------------
31
+ # 3️⃣ تعريف شكل الطلب
32
+ # -------------------------------------------------
33
+ class AskRequest(BaseModel):
34
+ question: str
35
+ max_new_tokens: int = 150 # عدد الكلمات التي تريد استخراجها
36
+
37
+ # -------------------------------------------------
38
+ # 4️⃣ نقطة النهاية /ask
39
+ # -------------------------------------------------
40
+ @app.post("/ask")
41
+ def ask(req: AskRequest):
42
+ q = req.question.strip()
43
+ if not q:
44
+ raise HTTPException(status_code=400, detail="Empty question")
45
+
46
+ # منطق بسيط لاختيار النموذج:
47
+ if any(tok in q.lower() for tok in ["mgzon", "flan", "t5"]):
48
+ # ----- استخدام T5 -----
49
+ inputs = t5_tokenizer(q, return_tensors="pt", truncation=True, max_length=256)
50
+ out_ids = t5_model.generate(**inputs, max_length=req.max_new_tokens)
51
+ answer = t5_tokenizer.decode(out_ids[0], skip_special_tokens=True)
52
+ model_name = "MGZON‑FLAN‑T5"
53
+ else:
54
+ # ----- استخدام Mistral -----
55
+ out = mistral(prompt=q, max_tokens=req.max_new_tokens)
56
+ answer = out["choices"][0]["text"].strip()
57
+ model_name = "Mistral‑7B‑GGUF"
58
+
59
+ return {"model": model_name, "response": answer}