DeeDe / app.py
Uunan's picture
Update app.py
425a1e0 verified
import os
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import logging
from fastapi.responses import StreamingResponse
# --- YENİ İZİN İÇİN IMPORT ---
from fastapi.middleware.cors import CORSMiddleware
# --- LOG AYARLARI ---
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# --- MODEL AYARLARI (RAG KALDIRILDI) ---
MODEL_REPO = "Qwen/Qwen2.5-3B-Instruct-GGUF"
MODEL_FILE = "qwen2.5-3b-instruct-q2_k.gguf"
# --- 1. ADIM: MODELİ İNDİR ---
logger.info(f"{MODEL_FILE} modeli Hugging Face Hub'dan indiriliyor...")
try:
model_path = hf_hub_download(
repo_id=MODEL_REPO,
filename=MODEL_FILE
)
logger.info(f"Model başarıyla {model_path} adresine indirildi.")
except Exception as e:
logger.error(f"Model indirilemedi: {e}")
raise
# --- 2. ADIM: MODELİ YÜKLE ---
logger.info("GGUF modeli yükleniyor...")
try:
llm = Llama(
model_path=model_path,
n_ctx=4096,
n_gpu_layers=0,
verbose=True
)
logger.info("Model başarıyla yüklendi.")
except Exception as e:
logger.error(f"Model yüklenirken hata oluştu: {e}")
raise
# --- 3. ADIM: FastAPI UYGULAMASI ---
app = FastAPI(
title="Qwen 2.5 API (Streaming - No RAG)",
description="Sadece Qwen 2.5 modelini stream eder."
)
# --- 4. ADIM: CORS (ÇAPRAZ KAYNAK) AYARLARI ---
origins = [
"https://deede.tr",
"http://deede.tr",
"*" # Tüm adreslere izin ver (en kolayı)
]
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# --- CORS İZİN KODU BİTTİ ---
class ChatRequest(BaseModel):
prompt: str
# --- 5. ADIM: BASİT STREAM GENERATOR (RAG KALDIRILDI) ---
async def stream_generator(messages):
"""
Sadece LLM'den gelen token'ları stream eder.
"""
try:
stream = llm.create_chat_completion(
messages=messages,
max_tokens=1024,
temperature=0.7,
stream=True
)
for chunk in stream:
content = chunk['choices'][0]['delta'].get('content', None)
if content:
yield content
except Exception as e:
logger.error(f"LLM stream hatası: {e}")
yield f" [LLM STREAM HATASI: {e}]"
@app.get("/")
def read_root():
return {"status": "Streaming API çalışıyor (No RAG)", "model_repo": MODEL_REPO}
@app.post("/api/chat")
async def chat_with_rag(request: ChatRequest):
user_prompt = request.prompt
logger.info(f"Gelen prompt: {user_prompt}")
# --- LLM PROMPT'U HAZIRLA (RAG CONTEXT'İ OLMADAN) ---
messages = [
{
"role": "system",
"content": "Sen Qwen, Alibaba Cloud tarafından yaratılmış bir yapay zeka asistansısın. Sana sorulan sorulara yardımcı olacak şekilde cevap ver."
},
{
"role": "user",
"content": user_prompt
}
]
# --- STREAM'İ BAŞLAT ---
return StreamingResponse(
stream_generator(messages),
media_type="text/plain" # Kaynak URL'i artık göndermiyoruz
)
# --- 7. ADIM: UYGULAMAYI BAŞLAT ---
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)