import os from fastapi import FastAPI, HTTPException from pydantic import BaseModel from llama_cpp import Llama from huggingface_hub import hf_hub_download import logging from fastapi.responses import StreamingResponse # --- YENİ İZİN İÇİN IMPORT --- from fastapi.middleware.cors import CORSMiddleware # --- LOG AYARLARI --- logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # --- MODEL AYARLARI (RAG KALDIRILDI) --- MODEL_REPO = "Qwen/Qwen2.5-3B-Instruct-GGUF" MODEL_FILE = "qwen2.5-3b-instruct-q2_k.gguf" # --- 1. ADIM: MODELİ İNDİR --- logger.info(f"{MODEL_FILE} modeli Hugging Face Hub'dan indiriliyor...") try: model_path = hf_hub_download( repo_id=MODEL_REPO, filename=MODEL_FILE ) logger.info(f"Model başarıyla {model_path} adresine indirildi.") except Exception as e: logger.error(f"Model indirilemedi: {e}") raise # --- 2. ADIM: MODELİ YÜKLE --- logger.info("GGUF modeli yükleniyor...") try: llm = Llama( model_path=model_path, n_ctx=4096, n_gpu_layers=0, verbose=True ) logger.info("Model başarıyla yüklendi.") except Exception as e: logger.error(f"Model yüklenirken hata oluştu: {e}") raise # --- 3. ADIM: FastAPI UYGULAMASI --- app = FastAPI( title="Qwen 2.5 API (Streaming - No RAG)", description="Sadece Qwen 2.5 modelini stream eder." ) # --- 4. ADIM: CORS (ÇAPRAZ KAYNAK) AYARLARI --- origins = [ "https://deede.tr", "http://deede.tr", "*" # Tüm adreslere izin ver (en kolayı) ] app.add_middleware( CORSMiddleware, allow_origins=origins, allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # --- CORS İZİN KODU BİTTİ --- class ChatRequest(BaseModel): prompt: str # --- 5. ADIM: BASİT STREAM GENERATOR (RAG KALDIRILDI) --- async def stream_generator(messages): """ Sadece LLM'den gelen token'ları stream eder. """ try: stream = llm.create_chat_completion( messages=messages, max_tokens=1024, temperature=0.7, stream=True ) for chunk in stream: content = chunk['choices'][0]['delta'].get('content', None) if content: yield content except Exception as e: logger.error(f"LLM stream hatası: {e}") yield f" [LLM STREAM HATASI: {e}]" @app.get("/") def read_root(): return {"status": "Streaming API çalışıyor (No RAG)", "model_repo": MODEL_REPO} @app.post("/api/chat") async def chat_with_rag(request: ChatRequest): user_prompt = request.prompt logger.info(f"Gelen prompt: {user_prompt}") # --- LLM PROMPT'U HAZIRLA (RAG CONTEXT'İ OLMADAN) --- messages = [ { "role": "system", "content": "Sen Qwen, Alibaba Cloud tarafından yaratılmış bir yapay zeka asistansısın. Sana sorulan sorulara yardımcı olacak şekilde cevap ver." }, { "role": "user", "content": user_prompt } ] # --- STREAM'İ BAŞLAT --- return StreamingResponse( stream_generator(messages), media_type="text/plain" # Kaynak URL'i artık göndermiyoruz ) # --- 7. ADIM: UYGULAMAYI BAŞLAT --- if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)