Spaces:

xset
/

aibot

Sleeping

App Files Files Community

xset commited on May 18

Commit

0d25792

1 Parent(s): 29209e8

Тыц

Browse files

Files changed (3) hide show

Dockerfile +19 -0
main.py +57 -0
requirements.txt +8 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,19 @@

+FROM python:3.9-slim
+WORKDIR /app
+RUN apt-get update && \
+apt-get install -y --no-install-recommends git g++ make && \
+apt-get clean && \
+rm -rf /var/lib/apt/lists/*
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY main.py .
+ENV HF_HOME=/tmp/huggingface-cache
+ENV TOKENIZERS_PARALLELISM=false
+EXPOSE 7860
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

main.py ADDED Viewed

	@@ -0,0 +1,57 @@

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+import torch
+import numpy as np
+# Проверка версии NumPy
+assert np.__version__.startswith('1.'), f"Несовместимая версия NumPy: {np.__version__}"
+app = FastAPI()
+class RequestData(BaseModel):
+    prompt: str
+    max_tokens: int = 50
+MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+try:
+    # Загрузка модели с явным указанием device_map
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    model = AutoModelForCausalLM.from_pretrained(
+        MODEL_NAME,
+        torch_dtype=torch.float32,
+        device_map="auto",
+        low_cpu_mem_usage=True
+    )
+    # Создаем pipeline без указания device
+    generator = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer
+    )
+except Exception as e:
+    print(f"Ошибка загрузки модели: {str(e)}")
+    generator = None
+@app.post("/generate")
+async def generate_text(request: RequestData):
+    if not generator:
+        raise HTTPException(status_code=503, detail="Модель не загружена")
+    try:
+        output = generator(
+            request.prompt,
+            max_new_tokens=min(request.max_tokens, 100),
+            do_sample=False,
+            num_beams=1,
+            temperature=0.7,
+        )
+        return {"response": output[0]["generated_text"]}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.get("/health")
+async def health_check():
+    return {"status": "ok" if generator else "unavailable"}

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+fastapi==0.109.0
+uvicorn==0.27.0
+torch==2.2.1 --index-url https://download.pytorch.org/whl/cpu
+transformers==4.40.2
+accelerate==0.29.3
+sentencepiece==0.2.0
+numpy==1.26.4
+protobuf==3.20.3