xset commited on
Commit
0d25792
·
1 Parent(s): 29209e8
Files changed (3) hide show
  1. Dockerfile +19 -0
  2. main.py +57 -0
  3. requirements.txt +8 -0
Dockerfile ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ WORKDIR /app
4
+
5
+ RUN apt-get update && \
6
+ apt-get install -y --no-install-recommends git g++ make && \
7
+ apt-get clean && \
8
+ rm -rf /var/lib/apt/lists/*
9
+
10
+ COPY requirements.txt .
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
+
13
+ COPY main.py .
14
+
15
+ ENV HF_HOME=/tmp/huggingface-cache
16
+ ENV TOKENIZERS_PARALLELISM=false
17
+
18
+ EXPOSE 7860
19
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
4
+ import torch
5
+ import numpy as np
6
+
7
+ # Проверка версии NumPy
8
+ assert np.__version__.startswith('1.'), f"Несовместимая версия NumPy: {np.__version__}"
9
+
10
+ app = FastAPI()
11
+
12
+ class RequestData(BaseModel):
13
+ prompt: str
14
+ max_tokens: int = 50
15
+
16
+ MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
17
+
18
+ try:
19
+ # Загрузка модели с явным указанием device_map
20
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
21
+ model = AutoModelForCausalLM.from_pretrained(
22
+ MODEL_NAME,
23
+ torch_dtype=torch.float32,
24
+ device_map="auto",
25
+ low_cpu_mem_usage=True
26
+ )
27
+
28
+ # Создаем pipeline без указания device
29
+ generator = pipeline(
30
+ "text-generation",
31
+ model=model,
32
+ tokenizer=tokenizer
33
+ )
34
+ except Exception as e:
35
+ print(f"Ошибка загрузки модели: {str(e)}")
36
+ generator = None
37
+
38
+ @app.post("/generate")
39
+ async def generate_text(request: RequestData):
40
+ if not generator:
41
+ raise HTTPException(status_code=503, detail="Модель не загружена")
42
+
43
+ try:
44
+ output = generator(
45
+ request.prompt,
46
+ max_new_tokens=min(request.max_tokens, 100),
47
+ do_sample=False,
48
+ num_beams=1,
49
+ temperature=0.7,
50
+ )
51
+ return {"response": output[0]["generated_text"]}
52
+ except Exception as e:
53
+ raise HTTPException(status_code=500, detail=str(e))
54
+
55
+ @app.get("/health")
56
+ async def health_check():
57
+ return {"status": "ok" if generator else "unavailable"}
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.109.0
2
+ uvicorn==0.27.0
3
+ torch==2.2.1 --index-url https://download.pytorch.org/whl/cpu
4
+ transformers==4.40.2
5
+ accelerate==0.29.3
6
+ sentencepiece==0.2.0
7
+ numpy==1.26.4
8
+ protobuf==3.20.3