Update app.py
Browse files
app.py
CHANGED
|
@@ -43,15 +43,8 @@ async def root():
|
|
| 43 |
@app.get("/health")
|
| 44 |
async def health_check():
|
| 45 |
logger.info(f"Health check endpoint called at {time.time()}")
|
| 46 |
-
if not models_loaded:
|
| 47 |
-
logger.info("Returning 'loading' status")
|
| 48 |
-
return JSONResponse(
|
| 49 |
-
content={"status": "loading", "message": "Models are still loading"},
|
| 50 |
-
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}
|
| 51 |
-
)
|
| 52 |
-
logger.info("Returning 'healthy' status")
|
| 53 |
return JSONResponse(
|
| 54 |
-
content={"status": "healthy"},
|
| 55 |
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}
|
| 56 |
)
|
| 57 |
|
|
@@ -88,9 +81,9 @@ async def load_models():
|
|
| 88 |
logger.info(f"Loading Mistral model from {gguf_path}")
|
| 89 |
mistral = Llama(
|
| 90 |
model_path=gguf_path,
|
| 91 |
-
n_ctx=
|
| 92 |
-
n_threads=1, #
|
| 93 |
-
n_batch=
|
| 94 |
verbose=True
|
| 95 |
)
|
| 96 |
logger.info(f"Successfully loaded Mistral model from {gguf_path} in {time.time() - start_time} seconds")
|
|
@@ -154,7 +147,7 @@ if __name__ == "__main__":
|
|
| 154 |
port=8080,
|
| 155 |
log_level="info",
|
| 156 |
workers=1,
|
| 157 |
-
timeout_keep_alive=
|
| 158 |
-
limit_concurrency=
|
| 159 |
-
limit_max_requests=
|
| 160 |
)
|
|
|
|
| 43 |
@app.get("/health")
|
| 44 |
async def health_check():
|
| 45 |
logger.info(f"Health check endpoint called at {time.time()}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
return JSONResponse(
|
| 47 |
+
content={"status": "healthy" if models_loaded else "loading"},
|
| 48 |
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}
|
| 49 |
)
|
| 50 |
|
|
|
|
| 81 |
logger.info(f"Loading Mistral model from {gguf_path}")
|
| 82 |
mistral = Llama(
|
| 83 |
model_path=gguf_path,
|
| 84 |
+
n_ctx=512, # قللنا n_ctx أكتر عشان نقلل الذاكرة
|
| 85 |
+
n_threads=1, # thread واحد بس
|
| 86 |
+
n_batch=128, # قللنا n_batch أكتر
|
| 87 |
verbose=True
|
| 88 |
)
|
| 89 |
logger.info(f"Successfully loaded Mistral model from {gguf_path} in {time.time() - start_time} seconds")
|
|
|
|
| 147 |
port=8080,
|
| 148 |
log_level="info",
|
| 149 |
workers=1,
|
| 150 |
+
timeout_keep_alive=15, # تقليل وقت keep-alive
|
| 151 |
+
limit_concurrency=5, # تقليل الاتصالات المتزامنة أكتر
|
| 152 |
+
limit_max_requests=50 # تقليل عدد الطلبات القصوى أكتر
|
| 153 |
)
|