import os import json import gradio as gr import numpy as np # ============================== # 1) КОНФИГ (без изменений) # ============================== MODEL_DIR = "rubert_tiny2_toxic_minprep" MAX_LEN = 256 DEFAULT_THRESHOLD = 0.65 cfg_path = os.path.join(MODEL_DIR, "inference_config.json") try: if os.path.exists(cfg_path): with open(cfg_path, "r", encoding="utf-8") as f: DEFAULT_THRESHOLD = float(json.load(f).get("threshold_val", DEFAULT_THRESHOLD)) except Exception: pass # ============================== # 2) ЗАГРУЗКА МОДЕЛИ (без изменений) # ============================== TRANSFORMER = {"model": None, "tokenizer": None, "device": "cpu", "loaded": False} try: import torch from transformers import AutoTokenizer, AutoModelForSequenceClassification device = "cuda" if torch.cuda.is_available() else "cpu" tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR, local_files_only=True) model = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR, local_files_only=True) model.to(device).eval() TRANSFORMER.update({ "model": model, "tokenizer": tokenizer, "device": device, "loaded": True }) print(f"[INFO] Модель загружена из: {MODEL_DIR} | device={device}") except Exception as e: print(f"[WARN] Не удалось загрузить модель из '{MODEL_DIR}': {e}") # ============================== # 3) ИНФЕРЕНС (без изменений) # ============================== def infer(comment: str, threshold: float): text = (comment or "").strip() if not text: return "—", {"Токсичный": 0.0, "Не токсичный": 1.0} if not TRANSFORMER["loaded"]: return "⚠️ Модель не загружена", {"Токсичный": 0.0, "Не токсичный": 1.0} import torch tok = TRANSFORMER["tokenizer"](text, return_tensors="pt", truncation=True, max_length=MAX_LEN) tok = {k: v.to(TRANSFORMER["device"]) for k, v in tok.items()} with torch.inference_mode(): logits = TRANSFORMER["model"](**tok).logits p_toxic = float(torch.softmax(logits, dim=1)[0, 1].detach().cpu().item()) verdict = "Токсичный" if p_toxic >= threshold else "Не токсичный" dist = {"Токсичный": p_toxic, "Не токсичный": 1.0 - p_toxic} return verdict, dist # ============================== # 4) ФУНКЦИИ-ОБЁРТКИ ДЛЯ UI (ИЗМЕНЕНО) # ============================== def predict_for_ui(comment: str, threshold: float): """ Вызывает infer и форматирует результат в красивый HTML-блок. """ verdict, dist = infer(comment, threshold) if verdict == "Токсичный": p_toxic = dist["Токсичный"] return f"""
Токсичный
Вероятность: {p_toxic:.1%}
Не токсичный
Вероятность: {p_toxic:.1%}
{verdict}
—
{DESCRIPTION}
—