Spaces:
Runtime error
Runtime error
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| from typing import List, Optional | |
| from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline | |
| import torch | |
| # ---------------------------- | |
| # FastAPI Initialization | |
| # ---------------------------- | |
| app = FastAPI(title="Resume NER Service", version="2.0") | |
| # ---------------------------- | |
| # Load Model | |
| # ---------------------------- | |
| MODEL_NAME = "yashpwr/resume-ner-bert-v2" | |
| print("Loading model... (this may take a minute)") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| model = AutoModelForTokenClassification.from_pretrained(MODEL_NAME) | |
| # Hugging Face pipeline (simple mode) | |
| ner_pipeline = pipeline( | |
| "token-classification", | |
| model=model, | |
| tokenizer=tokenizer, | |
| aggregation_strategy="simple" | |
| ) | |
| print("Model loaded successfully!") | |
| # ---------------------------- | |
| # Request & Response Schemas | |
| # ---------------------------- | |
| class ResumeText(BaseModel): | |
| text: str | |
| confidence_threshold: float = 0.5 | |
| mode: str = "simple" # "simple" | "advanced" | |
| class Entity(BaseModel): | |
| label: str | |
| text: str | |
| start: int | |
| end: int | |
| confidence: float | |
| # ---------------------------- | |
| # Advanced Extraction Function | |
| # ---------------------------- | |
| def extract_entities_with_confidence(text: str, confidence_threshold: float = 0.5): | |
| """Custom NER extraction with confidence + offsets.""" | |
| inputs = tokenizer( | |
| text, | |
| return_tensors="pt", | |
| truncation=True, | |
| max_length=256, | |
| padding=True, | |
| return_offsets_mapping=True | |
| ) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| predictions = torch.argmax(outputs.logits, dim=2) | |
| probabilities = torch.softmax(outputs.logits, dim=2) | |
| entities = [] | |
| current_entity = None | |
| offset_mapping = inputs["offset_mapping"][0] | |
| for i, (pred, offset) in enumerate(zip(predictions[0], offset_mapping)): | |
| label = model.config.id2label[pred.item()] | |
| confidence = probabilities[0][i][pred].item() | |
| # Skip special tokens | |
| if offset[0] == 0 and offset[1] == 0: | |
| continue | |
| if label.startswith("B-"): | |
| if current_entity and current_entity["confidence"] >= confidence_threshold: | |
| entities.append(current_entity) | |
| entity_type = label[2:] | |
| current_entity = { | |
| "label": entity_type, | |
| "text": text[offset[0]:offset[1]], | |
| "start": offset[0], | |
| "end": offset[1], | |
| "confidence": confidence, | |
| } | |
| elif label.startswith("I-") and current_entity: | |
| entity_type = label[2:] | |
| if entity_type == current_entity["label"]: | |
| current_entity["text"] += " " + text[offset[0]:offset[1]] | |
| current_entity["end"] = offset[1] | |
| current_entity["confidence"] = min(current_entity["confidence"], confidence) | |
| elif label == "O": | |
| if current_entity and current_entity["confidence"] >= confidence_threshold: | |
| entities.append(current_entity) | |
| current_entity = None | |
| if current_entity and current_entity["confidence"] >= confidence_threshold: | |
| entities.append(current_entity) | |
| return entities | |
| # ---------------------------- | |
| # API Endpoint | |
| # ---------------------------- | |
| def extract_entities(resume: ResumeText): | |
| """ | |
| Extract entities from resume text. | |
| Mode = "simple" -> uses pipeline | |
| Mode = "advanced" -> custom extraction with confidence scores | |
| """ | |
| if resume.mode == "simple": | |
| results = ner_pipeline(resume.text) | |
| entities = [ | |
| Entity( | |
| label=r["entity_group"], | |
| text=r["word"], | |
| start=r["start"], | |
| end=r["end"], | |
| confidence=r["score"], | |
| ) | |
| for r in results if r["score"] >= resume.confidence_threshold | |
| ] | |
| else: | |
| entities = [ | |
| Entity(**entity) | |
| for entity in extract_entities_with_confidence( | |
| resume.text, resume.confidence_threshold | |
| ) | |
| ] | |
| return entities | |
| # ---------------------------- | |
| # Health Check | |
| # ---------------------------- | |
| def health_check(): | |
| return {"status": "OK", "model": MODEL_NAME} | |