Spaces:

ahzamnaseem
/

ai-resume-analyzer

Runtime error

ahzamkidwai

Deploy model-service from GitHub

df1ed9d 3 months ago

4.41 kB

	from fastapi import FastAPI
	from pydantic import BaseModel
	from typing import List, Optional
	from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
	import torch

	# ----------------------------
	# FastAPI Initialization
	# ----------------------------
	app = FastAPI(title="Resume NER Service", version="2.0")

	# ----------------------------
	# Load Model
	# ----------------------------
	MODEL_NAME = "yashpwr/resume-ner-bert-v2"

	print("Loading model... (this may take a minute)")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = AutoModelForTokenClassification.from_pretrained(MODEL_NAME)

	# Hugging Face pipeline (simple mode)
	ner_pipeline = pipeline(
	"token-classification",
	model=model,
	tokenizer=tokenizer,
	aggregation_strategy="simple"
	)

	print("Model loaded successfully!")

	# ----------------------------
	# Request & Response Schemas
	# ----------------------------
	class ResumeText(BaseModel):
	text: str
	confidence_threshold: float = 0.5
	mode: str = "simple" # "simple" \| "advanced"

	class Entity(BaseModel):
	label: str
	text: str
	start: int
	end: int
	confidence: float

	# ----------------------------
	# Advanced Extraction Function
	# ----------------------------
	def extract_entities_with_confidence(text: str, confidence_threshold: float = 0.5):
	"""Custom NER extraction with confidence + offsets."""
	inputs = tokenizer(
	text,
	return_tensors="pt",
	truncation=True,
	max_length=256,
	padding=True,
	return_offsets_mapping=True
	)

	with torch.no_grad():
	outputs = model(**inputs)
	predictions = torch.argmax(outputs.logits, dim=2)
	probabilities = torch.softmax(outputs.logits, dim=2)

	entities = []
	current_entity = None
	offset_mapping = inputs["offset_mapping"][0]

	for i, (pred, offset) in enumerate(zip(predictions[0], offset_mapping)):
	label = model.config.id2label[pred.item()]
	confidence = probabilities[0][i][pred].item()

	# Skip special tokens
	if offset[0] == 0 and offset[1] == 0:
	continue

	if label.startswith("B-"):
	if current_entity and current_entity["confidence"] >= confidence_threshold:
	entities.append(current_entity)

	entity_type = label[2:]
	current_entity = {
	"label": entity_type,
	"text": text[offset[0]:offset[1]],
	"start": offset[0],
	"end": offset[1],
	"confidence": confidence,
	}

	elif label.startswith("I-") and current_entity:
	entity_type = label[2:]
	if entity_type == current_entity["label"]:
	current_entity["text"] += " " + text[offset[0]:offset[1]]
	current_entity["end"] = offset[1]
	current_entity["confidence"] = min(current_entity["confidence"], confidence)

	elif label == "O":
	if current_entity and current_entity["confidence"] >= confidence_threshold:
	entities.append(current_entity)
	current_entity = None

	if current_entity and current_entity["confidence"] >= confidence_threshold:
	entities.append(current_entity)

	return entities

	# ----------------------------
	# API Endpoint
	# ----------------------------
	@app.post("/extract", response_model=List[Entity])
	def extract_entities(resume: ResumeText):
	"""
	Extract entities from resume text.
	Mode = "simple" -> uses pipeline
	Mode = "advanced" -> custom extraction with confidence scores
	"""
	if resume.mode == "simple":
	results = ner_pipeline(resume.text)
	entities = [
	Entity(
	label=r["entity_group"],
	text=r["word"],
	start=r["start"],
	end=r["end"],
	confidence=r["score"],
	)
	for r in results if r["score"] >= resume.confidence_threshold
	]
	else:
	entities = [
	Entity(**entity)
	for entity in extract_entities_with_confidence(
	resume.text, resume.confidence_threshold
	)
	]

	return entities

	# ----------------------------
	# Health Check
	# ----------------------------
	@app.get("/health")
	def health_check():
	return {"status": "OK", "model": MODEL_NAME}