Spaces:

nlpUc3mStudents
/

DepNLP-MentalRiskEs

Sleeping

App Files Files Community

DepNLP-MentalRiskEs / src /roberta_regressor.py

simonsv

made simple functional streamlit app to host the model

1eba40c over 2 years ago

raw

history blame contribute delete

7.93 kB

	"""
	Defines a wrapper class of RobertaPreTrainedModel model to do regression on text data.
	Based on: https://www.kaggle.com/code/sumantindurkhya/bert-for-regression
	"""

	from typing import Optional, Tuple, Union
	from tqdm import tqdm, trange

	import numpy as np
	import torch
	import torch.nn.functional as F
	import torch.utils.checkpoint
	from torch import nn
	from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss

	from transformers import BertModel, BertPreTrainedModel, RobertaPreTrainedModel, RobertaModel

	class RobertaRegressor(RobertaPreTrainedModel):

	def __init__(self, config, num_outputs=1, dropout=0.1, freeze_bert=False):
	super().__init__(config)

	self.num_outputs = num_outputs

	self.roberta = RobertaModel(config)
	if freeze_bert:
	# freeze the roberta parameters
	for param in self.roberta.parameters():
	param.requires_grad = False
	self.classifier = nn.Linear(config.hidden_size, 128)
	self.relu = nn.ReLU()
	self.dropout = nn.Dropout(dropout)
	self.tanh = nn.Tanh()
	self.regressor = nn.Linear(128, num_outputs)


	def forward(self, input_ids, attention_mask):
	# forward pass of the model
	base_out = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
	logits = base_out.pooler_output
	out = self.classifier(logits)
	out = self.dropout(out)
	out = self.relu(out)
	out = self.tanh(out)
	out = self.dropout(out)
	out = self.regressor(out)
	return out

	def predict(self, text:str, tokenizer, device, numpy=True) -> Tuple[float, float, float, float]:
	input_ids, attention_mask = tokenizer.encode_plus(text, padding=True, truncation=True, return_tensors='pt').values()
	input_ids, attention_mask = input_ids.to(device), attention_mask.to(device)
	output = self(input_ids, attention_mask).squeeze()
	# free up memory
	del input_ids, attention_mask
	out = output.detach()
	if numpy:
	return out.cpu().numpy()
	return out


	class RobertaSeqMultiRegressor(RobertaPreTrainedModel):
	"""
	A wrapper class of RobertaPreTrainedModel model to do multi-output regression on text data.
	This models the task of predicting multiple outputs from a single text input.
	The problem is formulated in a sequential manner, where the model predicts the next output
	conditioned on the previous outputs.

	This approach is ideal for modeling problems where the outputs are correlated
	such as probability distributions, where the sum of the outputs must be 1.
	Or, for example, in the case of predicting the next word in a sentence, where the
	model must predict the next word conditioned on the previous words.

	The model is similar to the one described in the RobertaRegressor class, with the
	exception that the head of the model is a sequential model, where the output of the
	previous layer is fed as input to the next layer similar to how a RNN works.
	"""

	def __init__(self, config, num_outputs=1, dropout=0.1, freeze_bert=False):
	super().__init__(config)

	self.num_outputs = num_outputs

	self.roberta = RobertaModel(config)
	if freeze_bert:
	# freeze the roberta parameters
	for param in self.roberta.parameters():
	param.requires_grad = False
	# head of the model is a model that takes the output of the previous layer as input
	# and outputs a single value until the number of outputs is reached
	for i in range(num_outputs):
	setattr(self, f"regressor_{i}", nn.Linear(config.hidden_size, 128))
	self.relu = nn.ReLU()
	self.dropout = nn.Dropout(dropout)
	self.tanh = nn.Tanh()

	def forward(self, input_ids, attention_mask):
	# forward pass of the model
	base_out = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
	logits = base_out.pooler_output
	outputs = []
	for i in range(self.num_outputs):
	out = getattr(self, f"regressor_{i}")(logits)
	out = self.dropout(out)
	out = self.relu(out)
	out = self.tanh(out)
	outputs.append(out)
	return outputs


	def sum_diff_loss(output, target):
	return torch.sum(torch.abs(output - target))

	def evaluate(model, criterion, dataloader, device, sum_diff_penalty=False):
	model.eval()
	mean_acc, mean_loss, count = 0, 0, 0

	with torch.no_grad():
	for input_ids, attention_mask, target in (dataloader):

	input_ids, attention_mask, target = input_ids.to(device), attention_mask.to(device), target.to(device)
	output = model(input_ids, attention_mask)

	mean_loss += criterion(output.squeeze(), target.type_as(output)).item()
	count += 1

	return mean_loss/count

	# def predict(model, dataloader, device):
	# predicted_label = []
	# actual_label = []
	# with torch.no_grad():
	# for input_ids, attention_mask, target in (dataloader):

	# input_ids, attention_mask, target = input_ids.to(device), attention_mask.to(device), target.to(device)
	# output = model(input_ids, attention_mask)

	# predicted_label += output
	# actual_label += target

	# return predicted_label

	def train(model, criterion, optimizer, train_loader, val_loader, epochs, device):
	best_acc = 0
	for epoch in trange(epochs, desc="Epoch"):
	model.train()
	train_loss = 0
	for i, (input_ids, attention_mask, target) in enumerate(iterable=train_loader):
	optimizer.zero_grad()

	input_ids, attention_mask, target = input_ids.to(device), attention_mask.to(device), target.to(device)

	output = model(input_ids=input_ids, attention_mask=attention_mask)
	# out = model.classifier(output)
	loss = criterion(output.squeeze(), target.type_as(output))
	loss.backward()
	optimizer.step()

	train_loss += loss.item()

	print(f"Training loss is {train_loss/len(train_loader)}")
	val_loss = evaluate(model=model, criterion=criterion, dataloader=val_loader, device=device)
	print("Epoch {} complete! Validation Loss : {}".format(epoch, val_loss))

	def multi_reg_loss(loss='mse', sum_diff_penalty:float=0.0):
	"""
	A custom loss function that penalizes the sum of differences
	between the predicted and actual values for multi-output regression.
	This is done to guide the model to predict outputs where
	sum(y_hat1, y_hat2, ...) = sum(y1, y2, ...)

	e.g: in task d, we have that sum(label1, label2, label3, label4) = 1
	since its a probability distribution.

	Parameters
	----------
	loss : str, optional
	The loss function to be used, by default 'mse'
	Available options: 'mse' and 'cross_entropy'
	for mean squared error and cross entropy loss respectively
	sum_diff_penalty : float, optional
	The penalty to be applied to the sum of differences between the predicted and actual values, by default 0.0 (no penalty)
	"""
	if loss == 'mse':
	loss_func = F.mse_loss
	elif loss == 'cross_entropy':
	loss_func = F.cross_entropy
	else:
	raise ValueError("Invalid loss function. Available options: 'mse' and 'cross_entropy'")
	def reg_loss(input, target):
	# first compute the normal MSE loss
	mse = loss_func(input, target)
	# then penalize the sum of differences between the predicted and actual values
	sum_diff = torch.square(torch.sum(input) - torch.sum(target))
	return mse + sum_diff_penalty*sum_diff
	return reg_loss