Spaces:

nlpUc3mStudents
/

DepNLP-MentalRiskEs

Sleeping

DepNLP-MentalRiskEs / src /berta_finetuning.py

made simple functional streamlit app to host the model

1eba40c over 2 years ago

955 Bytes

	from transformers import pipeline
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	from datasets import Dataset, load_dataset#, Features, Value, ClassLabe

	ds = load_dataset('nlpUc3mStudents/mental-risk-c')
	# to pandas
	train_df = ds['train'].to_pandas()
	test_df = ds['test'].to_pandas()
	label_names = train_df.iloc[:,4:].columns.tolist()
	# concat messages by subject id
	train_by_subjectid = (
	train_df.groupby('subject_id')
	.agg({'message': lambda x: ' \| '.join(x), **{col: 'first' for col in label_names}})
	.reset_index()
	# .assign(
	# num_messages=lambda x: x.message.str.count('\\|') + 1
	# )
	)
	# back to datasets
	train_df = Dataset.from_pandas(train_by_subjectid)

	model_name= 'hackathon-somos-nlp-2023/roberta-base-bne-finetuned-suicide-es'

	tokenizer = AutoTokenizer.from_pretrained(model_name)
	# this model is trained with 2 labels, yet we need 4, so we need to change the head
	model = None