Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| import torch | |
| import re | |
| from datasets import Dataset | |
| from transformers import ( | |
| AutoModelForTokenClassification, | |
| AutoTokenizer, | |
| Trainer, | |
| TrainingArguments, | |
| DataCollatorForTokenClassification, | |
| ) | |
| from huggingface_hub import notebook_login | |
| # Login to Hugging Face Hub (Make sure your Space is set to private if needed) | |
| notebook_login() | |
| # Step 1: Load Luxury Fashion Dataset (Replace with actual dataset) | |
| df = pd.read_csv("luxury_apparel_data.csv") # Update with correct dataset file | |
| # Keep only relevant columns | |
| df = df[['brand', 'category', 'description', 'price']].dropna() | |
| # Generate search queries from dataset | |
| df['query'] = df.apply(lambda x: f"{x['brand']} {x['category']} under {x['price']} AED", axis=1) | |
| # Step 2: Tokenization | |
| model_name = "dslim/bert-base-NER" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| def tokenize_batch(batch): | |
| return tokenizer(batch['query'], padding=True, truncation=True) | |
| # Convert dataframe into Hugging Face dataset | |
| hf_dataset = Dataset.from_pandas(df[['query']]) | |
| hf_dataset = hf_dataset.map(tokenize_batch, batched=True) | |
| # Step 3: Fine-tune the Pretrained NER Model | |
| model = AutoModelForTokenClassification.from_pretrained(model_name) | |
| training_args = TrainingArguments( | |
| output_dir="./luxury_ner_model", | |
| evaluation_strategy="epoch", | |
| save_strategy="epoch", | |
| per_device_train_batch_size=8, | |
| per_device_eval_batch_size=8, | |
| num_train_epochs=3, | |
| logging_dir="./logs", | |
| logging_steps=500, | |
| ) | |
| trainer = Trainer( | |
| model=model, | |
| args=training_args, | |
| train_dataset=hf_dataset, | |
| eval_dataset=hf_dataset, | |
| tokenizer=tokenizer, | |
| data_collator=DataCollatorForTokenClassification(tokenizer), | |
| ) | |
| trainer.train() | |
| # Save model to Hugging Face Hub | |
| model.push_to_hub("luxury-fashion-ner") | |
| tokenizer.push_to_hub("luxury-fashion-ner") |