YAML Metadata
Warning:
empty or missing yaml metadata in repo card
(https://huggingface.co/docs/hub/model-cards#model-card-metadata)
ViCLIP - Vietnamese CLIP Text Encoder
This model is a Vietnamese adaptation of CLIP text encoder, trained on Vietnamese data.
Model Description
- Text encoder based on PhoBERT
- Projection head to align with CLIP embedding space
- Optimized for Vietnamese text understanding
Usage
from transformers import AutoTokenizer
from huggingface_hub import hf_hub_download
import torch
import torch.nn as nn
import torch.nn.functional as F
class PhoCLIPTextModel(nn.Module):
def __init__(self):
super().__init__()
# Load text encoder
self.text_encoder = AutoModel.from_pretrained("kienhoang123/ViCLIP")
# Load text projection head
state_dict = torch.load(hf_hub_download(repo_id="kienhoang123/ViCLIP", filename="model.pt"))
self.load_state_dict(state_dict)
def forward(self, input_ids, attention_mask=None):
# Get text embeddings
text_outputs = self.text_encoder(
input_ids=input_ids,
attention_mask=attention_mask,
return_dict=True
)
text_cls = text_outputs.last_hidden_state[:, 0, :]
text_proj = self.text_proj(text_cls)
return text_proj
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("kienhoang123/ViCLIP")
# Encode text
text = "This is an example Vietnamese text"
inputs = tokenizer(text, return_tensors="pt", padding="max_length", max_length=77, truncation=True)
model = PhoCLIPTextModel()
model.eval()
with torch.no_grad():
embedding = model(inputs.input_ids, inputs.attention_mask)
normalized_embedding = F.normalize(embedding, p=2, dim=-1)
- Downloads last month
- 6
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
๐
Ask for provider support