File size: 3,389 Bytes
56dc80a 42fcd4b 56dc80a 6df77de 56dc80a 764fd79 56dc80a 6df77de 56dc80a 764fd79 56dc80a 6df77de 56dc80a 764fd79 56dc80a 6df77de 764fd79 6df77de 56dc80a 6df77de 56dc80a 6df77de 9725557 6df77de 9725557 6df77de 9725557 6df77de 9725557 6df77de 9725557 56dc80a 6df77de 56dc80a 764fd79 6df77de 764fd79 ec6cfd9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import streamlit as st
import sacrebleu
from bert_score import score as bert_score
import jieba
# Function to calculate BLEU score
def calculate_bleu(translations, references):
return sacrebleu.corpus_bleu(translations, [references]).score
# Function to calculate TER score
def calculate_ter(translations, references):
return sacrebleu.corpus_ter(translations, [references]).score
# Function to calculate CHRF score
def calculate_chrf(translations, references):
return sacrebleu.corpus_chrf(translations, [references]).score
# Function to calculate BERTScore
def calculate_bertscore(translations, references, lang):
P, R, F1 = bert_score(translations, references, lang=lang)
return F1.mean().item()
# Streamlit app
st.title("Machine Translation Quality Evaluation")
st.write("Input the translated text and the reference translation to compute BLEU, TER, CHRF, and BERTScore metrics.")
# List of supported languages
languages = {
"English": "en",
"Chinese": "zh",
"French": "fr",
"German": "de",
"Spanish": "es",
"Russian": "ru",
"Japanese": "ja",
"Korean": "ko",
"Arabic": "ar",
"Italian": "it",
"Dutch": "nl",
"Portuguese": "pt",
"Turkish": "tr",
"Polish": "pl",
"Czech": "cs",
"Swedish": "sv",
"Danish": "da",
"Finnish": "fi",
"Greek": "el",
"Hungarian": "hu",
"Indonesian": "id",
"Norwegian": "no",
"Romanian": "ro",
"Thai": "th",
"Vietnamese": "vi",
"Hebrew": "he",
"Hindi": "hi",
"Bengali": "bn",
"Tamil": "ta",
"Urdu": "ur",
"Other": "other"
}
# Language selection
source_lang = st.selectbox("Select Source Language", list(languages.keys()))
target_lang = st.selectbox("Select Target Language", list(languages.keys()))
# Input fields for custom language codes if "Other" is selected
source_lang_code = st.text_input("Enter Source Language Code (ISO 639-1):", value=languages[source_lang]) if source_lang == "Other" else languages[source_lang]
target_lang_code = st.text_input("Enter Target Language Code (ISO 639-1):", value=languages[target_lang]) if target_lang == "Other" else languages[target_lang]
# Input fields for translations and references
translation_input = st.text_area("Translated Text", height=200)
reference_input = st.text_area("Reference Translation", height=200)
# Evaluate button
if st.button("Evaluate"):
if translation_input and reference_input:
translations = [translation_input.strip()]
references = [reference_input.strip()]
# Handle tokenization if necessary (e.g., for Chinese)
if source_lang_code == "zh" or target_lang_code == "zh":
translations = [' '.join(jieba.cut(text)) for text in translations]
references = [' '.join(jieba.cut(text)) for text in references]
bleu_score = calculate_bleu(translations, references)
ter_score = calculate_ter(translations, references)
chrf_score = calculate_chrf(translations, references)
bertscore = calculate_bertscore(translations, references, target_lang_code)
st.write(f"**BLEU Score:** {bleu_score:.2f}")
st.write(f"**TER Score:** {ter_score:.2f}")
st.write(f"**CHRF Score:** {chrf_score:.2f}")
st.write(f"**BERTScore:** {bertscore:.2f}")
else:
st.error("Please provide both translated text and reference translation.") |