Update app.py
Browse files
app.py
CHANGED
|
@@ -4,7 +4,6 @@ from bert_score import score as bert_score
|
|
| 4 |
import jieba
|
| 5 |
import traceback
|
| 6 |
|
| 7 |
-
# Function definitions remain the same
|
| 8 |
def calculate_bleu(translations, references):
|
| 9 |
return sacrebleu.corpus_bleu(translations, [references]).score
|
| 10 |
|
|
@@ -18,30 +17,44 @@ def calculate_bertscore(translations, references, lang):
|
|
| 18 |
P, R, F1 = bert_score(translations, references, lang=lang)
|
| 19 |
return F1.mean().item()
|
| 20 |
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
st.title("Machine Translation Quality Evaluation")
|
| 23 |
st.write("Input the translated text and the reference translation to compute BLEU, TER, CHRF, and BERTScore metrics.")
|
| 24 |
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
-
# Evaluate button
|
| 29 |
if st.button("Evaluate"):
|
| 30 |
if translation_input and reference_input:
|
| 31 |
try:
|
| 32 |
-
translations = [translation_input.strip()]
|
| 33 |
-
references = [reference_input.strip()]
|
| 34 |
|
| 35 |
-
st.write("Debug: Inputs received")
|
| 36 |
st.write(f"Translation: {translations}")
|
| 37 |
st.write(f"Reference: {references}")
|
| 38 |
|
| 39 |
-
# Handle tokenization if necessary (e.g., for Chinese)
|
| 40 |
-
if source_lang_code == "zh" or target_lang_code == "zh":
|
| 41 |
-
translations = [' '.join(jieba.cut(text)) for text in translations]
|
| 42 |
-
references = [' '.join(jieba.cut(text)) for text in references]
|
| 43 |
-
st.write("Debug: Chinese tokenization applied")
|
| 44 |
-
|
| 45 |
st.write("Debug: Calculating scores...")
|
| 46 |
|
| 47 |
bleu_score = calculate_bleu(translations, references)
|
|
|
|
| 4 |
import jieba
|
| 5 |
import traceback
|
| 6 |
|
|
|
|
| 7 |
def calculate_bleu(translations, references):
|
| 8 |
return sacrebleu.corpus_bleu(translations, [references]).score
|
| 9 |
|
|
|
|
| 17 |
P, R, F1 = bert_score(translations, references, lang=lang)
|
| 18 |
return F1.mean().item()
|
| 19 |
|
| 20 |
+
def tokenize_text(text, lang_code):
|
| 21 |
+
if lang_code == "zh":
|
| 22 |
+
return ' '.join(jieba.cut(text))
|
| 23 |
+
# For other languages, we'll use a simple space-based tokenization
|
| 24 |
+
# This might not be ideal for all languages, but it's a start
|
| 25 |
+
return ' '.join(text.split())
|
| 26 |
+
|
| 27 |
st.title("Machine Translation Quality Evaluation")
|
| 28 |
st.write("Input the translated text and the reference translation to compute BLEU, TER, CHRF, and BERTScore metrics.")
|
| 29 |
|
| 30 |
+
languages = {
|
| 31 |
+
"English": "en", "Chinese": "zh", "French": "fr", "German": "de", "Spanish": "es",
|
| 32 |
+
"Russian": "ru", "Japanese": "ja", "Korean": "ko", "Arabic": "ar", "Italian": "it",
|
| 33 |
+
"Dutch": "nl", "Portuguese": "pt", "Turkish": "tr", "Polish": "pl", "Czech": "cs",
|
| 34 |
+
"Swedish": "sv", "Danish": "da", "Finnish": "fi", "Greek": "el", "Hungarian": "hu",
|
| 35 |
+
"Indonesian": "id", "Norwegian": "no", "Romanian": "ro", "Thai": "th", "Vietnamese": "vi",
|
| 36 |
+
"Hebrew": "he", "Hindi": "hi", "Bengali": "bn", "Tamil": "ta", "Urdu": "ur", "Other": "other"
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
source_lang = st.selectbox("Select Source Language", list(languages.keys()))
|
| 40 |
+
target_lang = st.selectbox("Select Target Language", list(languages.keys()))
|
| 41 |
+
|
| 42 |
+
source_lang_code = st.text_input("Enter Source Language Code (ISO 639-1):", value=languages[source_lang]) if source_lang == "Other" else languages[source_lang]
|
| 43 |
+
target_lang_code = st.text_input("Enter Target Language Code (ISO 639-1):", value=languages[target_lang]) if target_lang == "Other" else languages[target_lang]
|
| 44 |
+
|
| 45 |
+
translation_input = st.text_area("Translated Text", height=200)
|
| 46 |
+
reference_input = st.text_area("Reference Translation", height=200)
|
| 47 |
|
|
|
|
| 48 |
if st.button("Evaluate"):
|
| 49 |
if translation_input and reference_input:
|
| 50 |
try:
|
| 51 |
+
translations = [tokenize_text(translation_input.strip(), target_lang_code)]
|
| 52 |
+
references = [tokenize_text(reference_input.strip(), target_lang_code)]
|
| 53 |
|
| 54 |
+
st.write("Debug: Inputs received and tokenized")
|
| 55 |
st.write(f"Translation: {translations}")
|
| 56 |
st.write(f"Reference: {references}")
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
st.write("Debug: Calculating scores...")
|
| 59 |
|
| 60 |
bleu_score = calculate_bleu(translations, references)
|