Spaces:

BACKENDAPI2024
/

aiisc-watermarking-modelv3

Sleeping

App Files Files Community

jgyasu commited on Jul 8, 2024

Commit

960f419

verified ·

1 Parent(s): 7baf701

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -221

app.py CHANGED Viewed

@@ -29,232 +29,29 @@ from termcolor import colored
 import nltk
 from nltk.translate.bleu_score import sentence_bleu
 from transformers import BertTokenizer, BertModel
-import graphviz
 import gradio as gr
 from tree import generate_plot
 from paraphraser import generate_paraphrase
 nltk.download('stopwords')
-# Function to Find the Longest Common Substring Words Subsequence
-def longest_common_subss(original_sentence, paraphrased_sentences):
-    stop_words = set(stopwords.words('english'))
-    original_sentence_lower = original_sentence.lower()
-    paraphrased_sentences_lower = [s.lower() for s in paraphrased_sentences]
-    paraphrased_sentences_no_stopwords = []
-    for sentence in paraphrased_sentences_lower:
-        words = re.findall(r'\b\w+\b', sentence)
-        filtered_sentence = ' '.join([word for word in words if word not in stop_words])
-        paraphrased_sentences_no_stopwords.append(filtered_sentence)
-    results = []
-    for sentence in paraphrased_sentences_no_stopwords:
-        common_words = set(original_sentence_lower.split()) & set(sentence.split())
-        for word in common_words:
-            sentence = sentence.replace(word, colored(word, 'green'))
-        results.append({
-            "Original Sentence": original_sentence_lower,
-            "Paraphrased Sentence": sentence,
-            "Substrings Word Pair": common_words
-        })
-    return results
-# Function to Find Common Substring Word between each paraphrase sentences
-def common_substring_word(original_sentence, paraphrased_sentences):
-    stop_words = set(stopwords.words('english'))
-    original_sentence_lower = original_sentence.lower()
-    paraphrased_sentences_lower = [s.lower() for s in paraphrased_sentences]
-    paraphrased_sentences_no_stopwords = []
-    for sentence in paraphrased_sentences_lower:
-        words = re.findall(r'\b\w+\b', sentence)
-        filtered_sentence = ' '.join([word for word in words if word not in stop_words])
-        paraphrased_sentences_no_stopwords.append(filtered_sentence)
-    results = []
-    for idx, sentence in enumerate(paraphrased_sentences_no_stopwords):
-        common_words = set(original_sentence_lower.split()) & set(sentence.split())
-        common_substrings = ', '.join(sorted(common_words))
-        for word in common_words:
-            sentence = sentence.replace(word, colored(word, 'green'))
-        results.append({
-            f"Paraphrased Sentence {idx+1}": sentence,
-            "Common Substrings": common_substrings
-        })
-    return results
-import re
-from nltk.corpus import stopwords
-def find_common_subsequences(sentence, str_list):
-    stop_words = set(stopwords.words('english'))
-    sentence = sentence.lower()
-    str_list = [s.lower() for s in str_list]
-    def is_present(lcs, str_list):
-        for string in str_list:
-            if lcs not in string:
-                return False
-        return True
-    def remove_stop_words_and_special_chars(sentence):
-        sentence = re.sub(r'[^\w\s]', '', sentence)
-        words = sentence.split()
-        filtered_words = [word for word in words if word.lower() not in stop_words]
-        return " ".join(filtered_words)
-    sentence = remove_stop_words_and_special_chars(sentence)
-    str_list = [remove_stop_words_and_special_chars(s) for s in str_list]
-    words = sentence.split(" ")
-    common_grams = []
-    added_phrases = set()
-    def is_covered(subseq, added_phrases):
-        for phrase in added_phrases:
-            if subseq in phrase:
-                return True
-        return False
-    for i in range(len(words) - 4):
-        penta = " ".join(words[i:i+5])
-        if is_present(penta, str_list):
-            common_grams.append(penta)
-            added_phrases.add(penta)
-    for i in range(len(words) - 3):
-        quad = " ".join(words[i:i+4])
-        if is_present(quad, str_list) and not is_covered(quad, added_phrases):
-            common_grams.append(quad)
-            added_phrases.add(quad)
-    for i in range(len(words) - 2):
-        tri = " ".join(words[i:i+3])
-        if is_present(tri, str_list) and not is_covered(tri, added_phrases):
-            common_grams.append(tri)
-            added_phrases.add(tri)
-    for i in range(len(words) - 1):
-        bi = " ".join(words[i:i+2])
-        if is_present(bi, str_list) and not is_covered(bi, added_phrases):
-            common_grams.append(bi)
-            added_phrases.add(bi)
-    for i in range(len(words)):
-        uni = words[i]
-        if is_present(uni, str_list) and not is_covered(uni, added_phrases):
-            common_grams.append(uni)
-            added_phrases.add(uni)
-    return common_grams
-def llm_output(prompt):
-    return prompt, prompt
-def highlight_phrases_with_colors(sentences, phrases):
-    color_map = {}
-    color_index = 0
-    highlighted_html = []
-    idx = 1
-    for sentence in sentences:
-        sentence_with_idx = f"{idx}. {sentence}"
-        idx += 1
-        highlighted_sentence = sentence_with_idx
-        phrase_count = 0
-        words = re.findall(r'\b\w+\b', sentence)
-        word_index = 1
-        for phrase in phrases:
-            if phrase not in color_map:
-                color_map[phrase] = f'hsl({color_index * 60 % 360}, 70%, 80%)'
-                color_index += 1
-            escaped_phrase = re.escape(phrase)
-            pattern = rf'\b{escaped_phrase}\b'
-            highlighted_sentence, num_replacements = re.subn(
-                pattern,
-                lambda m, count=phrase_count, color=color_map[phrase], index=word_index: (
-                    f'<span style="background-color: {color}; font-weight: bold;'
-                    f' padding: 2px 4px; border-radius: 2px; position: relative;">'
-                    f'<span style="background-color: black; color: white; border-radius: 50%;'
-                    f' padding: 2px 5px; margin-right: 5px;">{index}</span>'
-                    f'{m.group(0)}'
-                    f'</span>'
-                ),
-                highlighted_sentence,
-                flags=re.IGNORECASE
-            )
-            if num_replacements > 0:
-                phrase_count += 1
-                word_index += 1
-        highlighted_html.append(highlighted_sentence)
-    final_html = "<br><br>".join(highlighted_html)
-    return f'''
-    <div style="border: solid 1px #; padding: 16px; background-color: #FFFFFF; color: #374151; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); border-radius: 2px;">
-    <h3 style="margin-top: 0; font-size: 1em; color: #111827;">Paraphrased And Highlighted Text</h3>
-    <div style="background-color: #F5F5F5; line-height: 1.6; padding: 15px; border-radius: 2px;">{final_html}</div>
-    </div>
-    '''
-import re
-def highlight_phrases_with_colors_single_sentence(sentence, phrases):
-    color_map = {}
-    color_index = 0
-    highlighted_sentence = sentence
-    phrase_count = 0
-    words = re.findall(r'\b\w+\b', sentence)
-    word_index = 1
-    for phrase in phrases:
-        if phrase not in color_map:
-            color_map[phrase] = f'hsl({color_index * 60 % 360}, 70%, 80%)'
-            color_index += 1
-        escaped_phrase = re.escape(phrase)
-        pattern = rf'\b{escaped_phrase}\b'
-        highlighted_sentence, num_replacements = re.subn(
-            pattern,
-            lambda m, count=phrase_count, color=color_map[phrase], index=word_index: (
-                f'<span style="background-color: {color}; font-weight: bold;'
-                f' padding: 2px 4px; border-radius: 2px; position: relative;">'
-                f'<span style="background-color: black; color: white; border-radius: 50%;'
-                f' padding: 2px 5px; margin-right: 5px;">{index}</span>'
-                f'{m.group(0)}'
-                f'</span>'
-            ),
-            highlighted_sentence,
-            flags=re.IGNORECASE
-        )
-        if num_replacements > 0:
-            phrase_count += 1
-            word_index += 1
-    final_html = highlighted_sentence
-    return f'''
-    <div style="border: solid 1px #; padding: 16px; background-color: #FFFFFF; color: #374151; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); border-radius: 2px;">
-    <h3 style="margin-top: 0; font-size: 1em; color: #111827;">Selected Sentence</h3>
-    <div style="background-color: #F5F5F5; line-height: 1.6; padding: 15px; border-radius: 2px;">{final_html}</div>
-    </div>
-    '''
 # Function for the Gradio interface
 def model(prompt):
-    generated, sentence = llm_output(prompt)
-    res = generate_paraphrase(sentence)
-    common_subs = longest_common_subss(sentence, res)
-    common_grams = find_common_subsequences(sentence, res)
-    for i in range(len(common_subs)):
-        common_subs[i]["Paraphrased Sentence"] = res[i]
-    generated_highlighted = highlight_phrases_with_colors_single_sentence(generated, common_grams)
-    result = highlight_phrases_with_colors(res, common_grams)
     tree = generate_plot(sentence)
-    return generated, generated_highlighted, result, tree
-with gr.Blocks(theme = gr.themes.Monochrome()) as demo:
-    gr.Markdown("# Paraphrases the Text and Highlights the Non-melting Points")
     with gr.Row():
         user_input = gr.Textbox(label="User Prompt")
@@ -263,21 +60,22 @@ with gr.Blocks(theme = gr.themes.Monochrome()) as demo:
         submit_button = gr.Button("Submit")
         clear_button = gr.Button("Clear")
-    with gr.Row():
-        ai_output = gr.Textbox(label="AI-generated Text (Llama3)")
     with gr.Row():
         selected_sentence = gr.HTML()
     with gr.Row():
         html_output = gr.HTML()
     with gr.Row():
         tree = gr.Plot()
-    submit_button.click(model, inputs=user_input, outputs=[ai_output, selected_sentence, html_output, tree])
     clear_button.click(lambda: "", inputs=None, outputs=user_input)
-    clear_button.click(lambda: "", inputs=None, outputs=[ai_output, selected_sentence, html_output, tree])
 # Launch the demo
-demo.launch(share=True)

 import nltk
 from nltk.translate.bleu_score import sentence_bleu
 from transformers import BertTokenizer, BertModel
 import gradio as gr
 from tree import generate_plot
 from paraphraser import generate_paraphrase
+from lcs import find_common_subsequences
+from highlighter import highlight_common_words
 nltk.download('stopwords')
 # Function for the Gradio interface
 def model(prompt):
+    sentence = prompt
+    paraphrased_sentences = generate_paraphrase(sentence)
+    common_grams = find_common_subsequences(sentence, paraphrased_sentences)
+    highlighted_user_prompt = highlight_common_words(common_grams, [sentence])  # Pass the sentence as a list
+    highlighted_paraphrased_sentences = highlight_common_words(common_grams, paraphrased_sentences)  # Fix parameter order
+    discarded_sentences = []
     tree = generate_plot(sentence)
+    return highlighted_user_prompt, highlighted_paraphrased_sentences, discarded_sentences, tree
+with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
+    gr.Markdown("# **AIISC Watermarking Model**")
     with gr.Row():
         user_input = gr.Textbox(label="User Prompt")
         submit_button = gr.Button("Submit")
         clear_button = gr.Button("Clear")
     with gr.Row():
         selected_sentence = gr.HTML()
     with gr.Row():
         html_output = gr.HTML()
+    with gr.Row():
+        discarded_sentences = gr.Textbox(label="Discarded Sentences")
     with gr.Row():
         tree = gr.Plot()
+    submit_button.click(model, inputs=user_input, outputs=[selected_sentence, html_output, discarded_sentences, tree])
     clear_button.click(lambda: "", inputs=None, outputs=user_input)
+    clear_button.click(lambda: "", inputs=None, outputs=[selected_sentence, html_output, discarded_sentences, tree])
 # Launch the demo
+demo.launch(share=True)