Spaces:

jgyasu
/

text-paraphraser

Running

App Files Files Community

jgyasu commited on Jun 19

Commit

3a1a87f

•

1 Parent(s): b265c4f

Update app.py

Browse files

Files changed (1) hide show

app.py +101 -116

app.py CHANGED Viewed

@@ -1,12 +1,13 @@
 # -*- coding: utf-8 -*-
-"""watermark_intern.ipynb
 Automatically generated by Colab.
 Original file is located at
-    https://colab.research.google.com/drive/1SyerXj0c3UyLSYmdL4TBBzWhwvMJ3JwJ
 """
 import gradio as gr
@@ -299,97 +300,14 @@ def generate_paraphrase(question):
     res = paraphrase(question, para_tokenizer, para_model)
     return res
-# question = "The official position of the United States on the Russia Ukraine war has been consistent in supporting Ukraine ’s sovereignty , territorial integrity, and the peaceful resolution of the conflict."
 question = "Following the declaration of the State of Israel in 1948, neighboring Arab states invaded. The war ended with Israel controlling a significant portion of the territory. Many Palestinians became refugees."
-res = generate_paraphrase(question)
-res
-longest_common_subss(question, res)
 import nltk
 nltk.download('punkt')
 import re
 from nltk.corpus import stopwords
 from nltk.tokenize import word_tokenize
-def non_melting_points(original_sentence, paraphrased_sentences):
-    stop_words = set(stopwords.words('english'))
-    def tokenize_and_filter(sentence):
-        words = word_tokenize(sentence.lower())
-        filtered_words = {word for word in words if word.isalpha() and word not in stop_words}
-        return filtered_words
-    original_words = tokenize_and_filter(original_sentence)
-    paraphrased_words_list = [tokenize_and_filter(sentence) for sentence in paraphrased_sentences]
-    common_words = original_words
-    for words in paraphrased_words_list:
-        common_words &= words
-    return common_words
-#Function to get the first sentence from a paragraph
-import re
-def get_first_sentence(paragraph):
-    match = re.search(r'([^.]*\.[\s]*[A-Z])', paragraph)
-    if match:
-        first_sentence = match.group(0)
-        first_sentence = first_sentence.strip()
-        if len(first_sentence.split('.')) > 1:
-            return first_sentence.split('.')[0] + '.'
-        return first_sentence
-    else:
-        return paragraph
-#Initializing llama3
-# import json
-# import torch
-# from transformers import (AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline)
-# config_data = json.load(open("config.json"))
-# HF_TOKEN = config_data["HF_TOKEN"]
-# model_name = "meta-llama/Meta-Llama-3-8B"
-# bnb_config = BitsAndBytesConfig(
-#     load_in_4bit=True,
-#     bnb_4bit_use_double_quant=True,
-#     bnb_4bit_quant_type="nf4",
-#     bnb_4bit_compute_dtype=torch.bfloat16
-# )
-# tokenizer = AutoTokenizer.from_pretrained(model_name, token=HF_TOKEN)
-# tokenizer.pad_token = tokenizer.eos_token
-# model = AutoModelForCausalLM.from_pretrained(
-#     model_name,
-#     device_map="auto",
-#     quantization_config=bnb_config,
-#     token=HF_TOKEN
-# )
-# text_generator = pipeline(
-#     "text-generation",
-#     model=model,
-#     tokenizer=tokenizer,
-#     max_new_tokens=512,
-# )
-# # llm_result = text_generator("write about nazism")
-# llm_result
-# llm_result[0]["generated_text"].split('.')
-#Finds LCS
 import re
 from nltk.corpus import stopwords
@@ -467,8 +385,6 @@ common_grams
 common_gram_words = [word for gram in common_grams for word in gram.split()]
 common_gram_words
-import re
 def llm_output(prompt):
     # sequences = text_generator(prompt)
     # gen_text = sequences[0]["generated_text"]
@@ -478,45 +394,114 @@ def llm_output(prompt):
     return prompt,prompt
 import re
-def generate_html_output(results,common_grams,common_gram_words):
-    html_output = "<table border='1'>"
-    html_output += "<tr><th>Original Sentence</th><th>Paraphrased Sentence</th><th>Common Substrings</th><th>Non Melting Points</th></tr>"
-    for result in results:
-        original_sentence = result[f"Original Sentence"]
-        paraphrased_sentence = result[f"Paraphrased Sentence"]
-        common_substrings = result[f"Substrings Word Pair"]
-        # Highlight common substrings in the paraphrased sentence
-        for word in common_gram_words:
-            paraphrased_sentence = re.sub(r'\b' + re.escape(word) + r'\b', f'<span style="color:green">{word}</span>', paraphrased_sentence, flags=re.IGNORECASE)
-        html_output += f"<tr><td>{original_sentence}</td><td>{paraphrased_sentence}</td><td>{common_substrings}</td><td>{common_grams}</td></tr>"
-    html_output += "</table>"
-    return html_output
 def model(prompt):
     generated,sentence = llm_output(prompt)
     res = generate_paraphrase(sentence)
     common_subs = longest_common_subss(sentence,res)
-    non_melting  = non_melting_points(sentence, res)
     common_grams = find_common_subsequences(sentence,res)
-    common_gram_words = [word for gram in common_grams for word in gram.split()]
     for i in range(len(common_subs)):
         common_subs[i]["Paraphrased Sentence"] = res[i]
-    result = generate_html_output(common_subs,common_grams,common_gram_words)
     return generated, result
-# final = model(question)
-import gradio as gr
-demo = gr.Interface(
-    fn=model,
-    inputs=gr.Textbox(label="User Prompt"),
-    outputs=[gr.Textbox(label="AI-generated Text (Llama3)"), gr.HTML()],
-    title="Paraphrases the Text and Highlights the Non-melting Points",
-    theme=gr.themes.Soft()
-)
-demo.launch(share=True)

 # -*- coding: utf-8 -*-
+"""text-paraphraser.ipynb
 Automatically generated by Colab.
 Original file is located at
+    https://colab.research.google.com/drive/1pFGR4uvXMMWVJFQeFmn--arumSxqa5Yy
 """
+!pip install gradio
 import gradio as gr
     res = paraphrase(question, para_tokenizer, para_model)
     return res
 question = "Following the declaration of the State of Israel in 1948, neighboring Arab states invaded. The war ended with Israel controlling a significant portion of the territory. Many Palestinians became refugees."
 import nltk
 nltk.download('punkt')
 import re
 from nltk.corpus import stopwords
 from nltk.tokenize import word_tokenize
 import re
 from nltk.corpus import stopwords
 common_gram_words = [word for gram in common_grams for word in gram.split()]
 common_gram_words
 def llm_output(prompt):
     # sequences = text_generator(prompt)
     # gen_text = sequences[0]["generated_text"]
     return prompt,prompt
 import re
+import html
+def highlight_phrases_with_colors(sentences, phrases):
+    color_map = {}  # Dictionary to store color assignments for each phrase
+    color_index = 0  # Index to assign colors sequentially
+    # Generate HTML for highlighting each sentence
+    highlighted_html = []
+    idx = 1
+    for sentence in sentences:
+        sentence_with_idx = f"{idx}. {sentence}"
+        idx += 1
+        highlighted_sentence = html.escape(sentence_with_idx)
+        phrase_count = 0
+        # Split sentence into words to apply numbering
+        words = re.findall(r'\b\w+\b', sentence)
+        word_index = 1  # Index to track words
+        # Highlight each phrase with a unique color and number
+        for phrase in phrases:
+            if phrase not in color_map:
+                # Assign a new color if the phrase hasn't been encountered before
+                color_map[phrase] = f'hsl({color_index * 60 % 360}, 70%, 80%)'
+                color_index += 1
+            escaped_phrase = re.escape(phrase)
+            pattern = rf'\b{escaped_phrase}\b'
+            highlighted_sentence, num_replacements = re.subn(
+                pattern,
+                lambda m, count=phrase_count, color=color_map[phrase], index=word_index: (
+                    f'<span style="background-color: {color}; font-weight: bold;'
+                    f' padding: 2px 4px; border-radius: 2px; position: relative;">'
+                    f'<span style="background-color: black; color: white; border-radius: 50%;'
+                    f' padding: 2px 5px; margin-right: 5px;">{index}</span>'
+                    f'{m.group(0)}'
+                    f'</span>'
+                ),
+                highlighted_sentence,
+                flags=re.IGNORECASE
+            )
+            if num_replacements > 0:
+                phrase_count += 1
+                word_index += 1  # Increment word index after each replacement
+        highlighted_html.append(highlighted_sentence)
+    # Join sentences with line breaks
+    final_html = "<br><br>".join(highlighted_html)
+    # Wrap in a container div for styling
+    return f'''
+    <div style="border: solid 1px #; padding: 16px; background-color: #FFFFFF; color: #374151; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); border-radius: 12px;">
+    <h3 style="margin-top: 0; font-size: 1.25em; color: #111827;">Paraphrased And Highlighted Text</h3>
+    <div style="background-color: #F5F5F5; line-height: 1.6; padding: 15px; border-radius: 12px;">{final_html}</div>
+    </div>
+    '''
 def model(prompt):
     generated,sentence = llm_output(prompt)
     res = generate_paraphrase(sentence)
     common_subs = longest_common_subss(sentence,res)
+#     non_melting  = non_melting_points(sentence, res)
     common_grams = find_common_subsequences(sentence,res)
+    # common_gram_words = [word for gram in common_grams for word in gram.split()]
     for i in range(len(common_subs)):
         common_subs[i]["Paraphrased Sentence"] = res[i]
+    result = highlight_phrases_with_colors(res,common_grams)
     return generated, result
+# model(question)
+with gr.Blocks(theme = gr.themes.Monochrome()) as demo:
+    gr.Markdown("# Paraphrases the Text and Highlights the Non-melting Points")
+    with gr.Row():
+        user_input = gr.Textbox(label="User Prompt")
+    with gr.Row():
+        submit_button = gr.Button("Submit")
+        clear_button = gr.Button("Clear")
+    with gr.Row():
+        ai_output = gr.Textbox(label="AI-generated Text (Llama3)")
+    with gr.Row():
+        selected_sentence = gr.Textbox(label="Selected Sentence")
+    with gr.Row():
+        html_output = gr.HTML()
+    with gr.Row():
+      submit_button.click(model, inputs=user_input, outputs=[ai_output, html_output])
+      clear_button.click(lambda: "", inputs=None, outputs=user_input)
+      clear_button.click(lambda: "", inputs=None, outputs=[ai_output, selected_sentence, html_output])
+# Launch the demo
+demo.launch(share=True)
+# !pip install pyngrok
+# from pyngrok import ngrok, conf
+# conf.get_default().auth_token = '2hsSp28infbSQYi8Es6O0XxbY8R_4nCeErYLzjdjBMDLcfji'
+# public_url = ngrok.connect(7861).public_url
+# print(public_url)
+# demo.queue().launch(server_port=7861, inline=False, share=False, debug=True)
+# demo.launch(share=True,debug=True,inline = False)