jgyasu commited on
Commit
3a1a87f
1 Parent(s): b265c4f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -116
app.py CHANGED
@@ -1,12 +1,13 @@
1
  # -*- coding: utf-8 -*-
2
- """watermark_intern.ipynb
3
 
4
  Automatically generated by Colab.
5
 
6
  Original file is located at
7
- https://colab.research.google.com/drive/1SyerXj0c3UyLSYmdL4TBBzWhwvMJ3JwJ
8
  """
9
 
 
10
 
11
  import gradio as gr
12
 
@@ -299,97 +300,14 @@ def generate_paraphrase(question):
299
  res = paraphrase(question, para_tokenizer, para_model)
300
  return res
301
 
302
- # question = "The official position of the United States on the Russia Ukraine war has been consistent in supporting Ukraine ’s sovereignty , territorial integrity, and the peaceful resolution of the conflict."
303
-
304
  question = "Following the declaration of the State of Israel in 1948, neighboring Arab states invaded. The war ended with Israel controlling a significant portion of the territory. Many Palestinians became refugees."
305
 
306
- res = generate_paraphrase(question)
307
-
308
- res
309
-
310
- longest_common_subss(question, res)
311
-
312
  import nltk
313
  nltk.download('punkt')
314
-
315
  import re
316
  from nltk.corpus import stopwords
317
  from nltk.tokenize import word_tokenize
318
 
319
- def non_melting_points(original_sentence, paraphrased_sentences):
320
- stop_words = set(stopwords.words('english'))
321
-
322
- def tokenize_and_filter(sentence):
323
- words = word_tokenize(sentence.lower())
324
- filtered_words = {word for word in words if word.isalpha() and word not in stop_words}
325
- return filtered_words
326
- original_words = tokenize_and_filter(original_sentence)
327
- paraphrased_words_list = [tokenize_and_filter(sentence) for sentence in paraphrased_sentences]
328
- common_words = original_words
329
- for words in paraphrased_words_list:
330
- common_words &= words
331
- return common_words
332
-
333
- #Function to get the first sentence from a paragraph
334
-
335
- import re
336
-
337
- def get_first_sentence(paragraph):
338
- match = re.search(r'([^.]*\.[\s]*[A-Z])', paragraph)
339
- if match:
340
- first_sentence = match.group(0)
341
- first_sentence = first_sentence.strip()
342
- if len(first_sentence.split('.')) > 1:
343
- return first_sentence.split('.')[0] + '.'
344
- return first_sentence
345
- else:
346
- return paragraph
347
-
348
-
349
- #Initializing llama3
350
-
351
- # import json
352
- # import torch
353
- # from transformers import (AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline)
354
-
355
- # config_data = json.load(open("config.json"))
356
- # HF_TOKEN = config_data["HF_TOKEN"]
357
-
358
- # model_name = "meta-llama/Meta-Llama-3-8B"
359
-
360
- # bnb_config = BitsAndBytesConfig(
361
- # load_in_4bit=True,
362
- # bnb_4bit_use_double_quant=True,
363
- # bnb_4bit_quant_type="nf4",
364
- # bnb_4bit_compute_dtype=torch.bfloat16
365
- # )
366
-
367
- # tokenizer = AutoTokenizer.from_pretrained(model_name, token=HF_TOKEN)
368
- # tokenizer.pad_token = tokenizer.eos_token
369
-
370
- # model = AutoModelForCausalLM.from_pretrained(
371
- # model_name,
372
- # device_map="auto",
373
- # quantization_config=bnb_config,
374
- # token=HF_TOKEN
375
- # )
376
-
377
- # text_generator = pipeline(
378
- # "text-generation",
379
- # model=model,
380
- # tokenizer=tokenizer,
381
- # max_new_tokens=512,
382
- # )
383
-
384
- # # llm_result = text_generator("write about nazism")
385
-
386
- # llm_result
387
-
388
- # llm_result[0]["generated_text"].split('.')
389
-
390
-
391
- #Finds LCS
392
-
393
  import re
394
  from nltk.corpus import stopwords
395
 
@@ -467,8 +385,6 @@ common_grams
467
  common_gram_words = [word for gram in common_grams for word in gram.split()]
468
  common_gram_words
469
 
470
- import re
471
-
472
  def llm_output(prompt):
473
  # sequences = text_generator(prompt)
474
  # gen_text = sequences[0]["generated_text"]
@@ -478,45 +394,114 @@ def llm_output(prompt):
478
  return prompt,prompt
479
 
480
  import re
481
-
482
- def generate_html_output(results,common_grams,common_gram_words):
483
- html_output = "<table border='1'>"
484
- html_output += "<tr><th>Original Sentence</th><th>Paraphrased Sentence</th><th>Common Substrings</th><th>Non Melting Points</th></tr>"
485
-
486
- for result in results:
487
- original_sentence = result[f"Original Sentence"]
488
- paraphrased_sentence = result[f"Paraphrased Sentence"]
489
- common_substrings = result[f"Substrings Word Pair"]
490
- # Highlight common substrings in the paraphrased sentence
491
- for word in common_gram_words:
492
- paraphrased_sentence = re.sub(r'\b' + re.escape(word) + r'\b', f'<span style="color:green">{word}</span>', paraphrased_sentence, flags=re.IGNORECASE)
493
- html_output += f"<tr><td>{original_sentence}</td><td>{paraphrased_sentence}</td><td>{common_substrings}</td><td>{common_grams}</td></tr>"
494
- html_output += "</table>"
495
- return html_output
496
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
497
 
498
  def model(prompt):
499
  generated,sentence = llm_output(prompt)
500
  res = generate_paraphrase(sentence)
501
  common_subs = longest_common_subss(sentence,res)
502
- non_melting = non_melting_points(sentence, res)
503
  common_grams = find_common_subsequences(sentence,res)
504
- common_gram_words = [word for gram in common_grams for word in gram.split()]
505
  for i in range(len(common_subs)):
506
  common_subs[i]["Paraphrased Sentence"] = res[i]
507
- result = generate_html_output(common_subs,common_grams,common_gram_words)
508
  return generated, result
509
 
510
- # final = model(question)
511
 
512
- import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
513
 
514
- demo = gr.Interface(
515
- fn=model,
516
- inputs=gr.Textbox(label="User Prompt"),
517
- outputs=[gr.Textbox(label="AI-generated Text (Llama3)"), gr.HTML()],
518
- title="Paraphrases the Text and Highlights the Non-melting Points",
519
- theme=gr.themes.Soft()
520
- )
521
 
522
- demo.launch(share=True)
 
1
  # -*- coding: utf-8 -*-
2
+ """text-paraphraser.ipynb
3
 
4
  Automatically generated by Colab.
5
 
6
  Original file is located at
7
+ https://colab.research.google.com/drive/1pFGR4uvXMMWVJFQeFmn--arumSxqa5Yy
8
  """
9
 
10
+ !pip install gradio
11
 
12
  import gradio as gr
13
 
 
300
  res = paraphrase(question, para_tokenizer, para_model)
301
  return res
302
 
 
 
303
  question = "Following the declaration of the State of Israel in 1948, neighboring Arab states invaded. The war ended with Israel controlling a significant portion of the territory. Many Palestinians became refugees."
304
 
 
 
 
 
 
 
305
  import nltk
306
  nltk.download('punkt')
 
307
  import re
308
  from nltk.corpus import stopwords
309
  from nltk.tokenize import word_tokenize
310
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
  import re
312
  from nltk.corpus import stopwords
313
 
 
385
  common_gram_words = [word for gram in common_grams for word in gram.split()]
386
  common_gram_words
387
 
 
 
388
  def llm_output(prompt):
389
  # sequences = text_generator(prompt)
390
  # gen_text = sequences[0]["generated_text"]
 
394
  return prompt,prompt
395
 
396
  import re
397
+ import html
398
+
399
+ def highlight_phrases_with_colors(sentences, phrases):
400
+ color_map = {} # Dictionary to store color assignments for each phrase
401
+ color_index = 0 # Index to assign colors sequentially
402
+
403
+ # Generate HTML for highlighting each sentence
404
+ highlighted_html = []
405
+ idx = 1
406
+ for sentence in sentences:
407
+ sentence_with_idx = f"{idx}. {sentence}"
408
+ idx += 1
409
+ highlighted_sentence = html.escape(sentence_with_idx)
410
+ phrase_count = 0
411
+
412
+ # Split sentence into words to apply numbering
413
+ words = re.findall(r'\b\w+\b', sentence)
414
+ word_index = 1 # Index to track words
415
+
416
+ # Highlight each phrase with a unique color and number
417
+ for phrase in phrases:
418
+ if phrase not in color_map:
419
+ # Assign a new color if the phrase hasn't been encountered before
420
+ color_map[phrase] = f'hsl({color_index * 60 % 360}, 70%, 80%)'
421
+ color_index += 1
422
+
423
+ escaped_phrase = re.escape(phrase)
424
+ pattern = rf'\b{escaped_phrase}\b'
425
+ highlighted_sentence, num_replacements = re.subn(
426
+ pattern,
427
+ lambda m, count=phrase_count, color=color_map[phrase], index=word_index: (
428
+ f'<span style="background-color: {color}; font-weight: bold;'
429
+ f' padding: 2px 4px; border-radius: 2px; position: relative;">'
430
+ f'<span style="background-color: black; color: white; border-radius: 50%;'
431
+ f' padding: 2px 5px; margin-right: 5px;">{index}</span>'
432
+ f'{m.group(0)}'
433
+ f'</span>'
434
+ ),
435
+ highlighted_sentence,
436
+ flags=re.IGNORECASE
437
+ )
438
+ if num_replacements > 0:
439
+ phrase_count += 1
440
+ word_index += 1 # Increment word index after each replacement
441
+
442
+ highlighted_html.append(highlighted_sentence)
443
+
444
+ # Join sentences with line breaks
445
+ final_html = "<br><br>".join(highlighted_html)
446
+
447
+ # Wrap in a container div for styling
448
+ return f'''
449
+ <div style="border: solid 1px #; padding: 16px; background-color: #FFFFFF; color: #374151; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); border-radius: 12px;">
450
+ <h3 style="margin-top: 0; font-size: 1.25em; color: #111827;">Paraphrased And Highlighted Text</h3>
451
+ <div style="background-color: #F5F5F5; line-height: 1.6; padding: 15px; border-radius: 12px;">{final_html}</div>
452
+ </div>
453
+ '''
454
 
455
  def model(prompt):
456
  generated,sentence = llm_output(prompt)
457
  res = generate_paraphrase(sentence)
458
  common_subs = longest_common_subss(sentence,res)
459
+ # non_melting = non_melting_points(sentence, res)
460
  common_grams = find_common_subsequences(sentence,res)
461
+ # common_gram_words = [word for gram in common_grams for word in gram.split()]
462
  for i in range(len(common_subs)):
463
  common_subs[i]["Paraphrased Sentence"] = res[i]
464
+ result = highlight_phrases_with_colors(res,common_grams)
465
  return generated, result
466
 
467
+ # model(question)
468
 
469
+ with gr.Blocks(theme = gr.themes.Monochrome()) as demo:
470
+ gr.Markdown("# Paraphrases the Text and Highlights the Non-melting Points")
471
+
472
+ with gr.Row():
473
+ user_input = gr.Textbox(label="User Prompt")
474
+
475
+ with gr.Row():
476
+ submit_button = gr.Button("Submit")
477
+ clear_button = gr.Button("Clear")
478
+
479
+ with gr.Row():
480
+ ai_output = gr.Textbox(label="AI-generated Text (Llama3)")
481
+
482
+ with gr.Row():
483
+ selected_sentence = gr.Textbox(label="Selected Sentence")
484
+
485
+ with gr.Row():
486
+ html_output = gr.HTML()
487
+
488
+ with gr.Row():
489
+
490
+ submit_button.click(model, inputs=user_input, outputs=[ai_output, html_output])
491
+ clear_button.click(lambda: "", inputs=None, outputs=user_input)
492
+ clear_button.click(lambda: "", inputs=None, outputs=[ai_output, selected_sentence, html_output])
493
+
494
+ # Launch the demo
495
+ demo.launch(share=True)
496
+
497
+ # !pip install pyngrok
498
+
499
+ # from pyngrok import ngrok, conf
500
+ # conf.get_default().auth_token = '2hsSp28infbSQYi8Es6O0XxbY8R_4nCeErYLzjdjBMDLcfji'
501
+ # public_url = ngrok.connect(7861).public_url
502
+ # print(public_url)
503
+
504
+ # demo.queue().launch(server_port=7861, inline=False, share=False, debug=True)
505
+ # demo.launch(share=True,debug=True,inline = False)
506
 
 
 
 
 
 
 
 
507