File size: 1,838 Bytes
12c6e97
0f150a5
 
 
 
 
 
 
 
 
b1356e4
 
 
 
 
 
0f150a5
 
b1356e4
12c6e97
0f150a5
 
 
 
b1356e4
 
0f150a5
b1356e4
 
 
 
 
0f150a5
b1356e4
 
 
 
 
 
 
 
 
0f150a5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12c6e97
0f150a5
12c6e97
 
0f150a5
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from gensim.parsing.preprocessing import STOPWORDS
import wikipedia
import gradio as gr
from gradio.mix import Parallel
import requests
import nltk
from nltk.tokenize import word_tokenize
from nltk.tokenize import sent_tokenize
import re
nltk.download('punkt')
import numpy as np
import pandas as pd
from os import path
from PIL import Image
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import matplotlib.pyplot as plt


def opendomain(text,wikipedia_language="en"):
  question_words = STOPWORDS.union(set(['likes','play','.',',','like',"don't",'?','use','choose','important','better','?']))
  lower_text = text.lower()
  lower_text = word_tokenize(lower_text)
  new_text = [i for i in lower_text if i not in question_words]
  new_txt = "".join(new_text)
  if wikipedia_language:
    wikipedia.set_lang(wikipedia_language)

  et_page = wikipedia.page(new_txt.replace(" ", ""))
  title = et_page.title
  content = et_page.content
  page_url = et_page.url
  linked_pages = et_page.links

  text = content
  print(type(text))
  wordcloud = WordCloud(font_path="HelveticaWorld-Regular.ttf").generate(text)

  plt.imshow(wordcloud, interpolation='bilinear')
  plt.axis("off")


  final_out = re.sub(r'\=.+\=', '', text)
  result = list(filter(lambda x: x != '', final_out.split('\n\n')))

  answer = []
  for i in range(6):
      if len(result[i]) > 500:
          summary_point=result[i].split(".")[0]
          answer.append(summary_point)
  l = []
  for i in range(len(answer)):
      l.append("".join(answer[i]))
  gen_output = []
  for i in range(len(l)):
      gen_output.append(l[i] + ".")

  listToStr = ' '.join([str(elem) for elem in gen_output])
  listToStr = listToStr.replace("\n", "")
  return listToStr


iface = gr.Interface(fn=opendomain, inputs=[gr.inputs.Textbox(lines=5)], outputs="text")
iface.launch()