from gensim.parsing.preprocessing import STOPWORDS import wikipedia import gradio as gr from gradio.mix import Parallel import requests import nltk from nltk.tokenize import word_tokenize from nltk.tokenize import sent_tokenize import re nltk.download('punkt') import numpy as np import pandas as pd from os import path from PIL import Image from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator import matplotlib.pyplot as plt def opendomain(text,wikipedia_language="en"): question_words = STOPWORDS.union(set(['likes','play','.',',','like',"don't",'?','use','choose','important','better','?'])) lower_text = text.lower() lower_text = word_tokenize(lower_text) new_text = [i for i in lower_text if i not in question_words] new_txt = "".join(new_text) if wikipedia_language: wikipedia.set_lang(wikipedia_language) et_page = wikipedia.page(new_txt.replace(" ", "")) title = et_page.title content = et_page.content page_url = et_page.url linked_pages = et_page.links text = content print(type(text)) wordcloud = WordCloud(font_path="HelveticaWorld-Regular.ttf").generate(text) plt.imshow(wordcloud, interpolation='bilinear') plt.axis("off") final_out = re.sub(r'\=.+\=', '', text) result = list(filter(lambda x: x != '', final_out.split('\n\n'))) answer = [] for i in range(6): if len(result[i]) > 500: summary_point=result[i].split(".")[0] answer.append(summary_point) l = [] for i in range(len(answer)): l.append("".join(answer[i])) gen_output = [] for i in range(len(l)): gen_output.append(l[i] + ".") listToStr = ' '.join([str(elem) for elem in gen_output]) listToStr = listToStr.replace("\n", "") return listToStr iface = gr.Interface(fn=opendomain, inputs=[gr.inputs.Textbox(lines=5)], outputs="text") iface.launch()