from gensim.parsing.preprocessing import STOPWORDS import wikipedia import gradio as gr from gradio.mix import Parallel import requests import nltk from nltk.tokenize import word_tokenize from nltk.tokenize import sent_tokenize import re nltk.download('punkt') import numpy as np import pandas as pd from os import path from PIL import Image from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator import matplotlib.pyplot as plt def opendomain(text,wikipedia_language="en"): try: question_words = STOPWORDS.union(set(['likes','play','.',',','like',"don't",'?','use','choose','important','better','?'])) lower_text = text.lower() lower_text = word_tokenize(lower_text) new_text = [i for i in lower_text if i not in question_words] new_txt = "".join(new_text) if wikipedia_language: wikipedia.set_lang(wikipedia_language) et_page = wikipedia.page(new_txt.replace(" ", "")) title = et_page.title content = et_page.content page_url = et_page.url linked_pages = et_page.links text = content wordcloud = WordCloud(font_path="HelveticaWorld-Regular.ttf").generate(text) plt.imshow(wordcloud, interpolation='bilinear') plt.axis("off") except: return "Please write correct wikipedia article name OR question" final_out = re.sub(r'\=.+\=', '', text) result = list(filter(lambda x: x != '', final_out.split('\n\n'))) answer = [] for i in range(6): if len(result[i]) > 500: summary_point=result[i].split(".")[0:3] answer.append(summary_point) final = "" for value in answer: joint_value = ".".join(value) if final == "": final += joint_value else: final = f"{final}.\n\n{joint_value}" return final iface = gr.Interface(fn=opendomain, inputs=[gr.inputs.Textbox(lines=5)], outputs="text") iface.launch()