|
from gensim.parsing.preprocessing import STOPWORDS |
|
import wikipedia |
|
import gradio as gr |
|
from gradio.mix import Parallel |
|
import requests |
|
import nltk |
|
from nltk.tokenize import word_tokenize |
|
from nltk.tokenize import sent_tokenize |
|
import re |
|
nltk.download('punkt') |
|
import numpy as np |
|
import pandas as pd |
|
from os import path |
|
from PIL import Image |
|
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator |
|
import matplotlib.pyplot as plt |
|
|
|
|
|
def opendomain(text,wikipedia_language="en"): |
|
try: |
|
question_words = STOPWORDS.union(set(['likes','play','.',',','like',"don't",'?','use','choose','important','better','?'])) |
|
lower_text = text.lower() |
|
lower_text = word_tokenize(lower_text) |
|
new_text = [i for i in lower_text if i not in question_words] |
|
new_txt = "".join(new_text) |
|
if wikipedia_language: |
|
wikipedia.set_lang(wikipedia_language) |
|
|
|
et_page = wikipedia.page(new_txt.replace(" ", "")) |
|
title = et_page.title |
|
content = et_page.content |
|
page_url = et_page.url |
|
linked_pages = et_page.links |
|
|
|
text = content |
|
wordcloud = WordCloud(font_path="HelveticaWorld-Regular.ttf").generate(text) |
|
|
|
plt.imshow(wordcloud, interpolation='bilinear') |
|
plt.axis("off") |
|
except: |
|
return "Please write correct wikipedia article name OR question" |
|
|
|
|
|
final_out = re.sub(r'\=.+\=', '', text) |
|
result = list(filter(lambda x: x != '', final_out.split('\n\n'))) |
|
answer = [] |
|
for i in range(6): |
|
if len(result[i]) > 500: |
|
summary_point=result[i].split(".")[0:3] |
|
answer.append(summary_point) |
|
|
|
final = "" |
|
for value in answer: |
|
joint_value = ".".join(value) |
|
if final == "": |
|
final += joint_value |
|
else: |
|
final = f"{final}.\n\n{joint_value}" |
|
|
|
return final |
|
|
|
|
|
iface = gr.Interface(fn=opendomain, inputs=[gr.inputs.Textbox(lines=5)], outputs="text") |
|
iface.launch() |
|
|
|
|