|
import gradio as gr |
|
import wikipedia |
|
import numpy as np |
|
import pandas as pd |
|
from os import path |
|
from PIL import Image |
|
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator |
|
import matplotlib.pyplot as plt |
|
|
|
|
|
def wikipediaScrap(article_name, wikipedia_language="en"): |
|
if wikipedia_language: |
|
wikipedia.set_lang(wikipedia_language) |
|
|
|
et_page = wikipedia.page(article_name.replace(" ","")) |
|
title = et_page.title |
|
content = et_page.content |
|
page_url = et_page.url |
|
linked_pages = et_page.links |
|
|
|
text = content |
|
|
|
|
|
wordcloud = WordCloud(font_path="HelveticaWorld-Regular.ttf").generate(text) |
|
|
|
|
|
plt.imshow(wordcloud, interpolation='bilinear') |
|
plt.axis("off") |
|
|
|
return content |
|
|
|
|
|
with gr.Blocks(css="footer {visibility: hidden} #dsd_button {background: purple, color: white}") as demo: |
|
with gr.Row(): |
|
inp = gr.Textbox(placeholder="Enter the name of wikipedia article", label="Wikipedia article name") |
|
btn = gr.Button("Start Scraping", elem_id="dsd_button") |
|
with gr.Row(): |
|
content = gr.Textbox(label="Content") |
|
with gr.Row(): |
|
gr.Examples( |
|
examples=[["Eiffel Tower"], ["Eiffel tower"]], fn=wikipediaScrap, inputs=[inp], |
|
outputs=[content], cache_examples=True) |
|
|
|
interface = gr.Interface(fn=wikipediaScrap, inputs=[inp], outputs=[content]) |
|
|
|
interface.launch(inline=False) |