| import gradio as gr |
| import wikipedia |
| import numpy as np |
| import pandas as pd |
| from os import path |
| from PIL import Image |
| from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator |
| import matplotlib.pyplot as plt |
|
|
| def wikipediaScrap(article_name, wikipedia_language = "en"): |
| if wikipedia_language: |
| wikipedia.set_lang(wikipedia_language) |
|
|
| et_page = wikipedia.page(article_name) |
| title = et_page.title |
| content = et_page.content |
| page_url = et_page.url |
| linked_pages = et_page.links |
| |
| text = content |
|
|
| |
| wordcloud = WordCloud(font_path="HelveticaWorld-Regular.ttf").generate(text) |
|
|
| |
| plt.imshow(wordcloud, interpolation='bilinear') |
| plt.axis("off") |
| |
| return title, content, page_url, "\n". join(linked_pages), plt |
|
|
|
|
|
|
| with gr.Blocks( css = "footer {visibility: hidden} #dsd_button {background: purple, color: white}" ) as demo: |
| with gr.Row(): |
| inp = gr.Textbox(placeholder="Enter the name of wikipedia article", label="Wikipedia article name") |
| lan = gr.Textbox(placeholder="Enter the language code", label="Language") |
| btn = gr.Button("Start Scraping", elem_id="dsd_button") |
| with gr.Row(): |
| with gr.Column(): |
| gr.Markdown("""## About""") |
| title = gr.Textbox(label="Article title") |
| url = gr.Textbox(label="Article URL") |
| with gr.Column(): |
| gr.Markdown("""## Wordcloud""") |
| wordcloud = gr.Plot() |
| gr.Markdown("""### Content""") |
| with gr.Row(): |
| content = gr.Textbox(label="Content") |
| gr.Markdown("""### Linked Articles""") |
| with gr.Row(): |
| linked = gr.Textbox(label="Linked Articles") |
| with gr.Row(): |
| gr.Examples( |
| examples = [["Eiffel Tower", "en"], ["Eiffel tower", 'ur']], fn=wikipediaScrap, inputs=[inp, lan], outputs=[title, content, url, linked, wordcloud], cache_examples=True) |
| |
| interface = gr.Interface(fn=wikipediaScrap, inputs=[inp, lan], outputs=[title, content, url, linked, wordcloud]) |
| |
| interface.launch(inline=False) |