File size: 1,524 Bytes
f805bf8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import gradio as gr
import wikipedia
import numpy as np
import pandas as pd
from os import path
from PIL import Image
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import matplotlib.pyplot as plt


def wikipediaScrap(article_name, wikipedia_language="en"):
    if wikipedia_language:
        wikipedia.set_lang(wikipedia_language)

    et_page = wikipedia.page(article_name)
    title = et_page.title
    content = et_page.content
    page_url = et_page.url
    linked_pages = et_page.links

    text = content

    # Create and generate a word cloud image:
    wordcloud = WordCloud(font_path="HelveticaWorld-Regular.ttf").generate(text)

    # Display the generated image:
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis("off")

    return content


with gr.Blocks(css="footer {visibility: hidden} #dsd_button {background: purple, color: white}") as demo:
    with gr.Row():
        inp = gr.Textbox(placeholder="Enter the name of wikipedia article", label="Wikipedia article name")
        lan = gr.Textbox(placeholder="Enter the language code", label="Language")
    btn = gr.Button("Start Scraping", elem_id="dsd_button")
    with gr.Row():
        content = gr.Textbox(label="Content")
    with gr.Row():
        gr.Examples(
            examples=[["Eiffel Tower"], ["Eiffel tower"]], fn=wikipediaScrap, inputs=[inp],
            outputs=[content], cache_examples=True)

interface = gr.Interface(fn=wikipediaScrap, inputs=[inp,lan], outputs=[content])

interface.launch(inline=False)