File size: 1,615 Bytes
49664ed |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import gradio as gr
import Wikidata_Text_Parser as wtr
import sqlite3
def process_input(qid):
progress = gr.Progress(0)
wtr.claimParser(qid)
progress(0.20, desc="Filtering properties...")
filtered_df = wtr.propertyFiltering(qid)
progress(0.40, desc="Parsing URLs...")
url_set = wtr.urlParser()
progress(0.60, desc="Parsing HTML...")
html_set = wtr.htmlParser(url_set)
progress(0.80, desc="Generating claim text...")
claim_text = wtr.claim2text(html_set) #Claims generation
progress(1, desc="Generating claim text...")
html_text = wtr.html2text(html_set)
conn = sqlite3.connect('wikidata_claims_refs_parsed.db')
claim_text = claim_text.astype(str)
html_text = html_text.astype(str)
claim_text.to_sql('claim_text', conn, if_exists='replace', index=False)
html_text.to_sql('html_text', conn, if_exists='replace', index=False)
conn.commit()
conn.close()
return f"{html_text.shape[0]} HTMl documents collection via references of {qid}"
with gr.Blocks() as demo:
gr.Markdown(
"""
# Reference Quality Verification Tool
This is a tool for verifying the reference quality of Wikidata claims related to the target entity item.
Parsing could take 3~5 mins depending on the number of references.
"""
)
inp = gr.Textbox(label="Input QID", placeholder="Input QID (i.e. Q42)")
out = gr.Textbox(label="Parsing result")
run_button = gr.Button("Start parsing")
run_button.click(process_input, inp, out)
if __name__ == "__main__":
demo.launch() |