File size: 3,911 Bytes
eb40ae9 ac403f9 7afc79c ac403f9 eb40ae9 23333c7 eb40ae9 23333c7 eb40ae9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import gradio as gr
import pandas as pd
import numpy as np
import tempfile
import random
import os
import json
from pathlib import Path
from cxglearner.parser import Parser
from cxglearner.config import DefaultConfigs, Config
from cxglearner.utils import init_logger
from cxglearner.utils.utils_cxs import convert_slots_to_str
temp_dir = tempfile.gettempdir()
log_dir = Path(temp_dir) / "logs"
log_dir.mkdir(exist_ok=True)
cahce_dir = Path(temp_dir) / "cache"
config = Config(DefaultConfigs.eng)
config.experiment.log_path = log_dir / "eng.log"
logger = init_logger(config)
parser = Parser(config=config, version="1.1", logger=logger, cache_dir=cahce_dir)
examples = [["she should be more polite with the customers."]]
MAX_EXAMPLAR = 10
with open("data/learner_examplar_1.1.json", "r", encoding="utf-8") as fp:
examplars = json.load(fp)
logger.debug(len(examplars))
def fill_input_box(example):
return example[0]
def parse_text(text):
if not text: return gr.Dataframe(), gr.update(choices=[], value=None), gr.Dataframe()
encoded_elements = parser.encoder.encode(text, raw=True)
tokens, upos, xpos = np.array(encoded_elements["lexical"]), np.array(encoded_elements["upos"]["spaCy"]), np.array(
encoded_elements["xpos"]["spaCy"])
encoded_elements = np.vstack((tokens, upos, xpos))
radio_parsed = parser.parse(text)
radio_parsed = ["{} | {} | {}-{}".format(cxs[0],
convert_slots_to_str(parser.cxs_decoder[cxs[0]], parser.encoder, logger), cxs[1] + 1, cxs[2])
for cxs in radio_parsed[0]]
if len(radio_parsed) == 0:
radio_display = gr.Radio(label="Constructions", choices=[])
else:
radio_display = gr.Radio(
label="Constructions", choices=radio_parsed, interactive=True, value=radio_parsed[0]
)
if len(radio_parsed) == 0:
cons_df = pd.DataFrame()
else:
cxs = radio_parsed[0]
index, cxs, ranges = cxs.split("|")
cxs = cxs.strip()
if cxs in examplars:
exams = random.choices(examplars[cxs], k=min(MAX_EXAMPLAR, len(examplars[cxs])))
cons_df = pd.DataFrame(exams, columns=[cxs])
else:
cons_df = pd.DataFrame()
return encoded_elements, radio_display, cons_df
def refresh_examplar(option: str):
print(option)
index, cxs, ranges = option.split("|")
index = eval(index)
cxs = cxs.strip()
if cxs in examplars:
exams = random.choices(examplars[cxs], k=min(MAX_EXAMPLAR, len(examplars[cxs])))
return pd.DataFrame(exams, columns=[cxs])
return pd.DataFrame()
def clear_text():
return "", pd.DataFrame(), gr.Radio(label="Constructions", choices=[]), pd.DataFrame()
with gr.Blocks() as demo:
with gr.Column():
gr.Markdown("## CxGLearner Parser")
with gr.Row():
input_text = gr.Textbox(label="Input Text", placeholder="Enter a sentence here...")
with gr.Row():
dataset = gr.Dataset(components=[input_text],
samples=examples,
label="Click an example")
clear_buttton = gr.Button("Clear")
parser_button = gr.Button("Parse")
with gr.Column():
gr.Markdown("### Results of Encoding and Parsing")
enc_display = gr.Dataframe()
cxs_display = gr.Radio(label="Constructions", choices=[])
with gr.Column():
gr.Markdown("### Examplars")
cons_display = gr.Dataframe()
parser_button.click(fn=parse_text, inputs=[input_text], outputs=[enc_display, cxs_display, cons_display])
clear_buttton.click(fn=clear_text, inputs=[], outputs=[input_text, enc_display, cxs_display, cons_display])
dataset.click(fn=fill_input_box, inputs=dataset, outputs=input_text)
cxs_display.select(refresh_examplar, inputs=[cxs_display], outputs=cons_display)
demo.launch() |