|
|
|
import gradio as gr |
|
import pandas as pd |
|
from transformers import pipeline, AutoModelForQuestionAnswering, AutoTokenizer, AutoModelForTokenClassification |
|
|
|
|
|
qa_model_name = "erdometo/xlm-roberta-base-finetuned-TQuad2" |
|
token_classification_model_name = "akdeniz27/convbert-base-turkish-cased-ner" |
|
|
|
qa_model = AutoModelForQuestionAnswering.from_pretrained(qa_model_name) |
|
qa_tokenizer = AutoTokenizer.from_pretrained(qa_model_name) |
|
|
|
token_classification_model = AutoModelForTokenClassification.from_pretrained(token_classification_model_name) |
|
token_classification_tokenizer = AutoTokenizer.from_pretrained(token_classification_model_name) |
|
def tabulazier(output): |
|
output_comb = [] |
|
for ind, entity in enumerate(output): |
|
if ind == 0: |
|
output_comb.append(entity) |
|
elif output[ind]["start"] == output[ind-1]["end"] and output[ind]["entity_group"] == output[ind-1]["entity_group"]: |
|
output_comb[-1]["word"] = output_comb[-1]["word"] + output[ind]["word"] |
|
output_comb[-1]["end"] = output[ind]["end"] |
|
else: |
|
output_comb.append(entity) |
|
|
|
df = pd.DataFrame(output_comb) |
|
df['word'] = df['word'].str.replace('#', '') |
|
return df |
|
|
|
|
|
|
|
def predict(pipeline_type, question, context): |
|
if pipeline_type == "question-answering": |
|
qa_pipeline = pipeline("question-answering", model=qa_model, tokenizer=qa_tokenizer) |
|
result = qa_pipeline(question=question, context=context) |
|
response = [(result['answer'], result.get('score', None))] |
|
return [response, response] |
|
elif pipeline_type == "token-classification": |
|
token_classification_pipeline = pipeline("ner", model=token_classification_model, tokenizer=token_classification_tokenizer, aggregation_strategy="simple") |
|
result = token_classification_pipeline(context) |
|
highlighted_text = {"text": context, "entities": result} |
|
table=tabulazier(result) |
|
return [gr.HighlightedText(highlighted_text), table] |
|
|
|
|
|
iface = gr.Interface( |
|
fn=predict, |
|
inputs=[ |
|
gr.Dropdown(choices=["question-answering", "token-classification"], label="Choose Pipeline"), |
|
"text", |
|
"text" |
|
], |
|
outputs=[gr.Highlight(), gr.Dataframe()] |
|
) |
|
|
|
|
|
|
|
|
|
|
|
iface.launch(debug=False) |
|
|