Spaces:

SantiagoMoreno-UdeA
/

NER_RC

Runtime error

App Files Files Community

NER_RC / src /graph /GUI.py

SantiagoMoreno-UdeA

Sync complete

7bc122c 10 months ago

raw

history blame

10.8 kB

	# -- coding: utf-8 --
	"""
	Created on Fri Nov 11 16:01:08 2022

	@author: Santiago Moreno
	"""

	import os
	import gradio as gr
	import sys
	import json


	default_path = os.path.dirname(os.path.abspath(__file__))
	#default_path = default_path.replace('\\', '/')

	os.chdir(default_path)
	sys.path.insert(0, default_path+'/../scripts')

	from src.scripts.functionsner import use_model, tag_sentence, json_to_txt, training_model, characterize_data, upsampling_data, usage_cuda, copy_data
	from src.scripts.functionsrc import use_model_rc, training_model_rc, usage_cuda_rc

	models_NER = os.listdir(default_path+'/../../models/NER')
	models_rc = os.listdir(default_path+'/../../models/RC')

	#-------------------------------------------Functions-----------------------------------------------

	#--------------------------------------NER-----------------------------------
	def Trainer(fast, model_name, standard, input_dir, Upsampling, Cuda):
	if fast: epochs = 1
	else: epochs = 20

	if Cuda:
	cuda_info = usage_cuda(True)
	else:
	cuda_info = usage_cuda(False)


	if standard:
	copy_data(input_dir)
	else:
	Error = json_to_txt(input_dir)
	if type(Error)==int:
	yield 'Error processing the input documents, code error {}'.format(Error)
	if Upsampling:
	yield cuda_info+'\n'+'-'20+'Upsampling'+'-'20
	entities_dict=characterize_data()
	entities = list(entities_dict.keys())
	entities_to_upsample = [entities[i] for i,value in enumerate(entities_dict.values()) if value < 200]
	upsampling_data(entities_to_upsample, 0.8, entities)
	yield '-'20+'Training'+'-'20
	else:
	yield cuda_info+'\n'+'-'20+'Training'+'-'20
	Error = training_model(model_name, epochs)
	if type(Error)==int:
	yield 'Error training the model, code error {}'.format(Error)
	else:
	yield 'Training complete, model {} could be found at models/{}'.format(model_name,model_name)


	def Tagger_sentence(Model, Sentence, Cuda):
	if Cuda: cuda_info = usage_cuda(True)
	else: cuda_info = usage_cuda(False)
	yield cuda_info+'\n'+'-'20+'Tagging'+'-'20
	results = tag_sentence(Sentence, Model)
	if type(results)==int:
	yield "Error {}, see documentation".format(results)
	else:
	yield results['Highligth']

	def Tagger_json(Model, Input_file, Output_file, Cuda):
	if Cuda: cuda_info = usage_cuda(True)
	else: cuda_info = usage_cuda(False)

	with open(Output_file, "w", encoding='utf-8') as write_file:
	json.dump({'error':'error'}, write_file)

	yield cuda_info+'\n'+'-'20+'Tagging'+'-'20, {}, Output_file

	results = use_model(Model, Input_file.name, Output_file)
	if type(results)==int:
	error_dict = {}
	yield "Error {}, see documentation".format(results), error_dict, Output_file
	else:
	yield { "text" : results['text'], 'entities': results['entities']}, results, Output_file


	#--------------------RC-------------------------------
	def Trainer_RC(fast, model_name, input_file, rel2id_file, Cuda):
	if fast: epochs = 1
	else: epochs = 200

	if Cuda:
	cuda_info = usage_cuda_rc(True)
	else:
	cuda_info = usage_cuda_rc(False)


	yield cuda_info+'\n'+'-'20+'Training'+'-'20
	Error = training_model_rc(model_name, input_file.name, rel2id_file.name ,epochs)
	if type(Error)==int:
	yield 'Error training the model, code error {}'.format(Error)
	else:
	yield 'Training complete, model {} could be found at models/{}'.format(model_name,model_name)


	def Tagger_document_RC(Model, Input_file, Output_file, Cuda):
	if Cuda: cuda_info = usage_cuda_rc(True)
	else: cuda_info = usage_cuda_rc(False)

	with open(Output_file, "w", encoding='utf-8') as write_file:
	json.dump({'error':'error'}, write_file)

	yield {'cuda':cuda_info}, Output_file

	results = use_model_rc(Model, Input_file.name, Output_file)
	if type(results)==int:
	error_dict = {}
	yield error_dict, Output_file
	else:
	yield results, Output_file


	#---------------------------------GUI-------------------------------------
	def execute_GUI():
	global models_NER
	with gr.Blocks(title='NER', css="#title {font-size: 150% } #sub {font-size: 120% } ") as demo:

	gr.Markdown("Named Entity Recognition(NER) and Relation Classification (RC) by GITA and Pratec Group S.A.S.",elem_id="title")
	gr.Markdown("Software developed by Santiago Moreno, Daniel Escobar, and Rafael Orozco",elem_id="sub")
	gr.Markdown("Named Entity Recognition(NER) and Relation Classification (RC) System.")

	with gr.Tab("NER"):
	gr.Markdown("Use Tagger to apply NER from a pretrained model in a sentence or a given document in INPUT (.JSON) format.")
	gr.Markdown("Use Trainer to train a new NER model from a directory of documents in PRATECH (.JSON) format.")
	with gr.Tab("Tagger"):
	with gr.Tab("Sentence"):
	with gr.Row():
	with gr.Column():
	b = gr.Radio(list(models_NER), label='Model')
	inputs =[
	b,
	gr.Textbox(placeholder="Enter sentence here...", label='Sentence'),
	gr.Radio([True,False], label='CUDA', value=False),
	]
	tagger_sen = gr.Button("Tag")
	output = gr.HighlightedText()



	tagger_sen.click(Tagger_sentence, inputs=inputs, outputs=output)
	b.change(fn=lambda value: gr.update(choices=list(os.listdir('../../models/NER'))), inputs=b, outputs=b)
	gr.Examples(

	examples=[
	['CCC',"Camara de comercio de medellín. El ciudadano JAIME JARAMILLO VELEZ identificado con C.C. 12546987 ingresó al plantel el día 1/01/2022"],
	['CCC',"Razón Social GASEOSAS GLACIAR S.A.S, ACTIVIDAD PRINCIPAL fabricación y distribución de bebidas endulzadas"]
	],
	inputs=inputs
	)


	with gr.Tab("Document"):
	with gr.Row():
	with gr.Column():
	c = gr.Radio(list(models_NER), label='Model')
	inputs =[
	c,
	gr.File(label='Input data file'),
	gr.Textbox(placeholder="Enter path here...", label='Output data file path'), #value='../../data/Tagged/document_tagged.json'),
	gr.Radio([True,False], label='CUDA', value=False),
	]
	tagger_json = gr.Button("Tag")
	output = [
	gr.HighlightedText(),
	gr.JSON(),
	gr.File(),
	]

	models_NER = os.listdir(default_path+'/../../models/NER')


	tagger_json.click(Tagger_json, inputs=inputs, outputs=output)
	c.change(fn=lambda value: gr.update(choices=list(os.listdir('../../models/NER'))), inputs=c, outputs=c)


	with gr.Tab("Trainer"):
	with gr.Row():
	with gr.Column():
	train_input = inputs =[
	gr.Radio([True,False], label='Fast training', value=False),
	gr.Textbox(placeholder="Enter model name here...", label='New model name'),
	gr.Radio([True,False], label='Standard input', value=False),
	gr.Textbox(placeholder="Enter path here...", label='Input data directory path'),
	gr.Radio([True,False], label='Upsampling', value=False),
	gr.Radio([True,False], label='CUDA', value=False),
	]
	trainer = gr.Button("Train")
	train_output = gr.TextArea(placeholder="Output information", label='Output')


	with gr.Tab("RC"):
	gr.Markdown("Use Tagger to apply RC from a pretrained model in document in (.TXT) CONLL04 format.")
	gr.Markdown("Use Trainer to train a new RC model from a file (.TXT) CONLL04 format and the rel2id file (.JSON).")
	with gr.Tab("Tagger Document"):

	with gr.Row():
	with gr.Column():
	c = gr.Radio(list(models_rc), label='Model')
	inputs =[
	c,
	gr.File(label='Input data file'),
	gr.Textbox(placeholder="Enter path here...", label='Output data file path (.JSON)'), #value='../../data/Tagged/document_tagged.json'),
	gr.Radio([True,False], label='CUDA', value=False),
	]
	tagger_json = gr.Button("Tag")
	output = [
	gr.JSON(),
	gr.File(),
	]

	tagger_json.click(Tagger_document_RC, inputs=inputs, outputs=output)
	c.change(fn=lambda value: gr.update(choices=list(os.listdir('../../models/RC'))), inputs=c, outputs=c)

	with gr.Tab("Trainer"):
	with gr.Row():
	with gr.Column():
	train_input = inputs =[
	gr.Radio([True,False], label='Fast training', value=True),
	gr.Textbox(placeholder="Enter model name here...", label='New model name'),
	gr.File(label='Input train file (.TXT)'),
	gr.File(label='Input rel2id file (.JSON)'),
	gr.Radio([True,False], label='CUDA', value=False),
	]
	trainer = gr.Button("Train")
	train_output = gr.TextArea(placeholder="Output information", label='Output')

	trainer.click(Trainer_RC, inputs=train_input, outputs=train_output)



	demo.queue()
	demo.launch()