Spaces:

mdj1412
/

movie_review_score_discriminator

Sleeping

App Files Files Community

movie_review_score_discriminator / app.py

mdj1412

Upload 2 files

548681b almost 2 years ago

raw

history blame

3.41 kB

	import gradio as gr
	from transformers import AutoModelForSequenceClassification
	from transformers import AutoTokenizer
	import pandas as pd
	import random
	import torch


	README = """
	# Movie Review Score Discriminator
	It is a program that classifies whether it is positive or negative by entering movie reviews.
	You can choose between the Korean version and the English version.
	## Usage

	"""




	id2label = {0: "NEGATIVE", 1: "POSITIVE"}
	label2id = {"NEGATIVE": 0, "POSITIVE": 1}


	title = "Movie Review Score Discriminator"
	description = "It is a program that classifies whether it is positive or negative by entering movie reviews. You can choose between the Korean version and the English version."




	def tokenized_data(tokenizer, inputs):
	return tokenizer.batch_encode_plus(
	[inputs],
	return_tensors="pt",
	padding="max_length",
	max_length=64,
	truncation=True)




	examples_eng = ["the greatest musicians ", "cold movie "]
	examples_kor = ["긍정", "부정"]

	examples = []
	df = pd.read_csv('examples.csv', sep='\t', index_col='Unnamed: 0')
	for i in range(2):
	idx = random.randint(0, 50)
	examples.append(['Eng', df.iloc[idx, 0]])
	examples.append(['Kor', df.iloc[idx, 1]])


	eng_model_name = "roberta-base"
	eng_step = 1900
	eng_tokenizer = AutoTokenizer.from_pretrained(eng_model_name)
	eng_file_name = "{}-{}.pt".format(eng_model_name, eng_step)
	eng_state_dict = torch.load(eng_file_name)
	eng_model = AutoModelForSequenceClassification.from_pretrained(
	eng_model_name, num_labels=2, id2label=id2label, label2id=label2id,
	state_dict=eng_state_dict
	)


	kor_model_name = "klue_roberta-small"
	kor_step = 2400
	kor_tokenizer = AutoTokenizer.from_pretrained(kor_model_name.replace('_', '/'))
	kor_file_name = "{}-{}.pt".format(kor_model_name, kor_step)
	kor_state_dict = torch.load(kor_file_name)
	kor_model = AutoModelForSequenceClassification.from_pretrained(
	kor_model_name.replace('_', '/'), num_labels=2, id2label=id2label, label2id=label2id,
	state_dict=kor_state_dict
	)


	def builder(lang, text):
	if lang == 'Eng':
	model = eng_model
	tokenizer = eng_tokenizer
	else:
	model = kor_model
	tokenizer = kor_tokenizer

	inputs = tokenized_data(tokenizer, text)

	model.eval()
	with torch.no_grad():
	logits = model(input_ids=inputs['input_ids'],
	attention_mask=inputs['attention_mask']).logits

	prediction = torch.argmax(logits, axis=1)

	return id2label[prediction.item()]


	def builder2(inputs):
	return eng_model(inputs)


	demo = gr.Interface(builder, inputs=[gr.inputs.Dropdown(['Eng', 'Kor']), "text"], outputs="text",
	title=title, description=description, examples=examples)

	# demo2 = gr.Interface(builder2, inputs="text", outputs="text",
	# title=title, theme="peach",
	# allow_flagging="auto",
	# description=description, examples=examples)

	# demo3 = gr.Interface.load("models/mdj1412/movie_review_score_discriminator_eng", inputs="text", outputs="text",
	# title=title, theme="peach",
	# allow_flagging="auto",
	# description=description, examples=examples)

	if __name__ == "__main__":
	# print(examples)
	demo.launch()
	# demo3.launch()