Spaces:

yangheng
/

Multilingual-Aspect-Based-Sentiment-Analysis

Running

App Files Files Community

Multilingual-Aspect-Based-Sentiment-Analysis / app.py

yangheng

update

7de7186 almost 2 years ago

raw history blame

No virus

4.6 kB

	import os
	import random
	import gradio as gr
	import pandas as pd
	import requests

	from pyabsa import ATEPCCheckpointManager
	from pyabsa.functional.dataset.dataset_manager import download_datasets_from_github, ABSADatasetList, detect_infer_dataset

	download_datasets_from_github(os.getcwd())

	dataset_items = {dataset.name: dataset for dataset in ABSADatasetList()}

	URL = 'https://api.visitorbadge.io/api/combined?path=https%3A%2F%2Fhuggingface.co%2Fspaces%2Fyangheng%2Fpyabsa_inference&label=Inference%20Count&labelColor=%2337d67a&countColor=%23f47373&style=flat&labelStyle=none'

	def get_example(dataset):
	task = 'apc'
	dataset_file = detect_infer_dataset(dataset_items[dataset], task)

	for fname in dataset_file:
	lines = []
	if isinstance(fname, str):
	fname = [fname]

	for f in fname:
	print('loading: {}'.format(f))
	fin = open(f, 'r', encoding='utf-8')
	lines.extend(fin.readlines())
	fin.close()
	for i in range(len(lines)):
	lines[i] = lines[i][:lines[i].find('!sent!')].replace('[ASP]', '')
	return sorted(set(lines), key=lines.index)


	dataset_dict = {dataset.name: get_example(dataset.name) for dataset in ABSADatasetList()}
	aspect_extractor = ATEPCCheckpointManager.get_aspect_extractor(checkpoint='multilingual-256-2')


	def perform_inference(text, dataset):
	if not text:
	text = dataset_dict[dataset][random.randint(0, len(dataset_dict[dataset]) - 1)]

	result = aspect_extractor.extract_aspect(inference_source=[text],
	pred_sentiment=True)

	result = pd.DataFrame({
	'aspect': result[0]['aspect'],
	'sentiment': result[0]['sentiment'],
	# 'probability': result[0]['probs'],
	'confidence': [round(x, 4) for x in result[0]['confidence']],
	'position': result[0]['position']
	})
	requests.get(URL)
	return result, '{}'.format(text)


	demo = gr.Blocks()

	with demo:
	gr.Markdown("# <p align='center'>Multilingual Aspect-based Sentiment Analysis !</p>")
	gr.Markdown("""### Repo: [PyABSA](https://github.com/yangheng95/PyABSA)
	### Author: [Heng Yang](https://github.com/yangheng95) (杨恒)
	[![Downloads](https://pepy.tech/badge/pyabsa)](https://pepy.tech/project/pyabsa)
	[![Downloads](https://pepy.tech/badge/pyabsa/month)](https://pepy.tech/project/pyabsa)
	"""
	)
	gr.Markdown("Your input text should be no more than 80 words, that's the longest text we used in training. However, you can train your own model using 512 max length")
	gr.Markdown("You don't need to split each Chinese (Korean, etc.) token as the provided examples, just input the natural language text.")
	gr.Markdown("请确保输入的文本长度不超过200词，这是训练时的最大文本长度，过长将不会获得结果")
	gr.Markdown("提供的中文等其他非拉丁语系数据集采用了空格分字，这是早期数据集的遗留问题，预测时不用对中文等语言进行空格分字")
	output_dfs = []
	with gr.Row():
	with gr.Column():
	input_sentence = gr.Textbox(placeholder='Leave this box blank and choose a dataset will give you a random example...', label="Example:")
	gr.Markdown("You can find the datasets at [github.com/yangheng95/ABSADatasets](https://github.com/yangheng95/ABSADatasets/tree/v1.2/datasets/text_classification)")
	dataset_ids = gr.Radio(choices=[dataset.name for dataset in ABSADatasetList()[:-1]], value='Laptop14', label="Datasets")
	inference_button = gr.Button("Let's go!")
	gr.Markdown("There is a [demo](https://huggingface.co/spaces/yangheng/PyABSA-ATEPC-Chinese) specialized for the Chinese langauge")
	gr.Markdown("This demo support many other language as well, you can try and explore the results of other languages by yourself.")

	with gr.Column():
	output_text = gr.TextArea(label="Example:")
	output_df = gr.DataFrame(label="Prediction Results:")
	output_dfs.append(output_df)

	inference_button.click(fn=perform_inference,
	inputs=[input_sentence, dataset_ids],
	outputs=[output_df, output_text])

	gr.Markdown("![visitor badge](https://visitor-badge.glitch.me/badge?page_id=https://huggingface.co/spaces/yangheng/Multilingual-Aspect-Based-Sentiment-Analysis)")
	gr.Markdown("![Visitors]({})".format(URL))

	demo.launch(share=True)