Spaces:

fpessanha
/

EmotionAnnotation

Running

App Files Files Community

EmotionAnnotation / load_and_save.py

fpessanha

Fix: Push new IDs

6e901b3 about 4 hours ago

raw

history blame contribute delete

11.5 kB

	import gradio as gr
	import pandas as pd
	import os
	import gradio as gr
	from pathlib import Path
	from huggingface_hub import login
	from mutagen.mp3 import MP3
	from mutagen.wave import WAVE
	import json
	from text_explanations import *
	from utils import *
	from datetime import datetime

	possible_ids = {'Tiger-001': 0, 'Falcon-002': 0,
	'Elephant-003': 1, 'Panther-004': 1,
	'Zebra-005': 2, 'Wolf-006': 2,
	'Koala-007': 3, 'Otter-008': 3,
	'Leopard-009': 4, 'Panda-010': 4,
	'Cheetah-011': 5, 'Gorilla-012': 5,
	'Dolphin-013' : 6, 'Lynx-014': 6,
	'Moose-015': 7, 'Raccoon-016': 7,
	'Rabbit-017': 0, 'Eagle-018': 8, 'Jaguar-019': 8}

	persistent_storage = Path('/data')
	password_files = os.getenv("password_files")

	def load_first_example(annotations_df, file_list_df, id, completed, index):
	""" Loads and first example and updates index

	Parameters:
	* annotations_df: annotation file
	* file_list_df: files to annotate
	* id: participant ID
	* completed: number of examples annotated
	* index: current index (in the files to annotate list)

	return:
	* annotations_df: dataframe with current annotations
	* load_example: current example to annotate
	* completed: updated number of completed annotations
	* index: updated current index

	"""
	path_ann = f'{persistent_storage}/{id}_annotations.csv'

	if os.path.exists(path_ann):
	annotations_df = pd.read_csv(path_ann, keep_default_na=False)
	index = min(len(file_list_df) - 1, len(annotations_df))
	completed = len(annotations_df) # update how many examples were completed

	else:
	# Initialize an empty DataFrame to store annotations
	annotations_df = pd.DataFrame(columns=['sample_id', 'sentence', 'emotion', 'confidence', 'comments', 'n_clicks'])

	return annotations_df, *load_example(annotations_df, file_list_df, index), completed, index


	def load_example(annotations_df, file_list_df, index):
	"""Loads the example in row #index from dataframe file_list.
	If there are any annotations it will give those values to the annotation dataframe

	Parameters:
	* annotations_df: dataframe with current annotations
	* index: current index


	Returns:
	* sentence: current sentence
	* audio_path: current_audio path
	* ann['emotion']: current emotion
	* ann['confidence']: current confidence
	* ann['comments']: current comments
	* ann['n_clicks']: current number of clicks
	* start: current start
	* end: current end
	* duration: current sentence duration

	"""
	if index < len(file_list_df):
	row = file_list_df.iloc[index]
	audio_path = os.path.join(persistent_storage, 'files_to_annotate_2round', row["sample_id"].split('-')[0], row["sample_id"] + '.wav')
	sentence = row["sentence"]

	# If the user already made an annotation for this example, gradio will return said annotation
	ann = (
	annotations_df.iloc[index].to_dict() if index < len(annotations_df) else {"sample_id": row["sample_id"], "emotion": 'Blank', "confidence": 'Blank',
	"comments": '', "n_clicks": 0}
	)

	start = row['start']
	end = row['end']
	duration = get_audio_duration(audio_path)
	print(f'start/end/duration (load example) - {start} {end} {duration}')
	else:
	index -= 1
	row = file_list_df.iloc[index]
	audio_path = os.path.join(persistent_storage, 'files_to_annotate_2round', row["sample_id"].split('-')[0], row["sample_id"] + '.wav')
	sentence = row["sentence"]

	# If the user already made an annotation for this example, gradio will return said annotation
	ann = (
	annotations_df.iloc[index].to_dict() if index < len(annotations_df) else {"sample_id": row["sample_id"], "emotion": 'Blank', "confidence": 'Blank',
	"comments": '', "n_clicks": 0}
	)

	start = row['start']
	end = row['end']
	duration = get_audio_duration(audio_path)
	print(f'start/end/duration (load example) - {start} {end} {duration}')

	gr.Warning("This is the last example, well done!")
	return sentence, audio_path, ann['emotion'], ann['confidence'], ann["comments"], ann['n_clicks'], start, end, duration


	def save_annotation(annotations_df, file_list_df, emotions, confidence, comments, n_clicks, participant_id, ann_completed, current_index):
	"""Save the annotation for the current example.

	Parameters:
	* annotations_df: dataframe with all annotations so far
	* file_list_df: list of files to annotate
	* emotions, confidence, comments, n_clicks: annotations to save
	* participant_id: to indicate where to save the annotations
	* ann_completed: number of annotations completed
	* current_index: current index

	Return:
	* annotations_df: updated annotations_df
	* ann_completed: updated number of annotations completed
	"""

	row = file_list_df.iloc[current_index]
	sample_id = row["sample_id"]
	sentence = row["sentence"]

	# Update or append annotation
	if sample_id in annotations_df["sample_id"].values:
	annotations_df.loc[annotations_df["sample_id"] == sample_id, ["emotion", "confidence", "comments", "n_clicks"]] = \
	[emotions, confidence, comments, n_clicks]
	else:
	annotations_df.loc[len(annotations_df)] = [sample_id, sentence, emotions, confidence, comments, n_clicks]
	ann_completed += 1
	annotations_df.to_csv(f"{persistent_storage}/{participant_id}_annotations.csv", index=False) # Save to a CSV file

	timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

	annotations_df.to_csv(f"{persistent_storage}/temp/{participant_id}_annotations_{timestamp}.csv", index=False) # Save to a CSV file

	return annotations_df, ann_completed

	def next_example(annotations_df, file_list_df, emotions, confidence, comments, n_clicks, participant_id, start, end, duration, ann_completed, current_index):
	"""Move to the next example.

	Parameters:
	* annotations_df: current annotation dataframe
	* file_list_df: all files to annotate
	* emotions, confidence, comments, n_clicks: annotations to save
	* participant_id: to indicate where to save the annotations
	* ann_completed: number of annotations completed
	* current_index: current index

	Return:
	* annotations_df: updated annotations_df


	* sentence: current sentence
	* audio_path: current_audio path
	* ann['emotion']: current emotion
	* ann['confidence']: current confidence
	* ann['comments']: current comments
	* ann['n_clicks']: current number of clicks
	* start: current start
	* end: current end
	* duration: current sentence duration

	* ann_completed: updated number of annotations completed
	* current_index: current index

	"""

	if emotions == "Blank":
	gr.Warning("Please fill out the emotion section. 'Blank' is not a valid emotion.")
	elif confidence == "Blank":
	gr.Warning("Please fill out the confidence section. 'Blank' is not a valid input.")

	else:
	annotations_df, ann_completed = save_annotation(annotations_df, file_list_df, emotions, confidence, comments, n_clicks, participant_id, ann_completed, current_index)
	if current_index < len(file_list_df):
	current_index += 1
	sentence, audio_path, emotion, confidence, comments, n_clicks, start, end, duration = load_example(annotations_df, file_list_df, current_index)

	else:
	gr.Warning("This is the last example, well done!")
	print(f'current_index {current_index}')

	return annotations_df, sentence, audio_path, emotion, confidence, comments, n_clicks, start, end, duration, ann_completed, current_index

	def previous_example(annotations_df, file_list_df, emotion, confidence, comments, n_clicks, participant_id, ann_completed, current_index):

	"""Move to the previous example.

	Parameters:
	* annotations_df: current annotation dataframe
	* file_list_df: all files to annotate
	* emotions, confidence, comments, n_clicks: annotations to save
	* participant_id: to indicate where to save the annotations
	* ann_completed: number of annotations completed
	* current_index: current index

	Return:
	* annotations_df: updated annotations_df


	* sentence: current sentence
	* audio_path: current_audio path
	* ann['emotion']: current emotion
	* ann['confidence']: current confidence
	* ann['comments']: current comments
	* ann['n_clicks']: current number of clicks
	* start: current start
	* end: current end
	* duration: current sentence duration

	* ann_completed: updated number of annotations completed
	* current_index: current index
	"""

	if emotion != "Blank":
	annotations_df, ann_completed = save_annotation(annotations_df, file_list_df, emotion, confidence, comments, n_clicks, participant_id, ann_completed, current_index)

	if current_index > 0:
	current_index -= 1

	return annotations_df, *load_example(annotations_df, file_list_df, current_index), ann_completed, current_index


	def deactivate_participant_id(annotations_df, file_list_df, total, participant_id, lets_go, previous_button, next_button, sentence_text, audio_player, emotions, confidence, comments, n_clicks, ann_completed, current_index):


	if participant_id in possible_ids.keys():
	file_list_df = pd.read_csv(os.path.join(persistent_storage, 'files_to_annotate_2round', f'group_{possible_ids[participant_id]}_v2.csv'), keep_default_na=False)

	total = len(file_list_df)


	annotations_df, sentence, audio_player, emotions, confidence, comments, n_clicks, start, end, duration, ann_completed, current_index = load_first_example(annotations_df, file_list_df, participant_id, ann_completed, current_index)

	participant_id = gr.Textbox(label='What is your participant ID?', value = participant_id, interactive = False)
	lets_go = gr.Button("Participant selected!", interactive = False)

	sentence_text = gr.Textbox(label="Transcription", interactive=False, value = sentence)
	emotions = gr.Radio(["Blank", "Happy", "Sad", "Angry", "Neutral"], label="Predominant Emotion (Check the sidebar for major subclasses)", value = emotions, visible = True)
	confidence = gr.Radio(["Blank","Very Uncertain", "Somewhat Uncertain", "Neutral", "Somewhat confident", "Very confident"], label="How confident are you that the annotated emotion is present in the recording?", visible = True, value = confidence)
	comments = gr.Textbox(label="Comments", visible =True, value = comments)
	previous_button = gr.Button("Previous Example", visible = True)
	next_button = gr.Button("Next Example",visible = True)

	return annotations_df, file_list_df, participant_id, participant_id, lets_go, total, previous_button, next_button, sentence_text, audio_player, emotions, confidence, comments, n_clicks, start, end, duration, ann_completed, current_index

	else:
	raise gr.Error("Please insert a valid participant ID")