EmotionAnnotation / load_and_save.py
fpessanha's picture
Fix: Push new IDs
6e901b3
import gradio as gr
import pandas as pd
import os
import gradio as gr
from pathlib import Path
from huggingface_hub import login
from mutagen.mp3 import MP3
from mutagen.wave import WAVE
import json
from text_explanations import *
from utils import *
from datetime import datetime
possible_ids = {'Tiger-001': 0, 'Falcon-002': 0,
'Elephant-003': 1, 'Panther-004': 1,
'Zebra-005': 2, 'Wolf-006': 2,
'Koala-007': 3, 'Otter-008': 3,
'Leopard-009': 4, 'Panda-010': 4,
'Cheetah-011': 5, 'Gorilla-012': 5,
'Dolphin-013' : 6, 'Lynx-014': 6,
'Moose-015': 7, 'Raccoon-016': 7,
'Rabbit-017': 0, 'Eagle-018': 8, 'Jaguar-019': 8}
persistent_storage = Path('/data')
password_files = os.getenv("password_files")
def load_first_example(annotations_df, file_list_df, id, completed, index):
""" Loads and first example and updates index
Parameters:
* annotations_df: annotation file
* file_list_df: files to annotate
* id: participant ID
* completed: number of examples annotated
* index: current index (in the files to annotate list)
return:
* annotations_df: dataframe with current annotations
* load_example: current example to annotate
* completed: updated number of completed annotations
* index: updated current index
"""
path_ann = f'{persistent_storage}/{id}_annotations.csv'
if os.path.exists(path_ann):
annotations_df = pd.read_csv(path_ann, keep_default_na=False)
index = min(len(file_list_df) - 1, len(annotations_df))
completed = len(annotations_df) # update how many examples were completed
else:
# Initialize an empty DataFrame to store annotations
annotations_df = pd.DataFrame(columns=['sample_id', 'sentence', 'emotion', 'confidence', 'comments', 'n_clicks'])
return annotations_df, *load_example(annotations_df, file_list_df, index), completed, index
def load_example(annotations_df, file_list_df, index):
"""Loads the example in row #index from dataframe file_list.
If there are any annotations it will give those values to the annotation dataframe
Parameters:
* annotations_df: dataframe with current annotations
* index: current index
Returns:
* sentence: current sentence
* audio_path: current_audio path
* ann['emotion']: current emotion
* ann['confidence']: current confidence
* ann['comments']: current comments
* ann['n_clicks']: current number of clicks
* start: current start
* end: current end
* duration: current sentence duration
"""
if index < len(file_list_df):
row = file_list_df.iloc[index]
audio_path = os.path.join(persistent_storage, 'files_to_annotate_2round', row["sample_id"].split('-')[0], row["sample_id"] + '.wav')
sentence = row["sentence"]
# If the user already made an annotation for this example, gradio will return said annotation
ann = (
annotations_df.iloc[index].to_dict() if index < len(annotations_df) else {"sample_id": row["sample_id"], "emotion": 'Blank', "confidence": 'Blank',
"comments": '', "n_clicks": 0}
)
start = row['start']
end = row['end']
duration = get_audio_duration(audio_path)
print(f'start/end/duration (load example) - {start} {end} {duration}')
else:
index -= 1
row = file_list_df.iloc[index]
audio_path = os.path.join(persistent_storage, 'files_to_annotate_2round', row["sample_id"].split('-')[0], row["sample_id"] + '.wav')
sentence = row["sentence"]
# If the user already made an annotation for this example, gradio will return said annotation
ann = (
annotations_df.iloc[index].to_dict() if index < len(annotations_df) else {"sample_id": row["sample_id"], "emotion": 'Blank', "confidence": 'Blank',
"comments": '', "n_clicks": 0}
)
start = row['start']
end = row['end']
duration = get_audio_duration(audio_path)
print(f'start/end/duration (load example) - {start} {end} {duration}')
gr.Warning("This is the last example, well done!")
return sentence, audio_path, ann['emotion'], ann['confidence'], ann["comments"], ann['n_clicks'], start, end, duration
def save_annotation(annotations_df, file_list_df, emotions, confidence, comments, n_clicks, participant_id, ann_completed, current_index):
"""Save the annotation for the current example.
Parameters:
* annotations_df: dataframe with all annotations so far
* file_list_df: list of files to annotate
* emotions, confidence, comments, n_clicks: annotations to save
* participant_id: to indicate where to save the annotations
* ann_completed: number of annotations completed
* current_index: current index
Return:
* annotations_df: updated annotations_df
* ann_completed: updated number of annotations completed
"""
row = file_list_df.iloc[current_index]
sample_id = row["sample_id"]
sentence = row["sentence"]
# Update or append annotation
if sample_id in annotations_df["sample_id"].values:
annotations_df.loc[annotations_df["sample_id"] == sample_id, ["emotion", "confidence", "comments", "n_clicks"]] = \
[emotions, confidence, comments, n_clicks]
else:
annotations_df.loc[len(annotations_df)] = [sample_id, sentence, emotions, confidence, comments, n_clicks]
ann_completed += 1
annotations_df.to_csv(f"{persistent_storage}/{participant_id}_annotations.csv", index=False) # Save to a CSV file
timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
annotations_df.to_csv(f"{persistent_storage}/temp/{participant_id}_annotations_{timestamp}.csv", index=False) # Save to a CSV file
return annotations_df, ann_completed
def next_example(annotations_df, file_list_df, emotions, confidence, comments, n_clicks, participant_id, start, end, duration, ann_completed, current_index):
"""Move to the next example.
Parameters:
* annotations_df: current annotation dataframe
* file_list_df: all files to annotate
* emotions, confidence, comments, n_clicks: annotations to save
* participant_id: to indicate where to save the annotations
* ann_completed: number of annotations completed
* current_index: current index
Return:
* annotations_df: updated annotations_df
* sentence: current sentence
* audio_path: current_audio path
* ann['emotion']: current emotion
* ann['confidence']: current confidence
* ann['comments']: current comments
* ann['n_clicks']: current number of clicks
* start: current start
* end: current end
* duration: current sentence duration
* ann_completed: updated number of annotations completed
* current_index: current index
"""
if emotions == "Blank":
gr.Warning("Please fill out the emotion section. 'Blank' is not a valid emotion.")
elif confidence == "Blank":
gr.Warning("Please fill out the confidence section. 'Blank' is not a valid input.")
else:
annotations_df, ann_completed = save_annotation(annotations_df, file_list_df, emotions, confidence, comments, n_clicks, participant_id, ann_completed, current_index)
if current_index < len(file_list_df):
current_index += 1
sentence, audio_path, emotion, confidence, comments, n_clicks, start, end, duration = load_example(annotations_df, file_list_df, current_index)
else:
gr.Warning("This is the last example, well done!")
print(f'current_index {current_index}')
return annotations_df, sentence, audio_path, emotion, confidence, comments, n_clicks, start, end, duration, ann_completed, current_index
def previous_example(annotations_df, file_list_df, emotion, confidence, comments, n_clicks, participant_id, ann_completed, current_index):
"""Move to the previous example.
Parameters:
* annotations_df: current annotation dataframe
* file_list_df: all files to annotate
* emotions, confidence, comments, n_clicks: annotations to save
* participant_id: to indicate where to save the annotations
* ann_completed: number of annotations completed
* current_index: current index
Return:
* annotations_df: updated annotations_df
* sentence: current sentence
* audio_path: current_audio path
* ann['emotion']: current emotion
* ann['confidence']: current confidence
* ann['comments']: current comments
* ann['n_clicks']: current number of clicks
* start: current start
* end: current end
* duration: current sentence duration
* ann_completed: updated number of annotations completed
* current_index: current index
"""
if emotion != "Blank":
annotations_df, ann_completed = save_annotation(annotations_df, file_list_df, emotion, confidence, comments, n_clicks, participant_id, ann_completed, current_index)
if current_index > 0:
current_index -= 1
return annotations_df, *load_example(annotations_df, file_list_df, current_index), ann_completed, current_index
def deactivate_participant_id(annotations_df, file_list_df, total, participant_id, lets_go, previous_button, next_button, sentence_text, audio_player, emotions, confidence, comments, n_clicks, ann_completed, current_index):
if participant_id in possible_ids.keys():
file_list_df = pd.read_csv(os.path.join(persistent_storage, 'files_to_annotate_2round', f'group_{possible_ids[participant_id]}_v2.csv'), keep_default_na=False)
total = len(file_list_df)
annotations_df, sentence, audio_player, emotions, confidence, comments, n_clicks, start, end, duration, ann_completed, current_index = load_first_example(annotations_df, file_list_df, participant_id, ann_completed, current_index)
participant_id = gr.Textbox(label='What is your participant ID?', value = participant_id, interactive = False)
lets_go = gr.Button("Participant selected!", interactive = False)
sentence_text = gr.Textbox(label="Transcription", interactive=False, value = sentence)
emotions = gr.Radio(["Blank", "Happy", "Sad", "Angry", "Neutral"], label="Predominant Emotion (Check the sidebar for major subclasses)", value = emotions, visible = True)
confidence = gr.Radio(["Blank","Very Uncertain", "Somewhat Uncertain", "Neutral", "Somewhat confident", "Very confident"], label="How confident are you that the annotated emotion is present in the recording?", visible = True, value = confidence)
comments = gr.Textbox(label="Comments", visible =True, value = comments)
previous_button = gr.Button("Previous Example", visible = True)
next_button = gr.Button("Next Example",visible = True)
return annotations_df, file_list_df, participant_id, participant_id, lets_go, total, previous_button, next_button, sentence_text, audio_player, emotions, confidence, comments, n_clicks, start, end, duration, ann_completed, current_index
else:
raise gr.Error("Please insert a valid participant ID")