Spaces:

fpessanha
/

EmotionAnnotation

Runtime error

File size: 11,467 Bytes

import gradio as gr
import pandas as pd
import os
import gradio as gr
from pathlib import Path
from huggingface_hub import login
from mutagen.mp3 import MP3
from mutagen.wave import WAVE
import json
from text_explanations import *
from utils import *
from datetime import datetime

possible_ids = {'Tiger-001': 0, 'Falcon-002': 0, 
                'Elephant-003': 1, 'Panther-004': 1,
                'Zebra-005': 2, 'Wolf-006': 2,
                'Koala-007': 3, 'Otter-008': 3,
                'Leopard-009': 4, 'Panda-010': 4,
                'Cheetah-011': 5, 'Gorilla-012': 5,
                'Dolphin-013' : 6, 'Lynx-014': 6,
                'Moose-015': 7, 'Raccoon-016': 7,
                'Rabbit-017': 0, 'Eagle-018': 8, 'Jaguar-019': 8}

persistent_storage = Path('/data')
password_files = os.getenv("password_files")

def load_first_example(annotations_df, file_list_df, id, completed, index):
    """ Loads and first example and updates index
    
    Parameters:
    * annotations_df: annotation file
    * file_list_df: files to annotate
    * id: participant ID
    * completed: number of examples annotated
    * index: current index (in the files to annotate list)

    return:
    * annotations_df: dataframe with current annotations
    * load_example: current example to annotate
    * completed: updated number of completed annotations
    * index: updated current index

    """
    path_ann = f'{persistent_storage}/{id}_annotations.csv'

    if os.path.exists(path_ann):
        annotations_df = pd.read_csv(path_ann, keep_default_na=False)
        index = min(len(file_list_df) - 1, len(annotations_df))
        completed = len(annotations_df) # update how many examples were completed

    else: 
        # Initialize an empty DataFrame to store annotations
        annotations_df = pd.DataFrame(columns=['sample_id', 'sentence', 'emotion', 'confidence', 'comments', 'n_clicks'])

    return annotations_df, *load_example(annotations_df, file_list_df, index), completed, index


def load_example(annotations_df, file_list_df, index):
    """Loads the example in row #index from dataframe file_list. 
    If there are any annotations it will give those values to the annotation dataframe
    
    Parameters:
    * annotations_df: dataframe with current annotations
    * index: current index
    
    
    Returns:
    * sentence: current sentence
    * audio_path: current_audio path
    * ann['emotion']: current emotion
    * ann['confidence']: current confidence
    * ann['comments']: current comments
    * ann['n_clicks']: current number of clicks
    * start: current start
    * end: current end
    * duration: current sentence duration
    
    """
    if index < len(file_list_df):
        row = file_list_df.iloc[index]
        audio_path = os.path.join(persistent_storage, 'files_to_annotate_2round', row["sample_id"].split('-')[0], row["sample_id"] + '.wav')
        sentence = row["sentence"]

        # If the user already made an annotation for this example, gradio will return said annotation
        ann = (
            annotations_df.iloc[index].to_dict() if index < len(annotations_df) else {"sample_id": row["sample_id"], "emotion": 'Blank', "confidence": 'Blank',
                                                                                "comments": '', "n_clicks": 0}
        )

        start = row['start']
        end = row['end']
        duration = get_audio_duration(audio_path)
        print(f'start/end/duration (load example) - {start} {end} {duration}')
    else:
        index -= 1
        row = file_list_df.iloc[index]
        audio_path = os.path.join(persistent_storage, 'files_to_annotate_2round', row["sample_id"].split('-')[0], row["sample_id"] + '.wav')
        sentence = row["sentence"]

        # If the user already made an annotation for this example, gradio will return said annotation
        ann = (
            annotations_df.iloc[index].to_dict() if index < len(annotations_df) else {"sample_id": row["sample_id"], "emotion": 'Blank', "confidence": 'Blank',
                                                                                "comments": '', "n_clicks": 0}
        )

        start = row['start']
        end = row['end']
        duration = get_audio_duration(audio_path)
        print(f'start/end/duration (load example) - {start} {end} {duration}')

        gr.Warning("This is the last example, well done!")
    return sentence, audio_path, ann['emotion'], ann['confidence'], ann["comments"], ann['n_clicks'], start, end, duration


def save_annotation(annotations_df, file_list_df, emotions, confidence, comments, n_clicks, participant_id, ann_completed, current_index):
    """Save the annotation for the current example.
    
    Parameters:
    * annotations_df: dataframe with all annotations so far
    * file_list_df: list of files to annotate
    * emotions, confidence, comments, n_clicks: annotations to save
    * participant_id: to indicate where to save the annotations
    * ann_completed: number of annotations completed
    * current_index: current index
    
    Return:
    * annotations_df: updated annotations_df
    * ann_completed: updated number of annotations completed
    """

    row = file_list_df.iloc[current_index]
    sample_id = row["sample_id"]
    sentence = row["sentence"]

    # Update or append annotation
    if sample_id in annotations_df["sample_id"].values:
        annotations_df.loc[annotations_df["sample_id"] == sample_id, ["emotion", "confidence", "comments", "n_clicks"]] = \
            [emotions, confidence, comments, n_clicks]
    else:
        annotations_df.loc[len(annotations_df)] = [sample_id, sentence, emotions, confidence, comments, n_clicks]
        ann_completed += 1
    annotations_df.to_csv(f"{persistent_storage}/{participant_id}_annotations.csv", index=False)  # Save to a CSV file

    timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

    annotations_df.to_csv(f"{persistent_storage}/temp/{participant_id}_annotations_{timestamp}.csv", index=False)  # Save to a CSV file
    
    return annotations_df, ann_completed

def next_example(annotations_df, file_list_df, emotions, confidence, comments, n_clicks, participant_id, start, end, duration, ann_completed, current_index):
    """Move to the next example.

    Parameters:
    * annotations_df: current annotation dataframe
    * file_list_df: all files to annotate
    * emotions, confidence, comments, n_clicks: annotations to save
    * participant_id: to indicate where to save the annotations
    * ann_completed: number of annotations completed
    * current_index: current index

    Return:
    * annotations_df: updated annotations_df
    
    
    * sentence: current sentence
    * audio_path: current_audio path
    * ann['emotion']: current emotion
    * ann['confidence']: current confidence
    * ann['comments']: current comments
    * ann['n_clicks']: current number of clicks
    * start: current start
    * end: current end
    * duration: current sentence duration

    * ann_completed: updated number of annotations completed
    * current_index: current index

    """

    if emotions == "Blank":
        gr.Warning("Please fill out the emotion section. 'Blank' is not a valid emotion.")
    elif confidence == "Blank":
        gr.Warning("Please fill out the confidence section. 'Blank' is not a valid input.")

    else:  
        annotations_df, ann_completed = save_annotation(annotations_df, file_list_df, emotions, confidence, comments, n_clicks, participant_id, ann_completed, current_index)
        if current_index < len(file_list_df):
            current_index += 1
            sentence, audio_path, emotion, confidence, comments, n_clicks, start, end, duration = load_example(annotations_df, file_list_df, current_index)
        
        else:
           gr.Warning("This is the last example, well done!")
    print(f'current_index {current_index}')
    
    return annotations_df, sentence, audio_path, emotion, confidence, comments, n_clicks, start, end, duration, ann_completed, current_index

def previous_example(annotations_df, file_list_df, emotion, confidence, comments, n_clicks, participant_id,  ann_completed, current_index):

    """Move to the previous example.

    Parameters:
    * annotations_df: current annotation dataframe
    * file_list_df: all files to annotate
    * emotions, confidence, comments, n_clicks: annotations to save
    * participant_id: to indicate where to save the annotations
    * ann_completed: number of annotations completed
    * current_index: current index

    Return:
    * annotations_df: updated annotations_df
    
    
    * sentence: current sentence
    * audio_path: current_audio path
    * ann['emotion']: current emotion
    * ann['confidence']: current confidence
    * ann['comments']: current comments
    * ann['n_clicks']: current number of clicks
    * start: current start
    * end: current end
    * duration: current sentence duration

    * ann_completed: updated number of annotations completed
    * current_index: current index
    """

    if emotion != "Blank":
        annotations_df, ann_completed = save_annotation(annotations_df, file_list_df, emotion, confidence, comments, n_clicks, participant_id,  ann_completed, current_index)
        
    if current_index > 0:
        current_index -= 1
            
    return annotations_df, *load_example(annotations_df, file_list_df, current_index), ann_completed, current_index


def deactivate_participant_id(annotations_df, file_list_df, total, participant_id, lets_go, previous_button, next_button, sentence_text, audio_player, emotions, confidence, comments, n_clicks, ann_completed, current_index):


    if participant_id in possible_ids.keys():
        file_list_df = pd.read_csv(os.path.join(persistent_storage, 'files_to_annotate_2round', f'group_{possible_ids[participant_id]}_v2.csv'), keep_default_na=False)

        total = len(file_list_df)
    

        annotations_df, sentence, audio_player, emotions, confidence, comments, n_clicks, start, end, duration, ann_completed, current_index = load_first_example(annotations_df, file_list_df, participant_id, ann_completed, current_index)
  
        participant_id = gr.Textbox(label='What is your participant ID?', value = participant_id, interactive = False)
        lets_go = gr.Button("Participant selected!", interactive = False)
        
        sentence_text = gr.Textbox(label="Transcription", interactive=False, value = sentence)
        emotions = gr.Radio(["Blank", "Happy", "Sad", "Angry", "Neutral"], label="Predominant Emotion (Check the sidebar for major subclasses)", value =  emotions, visible = True)
        confidence = gr.Radio(["Blank","Very Uncertain", "Somewhat Uncertain", "Neutral", "Somewhat confident", "Very confident"], label="How confident are you that the annotated emotion is present in the recording?", visible = True, value = confidence)
        comments = gr.Textbox(label="Comments", visible =True, value = comments)
        previous_button = gr.Button("Previous Example", visible = True)
        next_button = gr.Button("Next Example",visible = True)
        
        return annotations_df, file_list_df, participant_id, participant_id, lets_go, total, previous_button, next_button, sentence_text, audio_player, emotions, confidence, comments, n_clicks, start, end, duration, ann_completed, current_index

    else:
        raise gr.Error("Please insert a valid participant ID")