import gradio as gr import pandas as pd import os import gradio as gr from pathlib import Path from huggingface_hub import login from mutagen.mp3 import MP3 from mutagen.wave import WAVE import json from text_explanations import * from utils import * from datetime import datetime possible_ids = {'Tiger-001': 0, 'Falcon-002': 0, 'Elephant-003': 1, 'Panther-004': 1, 'Zebra-005': 2, 'Wolf-006': 2, 'Koala-007': 3, 'Otter-008': 3, 'Leopard-009': 4, 'Panda-010': 4, 'Cheetah-011': 5, 'Gorilla-012': 5, 'Dolphin-013' : 6, 'Lynx-014': 6, 'Moose-015': 7, 'Raccoon-016': 7, 'Rabbit-017': 0, 'Eagle-018': 8, 'Jaguar-019': 8} persistent_storage = Path('/data') password_files = os.getenv("password_files") def load_first_example(annotations_df, file_list_df, id, completed, index): """ Loads and first example and updates index Parameters: * annotations_df: annotation file * file_list_df: files to annotate * id: participant ID * completed: number of examples annotated * index: current index (in the files to annotate list) return: * annotations_df: dataframe with current annotations * load_example: current example to annotate * completed: updated number of completed annotations * index: updated current index """ path_ann = f'{persistent_storage}/{id}_annotations.csv' if os.path.exists(path_ann): annotations_df = pd.read_csv(path_ann, keep_default_na=False) index = min(len(file_list_df) - 1, len(annotations_df)) completed = len(annotations_df) # update how many examples were completed else: # Initialize an empty DataFrame to store annotations annotations_df = pd.DataFrame(columns=['sample_id', 'sentence', 'emotion', 'confidence', 'comments', 'n_clicks']) return annotations_df, *load_example(annotations_df, file_list_df, index), completed, index def load_example(annotations_df, file_list_df, index): """Loads the example in row #index from dataframe file_list. If there are any annotations it will give those values to the annotation dataframe Parameters: * annotations_df: dataframe with current annotations * index: current index Returns: * sentence: current sentence * audio_path: current_audio path * ann['emotion']: current emotion * ann['confidence']: current confidence * ann['comments']: current comments * ann['n_clicks']: current number of clicks * start: current start * end: current end * duration: current sentence duration """ if index < len(file_list_df): row = file_list_df.iloc[index] audio_path = os.path.join(persistent_storage, 'files_to_annotate_2round', row["sample_id"].split('-')[0], row["sample_id"] + '.wav') sentence = row["sentence"] # If the user already made an annotation for this example, gradio will return said annotation ann = ( annotations_df.iloc[index].to_dict() if index < len(annotations_df) else {"sample_id": row["sample_id"], "emotion": 'Blank', "confidence": 'Blank', "comments": '', "n_clicks": 0} ) start = row['start'] end = row['end'] duration = get_audio_duration(audio_path) print(f'start/end/duration (load example) - {start} {end} {duration}') else: index -= 1 row = file_list_df.iloc[index] audio_path = os.path.join(persistent_storage, 'files_to_annotate_2round', row["sample_id"].split('-')[0], row["sample_id"] + '.wav') sentence = row["sentence"] # If the user already made an annotation for this example, gradio will return said annotation ann = ( annotations_df.iloc[index].to_dict() if index < len(annotations_df) else {"sample_id": row["sample_id"], "emotion": 'Blank', "confidence": 'Blank', "comments": '', "n_clicks": 0} ) start = row['start'] end = row['end'] duration = get_audio_duration(audio_path) print(f'start/end/duration (load example) - {start} {end} {duration}') gr.Warning("This is the last example, well done!") return sentence, audio_path, ann['emotion'], ann['confidence'], ann["comments"], ann['n_clicks'], start, end, duration def save_annotation(annotations_df, file_list_df, emotions, confidence, comments, n_clicks, participant_id, ann_completed, current_index): """Save the annotation for the current example. Parameters: * annotations_df: dataframe with all annotations so far * file_list_df: list of files to annotate * emotions, confidence, comments, n_clicks: annotations to save * participant_id: to indicate where to save the annotations * ann_completed: number of annotations completed * current_index: current index Return: * annotations_df: updated annotations_df * ann_completed: updated number of annotations completed """ row = file_list_df.iloc[current_index] sample_id = row["sample_id"] sentence = row["sentence"] # Update or append annotation if sample_id in annotations_df["sample_id"].values: annotations_df.loc[annotations_df["sample_id"] == sample_id, ["emotion", "confidence", "comments", "n_clicks"]] = \ [emotions, confidence, comments, n_clicks] else: annotations_df.loc[len(annotations_df)] = [sample_id, sentence, emotions, confidence, comments, n_clicks] ann_completed += 1 annotations_df.to_csv(f"{persistent_storage}/{participant_id}_annotations.csv", index=False) # Save to a CSV file timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') annotations_df.to_csv(f"{persistent_storage}/temp/{participant_id}_annotations_{timestamp}.csv", index=False) # Save to a CSV file return annotations_df, ann_completed def next_example(annotations_df, file_list_df, emotions, confidence, comments, n_clicks, participant_id, start, end, duration, ann_completed, current_index): """Move to the next example. Parameters: * annotations_df: current annotation dataframe * file_list_df: all files to annotate * emotions, confidence, comments, n_clicks: annotations to save * participant_id: to indicate where to save the annotations * ann_completed: number of annotations completed * current_index: current index Return: * annotations_df: updated annotations_df * sentence: current sentence * audio_path: current_audio path * ann['emotion']: current emotion * ann['confidence']: current confidence * ann['comments']: current comments * ann['n_clicks']: current number of clicks * start: current start * end: current end * duration: current sentence duration * ann_completed: updated number of annotations completed * current_index: current index """ if emotions == "Blank": gr.Warning("Please fill out the emotion section. 'Blank' is not a valid emotion.") elif confidence == "Blank": gr.Warning("Please fill out the confidence section. 'Blank' is not a valid input.") else: annotations_df, ann_completed = save_annotation(annotations_df, file_list_df, emotions, confidence, comments, n_clicks, participant_id, ann_completed, current_index) if current_index < len(file_list_df): current_index += 1 sentence, audio_path, emotion, confidence, comments, n_clicks, start, end, duration = load_example(annotations_df, file_list_df, current_index) else: gr.Warning("This is the last example, well done!") print(f'current_index {current_index}') return annotations_df, sentence, audio_path, emotion, confidence, comments, n_clicks, start, end, duration, ann_completed, current_index def previous_example(annotations_df, file_list_df, emotion, confidence, comments, n_clicks, participant_id, ann_completed, current_index): """Move to the previous example. Parameters: * annotations_df: current annotation dataframe * file_list_df: all files to annotate * emotions, confidence, comments, n_clicks: annotations to save * participant_id: to indicate where to save the annotations * ann_completed: number of annotations completed * current_index: current index Return: * annotations_df: updated annotations_df * sentence: current sentence * audio_path: current_audio path * ann['emotion']: current emotion * ann['confidence']: current confidence * ann['comments']: current comments * ann['n_clicks']: current number of clicks * start: current start * end: current end * duration: current sentence duration * ann_completed: updated number of annotations completed * current_index: current index """ if emotion != "Blank": annotations_df, ann_completed = save_annotation(annotations_df, file_list_df, emotion, confidence, comments, n_clicks, participant_id, ann_completed, current_index) if current_index > 0: current_index -= 1 return annotations_df, *load_example(annotations_df, file_list_df, current_index), ann_completed, current_index def deactivate_participant_id(annotations_df, file_list_df, total, participant_id, lets_go, previous_button, next_button, sentence_text, audio_player, emotions, confidence, comments, n_clicks, ann_completed, current_index): if participant_id in possible_ids.keys(): file_list_df = pd.read_csv(os.path.join(persistent_storage, 'files_to_annotate_2round', f'group_{possible_ids[participant_id]}_v2.csv'), keep_default_na=False) total = len(file_list_df) annotations_df, sentence, audio_player, emotions, confidence, comments, n_clicks, start, end, duration, ann_completed, current_index = load_first_example(annotations_df, file_list_df, participant_id, ann_completed, current_index) participant_id = gr.Textbox(label='What is your participant ID?', value = participant_id, interactive = False) lets_go = gr.Button("Participant selected!", interactive = False) sentence_text = gr.Textbox(label="Transcription", interactive=False, value = sentence) emotions = gr.Radio(["Blank", "Happy", "Sad", "Angry", "Neutral"], label="Predominant Emotion (Check the sidebar for major subclasses)", value = emotions, visible = True) confidence = gr.Radio(["Blank","Very Uncertain", "Somewhat Uncertain", "Neutral", "Somewhat confident", "Very confident"], label="How confident are you that the annotated emotion is present in the recording?", visible = True, value = confidence) comments = gr.Textbox(label="Comments", visible =True, value = comments) previous_button = gr.Button("Previous Example", visible = True) next_button = gr.Button("Next Example",visible = True) return annotations_df, file_list_df, participant_id, participant_id, lets_go, total, previous_button, next_button, sentence_text, audio_player, emotions, confidence, comments, n_clicks, start, end, duration, ann_completed, current_index else: raise gr.Error("Please insert a valid participant ID")