File size: 11,467 Bytes
578ad69
 
 
 
 
 
 
 
 
 
 
2e5103f
578ad69
 
 
 
 
 
 
 
312ec77
6e901b3
578ad69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c71a422
 
578ad69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c71a422
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
578ad69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2e5103f
 
 
 
578ad69
 
 
312ec77
578ad69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9270d6d
578ad69
312ec77
c71a422
 
 
 
312ec77
 
578ad69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312ec77
578ad69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
import gradio as gr
import pandas as pd
import os
import gradio as gr
from pathlib import Path
from huggingface_hub import login
from mutagen.mp3 import MP3
from mutagen.wave import WAVE
import json
from text_explanations import *
from utils import *
from datetime import datetime

possible_ids = {'Tiger-001': 0, 'Falcon-002': 0, 
                'Elephant-003': 1, 'Panther-004': 1,
                'Zebra-005': 2, 'Wolf-006': 2,
                'Koala-007': 3, 'Otter-008': 3,
                'Leopard-009': 4, 'Panda-010': 4,
                'Cheetah-011': 5, 'Gorilla-012': 5,
                'Dolphin-013' : 6, 'Lynx-014': 6,
                'Moose-015': 7, 'Raccoon-016': 7,
                'Rabbit-017': 0, 'Eagle-018': 8, 'Jaguar-019': 8}

persistent_storage = Path('/data')
password_files = os.getenv("password_files")

def load_first_example(annotations_df, file_list_df, id, completed, index):
    """ Loads and first example and updates index
    
    Parameters:
    * annotations_df: annotation file
    * file_list_df: files to annotate
    * id: participant ID
    * completed: number of examples annotated
    * index: current index (in the files to annotate list)

    return:
    * annotations_df: dataframe with current annotations
    * load_example: current example to annotate
    * completed: updated number of completed annotations
    * index: updated current index

    """
    path_ann = f'{persistent_storage}/{id}_annotations.csv'

    if os.path.exists(path_ann):
        annotations_df = pd.read_csv(path_ann, keep_default_na=False)
        index = min(len(file_list_df) - 1, len(annotations_df))
        completed = len(annotations_df) # update how many examples were completed

    else: 
        # Initialize an empty DataFrame to store annotations
        annotations_df = pd.DataFrame(columns=['sample_id', 'sentence', 'emotion', 'confidence', 'comments', 'n_clicks'])

    return annotations_df, *load_example(annotations_df, file_list_df, index), completed, index


def load_example(annotations_df, file_list_df, index):
    """Loads the example in row #index from dataframe file_list. 
    If there are any annotations it will give those values to the annotation dataframe
    
    Parameters:
    * annotations_df: dataframe with current annotations
    * index: current index
    
    
    Returns:
    * sentence: current sentence
    * audio_path: current_audio path
    * ann['emotion']: current emotion
    * ann['confidence']: current confidence
    * ann['comments']: current comments
    * ann['n_clicks']: current number of clicks
    * start: current start
    * end: current end
    * duration: current sentence duration
    
    """
    if index < len(file_list_df):
        row = file_list_df.iloc[index]
        audio_path = os.path.join(persistent_storage, 'files_to_annotate_2round', row["sample_id"].split('-')[0], row["sample_id"] + '.wav')
        sentence = row["sentence"]

        # If the user already made an annotation for this example, gradio will return said annotation
        ann = (
            annotations_df.iloc[index].to_dict() if index < len(annotations_df) else {"sample_id": row["sample_id"], "emotion": 'Blank', "confidence": 'Blank',
                                                                                "comments": '', "n_clicks": 0}
        )

        start = row['start']
        end = row['end']
        duration = get_audio_duration(audio_path)
        print(f'start/end/duration (load example) - {start} {end} {duration}')
    else:
        index -= 1
        row = file_list_df.iloc[index]
        audio_path = os.path.join(persistent_storage, 'files_to_annotate_2round', row["sample_id"].split('-')[0], row["sample_id"] + '.wav')
        sentence = row["sentence"]

        # If the user already made an annotation for this example, gradio will return said annotation
        ann = (
            annotations_df.iloc[index].to_dict() if index < len(annotations_df) else {"sample_id": row["sample_id"], "emotion": 'Blank', "confidence": 'Blank',
                                                                                "comments": '', "n_clicks": 0}
        )

        start = row['start']
        end = row['end']
        duration = get_audio_duration(audio_path)
        print(f'start/end/duration (load example) - {start} {end} {duration}')

        gr.Warning("This is the last example, well done!")
    return sentence, audio_path, ann['emotion'], ann['confidence'], ann["comments"], ann['n_clicks'], start, end, duration


def save_annotation(annotations_df, file_list_df, emotions, confidence, comments, n_clicks, participant_id, ann_completed, current_index):
    """Save the annotation for the current example.
    
    Parameters:
    * annotations_df: dataframe with all annotations so far
    * file_list_df: list of files to annotate
    * emotions, confidence, comments, n_clicks: annotations to save
    * participant_id: to indicate where to save the annotations
    * ann_completed: number of annotations completed
    * current_index: current index
    
    Return:
    * annotations_df: updated annotations_df
    * ann_completed: updated number of annotations completed
    """

    row = file_list_df.iloc[current_index]
    sample_id = row["sample_id"]
    sentence = row["sentence"]

    # Update or append annotation
    if sample_id in annotations_df["sample_id"].values:
        annotations_df.loc[annotations_df["sample_id"] == sample_id, ["emotion", "confidence", "comments", "n_clicks"]] = \
            [emotions, confidence, comments, n_clicks]
    else:
        annotations_df.loc[len(annotations_df)] = [sample_id, sentence, emotions, confidence, comments, n_clicks]
        ann_completed += 1
    annotations_df.to_csv(f"{persistent_storage}/{participant_id}_annotations.csv", index=False)  # Save to a CSV file

    timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

    annotations_df.to_csv(f"{persistent_storage}/temp/{participant_id}_annotations_{timestamp}.csv", index=False)  # Save to a CSV file
    
    return annotations_df, ann_completed

def next_example(annotations_df, file_list_df, emotions, confidence, comments, n_clicks, participant_id, start, end, duration, ann_completed, current_index):
    """Move to the next example.

    Parameters:
    * annotations_df: current annotation dataframe
    * file_list_df: all files to annotate
    * emotions, confidence, comments, n_clicks: annotations to save
    * participant_id: to indicate where to save the annotations
    * ann_completed: number of annotations completed
    * current_index: current index

    Return:
    * annotations_df: updated annotations_df
    
    
    * sentence: current sentence
    * audio_path: current_audio path
    * ann['emotion']: current emotion
    * ann['confidence']: current confidence
    * ann['comments']: current comments
    * ann['n_clicks']: current number of clicks
    * start: current start
    * end: current end
    * duration: current sentence duration

    * ann_completed: updated number of annotations completed
    * current_index: current index

    """

    if emotions == "Blank":
        gr.Warning("Please fill out the emotion section. 'Blank' is not a valid emotion.")
    elif confidence == "Blank":
        gr.Warning("Please fill out the confidence section. 'Blank' is not a valid input.")

    else:  
        annotations_df, ann_completed = save_annotation(annotations_df, file_list_df, emotions, confidence, comments, n_clicks, participant_id, ann_completed, current_index)
        if current_index < len(file_list_df):
            current_index += 1
            sentence, audio_path, emotion, confidence, comments, n_clicks, start, end, duration = load_example(annotations_df, file_list_df, current_index)
        
        else:
           gr.Warning("This is the last example, well done!")
    print(f'current_index {current_index}')
    
    return annotations_df, sentence, audio_path, emotion, confidence, comments, n_clicks, start, end, duration, ann_completed, current_index

def previous_example(annotations_df, file_list_df, emotion, confidence, comments, n_clicks, participant_id,  ann_completed, current_index):

    """Move to the previous example.

    Parameters:
    * annotations_df: current annotation dataframe
    * file_list_df: all files to annotate
    * emotions, confidence, comments, n_clicks: annotations to save
    * participant_id: to indicate where to save the annotations
    * ann_completed: number of annotations completed
    * current_index: current index

    Return:
    * annotations_df: updated annotations_df
    
    
    * sentence: current sentence
    * audio_path: current_audio path
    * ann['emotion']: current emotion
    * ann['confidence']: current confidence
    * ann['comments']: current comments
    * ann['n_clicks']: current number of clicks
    * start: current start
    * end: current end
    * duration: current sentence duration

    * ann_completed: updated number of annotations completed
    * current_index: current index
    """

    if emotion != "Blank":
        annotations_df, ann_completed = save_annotation(annotations_df, file_list_df, emotion, confidence, comments, n_clicks, participant_id,  ann_completed, current_index)
        
    if current_index > 0:
        current_index -= 1
            
    return annotations_df, *load_example(annotations_df, file_list_df, current_index), ann_completed, current_index


def deactivate_participant_id(annotations_df, file_list_df, total, participant_id, lets_go, previous_button, next_button, sentence_text, audio_player, emotions, confidence, comments, n_clicks, ann_completed, current_index):


    if participant_id in possible_ids.keys():
        file_list_df = pd.read_csv(os.path.join(persistent_storage, 'files_to_annotate_2round', f'group_{possible_ids[participant_id]}_v2.csv'), keep_default_na=False)

        total = len(file_list_df)
    

        annotations_df, sentence, audio_player, emotions, confidence, comments, n_clicks, start, end, duration, ann_completed, current_index = load_first_example(annotations_df, file_list_df, participant_id, ann_completed, current_index)
  
        participant_id = gr.Textbox(label='What is your participant ID?', value = participant_id, interactive = False)
        lets_go = gr.Button("Participant selected!", interactive = False)
        
        sentence_text = gr.Textbox(label="Transcription", interactive=False, value = sentence)
        emotions = gr.Radio(["Blank", "Happy", "Sad", "Angry", "Neutral"], label="Predominant Emotion (Check the sidebar for major subclasses)", value =  emotions, visible = True)
        confidence = gr.Radio(["Blank","Very Uncertain", "Somewhat Uncertain", "Neutral", "Somewhat confident", "Very confident"], label="How confident are you that the annotated emotion is present in the recording?", visible = True, value = confidence)
        comments = gr.Textbox(label="Comments", visible =True, value = comments)
        previous_button = gr.Button("Previous Example", visible = True)
        next_button = gr.Button("Next Example",visible = True)
        
        return annotations_df, file_list_df, participant_id, participant_id, lets_go, total, previous_button, next_button, sentence_text, audio_player, emotions, confidence, comments, n_clicks, start, end, duration, ann_completed, current_index

    else:
        raise gr.Error("Please insert a valid participant ID")