AjithKSenthil commited on
Commit
840f6e0
1 Parent(s): 8b16ee5

Upload ChatAttachmentAnalysis.py

Browse files
Files changed (1) hide show
  1. ChatAttachmentAnalysis.py +16 -1
ChatAttachmentAnalysis.py CHANGED
@@ -15,7 +15,8 @@ df['embedding'] = df['embedding'].apply(lambda x: [float(num) for num in x.strip
15
 
16
  # Split the data into features (X) and labels (y)
17
  X = list(df.embedding.values)
18
- y = ['avoide', 'avoida', 'avoidb', 'avoidc', 'avoidd', 'anxietye', 'anxietya', 'anxietyb', 'anxietyc', 'anxietyd']
 
19
 
20
  # Split data into training and testing sets
21
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
@@ -34,3 +35,17 @@ mae = mean_absolute_error(y_test, preds)
34
  print(f"Chat transcript embeddings performance: mse={mse:.2f}, mae={mae:.2f}")
35
 
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  # Split the data into features (X) and labels (y)
17
  X = list(df.embedding.values)
18
+ y = df[['avoide', 'avoida', 'avoidb', 'avoidc', 'avoidd', 'anxietye', 'anxietya', 'anxietyb', 'anxietyc', 'anxietyd']].values
19
+
20
 
21
  # Split data into training and testing sets
22
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
 
35
  print(f"Chat transcript embeddings performance: mse={mse:.2f}, mae={mae:.2f}")
36
 
37
 
38
+ # Mean Squared Error (MSE) is a measure of how close a fitted line is to data points.
39
+ # In the context of this task, a lower MSE means that our model's predicted attachment scores are closer to the true scores.
40
+ # An MSE of 1.32 suggests that the average squared difference between the predicted and actual scores is 1.32.
41
+ # Since our scores are normalized between 0 and 1, this error could be considered relatively high,
42
+ # meaning the model's predictions are somewhat off from the true values.
43
+
44
+ # Mean Absolute Error (MAE) is another measure of error in our predictions.
45
+ # It's the average absolute difference between the predicted and actual scores.
46
+ # An MAE of 0.96 suggests that, on average, our predicted attachment scores are off by 0.96 from the true scores.
47
+ # Considering that our scores are normalized between 0 and 1, this error is also quite high, indicating that
48
+ # the model's predictions are not very accurate.
49
+
50
+ # Both MSE and MAE are loss functions that we want to minimize. Lower values for both indicate better model performance.
51
+ # In general, the lower these values, the better the model's predictions are.