AjithKSenthil commited on
Commit
cab07a0
1 Parent(s): d2e169c

Upload DataVisualization.py

Browse files
Files changed (1) hide show
  1. DataVisualization.py +57 -0
DataVisualization.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # DataVisualization.py
2
+ # Purpose: Script to create visualizations for chat data and machine learning model results.
3
+
4
+ import pandas as pd
5
+ import matplotlib.pyplot as plt
6
+ import seaborn as sns
7
+
8
+ # Load Data
9
+ # Assuming you have a CSV file with your model's predictions and actual scores
10
+ datafile_path = "data/model_predictions.csv"
11
+ df = pd.read_csv(datafile_path)
12
+
13
+ # Visualization Functions
14
+
15
+ def plot_feature_importances(model):
16
+ """
17
+ Plots feature importances of a trained model.
18
+ """
19
+ feat_importances = pd.Series(model.feature_importances_, index=df.columns[:-1])
20
+ feat_importances.nlargest(10).plot(kind='barh')
21
+ plt.title('Feature Importances')
22
+ plt.show()
23
+
24
+ def plot_actual_vs_predicted(y_actual, y_pred, title='Actual vs Predicted'):
25
+ """
26
+ Scatter plot for actual vs predicted values.
27
+ """
28
+ plt.figure(figsize=(10, 6))
29
+ sns.scatterplot(x=y_actual, y=y_pred, alpha=0.6)
30
+ plt.plot([y_actual.min(), y_actual.max()], [y_actual.min(), y_actual.max()], '--r')
31
+ plt.xlabel('Actual')
32
+ plt.ylabel('Predicted')
33
+ plt.title(title)
34
+ plt.show()
35
+
36
+ def plot_error_distribution(y_actual, y_pred, title='Error Distribution'):
37
+ """
38
+ Histogram for prediction errors.
39
+ """
40
+ errors = y_actual - y_pred
41
+ plt.figure(figsize=(10, 6))
42
+ sns.histplot(errors, bins=20, kde=True)
43
+ plt.xlabel('Prediction Error')
44
+ plt.title(title)
45
+ plt.show()
46
+
47
+ # Example Usage
48
+ # These are just examples. Replace 'your_model' with your actual trained model
49
+ # and 'y_actual', 'y_pred' with your actual data.
50
+
51
+ # plot_feature_importances(your_model)
52
+ # plot_actual_vs_predicted(df['ActualScore'], df['PredictedScore'])
53
+ # plot_error_distribution(df['ActualScore'], df['PredictedScore'])
54
+
55
+ # Note to Users:
56
+ # - Adjust the data paths, column names, and model variables as per your data and model.
57
+ # - Feel free to add more visualization functions based on your specific needs.