Yi-666 commited on
Commit
cc59fa1
·
verified ·
1 Parent(s): 59beba1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -66
app.py CHANGED
@@ -2,6 +2,10 @@ import streamlit as st
2
  import numpy as np
3
  import matplotlib.pyplot as plt
4
  from sklearn.metrics import precision_recall_curve, auc
 
 
 
 
5
 
6
  # Sidebar navigation
7
  st.sidebar.title("App Navigation")
@@ -13,79 +17,77 @@ if page == "Sentiment Analysis":
13
  st.title("Twitter Sentiment Analysis App")
14
 
15
  # Load sentiment analysis pipeline
16
- from transformers import pipeline
17
- sentiment_pipe = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment-latest")
18
 
19
  # Input box for user to enter a tweet
20
  user_input = st.text_input("Enter a tweet to analyze:")
21
 
22
  if user_input:
23
- # Run sentiment analysis
24
- result = sentiment_pipe(user_input)
25
- st.write("Sentiment Analysis Result:", result)
 
 
 
 
 
 
26
 
27
  # Model Evaluation Page
28
  elif page == "Model Evaluation":
29
  st.title("Model Precision-Recall Evaluation")
30
 
31
- # Initialize default values for y_true and y_score
32
- y_true, y_score = None, None
33
-
34
- # Option to use actual model predictions
35
- st.write("### Evaluate Using Actual Model")
36
- model_type = st.selectbox("Choose model type:", ["Scikit-learn", "Transformers"])
37
-
38
- if model_type == "Scikit-learn":
39
- st.write("### Scikit-learn Model")
40
-
41
- from sklearn.ensemble import RandomForestClassifier
42
- from sklearn.datasets import make_classification
43
-
44
- # Create and train a Scikit-learn model
45
- X_train, y_train = make_classification(n_samples=1000, n_features=20, random_state=42)
46
- X_test, y_test = make_classification(n_samples=200, n_features=20, random_state=42)
47
- model = RandomForestClassifier()
48
- model.fit(X_train, y_train)
49
-
50
- # Generate predictions
51
- y_score = model.predict_proba(X_test)[:, 1] # Predicted probabilities for the positive class
52
- y_true = y_test # True labels
53
-
54
- elif model_type == "Transformers":
55
- st.write("### Transformers Model")
56
-
57
- from transformers import pipeline
58
-
59
- # Load a Transformers model
60
- model = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-sentiment-latest")
61
-
62
- # Simulate a batch of tweets for evaluation
63
- tweets = ["I love this!", "This is bad...", "I'm not sure about this."]
64
- results = model(tweets)
65
-
66
- # Simulated true labels (manually defined for simplicity)
67
- y_true = [1, 0, 1] # Manually defined true labels
68
- y_score = [res["score"] for res in results] # Extract probabilities
69
-
70
- # Validate data and calculate Precision-Recall curve
71
- if y_true is not None and y_score is not None and len(y_true) > 0 and len(y_score) > 0:
72
- try:
73
- # Calculate Precision, Recall, and AUC
74
- precision, recall, _ = precision_recall_curve(y_true, y_score)
75
- pr_auc = auc(recall, precision)
76
-
77
- # Plot the PR curve
78
- fig, ax = plt.subplots()
79
- ax.plot(recall, precision, label=f"PR Curve (AUC = {pr_auc:.2f})")
80
- ax.set_xlabel("Recall")
81
- ax.set_ylabel("Precision")
82
- ax.set_title("Precision-Recall Curve")
83
- ax.legend(loc="best")
84
- ax.grid()
85
-
86
- # Display the plot
87
- st.pyplot(fig)
88
- except Exception as e:
89
- st.error(f"An error occurred while generating the PR curve: {e}")
90
- else:
91
- st.info("Please select a model and ensure it generates valid data.")
 
2
  import numpy as np
3
  import matplotlib.pyplot as plt
4
  from sklearn.metrics import precision_recall_curve, auc
5
+ from datasets import load_dataset
6
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
7
+ import torch
8
+ from tqdm import tqdm
9
 
10
  # Sidebar navigation
11
  st.sidebar.title("App Navigation")
 
17
  st.title("Twitter Sentiment Analysis App")
18
 
19
  # Load sentiment analysis pipeline
20
+ tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
21
+ model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
22
 
23
  # Input box for user to enter a tweet
24
  user_input = st.text_input("Enter a tweet to analyze:")
25
 
26
  if user_input:
27
+ # Tokenize and predict
28
+ inputs = tokenizer(user_input, return_tensors="pt", truncation=True, padding=True)
29
+ with torch.no_grad():
30
+ outputs = model(**inputs)
31
+ probs = torch.softmax(outputs.logits, dim=-1)
32
+
33
+ sentiment = "POSITIVE" if probs[0][1] > probs[0][0] else "NEGATIVE"
34
+ st.write(f"Sentiment: {sentiment}")
35
+ st.write(f"Scores: {probs[0].numpy()}")
36
 
37
  # Model Evaluation Page
38
  elif page == "Model Evaluation":
39
  st.title("Model Precision-Recall Evaluation")
40
 
41
+ # Load tweet_eval dataset
42
+ dataset_name = "cardiffnlp/tweet_eval"
43
+ task = st.selectbox("Choose a dataset task:", ["emoji", "sentiment"])
44
+ split = st.selectbox("Choose data split:", ["train", "validation", "test"])
45
+
46
+ # Load dataset
47
+ with st.spinner("Loading dataset..."):
48
+ dataset = load_dataset(dataset_name, task, split=split)
49
+
50
+ st.write(f"Loaded {len(dataset)} samples from {dataset_name} ({task}/{split}).")
51
+
52
+ # Load model
53
+ model_name = f"cardiffnlp/twitter-roberta-base-{task}"
54
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
55
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
56
+
57
+ # Batch predict on dataset
58
+ batch_size = 16
59
+ predicted_probs = []
60
+ true_labels = dataset["label"]
61
+ texts = dataset["text"]
62
+
63
+ with st.spinner("Running model predictions..."):
64
+ for i in tqdm(range(0, len(texts), batch_size)):
65
+ batch = texts[i:i + batch_size]
66
+ inputs = tokenizer(batch, padding=True, truncation=True, return_tensors="pt")
67
+ with torch.no_grad():
68
+ outputs = model(**inputs)
69
+ probs = torch.softmax(outputs.logits, dim=-1)
70
+ predicted_probs.extend(probs.cpu().numpy())
71
+
72
+ # Select a class for PR Curve
73
+ num_classes = model.config.num_labels
74
+ class_to_evaluate = st.selectbox("Choose a class to evaluate:", list(range(num_classes)))
75
+
76
+ # Calculate Precision-Recall Curve
77
+ y_true = [1 if label == class_to_evaluate else 0 for label in true_labels]
78
+ y_score = [probs[class_to_evaluate] for probs in predicted_probs]
79
+
80
+ precision, recall, _ = precision_recall_curve(y_true, y_score)
81
+ pr_auc = auc(recall, precision)
82
+
83
+ # Plot PR Curve
84
+ fig, ax = plt.subplots()
85
+ ax.plot(recall, precision, label=f"PR Curve (AUC = {pr_auc:.2f})")
86
+ ax.set_xlabel("Recall")
87
+ ax.set_ylabel("Precision")
88
+ ax.set_title(f"Precision-Recall Curve for Class {class_to_evaluate}")
89
+ ax.legend(loc="best")
90
+ ax.grid()
91
+
92
+ st.pyplot(fig)
93
+ st.success(f"Precision-Recall AUC: {pr_auc:.2f}")