awacke1 commited on
Commit
af02f65
·
verified ·
1 Parent(s): b03f8f9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -0
app.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.feature_selection import SelectKBest, f_classif
5
+ from sklearn.preprocessing import StandardScaler
6
+ from sklearn.ensemble import IsolationForest
7
+ from sklearn.decomposition import PCA
8
+ import nltk
9
+ from nltk.sentiment import SentimentIntensityAnalyzer
10
+ from nltk.tokenize import word_tokenize
11
+ from nltk.corpus import stopwords
12
+ from collections import Counter
13
+ import matplotlib.pyplot as plt
14
+
15
+ # Download necessary NLTK data
16
+ nltk.download('vader_lexicon')
17
+ nltk.download('punkt')
18
+ nltk.download('stopwords')
19
+
20
+ def main():
21
+ st.title("AI in Data Science Demo")
22
+
23
+ # Sidebar for navigation
24
+ page = st.sidebar.selectbox("Choose a demo", ["Feature Engineering", "Anomaly Detection", "NLP Analysis"])
25
+
26
+ if page == "Feature Engineering":
27
+ feature_engineering_demo()
28
+ elif page == "Anomaly Detection":
29
+ anomaly_detection_demo()
30
+ else:
31
+ nlp_demo()
32
+
33
+ def feature_engineering_demo():
34
+ st.header("Automated Feature Engineering and Selection")
35
+
36
+ # Generate sample data
37
+ X = np.random.rand(100, 5)
38
+ y = np.random.randint(0, 2, 100)
39
+
40
+ # Feature selection
41
+ selector = SelectKBest(f_classif, k=3)
42
+ X_new = selector.fit_transform(X, y)
43
+
44
+ # PCA
45
+ scaler = StandardScaler()
46
+ X_scaled = scaler.fit_transform(X)
47
+ pca = PCA(n_components=2)
48
+ X_pca = pca.fit_transform(X_scaled)
49
+
50
+ # Display results
51
+ st.subheader("Original Features")
52
+ st.write(pd.DataFrame(X, columns=[f"Feature {i+1}" for i in range(5)]).head())
53
+
54
+ st.subheader("Selected Top 3 Features")
55
+ st.write(pd.DataFrame(X_new, columns=[f"Selected Feature {i+1}" for i in range(3)]).head())
56
+
57
+ st.subheader("PCA Transformation")
58
+ st.write(pd.DataFrame(X_pca, columns=["PC1", "PC2"]).head())
59
+
60
+ # Visualization
61
+ fig, ax = plt.subplots()
62
+ ax.scatter(X_pca[:, 0], X_pca[:, 1], c=y)
63
+ ax.set_xlabel("First Principal Component")
64
+ ax.set_ylabel("Second Principal Component")
65
+ ax.set_title("PCA of Dataset")
66
+ st.pyplot(fig)
67
+
68
+ def anomaly_detection_demo():
69
+ st.header("Anomaly Detection")
70
+
71
+ # Generate sample data with anomalies
72
+ np.random.seed(42)
73
+ X = np.random.randn(100, 2)
74
+ X[-5:] = X[-5:] + [4, 4] # Add some anomalies
75
+
76
+ # Fit Isolation Forest
77
+ clf = IsolationForest(contamination=0.1, random_state=42)
78
+ y_pred = clf.fit_predict(X)
79
+
80
+ # Visualization
81
+ fig, ax = plt.subplots()
82
+ ax.scatter(X[:, 0], X[:, 1], c=y_pred, cmap='viridis')
83
+ ax.set_title("Anomaly Detection using Isolation Forest")
84
+ ax.set_xlabel("Feature 1")
85
+ ax.set_ylabel("Feature 2")
86
+ st.pyplot(fig)
87
+
88
+ st.write("Points in yellow are detected as anomalies.")
89
+
90
+ def nlp_demo():
91
+ st.header("NLP Analysis")
92
+
93
+ # Sample text input
94
+ text = st.text_area("Enter text for analysis", "I love using AI for data analysis. It's exciting and powerful!")
95
+
96
+ if text:
97
+ # Sentiment Analysis
98
+ sia = SentimentIntensityAnalyzer()
99
+ sentiment = sia.polarity_scores(text)
100
+
101
+ st.subheader("Sentiment Analysis")
102
+ st.write(f"Positive: {sentiment['pos']:.2f}")
103
+ st.write(f"Neutral: {sentiment['neu']:.2f}")
104
+ st.write(f"Negative: {sentiment['neg']:.2f}")
105
+
106
+ # Simple keyword extraction
107
+ tokens = word_tokenize(text.lower())
108
+ stop_words = set(stopwords.words('english'))
109
+ keywords = [word for word in tokens if word.isalnum() and word not in stop_words]
110
+ keyword_freq = Counter(keywords).most_common(5)
111
+
112
+ st.subheader("Top Keywords")
113
+ st.write(pd.DataFrame(keyword_freq, columns=["Keyword", "Frequency"]))
114
+
115
+ if __name__ == "__main__":
116
+ main()