Spaces:

awacke1
/

DataScienceDataEngineeringChallenge

Sleeping

App Files Files Community

awacke1 commited on Aug 27, 2024

Commit

af02f65

verified ·

1 Parent(s): b03f8f9

Create app.py

Browse files

Files changed (1) hide show

app.py +116 -0

app.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+from sklearn.feature_selection import SelectKBest, f_classif
+from sklearn.preprocessing import StandardScaler
+from sklearn.ensemble import IsolationForest
+from sklearn.decomposition import PCA
+import nltk
+from nltk.sentiment import SentimentIntensityAnalyzer
+from nltk.tokenize import word_tokenize
+from nltk.corpus import stopwords
+from collections import Counter
+import matplotlib.pyplot as plt
+# Download necessary NLTK data
+nltk.download('vader_lexicon')
+nltk.download('punkt')
+nltk.download('stopwords')
+def main():
+    st.title("AI in Data Science Demo")
+    # Sidebar for navigation
+    page = st.sidebar.selectbox("Choose a demo", ["Feature Engineering", "Anomaly Detection", "NLP Analysis"])
+    if page == "Feature Engineering":
+        feature_engineering_demo()
+    elif page == "Anomaly Detection":
+        anomaly_detection_demo()
+    else:
+        nlp_demo()
+def feature_engineering_demo():
+    st.header("Automated Feature Engineering and Selection")
+    # Generate sample data
+    X = np.random.rand(100, 5)
+    y = np.random.randint(0, 2, 100)
+    # Feature selection
+    selector = SelectKBest(f_classif, k=3)
+    X_new = selector.fit_transform(X, y)
+    # PCA
+    scaler = StandardScaler()
+    X_scaled = scaler.fit_transform(X)
+    pca = PCA(n_components=2)
+    X_pca = pca.fit_transform(X_scaled)
+    # Display results
+    st.subheader("Original Features")
+    st.write(pd.DataFrame(X, columns=[f"Feature {i+1}" for i in range(5)]).head())
+    st.subheader("Selected Top 3 Features")
+    st.write(pd.DataFrame(X_new, columns=[f"Selected Feature {i+1}" for i in range(3)]).head())
+    st.subheader("PCA Transformation")
+    st.write(pd.DataFrame(X_pca, columns=["PC1", "PC2"]).head())
+    # Visualization
+    fig, ax = plt.subplots()
+    ax.scatter(X_pca[:, 0], X_pca[:, 1], c=y)
+    ax.set_xlabel("First Principal Component")
+    ax.set_ylabel("Second Principal Component")
+    ax.set_title("PCA of Dataset")
+    st.pyplot(fig)
+def anomaly_detection_demo():
+    st.header("Anomaly Detection")
+    # Generate sample data with anomalies
+    np.random.seed(42)
+    X = np.random.randn(100, 2)
+    X[-5:] = X[-5:] + [4, 4]  # Add some anomalies
+    # Fit Isolation Forest
+    clf = IsolationForest(contamination=0.1, random_state=42)
+    y_pred = clf.fit_predict(X)
+    # Visualization
+    fig, ax = plt.subplots()
+    ax.scatter(X[:, 0], X[:, 1], c=y_pred, cmap='viridis')
+    ax.set_title("Anomaly Detection using Isolation Forest")
+    ax.set_xlabel("Feature 1")
+    ax.set_ylabel("Feature 2")
+    st.pyplot(fig)
+    st.write("Points in yellow are detected as anomalies.")
+def nlp_demo():
+    st.header("NLP Analysis")
+    # Sample text input
+    text = st.text_area("Enter text for analysis", "I love using AI for data analysis. It's exciting and powerful!")
+    if text:
+        # Sentiment Analysis
+        sia = SentimentIntensityAnalyzer()
+        sentiment = sia.polarity_scores(text)
+        st.subheader("Sentiment Analysis")
+        st.write(f"Positive: {sentiment['pos']:.2f}")
+        st.write(f"Neutral: {sentiment['neu']:.2f}")
+        st.write(f"Negative: {sentiment['neg']:.2f}")
+        # Simple keyword extraction
+        tokens = word_tokenize(text.lower())
+        stop_words = set(stopwords.words('english'))
+        keywords = [word for word in tokens if word.isalnum() and word not in stop_words]
+        keyword_freq = Counter(keywords).most_common(5)
+        st.subheader("Top Keywords")
+        st.write(pd.DataFrame(keyword_freq, columns=["Keyword", "Frequency"]))
+if __name__ == "__main__":
+    main()