Spaces:

Alamgirapi
/

NoCodeTextClassifier

Sleeping

App Files Files Community

Alamgirapi commited on Aug 6, 2025

Commit

4dcb991

verified ·

1 Parent(s): e6c2921

Update app.py

Browse files

Files changed (1) hide show

app.py +294 -213

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ import pickle
 import io
 import traceback
 import sys
 from datetime import datetime
 # Import ML libraries with error handling
@@ -29,7 +30,7 @@ except ImportError as e:
     st.info("Please ensure NoCodeTextClassifier package is installed")
 # Set page config
-st.set_page_config(page_title="Debug Text Classification", page_icon="🔍", layout="wide")
 # Debug section
 st.sidebar.header("🔍 Debug Information")
@@ -41,118 +42,107 @@ def debug_log(message, level="INFO"):
         timestamp = datetime.now().strftime("%H:%M:%S")
         st.sidebar.write(f"**{timestamp} [{level}]:** {message}")
-def detailed_error_info(e):
-    """Get detailed error information"""
-    error_type = type(e).__name__
-    error_message = str(e)
-    error_traceback = traceback.format_exc()
-    return {
-        'type': error_type,
-        'message': error_message,
-        'traceback': error_traceback
-    }
-def inspect_uploaded_file(uploaded_file):
-    """Inspect uploaded file properties"""
-    debug_log("🔍 Inspecting uploaded file...")
-    try:
-        file_info = {
-            'name': uploaded_file.name,
-            'type': uploaded_file.type,
-            'size': uploaded_file.size,
-            'file_id': getattr(uploaded_file, 'file_id', 'Not available')
-        }
-        debug_log(f"File name: {file_info['name']}")
-        debug_log(f"File type: {file_info['type']}")
-        debug_log(f"File size: {file_info['size']} bytes")
-        debug_log(f"File ID: {file_info['file_id']}")
-        # Try to read first few bytes
-        uploaded_file.seek(0)
-        first_bytes = uploaded_file.read(100)
-        debug_log(f"First 100 bytes type: {type(first_bytes)}")
-        debug_log(f"First 100 bytes preview: {first_bytes[:50]}...")
-        # Reset file pointer
-        uploaded_file.seek(0)
-        return file_info
-    except Exception as e:
-        error_info = detailed_error_info(e)
-        debug_log(f"❌ Error inspecting file: {error_info['type']}: {error_info['message']}", "ERROR")
-        st.sidebar.error(f"File inspection error: {error_info['message']}")
-        return None
-def safe_read_csv_debug(uploaded_file, encoding_options=['utf-8', 'latin1', 'iso-8859-1', 'cp1252']):
-    """Safely read CSV with extensive debugging"""
-    debug_log("🔄 Starting CSV read process...")
-    # Inspect file first
-    file_info = inspect_uploaded_file(uploaded_file)
-    if file_info is None:
-        return None
-    # Try different reading methods
-    methods = [
-        ("Direct pandas read", lambda f: pd.read_csv(f)),
-        ("BytesIO method", lambda f: pd.read_csv(io.BytesIO(f.read()))),
-        ("StringIO method", lambda f: pd.read_csv(io.StringIO(f.read().decode('utf-8')))),
-    ]
-    for method_name, method_func in methods:
-        debug_log(f"🔄 Trying method: {method_name}")
-        for encoding in encoding_options:
-            try:
-                debug_log(f"  - Attempting encoding: {encoding}")
-                uploaded_file.seek(0)
-                if method_name == "Direct pandas read":
-                    df = pd.read_csv(uploaded_file, encoding=encoding)
-                elif method_name == "BytesIO method":
-                    uploaded_file.seek(0)
-                    content = uploaded_file.read()
-                    df = pd.read_csv(io.BytesIO(content), encoding=encoding)
-                elif method_name == "StringIO method":
-                    uploaded_file.seek(0)
-                    content = uploaded_file.read()
-                    if isinstance(content, bytes):
-                        content = content.decode(encoding)
-                    df = pd.read_csv(io.StringIO(content))
-                debug_log(f"✅ Success with {method_name} + {encoding}")
-                debug_log(f"DataFrame shape: {df.shape}")
-                debug_log(f"Columns: {list(df.columns)}")
-                st.success(f"File loaded successfully using {method_name} with {encoding} encoding")
-                return df
-            except UnicodeDecodeError as e:
-                debug_log(f"  - Unicode error with {encoding}: {str(e)}", "WARNING")
-                continue
-            except Exception as e:
-                error_info = detailed_error_info(e)
-                debug_log(f"  - Error with {method_name} + {encoding}: {error_info['type']}: {error_info['message']}", "ERROR")
-                # Show detailed error for 403 or permission errors
-                if "403" in str(e) or "permission" in str(e).lower():
-                    st.error("🚨 PERMISSION ERROR DETECTED!")
-                    st.error(f"Method: {method_name}, Encoding: {encoding}")
-                    st.error(f"Error type: {error_info['type']}")
-                    st.error(f"Error message: {error_info['message']}")
-                    st.code(error_info['traceback'])
-                continue
-    debug_log("❌ All reading methods failed", "ERROR")
-    st.error("All CSV reading methods failed. Check debug log for details.")
-    return None
-# Utility functions with debugging
 def save_artifacts(obj, folder_name, file_name):
     """Save artifacts with debugging"""
     debug_log(f"💾 Saving {file_name} to {folder_name}")
@@ -167,9 +157,8 @@ def save_artifacts(obj, folder_name, file_name):
         return True
     except Exception as e:
-        error_info = detailed_error_info(e)
-        debug_log(f"❌ Error saving {file_name}: {error_info['message']}", "ERROR")
-        st.error(f"Save error: {error_info['message']}")
         return False
 def load_artifacts(folder_name, file_name):
@@ -189,9 +178,7 @@ def load_artifacts(folder_name, file_name):
         return obj
     except Exception as e:
-        error_info = detailed_error_info(e)
-        debug_log(f"❌ Error loading {file_name}: {error_info['message']}", "ERROR")
-        st.error(f"Load error: {error_info['message']}")
         return None
 def load_model(model_name):
@@ -221,11 +208,9 @@ def predict_text(model_name, text, vectorizer_type="tfidf"):
         debug_log("🧹 Cleaning text...")
         text_cleaner = TextCleaner()
         clean_text = text_cleaner.clean_text(text)
-        debug_log(f"Cleaned text preview: {clean_text[:50]}...")
         debug_log("🔢 Vectorizing text...")
         text_vector = vectorizer.transform([clean_text])
-        debug_log(f"Vector shape: {text_vector.shape}")
         debug_log("🎯 Making prediction...")
         prediction = model.predict(text_vector)
@@ -234,7 +219,6 @@ def predict_text(model_name, text, vectorizer_type="tfidf"):
         if hasattr(model, 'predict_proba'):
             try:
                 prediction_proba = model.predict_proba(text_vector)[0]
-                debug_log(f"Prediction probabilities: {prediction_proba}")
             except:
                 debug_log("No prediction probabilities available", "WARNING")
@@ -244,142 +228,239 @@ def predict_text(model_name, text, vectorizer_type="tfidf"):
         return predicted_label, prediction_proba
     except Exception as e:
-        error_info = detailed_error_info(e)
-        debug_log(f"❌ Prediction error: {error_info['message']}", "ERROR")
-        st.error(f"Prediction error: {error_info['message']}")
-        if debug_mode:
-            st.code(error_info['traceback'])
         return None, None
 # Main App
-st.title('🔍 Debug Text Classification App')
-st.write('Debug version to identify and fix issues')
-# Environment info
 if debug_mode:
     st.sidebar.subheader("🖥️ Environment Info")
     st.sidebar.write(f"Python version: {sys.version}")
     st.sidebar.write(f"Streamlit version: {st.__version__}")
-    st.sidebar.write(f"Pandas version: {pd.__version__}")
-    st.sidebar.write(f"Current working directory: {os.getcwd()}")
-    # Check directory permissions
-    try:
-        test_dir = "test_permissions"
-        os.makedirs(test_dir, exist_ok=True)
-        test_file = os.path.join(test_dir, "test.txt")
-        with open(test_file, 'w') as f:
-            f.write("test")
-        os.remove(test_file)
-        os.rmdir(test_dir)
-        st.sidebar.success("✅ File system permissions OK")
-    except Exception as e:
-        st.sidebar.error(f"❌ File system permission issue: {e}")
-# Sidebar navigation
-section = st.sidebar.radio("Choose Section", ["File Upload Debug", "Data Analysis", "Train Model", "Predictions"])
-# Session state initialization
-if 'vectorizer_type' not in st.session_state:
-    st.session_state.vectorizer_type = "tfidf"
 if 'train_df' not in st.session_state:
     st.session_state.train_df = None
-# File Upload Debug Section
-if section == "File Upload Debug":
-    st.subheader("🔍 File Upload Debugging")
-    st.info("This section helps debug file upload issues. Upload your file and see detailed error information.")
-    train_data = st.file_uploader("Upload training data (DEBUG MODE)", type=["csv"], key="debug_upload")
-    if train_data is not None:
-        st.write("### File Upload Detected!")
-        # Show raw file info
-        st.write("**Raw File Information:**")
-        st.json({
-            "name": train_data.name,
-            "type": train_data.type if hasattr(train_data, 'type') else "Unknown",
-            "size": train_data.size if hasattr(train_data, 'size') else "Unknown"
-        })
-        # Try to read the file
-        st.write("### Attempting to Read File...")
-        with st.spinner("Reading file with debug mode..."):
-            df = safe_read_csv_debug(train_data)
-        if df is not None:
-            st.success("🎉 File successfully loaded!")
-            st.write("**Data Preview:**")
-            st.dataframe(df.head())
-            st.write(f"**Shape:** {df.shape}")
-            st.write(f"**Columns:** {list(df.columns)}")
-            st.write(f"**Data Types:**")
-            st.write(df.dtypes)
-            # Store in session state
-            st.session_state.train_df = df
-        else:
-            st.error("❌ Failed to load file. Check the debug log for details.")
-            # Additional troubleshooting
-            st.write("### 🔧 Troubleshooting Steps:")
-            st.write("1. Check if your file is a valid CSV")
-            st.write("2. Try saving your CSV with different encoding (UTF-8 recommended)")
-            st.write("3. Check if file size is within limits")
-            st.write("4. Ensure no special characters in filename")
-            st.write("5. Try uploading from a different location")
-# Other sections (simplified for debugging)
 elif section == "Data Analysis":
-    st.subheader("📊 Data Analysis")
     if st.session_state.train_df is not None:
         df = st.session_state.train_df
-        st.write("Using loaded data from debug session:")
-        st.dataframe(df.head())
-        # Basic analysis without custom modules if they fail
-        st.write(f"**Shape:** {df.shape}")
-        st.write(f"**Columns:** {list(df.columns)}")
-        st.write(f"**Missing values:**")
-        st.write(df.isnull().sum())
     else:
-        st.warning("No data loaded. Please use 'File Upload Debug' section first.")
 elif section == "Train Model":
-    st.subheader("🤖 Train Model")
-    st.info("Use this section after successfully loading data in debug mode.")
     if st.session_state.train_df is not None:
-        st.success("Data available for training!")
-        # Add your training logic here
     else:
-        st.warning("No data loaded. Please use 'File Upload Debug' section first.")
 elif section == "Predictions":
-    st.subheader("🔮 Predictions")
-    st.info("Use this section after training a model.")
-    # Check for trained models
-    if os.path.exists("models"):
-        models = [f for f in os.listdir("models") if f.endswith('.pkl')]
-        if models:
-            st.write(f"Available models: {models}")
         else:
-            st.info("No trained models found.")
     else:
-        st.info("Models directory not found.")
-# Debug summary
-if debug_mode:
-    st.sidebar.markdown("---")
-    st.sidebar.subheader("📋 Debug Summary")
-    if st.session_state.train_df is not None:
-        st.sidebar.success("✅ Data loaded successfully")
-    else:
-        st.sidebar.warning("⚠️ No data loaded")

 import io
 import traceback
 import sys
+import base64
 from datetime import datetime
 # Import ML libraries with error handling
     st.info("Please ensure NoCodeTextClassifier package is installed")
 # Set page config
+st.set_page_config(page_title="Fixed Text Classification", page_icon="🔧", layout="wide")
 # Debug section
 st.sidebar.header("🔍 Debug Information")
         timestamp = datetime.now().strftime("%H:%M:%S")
         st.sidebar.write(f"**{timestamp} [{level}]:** {message}")
+# Alternative file upload methods
+def alternative_file_upload():
+    """Alternative file upload methods to bypass 403 error"""
+    st.subheader("🔧 Alternative File Upload Methods")
+    # Method 1: Text area paste
+    st.markdown("### Method 1: Copy-Paste CSV Content")
+    st.info("Copy your CSV content and paste it in the text area below")
+    csv_content = st.text_area(
+        "Paste your CSV content here:",
+        height=200,
+        placeholder="name,age,city\nJohn,25,New York\nJane,30,London"
+    )
+    if csv_content and st.button("Load from Text Area", type="primary"):
+        try:
+            df = pd.read_csv(io.StringIO(csv_content))
+            st.success("✅ CSV loaded from text area!")
+            return df, "text_area"
+        except Exception as e:
+            st.error(f"Error parsing CSV: {e}")
+            return None, None
+    # Method 2: Base64 upload (for advanced users)
+    st.markdown("### Method 2: Base64 Upload")
+    with st.expander("For Advanced Users - Base64 Upload"):
+        st.info("Convert your CSV to base64 and paste here")
+        st.code("""
+# Python code to convert CSV to base64:
+import base64
+with open('your_file.csv', 'rb') as f:
+    encoded = base64.b64encode(f.read()).decode()
+    print(encoded)
+        """)
+        base64_content = st.text_area("Paste base64 encoded CSV:", height=100)
+        if base64_content and st.button("Load from Base64"):
+            try:
+                decoded = base64.b64decode(base64_content)
+                df = pd.read_csv(io.BytesIO(decoded))
+                st.success("✅ CSV loaded from base64!")
+                return df, "base64"
+            except Exception as e:
+                st.error(f"Error decoding base64: {e}")
+                return None, None
+    # Method 3: Sample data
+    st.markdown("### Method 3: Use Sample Data")
+    if st.button("Load Sample Text Classification Data"):
+        # Create sample data
+        sample_data = {
+            'text': [
+                'I love this product, it works great!',
+                'This is terrible, waste of money',
+                'Good quality and fast delivery',
+                'Not satisfied with the purchase',
+                'Excellent service and support',
+                'Poor quality, arrived damaged',
+                'Amazing product, highly recommend',
+                'Disappointed with the results'
+            ],
+            'label': ['positive', 'negative', 'positive', 'negative',
+                     'positive', 'negative', 'positive', 'negative']
+        }
+        df = pd.DataFrame(sample_data)
+        st.success("✅ Sample data loaded!")
+        return df, "sample"
+    return None, None
+def safe_file_uploader_with_fallback():
+    """Try normal upload first, then fallback methods"""
+    st.markdown("### 📁 Upload Your CSV File")
+    # Try standard uploader first
+    uploaded_file = st.file_uploader(
+        "Choose a CSV file",
+        type=['csv'],
+        help="If upload fails with 403 error, use alternative methods below"
+    )
+    if uploaded_file is not None:
+        try:
+            debug_log("📁 File uploaded successfully via standard method")
+            df = pd.read_csv(uploaded_file)
+            st.success("✅ File uploaded successfully!")
+            return df, "standard"
+        except Exception as e:
+            st.error(f"Error reading uploaded file: {e}")
+            debug_log(f"❌ Standard upload failed: {e}", "ERROR")
+    # If standard upload fails or no file uploaded, show alternatives
+    st.markdown("---")
+    st.markdown("### 🔄 Alternative Upload Methods")
+    st.warning("If you're getting a 403 error, try one of these alternative methods:")
+    return alternative_file_upload()
+# Utility functions (same as before but with debug)
 def save_artifacts(obj, folder_name, file_name):
     """Save artifacts with debugging"""
     debug_log(f"💾 Saving {file_name} to {folder_name}")
         return True
     except Exception as e:
+        debug_log(f"❌ Error saving {file_name}: {str(e)}", "ERROR")
+        st.error(f"Save error: {str(e)}")
         return False
 def load_artifacts(folder_name, file_name):
         return obj
     except Exception as e:
+        debug_log(f"❌ Error loading {file_name}: {str(e)}", "ERROR")
         return None
 def load_model(model_name):
         debug_log("🧹 Cleaning text...")
         text_cleaner = TextCleaner()
         clean_text = text_cleaner.clean_text(text)
         debug_log("🔢 Vectorizing text...")
         text_vector = vectorizer.transform([clean_text])
         debug_log("🎯 Making prediction...")
         prediction = model.predict(text_vector)
         if hasattr(model, 'predict_proba'):
             try:
                 prediction_proba = model.predict_proba(text_vector)[0]
             except:
                 debug_log("No prediction probabilities available", "WARNING")
         return predicted_label, prediction_proba
     except Exception as e:
+        debug_log(f"❌ Prediction error: {str(e)}", "ERROR")
+        st.error(f"Prediction error: {str(e)}")
         return None, None
 # Main App
+st.title('🔧 Fixed Text Classification App')
+st.write('Workaround version to bypass 403 upload errors')
+# Show environment info in sidebar if debug mode
 if debug_mode:
     st.sidebar.subheader("🖥️ Environment Info")
     st.sidebar.write(f"Python version: {sys.version}")
     st.sidebar.write(f"Streamlit version: {st.__version__}")
+    st.sidebar.write(f"Current directory: {os.getcwd()}")
+# Navigation
+section = st.sidebar.radio("Choose Section", [
+    "Upload Data", "Data Analysis", "Train Model", "Predictions"
+])
+# Session state
 if 'train_df' not in st.session_state:
     st.session_state.train_df = None
+if 'upload_method' not in st.session_state:
+    st.session_state.upload_method = None
+if 'vectorizer_type' not in st.session_state:
+    st.session_state.vectorizer_type = "tfidf"
+# Upload Data Section
+if section == "Upload Data":
+    st.subheader("📁 Upload Your Dataset")
+    df, method = safe_file_uploader_with_fallback()
+    if df is not None:
+        st.session_state.train_df = df
+        st.session_state.upload_method = method
+        st.write("### 📊 Data Preview")
+        st.dataframe(df.head())
+        st.write("### 📈 Basic Info")
+        col1, col2, col3 = st.columns(3)
+        with col1:
+            st.metric("Rows", df.shape[0])
+        with col2:
+            st.metric("Columns", df.shape[1])
+        with col3:
+            st.metric("Missing Values", df.isnull().sum().sum())
+        st.write("### 🏷️ Select Columns")
+        columns = df.columns.tolist()
+        col1, col2 = st.columns(2)
+        with col1:
+            text_column = st.selectbox("Select text column:", columns)
+        with col2:
+            target_column = st.selectbox("Select target/label column:", columns)
+        if text_column and target_column:
+            st.session_state.text_column = text_column
+            st.session_state.target_column = target_column
+            # Show sample data
+            st.write("### 📝 Sample Data")
+            sample_df = df[[text_column, target_column]].head()
+            st.dataframe(sample_df)
+            # Show target distribution
+            st.write("### 🎯 Target Distribution")
+            target_counts = df[target_column].value_counts()
+            st.bar_chart(target_counts)
+            st.success("✅ Data ready for processing!")
+# Data Analysis Section
 elif section == "Data Analysis":
     if st.session_state.train_df is not None:
         df = st.session_state.train_df
+        text_col = st.session_state.get('text_column')
+        target_col = st.session_state.get('target_column')
+        if text_col and target_col:
+            st.subheader("📊 Data Analysis")
+            try:
+                # Process data using custom classes
+                info = Informations(df, text_col, target_col)
+                df['clean_text'] = info.clean_text()
+                df['text_length'] = info.text_length()
+                # Update session state
+                st.session_state.train_df = df
+                # Show analysis
+                st.write("**Data Shape:**", info.shape())
+                st.write("**Class Distribution:**", info.class_imbalanced())
+                st.write("**Missing Values:**", info.missing_values())
+                # Text length analysis
+                st.write("**Text Length Analysis:**")
+                st.write(info.analysis_text_length('text_length'))
+                # Visualizations
+                vis = Visualizations(df, text_col, target_col)
+                col1, col2 = st.columns(2)
+                with col1:
+                    st.write("**Class Distribution:**")
+                    vis.class_distribution()
+                with col2:
+                    st.write("**Text Length Distribution:**")
+                    vis.text_length_distribution()
+            except Exception as e:
+                st.error(f"Error in analysis: {e}")
+                debug_log(f"Analysis error: {e}", "ERROR")
+        else:
+            st.warning("Please select text and target columns in the Upload Data section.")
     else:
+        st.warning("Please upload data first.")
+# Train Model Section
 elif section == "Train Model":
     if st.session_state.train_df is not None:
+        df = st.session_state.train_df
+        text_col = st.session_state.get('text_column')
+        target_col = st.session_state.get('target_column')
+        if text_col and target_col and 'clean_text' in df.columns:
+            st.subheader("🤖 Train Model")
+            col1, col2 = st.columns(2)
+            with col1:
+                model_choice = st.selectbox("Choose Model:", [
+                    "Logistic Regression", "Decision Tree", "Random Forest",
+                    "Linear SVC", "SVC", "Multinomial Naive Bayes"
+                ])
+            with col2:
+                vectorizer_choice = st.selectbox("Choose Vectorizer:",
+                    ["Tfidf Vectorizer", "Count Vectorizer"])
+            if st.button("🚀 Train Model", type="primary"):
+                with st.spinner("Training model..."):
+                    try:
+                        # Prepare data
+                        if vectorizer_choice == "Tfidf Vectorizer":
+                            vectorizer = TfidfVectorizer(max_features=10000)
+                            st.session_state.vectorizer_type = "tfidf"
+                        else:
+                            vectorizer = CountVectorizer(max_features=10000)
+                            st.session_state.vectorizer_type = "count"
+                        # Label encoding
+                        label_encoder = LabelEncoder()
+                        y = label_encoder.fit_transform(df[target_col])
+                        X = vectorizer.fit_transform(df['clean_text'])
+                        # Split data
+                        X_train, X_test, y_train, y_test = process.split_data(X, y)
+                        # Save artifacts
+                        save_artifacts(vectorizer, "artifacts", f"{st.session_state.vectorizer_type}_vectorizer.pkl")
+                        save_artifacts(label_encoder, "artifacts", "encoder.pkl")
+                        # Train model
+                        models = Models(X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test)
+                        if model_choice == "Logistic Regression":
+                            models.LogisticRegression()
+                        elif model_choice == "Decision Tree":
+                            models.DecisionTree()
+                        elif model_choice == "Random Forest":
+                            models.RandomForestClassifier()
+                        elif model_choice == "Linear SVC":
+                            models.LinearSVC()
+                        elif model_choice == "SVC":
+                            models.SVC()
+                        elif model_choice == "Multinomial Naive Bayes":
+                            models.MultinomialNB()
+                        st.success("🎉 Model trained successfully!")
+                    except Exception as e:
+                        st.error(f"Training error: {e}")
+                        debug_log(f"Training error: {e}", "ERROR")
+        else:
+            st.warning("Please complete data analysis first to process the text data.")
     else:
+        st.warning("Please upload data first.")
+# Predictions Section
 elif section == "Predictions":
+    st.subheader("🔮 Make Predictions")
+    # Check for models
+    if os.path.exists("models") and os.listdir("models"):
+        available_models = [f for f in os.listdir("models") if f.endswith('.pkl')]
+        if available_models:
+            selected_model = st.selectbox("Choose trained model:", available_models)
+            # Single prediction
+            st.write("### Single Text Prediction")
+            text_input = st.text_area("Enter text to classify:", height=100)
+            if st.button("🎯 Predict") and text_input:
+                prediction, probabilities = predict_text(
+                    selected_model,
+                    text_input,
+                    st.session_state.get('vectorizer_type', 'tfidf')
+                )
+                if prediction is not None:
+                    st.success(f"**Prediction:** {prediction}")
+                    if probabilities is not None:
+                        encoder = load_artifacts("artifacts", "encoder.pkl")
+                        if encoder is not None:
+                            prob_df = pd.DataFrame({
+                                'Class': encoder.classes_,
+                                'Probability': probabilities
+                            }).sort_values('Probability', ascending=False)
+                            st.bar_chart(prob_df.set_index('Class'))
         else:
+            st.info("No trained models found. Train a model first.")
     else:
+        st.info("No models directory found. Train a model first.")
+# Show upload method used in sidebar
+if st.session_state.upload_method:
+    st.sidebar.success(f"✅ Data loaded via: {st.session_state.upload_method}")