Spaces:

MINHCT
/

Classification

Sleeping

App Files Files Community

MINHCT commited on Apr 30

Commit

c77a94b

•

1 Parent(s): c2af0b2

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -114

app.py CHANGED Viewed

@@ -92,14 +92,12 @@ def process_api(text):
     SVM_Predicted = SVM_model.predict(processed_text).tolist() # SVC Model
     Seq_Predicted = Seq_model.predict(padded_sequence)
     predicted_label_index = np.argmax(Seq_Predicted)
-    print(int(predicted_label_index))
     # ----------- Proba -----------
     Logistic_Predicted_proba = logistic_model.predict_proba(processed_text)
-    #print(float(np.max(Logistic_Predicted_proba)))
     svm_new_probs = SVM_model.decision_function(processed_text)
     svm_probs = svm_model.predict_proba(svm_new_probs)
-    #print(float(np.max(svm_probs)))
     # ----------- Debug Logs -----------
     logistic_debug = decodedLabel(int(Logistic_Predicted[0]))
@@ -114,122 +112,16 @@ def process_api(text):
             'predicted_label_svm': decodedLabel(int(SVM_Predicted[0])),
             'probability_svm': f"{int(float(np.max(svm_probs))*10000//100)}%",
-            'LSTM': decodedLabel(int(predicted_label_index)),
             'Article_Content': text
         }
 # Init web crawling, process article content by Model and return result as JSON
-def categorize(url):
-    try:
-        article_content = crawURL(url)
-        result = process_api(article_content)
-        return result
-    except Exception as error:
-        if hasattr(error, 'message'):
-            return {"error_message": error.message}
-        else:
-            return {"error_message": error}
-# Main App
-st.title('Instant Category Classification')
-st.write("Unsure what category a CNN article belongs to? Our clever tool can help! Paste the URL below and press Enter. We'll sort it into one of our 5 categories in a flash! ⚡️")
-# Define category information (modify content and bullet points as needed)
-categories = {
-    "Business": [
-        "Analyze market trends and investment opportunities.",
-        "Gain insights into company performance and industry news.",
-        "Stay informed about economic developments and regulations."
-    ],
-    "Health": [
-        "Discover healthy recipes and exercise tips.",
-        "Learn about the latest medical research and advancements.",
-        "Find resources for managing chronic conditions and improving well-being."
-    ],
-    "Sport": [
-        "Follow your favorite sports teams and athletes.",
-        "Explore news and analysis from various sports categories.",
-        "Stay updated on upcoming games and competitions."
-    ],
-    "Politics": [
-        "Get informed about current political events and policies.",
-        "Understand different perspectives on political issues.",
-        "Engage in discussions and debates about politics."
-    ],
-    "Entertainment": [
-        "Find recommendations for movies, TV shows, and music.",
-        "Explore reviews and insights from entertainment critics.",
-        "Stay updated on celebrity news and cultural trends."
-    ]
-}
-# Define model information (modify descriptions as needed)
-models = {
-  "Logistic Regression": "A widely used statistical method for classification problems. It excels at identifying linear relationships between features and the target variable.",
-  "SVC (Support Vector Classifier)": "A powerful machine learning model that seeks to find a hyperplane that best separates data points of different classes. It's effective for high-dimensional data and can handle some non-linear relationships.",
-  "LSTM (Long Short-Term Memory)": "A type of recurrent neural network (RNN) particularly well-suited for sequential data like text or time series. LSTMs can effectively capture long-term dependencies within the data.",
-  "BERT (Bidirectional Encoder Representations from Transformers)": "A powerful pre-trained model based on the Transformer architecture. It excels at understanding the nuances of language and can be fine-tuned for various NLP tasks like text classification."
-}
-# Create expanders containing list of categories can be classified
-with st.expander("Category List"):
-  # Title for each category
-  st.subheader("Available Categories:")
-  for category in categories.keys():
-    st.write(f"- {category}")
-  # Content for each category (separated by a horizontal line)
-  st.write("---")
-  for category, content in categories.items():
-    st.subheader(category)
-    for item in content:
-      st.write(f"- {item}")
-# Create expanders containing list of models used in this project
-with st.expander("Available Models"):
-  st.subheader("List of Models:")
-  for model_name in models.keys():
-    st.write(f"- {model_name}")
-  st.write("---")
-  for model_name, description in models.items():
-    st.subheader(model_name)
-    st.write(description)
-# Explain to user why this project is only worked for CNN domain
-with st.expander("Tips", expanded=True):
-    st.write(
-        '''
-            This project works best with CNN articles right now.
-            Our web crawler is like a special tool for CNN's website.
-            It can't quite understand other websites because they're built differently
-        '''
-    )
-st.divider() # 👈 Draws a horizontal rule
-st.title('Dive in! See what category your CNN story belongs to 😉.')
-# Paste URL Input
-url = st.text_input("Find your favorite CNN story! Paste the URL and press ENTER 🔍.", placeholder='Ex: https://edition.cnn.com/2012/01/31/health/frank-njenga-mental-health/index.html')
-if url:
-    st.divider() # 👈 Draws a horizontal rule
-    result = categorize(url)
-    article_content = result.get('Article_Content')
-    st.title('Article Content Fetched')
-    st.text_area("", value=article_content, height=400) # render the article content as textarea element
-    st.divider()  # 👈 Draws a horizontal rule
-    st.title('Predicted Results')
-    st.json({
-        "Logistic": {
-            "predicted_label": result.get("predicted_label_logistic"),
-            "probability": result.get("probability_logistic")
-        },
-        "SVC": {
-            "predicted_label": result.get("predicted_label_svm"),
-            "probability": result.get("probability_svm")
         },
-        "LSTM": result.get("LSTM")
     })
 st.divider()  # 👈 Draws a horizontal rule

     SVM_Predicted = SVM_model.predict(processed_text).tolist() # SVC Model
     Seq_Predicted = Seq_model.predict(padded_sequence)
     predicted_label_index = np.argmax(Seq_Predicted)
     # ----------- Proba -----------
     Logistic_Predicted_proba = logistic_model.predict_proba(processed_text)
     svm_new_probs = SVM_model.decision_function(processed_text)
     svm_probs = svm_model.predict_proba(svm_new_probs)
+    predicted_label_index = np.argmax(Seq_Predicted)
     # ----------- Debug Logs -----------
     logistic_debug = decodedLabel(int(Logistic_Predicted[0]))
             'predicted_label_svm': decodedLabel(int(SVM_Predicted[0])),
             'probability_svm': f"{int(float(np.max(svm_probs))*10000//100)}%",
+            'predicted_label_lstm': int(predicted_label_index),
+            'probability_lstm': f"{int(float(np.max(Seq_Predicted))*10000//100)}%",
             'Article_Content': text
         }
 # Init web crawling, process article content by Model and return result as JSON
+lstm")
         },
     })
 st.divider()  # 👈 Draws a horizontal rule