Spaces:

MINHCT
/

Classification

Running

App Files Files Community

MINHCT commited on Apr 30

Commit

b21cabd

•

1 Parent(s): b74f1f5

Update app.py

Browse files

Files changed (1) hide show

app.py +115 -7

app.py CHANGED Viewed

@@ -92,12 +92,14 @@ def process_api(text):
     SVM_Predicted = SVM_model.predict(processed_text).tolist() # SVC Model
     Seq_Predicted = Seq_model.predict(padded_sequence)
     predicted_label_index = np.argmax(Seq_Predicted)
     # ----------- Proba -----------
     Logistic_Predicted_proba = logistic_model.predict_proba(processed_text)
     svm_new_probs = SVM_model.decision_function(processed_text)
     svm_probs = svm_model.predict_proba(svm_new_probs)
-    predicted_label_index = np.argmax(Seq_Predicted)
     # ----------- Debug Logs -----------
     logistic_debug = decodedLabel(int(Logistic_Predicted[0]))
@@ -112,16 +114,122 @@ def process_api(text):
             'predicted_label_svm': decodedLabel(int(SVM_Predicted[0])),
             'probability_svm': f"{int(float(np.max(svm_probs))*10000//100)}%",
-            'predicted_label_lstm': int(predicted_label_index),
-            'probability_lstm': f"{int(float(np.max(Seq_Predicted))*10000//100)}%",
             'Article_Content': text
         }
 # Init web crawling, process article content by Model and return result as JSON
-lstm")
         },
     })
 st.divider()  # 👈 Draws a horizontal rule
@@ -147,7 +255,7 @@ st.divider()  # 👈 Draws a horizontal rule
 # Get the current year
 current_year = date.today().year
 # Format the copyright statement with dynamic year
-copyright_text = f"Copyright © {current_year}"
 st.title(copyright_text)
 author_names = ["Trần Thanh Phước (Mentor)", "Lương Ngọc Phương (Member)", "Trịnh Cẩm Minh (Member)"]
 st.write("Meet the minds behind the work!")

     SVM_Predicted = SVM_model.predict(processed_text).tolist() # SVC Model
     Seq_Predicted = Seq_model.predict(padded_sequence)
     predicted_label_index = np.argmax(Seq_Predicted)
+    print(int(predicted_label_index))
     # ----------- Proba -----------
     Logistic_Predicted_proba = logistic_model.predict_proba(processed_text)
+    #print(float(np.max(Logistic_Predicted_proba)))
     svm_new_probs = SVM_model.decision_function(processed_text)
     svm_probs = svm_model.predict_proba(svm_new_probs)
+    #print(float(np.max(svm_probs)))
     # ----------- Debug Logs -----------
     logistic_debug = decodedLabel(int(Logistic_Predicted[0]))
             'predicted_label_svm': decodedLabel(int(SVM_Predicted[0])),
             'probability_svm': f"{int(float(np.max(svm_probs))*10000//100)}%",
+            'LSTM': decodedLabel(int(predicted_label_index)),
             'Article_Content': text
         }
 # Init web crawling, process article content by Model and return result as JSON
+def categorize(url):
+    try:
+        article_content = crawURL(url)
+        result = process_api(article_content)
+        return result
+    except Exception as error:
+        if hasattr(error, 'message'):
+            return {"error_message": error.message}
+        else:
+            return {"error_message": error}
+# Main App
+st.title('Instant Category Classification')
+st.write("Unsure what category a CNN article belongs to? Our clever tool can help! Paste the URL below and press Enter. We'll sort it into one of our 5 categories in a flash! ⚡️")
+# Define category information (modify content and bullet points as needed)
+categories = {
+    "Business": [
+        "Analyze market trends and investment opportunities.",
+        "Gain insights into company performance and industry news.",
+        "Stay informed about economic developments and regulations."
+    ],
+    "Health": [
+        "Discover healthy recipes and exercise tips.",
+        "Learn about the latest medical research and advancements.",
+        "Find resources for managing chronic conditions and improving well-being."
+    ],
+    "Sport": [
+        "Follow your favorite sports teams and athletes.",
+        "Explore news and analysis from various sports categories.",
+        "Stay updated on upcoming games and competitions."
+    ],
+    "Politics": [
+        "Get informed about current political events and policies.",
+        "Understand different perspectives on political issues.",
+        "Engage in discussions and debates about politics."
+    ],
+    "Entertainment": [
+        "Find recommendations for movies, TV shows, and music.",
+        "Explore reviews and insights from entertainment critics.",
+        "Stay updated on celebrity news and cultural trends."
+    ]
+}
+# Define model information (modify descriptions as needed)
+models = {
+  "Logistic Regression": "A widely used statistical method for classification problems. It excels at identifying linear relationships between features and the target variable.",
+  "SVC (Support Vector Classifier)": "A powerful machine learning model that seeks to find a hyperplane that best separates data points of different classes. It's effective for high-dimensional data and can handle some non-linear relationships.",
+  "LSTM (Long Short-Term Memory)": "A type of recurrent neural network (RNN) particularly well-suited for sequential data like text or time series. LSTMs can effectively capture long-term dependencies within the data.",
+  "BERT (Bidirectional Encoder Representations from Transformers)": "A powerful pre-trained model based on the Transformer architecture. It excels at understanding the nuances of language and can be fine-tuned for various NLP tasks like text classification."
+}
+# Create expanders containing list of categories can be classified
+with st.expander("Category List"):
+  # Title for each category
+  st.subheader("Available Categories:")
+  for category in categories.keys():
+    st.write(f"- {category}")
+  # Content for each category (separated by a horizontal line)
+  st.write("---")
+  for category, content in categories.items():
+    st.subheader(category)
+    for item in content:
+      st.write(f"- {item}")
+# Create expanders containing list of models used in this project
+with st.expander("Available Models"):
+  st.subheader("List of Models:")
+  for model_name in models.keys():
+    st.write(f"- {model_name}")
+  st.write("---")
+  for model_name, description in models.items():
+    st.subheader(model_name)
+    st.write(description)
+# Explain to user why this project is only worked for CNN domain
+with st.expander("Tips", expanded=True):
+    st.write(
+        '''
+            This project works best with CNN articles right now.
+            Our web crawler is like a special tool for CNN's website.
+            It can't quite understand other websites because they're built differently
+        '''
+    )
+st.divider() # 👈 Draws a horizontal rule
+st.title('Dive in! See what category your CNN story belongs to 😉.')
+# Paste URL Input
+url = st.text_input("Find your favorite CNN story! Paste the URL and press ENTER 🔍.", placeholder='Ex: https://edition.cnn.com/2012/01/31/health/frank-njenga-mental-health/index.html')
+if url:
+    st.divider() # 👈 Draws a horizontal rule
+    result = categorize(url)
+    article_content = result.get('Article_Content')
+    st.title('Article Content Fetched')
+    st.text_area("", value=article_content, height=400) # render the article content as textarea element
+    st.divider()  # 👈 Draws a horizontal rule
+    st.title('Predicted Results')
+    st.json({
+        "Logistic": {
+            "predicted_label": result.get("predicted_label_logistic"),
+            "probability": result.get("probability_logistic")
+        },
+        "SVC": {
+            "predicted_label": result.get("predicted_label_svm"),
+            "probability": result.get("probability_svm")
         },
+        "LSTM": result.get("LSTM")
     })
 st.divider()  # 👈 Draws a horizontal rule
 # Get the current year
 current_year = date.today().year
 # Format the copyright statement with dynamic year
+copyright_text = f"Copyright © {current_year}"
 st.title(copyright_text)
 author_names = ["Trần Thanh Phước (Mentor)", "Lương Ngọc Phương (Member)", "Trịnh Cẩm Minh (Member)"]
 st.write("Meet the minds behind the work!")