Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -3,20 +3,17 @@ import streamlit as st
|
|
3 |
import json
|
4 |
import requests
|
5 |
from bs4 import BeautifulSoup
|
6 |
-
#from . import SVM_Linear_Model
|
7 |
-
#import Logistic_Model
|
8 |
-
#from . import vectorizer
|
9 |
-
# from . import tokenizer
|
10 |
|
11 |
# load all the models and vectorizer (global vocabulary)
|
12 |
-
# Seq_model = load_model(
|
13 |
-
|
14 |
logistic_model = joblib.load("Logistic_Model.joblib") # Logistic
|
15 |
-
vectorizer = joblib.load("vectorizer.joblib") # global vocabulary
|
16 |
-
|
17 |
|
|
|
18 |
def crawURL(url):
|
19 |
-
# Fetch the
|
20 |
response = requests.get(url)
|
21 |
# Parse the sitemap HTML
|
22 |
soup = BeautifulSoup(response.content, 'html.parser')
|
@@ -61,21 +58,23 @@ def crawURL(url):
|
|
61 |
print(f"Failed to crawl page: {url}, Error: {str(e)}")
|
62 |
return null
|
63 |
|
|
|
64 |
def process_api(text):
|
65 |
# Vectorize the text data
|
66 |
processed_text = vectorizer.transform([text])
|
67 |
# sequence = tokenizer.texts_to_sequences([text])
|
68 |
# padded_sequence = pad_sequences(sequence, maxlen=1000, padding='post')
|
|
|
69 |
# Get the predicted result from models
|
70 |
-
# Seq_Predicted = Seq_model.predict(padded_sequence)
|
71 |
-
# SVM_Predicted = SVM_model.predict(processed_text).tolist()
|
72 |
Logistic_Predicted = logistic_model.predict(processed_text).tolist()
|
|
|
|
|
73 |
|
74 |
# predicted_label_index = np.argmax(Seq_Predicted)
|
75 |
return {
|
76 |
-
'
|
77 |
-
# 'SVM_Predicted': int(SVM_Predicted[0]),
|
78 |
'Logistic_Predicted': int(Logistic_Predicted[0])
|
|
|
79 |
}
|
80 |
|
81 |
# Using Model to handle and return Category Route
|
@@ -93,10 +92,9 @@ def categorize(url):
|
|
93 |
return {"error_message": error.message}
|
94 |
else:
|
95 |
return {"error_message": error}
|
96 |
-
|
97 |
|
|
|
98 |
url = st.text_input("enter your CNN's URL here")
|
99 |
-
|
100 |
if url:
|
101 |
result = categorize(url)
|
102 |
st.json(result)
|
|
|
3 |
import json
|
4 |
import requests
|
5 |
from bs4 import BeautifulSoup
|
|
|
|
|
|
|
|
|
6 |
|
7 |
# load all the models and vectorizer (global vocabulary)
|
8 |
+
# Seq_model = load_model("LSTM.h5") # Sequential
|
9 |
+
SVM_Linear_model = joblib.load("SVM_Linear_Kernel") # SVM
|
10 |
logistic_model = joblib.load("Logistic_Model.joblib") # Logistic
|
11 |
+
vectorizer = joblib.load("vectorizer.joblib") # global vocabulary (used for Logistic, SVC)
|
12 |
+
tokenizer = joblib.load("tokenizer.joblib") # used for LSTM
|
13 |
|
14 |
+
# Web Crawler function
|
15 |
def crawURL(url):
|
16 |
+
# Fetch the URL content
|
17 |
response = requests.get(url)
|
18 |
# Parse the sitemap HTML
|
19 |
soup = BeautifulSoup(response.content, 'html.parser')
|
|
|
58 |
print(f"Failed to crawl page: {url}, Error: {str(e)}")
|
59 |
return null
|
60 |
|
61 |
+
# Predict for text category using Models
|
62 |
def process_api(text):
|
63 |
# Vectorize the text data
|
64 |
processed_text = vectorizer.transform([text])
|
65 |
# sequence = tokenizer.texts_to_sequences([text])
|
66 |
# padded_sequence = pad_sequences(sequence, maxlen=1000, padding='post')
|
67 |
+
|
68 |
# Get the predicted result from models
|
|
|
|
|
69 |
Logistic_Predicted = logistic_model.predict(processed_text).tolist()
|
70 |
+
SVM_Predicted = SVM_model.predict(processed_text).tolist()
|
71 |
+
# Seq_Predicted = Seq_model.predict(padded_sequence)
|
72 |
|
73 |
# predicted_label_index = np.argmax(Seq_Predicted)
|
74 |
return {
|
75 |
+
'SVM_Predicted': int(SVM_Predicted[0]),
|
|
|
76 |
'Logistic_Predicted': int(Logistic_Predicted[0])
|
77 |
+
'Article_Content': text,
|
78 |
}
|
79 |
|
80 |
# Using Model to handle and return Category Route
|
|
|
92 |
return {"error_message": error.message}
|
93 |
else:
|
94 |
return {"error_message": error}
|
|
|
95 |
|
96 |
+
# Main App
|
97 |
url = st.text_input("enter your CNN's URL here")
|
|
|
98 |
if url:
|
99 |
result = categorize(url)
|
100 |
st.json(result)
|