|
|
|
from threading import Thread |
|
from flask import Blueprint, jsonify, request |
|
from flask_cors import CORS |
|
import sys |
|
import os |
|
|
|
|
|
|
|
from concurrent.futures import ProcessPoolExecutor |
|
|
|
|
|
import fasttext |
|
|
|
|
|
|
|
sys.path.insert(0, os.path.abspath(".")) |
|
|
|
from app.config import Config |
|
from app.helpers import * |
|
from app.db.models import Tasks |
|
from app.database import db |
|
from app.threads.process_fsa_v2 import process_fsa_categories_v2 |
|
|
|
|
|
|
|
fsa = Blueprint("fsa_v2", __name__, url_prefix="/api/v2/fsa") |
|
|
|
|
|
CORS( |
|
fsa, |
|
supports_credentials=True |
|
) |
|
|
|
|
|
|
|
class FSAThread_V2(Thread): |
|
def __init__(self, data={}) -> None: |
|
Thread.__init__(self) |
|
self.data = data |
|
|
|
def run(self) -> None: |
|
process_fsa_categories_v2(self.data) |
|
|
|
|
|
|
|
max_processes = 4 |
|
process_executor = ProcessPoolExecutor(max_workers=max_processes) |
|
|
|
|
|
def update_db(table_idx, remarks=None): |
|
from app.api import app |
|
|
|
with app.app_context(): |
|
Tasks.update_by_id(table_idx, remarks) |
|
db.session.close() |
|
|
|
|
|
|
|
@fsa.route("/single-product", methods=["POST"]) |
|
def predict_categories(): |
|
|
|
|
|
body = request.json |
|
|
|
|
|
if not body: |
|
return jsonify({"message": "Cannot decode JSON from the body"}), 422 |
|
|
|
|
|
product_name = body.get("product_name") |
|
|
|
|
|
if not product_name: |
|
return jsonify({"message": "Product name is missing"}), 422 |
|
|
|
|
|
product_name = preprocess(product_name) |
|
|
|
|
|
|
|
Logger.info(message="Processing FSA categorical data for " + product_name) |
|
|
|
|
|
try: |
|
model = fasttext.load_model('app/models/L0/L0_model.bin') |
|
except: |
|
return jsonify({"message": "Can't load the L0 model"}), 500 |
|
|
|
|
|
L0_label,L0_accuracy = get_label_and_accuracy(model,product_name) |
|
L0_return_label,L0_return_score,L0_label_status = get_return_labels(L0_label,L0_accuracy,0.95) |
|
print("L0",L0_label,L0_accuracy) |
|
|
|
if not L0_label: |
|
return jsonify({"message": "Error predicting L0 Category"}), 500 |
|
|
|
|
|
try: |
|
model = fasttext.load_model('app/models/L1/L1_model.bin') |
|
except: |
|
return jsonify({"message": "Can't load the L1 model"}), 500 |
|
|
|
|
|
L1_label,L1_accuracy = get_label_and_accuracy(model,L0_label +" " + product_name) |
|
L1_return_label,L1_return_score,L1_label_status = get_return_labels(L1_label,L1_accuracy,0.95) |
|
print("L1",L1_label,L1_accuracy) |
|
|
|
if not L1_label: |
|
return jsonify({"message": "Error predicting L1 Category"}), 500 |
|
|
|
|
|
try: |
|
model = fasttext.load_model('app/models/L2/L2_model.bin') |
|
except: |
|
return jsonify({"message": "Can't load the L2 model"}), 500 |
|
|
|
|
|
L2_label,L2_accuracy = get_label_and_accuracy(model,L1_label+" "+product_name) |
|
L2_return_label,L2_return_score,L2_label_status = get_return_labels(L2_label,L2_accuracy,0.95) |
|
print("L2",L2_label,L2_accuracy) |
|
|
|
|
|
if not L2_label: |
|
return jsonify({"message": "Error predicting L2 Category"}), 500 |
|
|
|
|
|
try: |
|
model = fasttext.load_model('app/models/L3/L3_model.bin') |
|
except: |
|
return jsonify({"message": "Can't load the L3 model"}), 500 |
|
|
|
L3_label,L3_accuracy = get_label_and_accuracy(model,L2_label+" "+product_name) |
|
L3_return_label,L3_return_score,L3_label_status = get_return_labels(L3_label,L3_accuracy,0.95) |
|
print("L3",L3_label,L3_accuracy) |
|
|
|
if not L3_label: |
|
return jsonify({"message": "Error predicting L3 Category"}), 500 |
|
|
|
if L0_label == "administrative": |
|
try: |
|
model = fasttext.load_model('app/models/L4/administrative/L4_Admin_model.bin') |
|
except: |
|
return jsonify({"message": "Can't load the L4 (Administrative) model"}), 500 |
|
|
|
L4_label,L4_accuracy = get_label_and_accuracy(model,(L3_label+ " " +product_name)) |
|
L4_return_label,L4_return_score,L4_label_status = get_return_labels(L4_label,L4_accuracy,0.75) |
|
print("L4",L4_label,L4_accuracy) |
|
|
|
|
|
elif L0_label == "beverage": |
|
try: |
|
model = fasttext.load_model('app/models/L4/beverage/L4_beverage_model.bin') |
|
except: |
|
return jsonify({"message": "Can't load the L4 (Beverage) model"}), 500 |
|
|
|
L4_label,L4_accuracy = get_label_and_accuracy(model,(L3_label+" "+product_name)) |
|
L4_return_score = None |
|
L4_return_label,L4_return_score,L4_label_status = get_return_labels(L4_label,L4_accuracy,0.66) |
|
print("L4",L4_label,L4_accuracy) |
|
|
|
|
|
elif L0_label == "food": |
|
try: |
|
model = fasttext.load_model('app/models/L4/food/L4_food_model.bin') |
|
except: |
|
return jsonify({"message": "Can't load the L4 (Food) model"}), 500 |
|
|
|
L4_label,L4_accuracy = get_label_and_accuracy(model,(L3_label+" "+product_name)) |
|
L4_return_label,L4_return_score,L4_label_status = get_return_labels(L4_label,L4_accuracy,0.85) |
|
print("L4",L4_label,L4_accuracy) |
|
|
|
|
|
elif L0_label == "operationals": |
|
try: |
|
model = fasttext.load_model('app/models/L4/operationals/L4_operationals_model.bin') |
|
except: |
|
return jsonify({"message": "Can't load the L4 (Operationals) model"}), 500 |
|
|
|
L4_label,L4_accuracy = get_label_and_accuracy(model,(L3_label+" "+product_name)) |
|
L4_return_label,L4_return_score,L4_label_status = get_return_labels(L4_label,L4_accuracy,0.8) |
|
print("L4",L4_label,L4_accuracy) |
|
|
|
|
|
else: |
|
return jsonify({"message": "Error prediction of L4 Category"}), 422 |
|
|
|
if not L4_label: |
|
return jsonify({"message": "Error predicting L4 Category"}), 422 |
|
|
|
|
|
Logger.info(message="Done processing FSA categorical data for" + product_name) |
|
|
|
|
|
|
|
return jsonify({ |
|
"classification_results": { |
|
"l0": L0_return_label, |
|
"l1": L1_return_label, |
|
"l2": L2_return_label, |
|
"l3": L3_return_label, |
|
"l4": L4_return_label |
|
}, |
|
"scores": { |
|
"l0": L0_return_score, |
|
"l1": L1_return_score, |
|
"l2": L2_return_score, |
|
"l3": L3_return_score, |
|
"l4": L4_return_score |
|
}, |
|
"remarks":{ |
|
"l0": L0_label_status, |
|
"l1": L1_label_status, |
|
"l2": L2_label_status, |
|
"l3": L3_label_status, |
|
"l4": L4_label_status |
|
}, |
|
"all_classification_results": { |
|
"L0": L0_label, |
|
"L1": L1_label, |
|
"L2": L2_label, |
|
"L3": L3_label, |
|
"L4": L4_label |
|
}, |
|
"all_scores": { |
|
"L0": L0_accuracy, |
|
"L1": L1_accuracy, |
|
"L2": L2_accuracy, |
|
"L3": L3_accuracy, |
|
"L4": L4_accuracy |
|
} |
|
|
|
|
|
}), 200 |
|
|
|
|
|
|
|
|
|
|
|
|
|
@fsa.route("/process-csv", methods=["POST"]) |
|
def process_csv(): |
|
|
|
|
|
body = request.json |
|
|
|
|
|
if not body: |
|
return jsonify({"message": "Cannot decode JSON from the body"}), 422 |
|
|
|
|
|
file_name = body.get("uploaded_file_name") |
|
|
|
|
|
original_file_name = body.get("original_file_name") or file_name |
|
|
|
|
|
if not file_name: |
|
return jsonify({"message": "File name is missing"}), 422 |
|
|
|
files = [{"name": f"fsa_input_{file_name}", "path": f"FSA Categorization/input/{file_name}"}] |
|
|
|
|
|
|
|
for file in files: |
|
download_status = download_file_from_s3( |
|
file_name=file["name"], file_path=file["path"] |
|
) |
|
if isinstance(download_status, botocore.exceptions.ClientError): |
|
return ( |
|
jsonify({"message": f"Error downloading {file} from s3"}), |
|
422, |
|
) |
|
|
|
|
|
|
|
df = read_files(file_name=file_name) |
|
|
|
|
|
if "product_name" not in df.columns: |
|
remove_files(f"fsa_input_{file_name}") |
|
return jsonify({"message": "Product name column is missing from the CSV"}), 422 |
|
|
|
|
|
|
|
created_task = Tasks.create(file_name=file_name, original_file_name=original_file_name) |
|
|
|
|
|
data = { |
|
"file_name": file_name, |
|
"table_idx": created_task.id, |
|
"update_db": update_db |
|
} |
|
|
|
db.session.close() |
|
|
|
result_future = process_executor.submit(process_fsa_categories_v2, (data)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
return jsonify({"message": f"{file_name} - File processing starting"}), 200 |