mo01018's picture
Update app.py
a4ac5c1 verified
from flask import Flask, render_template, request
from joblib import load
import pandas as pd
import re
from customFunctions import *
import json
import datetime
import numpy as np
from huggingface_hub import hf_hub_download
import torch
import os
import logging
pd.set_option('display.max_colwidth', 1000)
# Patch torch.load to always load on CPU
original_torch_load = torch.load
def cpu_load(*args, **kwargs):
return original_torch_load(*args, map_location=torch.device('cpu'), **kwargs)
torch.load = cpu_load
app = Flask(__name__)
# Logging setup
LOG_DIR = "/tmp/logs" # Use a universally writable directory
LOG_FILE = os.path.join(LOG_DIR, "usage_log.jsonl")
os.makedirs(LOG_DIR, exist_ok=True)
logging.basicConfig(
filename=LOG_FILE,
level=logging.INFO,
format='%(asctime)s [%(levelname)s] %(message)s'
)
PIPELINES = [
{'id': 8, 'name': 'Embedded using BioWordVec', 'filename': "pipeline_ex3_s4.joblib"},
{'id': 1, 'name': 'Baseline', 'filename': "pipeline_ex1_s1.joblib"},
{'id': 2, 'name': 'Trained on a FeedForward NN', 'filename': "pipeline_ex1_s2.joblib"},
{'id': 3, 'name': 'Trained on a CRF', 'filename': "pipeline_ex1_s3.joblib"},
{'id': 4, 'name': 'Trained on a small dataset', 'filename': "pipeline_ex2_s3.joblib"},
{'id': 5, 'name': 'Trained on a large dataset', 'filename': "pipeline_ex2_s2.joblib"},
{'id': 6, 'name': 'Embedded using TFIDF', 'filename': "pipeline_ex3_s2.joblib"},
{'id': 7, 'name': 'Embedded using GloVe', 'filename': "pipeline_ex3_s3.joblib"},
]
pipeline_metadata = [{'id': p['id'], 'name': p['name']} for p in PIPELINES]
def load_pipeline_from_hub(filename):
cache_dir = "/tmp/hf_cache"
os.environ["HF_HUB_CACHE"] = cache_dir
repo_id = 'hw01558/nlp-coursework-pipelines'
local_path = hf_hub_download(repo_id=repo_id, filename=filename, cache_dir=cache_dir)
return load(local_path)
def get_pipeline_by_id(pipelines, pipeline_id):
return next((p['filename'] for p in pipelines if p['id'] == pipeline_id), None)
def get_name_by_id(pipelines, pipeline_id):
return next((p['name'] for p in pipelines if p['id'] == pipeline_id), None)
def requestResults(text, pipeline):
labels = pipeline.predict(text)
if isinstance(labels, np.ndarray):
labels = labels.tolist()
return labels[0]
def log_interaction(user_input, model_name, predictions):
log_entry = {
"timestamp": datetime.datetime.utcnow().isoformat(),
"model": model_name,
"user_input": user_input,
"predictions": predictions
}
try:
logging.info(json.dumps(log_entry))
print("[INFO] Logged interaction successfully.")
except Exception as e:
print(f"[ERROR] Could not write log entry: {e}")
@app.route('/')
def index():
return render_template('index.html', pipelines=pipeline_metadata)
@app.route('/', methods=['POST'])
def get_data():
if request.method == 'POST':
text = request.form['search']
tokens = re.findall(r"\w+|[^\w\s]", text)
tokens_formatted = pd.Series([pd.Series(tokens)])
pipeline_id = int(request.form['pipeline_select'])
pipeline = load_pipeline_from_hub(get_pipeline_by_id(PIPELINES, pipeline_id))
name = get_name_by_id(PIPELINES, pipeline_id)
labels = requestResults(tokens_formatted, pipeline)
results = dict(zip(tokens, labels))
log_interaction(text, name, results)
return render_template('index.html', results=results, name=name, pipelines=pipeline_metadata)
if __name__ == '__main__':
app.run(host="0.0.0.0", port=7860)