Spaces:

mo01018
/

Deployment-Trial

Sleeping

App Files Files Community

Deployment-Trial / app.py

mo01018

Update app.py

a4ac5c1 verified about 2 months ago

raw

history blame contribute delete

3.61 kB

	from flask import Flask, render_template, request
	from joblib import load
	import pandas as pd
	import re
	from customFunctions import *
	import json
	import datetime
	import numpy as np
	from huggingface_hub import hf_hub_download
	import torch
	import os
	import logging

	pd.set_option('display.max_colwidth', 1000)

	# Patch torch.load to always load on CPU
	original_torch_load = torch.load
	def cpu_load(args, *kwargs):
	return original_torch_load(args, map_location=torch.device('cpu'), *kwargs)
	torch.load = cpu_load

	app = Flask(__name__)

	# Logging setup
	LOG_DIR = "/tmp/logs" # Use a universally writable directory
	LOG_FILE = os.path.join(LOG_DIR, "usage_log.jsonl")
	os.makedirs(LOG_DIR, exist_ok=True)
	logging.basicConfig(
	filename=LOG_FILE,
	level=logging.INFO,
	format='%(asctime)s [%(levelname)s] %(message)s'
	)

	PIPELINES = [
	{'id': 8, 'name': 'Embedded using BioWordVec', 'filename': "pipeline_ex3_s4.joblib"},
	{'id': 1, 'name': 'Baseline', 'filename': "pipeline_ex1_s1.joblib"},
	{'id': 2, 'name': 'Trained on a FeedForward NN', 'filename': "pipeline_ex1_s2.joblib"},
	{'id': 3, 'name': 'Trained on a CRF', 'filename': "pipeline_ex1_s3.joblib"},
	{'id': 4, 'name': 'Trained on a small dataset', 'filename': "pipeline_ex2_s3.joblib"},
	{'id': 5, 'name': 'Trained on a large dataset', 'filename': "pipeline_ex2_s2.joblib"},
	{'id': 6, 'name': 'Embedded using TFIDF', 'filename': "pipeline_ex3_s2.joblib"},
	{'id': 7, 'name': 'Embedded using GloVe', 'filename': "pipeline_ex3_s3.joblib"},
	]

	pipeline_metadata = [{'id': p['id'], 'name': p['name']} for p in PIPELINES]

	def load_pipeline_from_hub(filename):
	cache_dir = "/tmp/hf_cache"
	os.environ["HF_HUB_CACHE"] = cache_dir
	repo_id = 'hw01558/nlp-coursework-pipelines'
	local_path = hf_hub_download(repo_id=repo_id, filename=filename, cache_dir=cache_dir)
	return load(local_path)

	def get_pipeline_by_id(pipelines, pipeline_id):
	return next((p['filename'] for p in pipelines if p['id'] == pipeline_id), None)

	def get_name_by_id(pipelines, pipeline_id):
	return next((p['name'] for p in pipelines if p['id'] == pipeline_id), None)

	def requestResults(text, pipeline):
	labels = pipeline.predict(text)
	if isinstance(labels, np.ndarray):
	labels = labels.tolist()
	return labels[0]

	def log_interaction(user_input, model_name, predictions):
	log_entry = {
	"timestamp": datetime.datetime.utcnow().isoformat(),
	"model": model_name,
	"user_input": user_input,
	"predictions": predictions
	}
	try:
	logging.info(json.dumps(log_entry))
	print("[INFO] Logged interaction successfully.")
	except Exception as e:
	print(f"[ERROR] Could not write log entry: {e}")


	@app.route('/')
	def index():
	return render_template('index.html', pipelines=pipeline_metadata)

	@app.route('/', methods=['POST'])
	def get_data():
	if request.method == 'POST':
	text = request.form['search']
	tokens = re.findall(r"\w+\|[^\w\s]", text)
	tokens_formatted = pd.Series([pd.Series(tokens)])

	pipeline_id = int(request.form['pipeline_select'])
	pipeline = load_pipeline_from_hub(get_pipeline_by_id(PIPELINES, pipeline_id))
	name = get_name_by_id(PIPELINES, pipeline_id)

	labels = requestResults(tokens_formatted, pipeline)
	results = dict(zip(tokens, labels))

	log_interaction(text, name, results)

	return render_template('index.html', results=results, name=name, pipelines=pipeline_metadata)


	if __name__ == '__main__':
	app.run(host="0.0.0.0", port=7860)