Spaces:
Sleeping
Sleeping
| from flask import Flask, render_template, request | |
| from joblib import load | |
| import pandas as pd | |
| import re | |
| from customFunctions import * | |
| import json | |
| import datetime | |
| import numpy as np | |
| from huggingface_hub import hf_hub_download | |
| import torch | |
| import os | |
| import logging | |
| pd.set_option('display.max_colwidth', 1000) | |
| # Patch torch.load to always load on CPU | |
| original_torch_load = torch.load | |
| def cpu_load(*args, **kwargs): | |
| return original_torch_load(*args, map_location=torch.device('cpu'), **kwargs) | |
| torch.load = cpu_load | |
| app = Flask(__name__) | |
| # Logging setup | |
| LOG_DIR = "/tmp/logs" # Use a universally writable directory | |
| LOG_FILE = os.path.join(LOG_DIR, "usage_log.jsonl") | |
| os.makedirs(LOG_DIR, exist_ok=True) | |
| logging.basicConfig( | |
| filename=LOG_FILE, | |
| level=logging.INFO, | |
| format='%(asctime)s [%(levelname)s] %(message)s' | |
| ) | |
| PIPELINES = [ | |
| {'id': 8, 'name': 'Embedded using BioWordVec', 'filename': "pipeline_ex3_s4.joblib"}, | |
| {'id': 1, 'name': 'Baseline', 'filename': "pipeline_ex1_s1.joblib"}, | |
| {'id': 2, 'name': 'Trained on a FeedForward NN', 'filename': "pipeline_ex1_s2.joblib"}, | |
| {'id': 3, 'name': 'Trained on a CRF', 'filename': "pipeline_ex1_s3.joblib"}, | |
| {'id': 4, 'name': 'Trained on a small dataset', 'filename': "pipeline_ex2_s3.joblib"}, | |
| {'id': 5, 'name': 'Trained on a large dataset', 'filename': "pipeline_ex2_s2.joblib"}, | |
| {'id': 6, 'name': 'Embedded using TFIDF', 'filename': "pipeline_ex3_s2.joblib"}, | |
| {'id': 7, 'name': 'Embedded using GloVe', 'filename': "pipeline_ex3_s3.joblib"}, | |
| ] | |
| pipeline_metadata = [{'id': p['id'], 'name': p['name']} for p in PIPELINES] | |
| def load_pipeline_from_hub(filename): | |
| cache_dir = "/tmp/hf_cache" | |
| os.environ["HF_HUB_CACHE"] = cache_dir | |
| repo_id = 'hw01558/nlp-coursework-pipelines' | |
| local_path = hf_hub_download(repo_id=repo_id, filename=filename, cache_dir=cache_dir) | |
| return load(local_path) | |
| def get_pipeline_by_id(pipelines, pipeline_id): | |
| return next((p['filename'] for p in pipelines if p['id'] == pipeline_id), None) | |
| def get_name_by_id(pipelines, pipeline_id): | |
| return next((p['name'] for p in pipelines if p['id'] == pipeline_id), None) | |
| def requestResults(text, pipeline): | |
| labels = pipeline.predict(text) | |
| if isinstance(labels, np.ndarray): | |
| labels = labels.tolist() | |
| return labels[0] | |
| def log_interaction(user_input, model_name, predictions): | |
| log_entry = { | |
| "timestamp": datetime.datetime.utcnow().isoformat(), | |
| "model": model_name, | |
| "user_input": user_input, | |
| "predictions": predictions | |
| } | |
| try: | |
| logging.info(json.dumps(log_entry)) | |
| print("[INFO] Logged interaction successfully.") | |
| except Exception as e: | |
| print(f"[ERROR] Could not write log entry: {e}") | |
| def index(): | |
| return render_template('index.html', pipelines=pipeline_metadata) | |
| def get_data(): | |
| if request.method == 'POST': | |
| text = request.form['search'] | |
| tokens = re.findall(r"\w+|[^\w\s]", text) | |
| tokens_formatted = pd.Series([pd.Series(tokens)]) | |
| pipeline_id = int(request.form['pipeline_select']) | |
| pipeline = load_pipeline_from_hub(get_pipeline_by_id(PIPELINES, pipeline_id)) | |
| name = get_name_by_id(PIPELINES, pipeline_id) | |
| labels = requestResults(tokens_formatted, pipeline) | |
| results = dict(zip(tokens, labels)) | |
| log_interaction(text, name, results) | |
| return render_template('index.html', results=results, name=name, pipelines=pipeline_metadata) | |
| if __name__ == '__main__': | |
| app.run(host="0.0.0.0", port=7860) | |