from flask import Flask, render_template, request, redirect, url_for, send_file, make_response import pandas as pd import io import csv import numpy as np from active_learning import get_initial_sample, get_uncertain_sample from datasets import Dataset, concatenate_datasets, load_dataset app = Flask(__name__) # Global variables to store data and labels uploaded_data = None labels = [] al_step = 0 data_amount = 100 dataset_labeled_prev = None #num_query = 5 @app.route("/", methods=["GET", "POST"]) def upload(): global uploaded_data, labels, dataset_unlabeled, al_step, num_query if request.method == "POST": # Upload the data file num_query = int(request.form.get('query_number')) data_file = request.files["data_file"] if data_file: df = pd.read_csv(data_file) unlabeled_data = Dataset.from_pandas(df) # get_initial_sample() uncertain_samples = get_initial_sample(unlabeled_data, num_query) all_list = np.arange(0, len(unlabeled_data)) unlabeled_list = np.delete(all_list, uncertain_samples) dataset_labeled = unlabeled_data[uncertain_samples] dataset_unlabeled = unlabeled_data[unlabeled_list] dataset_unlabeled = Dataset.from_dict(dataset_unlabeled) uploaded_data = dataset_labeled # Upload the labels file labels_file = request.files["labels_file"] if labels_file: df_labels = pd.read_csv(labels_file) labels = df_labels["label"].tolist() return redirect(url_for("annotate")) return render_template("upload.html") @app.route("/annotate", methods=["GET", "POST"]) def annotate(): global uploaded_data, labels, dataset_unlabeled, al_step, dataset_labeled_prev, num_query al_process = "Run get_initial_sample()..." if request.method == "POST": al_step += 1 al_process = "Run get_uncertain_sample()..." annotated_data = request.form.getlist("annotations") df = pd.DataFrame(uploaded_data) df["label"] = annotated_data dataset_labeled = Dataset.from_pandas(df) print(f"AL STEP #{al_step}") print(dataset_labeled) if al_step > 1: dataset_labeled = concatenate_datasets([dataset_labeled_prev, dataset_labeled]) # get_uncertain_sample() uncertain_samples = get_uncertain_sample(dataset_labeled, dataset_unlabeled, num_query) dataset_labeled_prev = dataset_labeled unlabeled_list = np.arange(0, data_amount - al_step*num_query) unlabeled_list = np.delete(unlabeled_list, uncertain_samples) dataset_labeled_next = dataset_unlabeled[uncertain_samples] dataset_unlabeled = dataset_unlabeled[unlabeled_list] uploaded_data = dataset_labeled_next dataset_unlabeled = Dataset.from_dict(dataset_unlabeled) return render_template( "annotate.html", data=uploaded_data, labels=labels, al_step=al_step, n_unlabeled=len(dataset_unlabeled), al_process=al_process ) if __name__ == "__main__": app.run(host="0.0.0.0", port=7860)