al-tools / app.py
Saripudin's picture
Update app.py
94ed3a0
from flask import Flask, render_template, request, redirect, url_for, send_file, make_response
import pandas as pd
import io
import csv
import numpy as np
from active_learning import get_initial_sample, get_uncertain_sample
from datasets import Dataset, concatenate_datasets, load_dataset
app = Flask(__name__)
# Global variables to store data and labels
uploaded_data = None
labels = []
al_step = 0
data_amount = 100
dataset_labeled_prev = None
#num_query = 5
@app.route("/", methods=["GET", "POST"])
def upload():
global uploaded_data, labels, dataset_unlabeled, al_step, num_query
if request.method == "POST":
# Upload the data file
num_query = int(request.form.get('query_number'))
data_file = request.files["data_file"]
if data_file:
df = pd.read_csv(data_file)
unlabeled_data = Dataset.from_pandas(df)
# get_initial_sample()
uncertain_samples = get_initial_sample(unlabeled_data, num_query)
all_list = np.arange(0, len(unlabeled_data))
unlabeled_list = np.delete(all_list, uncertain_samples)
dataset_labeled = unlabeled_data[uncertain_samples]
dataset_unlabeled = unlabeled_data[unlabeled_list]
dataset_unlabeled = Dataset.from_dict(dataset_unlabeled)
uploaded_data = dataset_labeled
# Upload the labels file
labels_file = request.files["labels_file"]
if labels_file:
df_labels = pd.read_csv(labels_file)
labels = df_labels["label"].tolist()
return redirect(url_for("annotate"))
return render_template("upload.html")
@app.route("/annotate", methods=["GET", "POST"])
def annotate():
global uploaded_data, labels, dataset_unlabeled, al_step, dataset_labeled_prev, num_query
al_process = "Run get_initial_sample()..."
if request.method == "POST":
al_step += 1
al_process = "Run get_uncertain_sample()..."
annotated_data = request.form.getlist("annotations")
df = pd.DataFrame(uploaded_data)
df["label"] = annotated_data
dataset_labeled = Dataset.from_pandas(df)
print(f"AL STEP #{al_step}")
print(dataset_labeled)
if al_step > 1:
dataset_labeled = concatenate_datasets([dataset_labeled_prev, dataset_labeled])
# get_uncertain_sample()
uncertain_samples = get_uncertain_sample(dataset_labeled, dataset_unlabeled, num_query)
dataset_labeled_prev = dataset_labeled
unlabeled_list = np.arange(0, data_amount - al_step*num_query)
unlabeled_list = np.delete(unlabeled_list, uncertain_samples)
dataset_labeled_next = dataset_unlabeled[uncertain_samples]
dataset_unlabeled = dataset_unlabeled[unlabeled_list]
uploaded_data = dataset_labeled_next
dataset_unlabeled = Dataset.from_dict(dataset_unlabeled)
return render_template(
"annotate.html",
data=uploaded_data,
labels=labels,
al_step=al_step,
n_unlabeled=len(dataset_unlabeled),
al_process=al_process
)
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860)