Spaces:
Runtime error
Runtime error
import gradio as gr | |
import argparse | |
import json | |
import logging | |
import os | |
import sys | |
import pathlib | |
import random | |
import shutil | |
import time | |
from typing import Any, Dict, List, Union | |
import numpy as np | |
import pandas as pd | |
from sklearn.feature_extraction.text import (CountVectorizer, TfidfTransformer, HashingVectorizer, | |
TfidfVectorizer) | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.metrics import f1_score | |
from sklearn.model_selection import train_test_split | |
from tqdm import tqdm | |
from lr.hyperparameters import SEARCH_SPACE, RandomSearch, HyperparameterSearch | |
from shutil import rmtree | |
def load_model(serialization_dir): | |
with open(os.path.join(serialization_dir, "best_hyperparameters.json"), 'r') as f: | |
hyperparameters = json.load(f) | |
if hyperparameters.pop('stopwords') == 1: | |
stop_words = 'english' | |
else: | |
stop_words = None | |
weight = hyperparameters.pop('weight') | |
if weight == 'binary': | |
binary = True | |
else: | |
binary = False | |
ngram_range = hyperparameters.pop('ngram_range') | |
ngram_range = sorted([int(x) for x in ngram_range.split()]) | |
if weight == 'tf-idf': | |
vect = TfidfVectorizer(stop_words=stop_words, | |
lowercase=True, | |
ngram_range=ngram_range) | |
elif weight == 'hash': | |
vect = HashingVectorizer(stop_words=stop_words,lowercase=True,ngram_range=ngram_range) | |
else: | |
vect = CountVectorizer(binary=binary, | |
stop_words=stop_words, | |
lowercase=True, | |
ngram_range=ngram_range) | |
if weight != "hash": | |
with open(os.path.join(serialization_dir, "vocab.json"), 'r') as f: | |
vocab = json.load(f) | |
vect.vocabulary_ = vocab | |
hyperparameters['C'] = float(hyperparameters['C']) | |
hyperparameters['tol'] = float(hyperparameters['tol']) | |
classifier = LogisticRegression(**hyperparameters) | |
if os.path.exists(os.path.join(serialization_dir, "archive", "idf.npy")): | |
vect.idf_ = np.load(os.path.join(serialization_dir, "archive", "idf.npy")) | |
classifier.coef_ = np.load(os.path.join(serialization_dir, "archive", "coef.npy")) | |
classifier.intercept_ = np.load(os.path.join(serialization_dir, "archive", "intercept.npy")) | |
classifier.classes_ = np.load(os.path.join(serialization_dir, "archive", "classes.npy")) | |
return classifier, vect | |
def score(x, clf, vectorizer): | |
# score a single document | |
return clf.predict_proba(vectorizer.transform([x])) | |
clf, vectorizer = load_model("model/") | |
def start(text): | |
k = round(score(text, clf, vectorizer)[0][1], 2) | |
return {"GPT-3 Filter Quality Score": k } | |