Spaces:

sponsored-bye
/

sponsoredbye

Running

sponsoredbye / functions /model_infer.py

Add new model version

01c2377 6 months ago

1.46 kB

	from keras.preprocessing.sequence import pad_sequences
	import numpy as np
	import re

	# import tensorflow as tf
	import os
	import requests
	from keras.models import load_model

	headers = {"Authorization": f"Bearer {os.environ['HF_Token']}"}

	model = load_model("./model.keras")


	def query_embeddings(texts):
	payload = {"inputs": texts, "options": {"wait_for_model": True}}

	model_id = "sentence-transformers/sentence-t5-base"
	API_URL = (
	f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_id}"
	)
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()


	def preprocess(sentences):
	max_len = 1682
	embeddings = query_embeddings(sentences)

	if len(sentences) > max_len:
	X = embeddings[:max_len]
	else:
	X = embeddings
	X_padded = pad_sequences([X], maxlen=max_len, dtype="float32", padding="post")
	return X_padded


	def predict_from_document(sentences):
	preprop = preprocess(sentences)
	prediction = model.predict(preprop)
	# Set the prediction threshold to 0.8 instead of 0.5, now use mean
	if np.mean(prediction) < 0.5:
	output = (prediction.flatten()[: len(sentences)] >= 0.5).astype(int)
	else:
	output = (
	prediction.flatten()[: len(sentences)]
	>= np.mean(prediction) * 1.20 # + np.std(prediction)
	).astype(int)
	return output, prediction.flatten()[: len(sentences)]