Spaces:

ICTuniverse
/

reranking

Running

App Files Files Community

reranking / app.py

ICTuniverse

Update app.py

32563c3 verified 2 days ago

raw

history blame contribute delete

1.83 kB

	from flask import Flask, request, jsonify
	from sentence_transformers import CrossEncoder



	app = Flask(__name__)


	# Load your cross-encoder model
	model_name = "truong1301/reranker_pho_BLAI" # Replace with your actual model if different
	cross_encoder = CrossEncoder(model_name, max_length=256, num_labels=1)

	# Function to preprocess text with Vietnamese word segmentation
	def preprocess_text(text):
	if not text:
	return text
	segmented_text = rdrsegmenter.word_segment(text)
	# Join tokenized sentences into a single string
	return " ".join([" ".join(sentence) for sentence in segmented_text])

	@app.route("/rerank", methods=["POST"])
	def rerank():
	try:
	# Get JSON data from the request (query and list of documents)
	data = request.get_json()
	query = data.get("query", "")
	documents = data.get("documents", [])

	if not query or not documents:
	return jsonify({"error": "Missing query or documents"}), 400

	# Create pairs of query and documents for reranking
	query_doc_pairs = [(query, doc) for doc in documents]

	# Get reranking scores from the cross-encoder
	scores = cross_encoder.predict(query_doc_pairs).tolist()

	# Combine documents with their scores and sort
	ranked_results = sorted(
	[{"document": doc, "score": score} for doc, score in zip(documents, scores)],
	key=lambda x: x["score"],
	reverse=True
	)

	return jsonify({"results": ranked_results})

	except Exception as e:
	return jsonify({"error": str(e)}), 500

	@app.route("/", methods=["GET"])
	def health_check():
	return jsonify({"status": "Server is running"}), 200

	if __name__ == "__main__":
	app.run(host="0.0.0.0", port=7860) # Default port for Hugging Face Spaces