reranking / app.py
ICTuniverse's picture
Update app.py
32563c3 verified
from flask import Flask, request, jsonify
from sentence_transformers import CrossEncoder
app = Flask(__name__)
# Load your cross-encoder model
model_name = "truong1301/reranker_pho_BLAI" # Replace with your actual model if different
cross_encoder = CrossEncoder(model_name, max_length=256, num_labels=1)
# Function to preprocess text with Vietnamese word segmentation
def preprocess_text(text):
if not text:
return text
segmented_text = rdrsegmenter.word_segment(text)
# Join tokenized sentences into a single string
return " ".join([" ".join(sentence) for sentence in segmented_text])
@app.route("/rerank", methods=["POST"])
def rerank():
try:
# Get JSON data from the request (query and list of documents)
data = request.get_json()
query = data.get("query", "")
documents = data.get("documents", [])
if not query or not documents:
return jsonify({"error": "Missing query or documents"}), 400
# Create pairs of query and documents for reranking
query_doc_pairs = [(query, doc) for doc in documents]
# Get reranking scores from the cross-encoder
scores = cross_encoder.predict(query_doc_pairs).tolist()
# Combine documents with their scores and sort
ranked_results = sorted(
[{"document": doc, "score": score} for doc, score in zip(documents, scores)],
key=lambda x: x["score"],
reverse=True
)
return jsonify({"results": ranked_results})
except Exception as e:
return jsonify({"error": str(e)}), 500
@app.route("/", methods=["GET"])
def health_check():
return jsonify({"status": "Server is running"}), 200
if __name__ == "__main__":
app.run(host="0.0.0.0", port=7860) # Default port for Hugging Face Spaces