from flask import Flask, request, jsonify
from sentence_transformers import CrossEncoder


app = Flask(__name__)


# Load your cross-encoder model
model_name = "truong1301/reranker_pho_BLAI"  # Replace with your actual model if different
cross_encoder = CrossEncoder(model_name, max_length=256, num_labels=1)

# Function to preprocess text with Vietnamese word segmentation
def preprocess_text(text):
    if not text:
        return text
    segmented_text = rdrsegmenter.word_segment(text)
    # Join tokenized sentences into a single string
    return " ".join([" ".join(sentence) for sentence in segmented_text])

@app.route("/rerank", methods=["POST"])
def rerank():
    try:
        # Get JSON data from the request (query and list of documents)
        data = request.get_json()
        query = data.get("query", "")
        documents = data.get("documents", [])

        if not query or not documents:
            return jsonify({"error": "Missing query or documents"}), 400

        # Create pairs of query and documents for reranking
        query_doc_pairs = [(query, doc) for doc in documents]

        # Get reranking scores from the cross-encoder
        scores = cross_encoder.predict(query_doc_pairs).tolist()

        # Combine documents with their scores and sort
        ranked_results = sorted(
            [{"document": doc, "score": score} for doc, score in zip(documents, scores)],
            key=lambda x: x["score"],
            reverse=True
        )

        return jsonify({"results": ranked_results})

    except Exception as e:
        return jsonify({"error": str(e)}), 500

@app.route("/", methods=["GET"])
def health_check():
    return jsonify({"status": "Server is running"}), 200

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=7860)  # Default port for Hugging Face Spaces