import gradio as gr from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch # Load model and tokenizer tokenizer = AutoTokenizer.from_pretrained("BAAI/bge-reranker-v2-m3") model = AutoModelForSequenceClassification.from_pretrained("BAAI/bge-reranker-v2-m3") # Reranking logic def rerank(query, docs_text): docs = docs_text.strip().split('\n') pairs = [(query, doc) for doc in docs] inputs = tokenizer(pairs, padding=True, truncation=True, return_tensors="pt") with torch.no_grad(): scores = model(**inputs).logits.squeeze(-1) results = sorted(zip(docs, scores.tolist()), key=lambda x: x[1], reverse=True) # Return structured JSON array return [{"score": round(score, 4), "document": doc} for doc, score in results] # Create API-ready Interface iface = gr.Interface( fn=rerank, inputs=[ gr.Textbox(label="Query", lines=1), gr.Textbox(label="Documents (one per line)", lines=10) ], outputs="json", title="BGE Reranker v2 M3", description="Rerank a list of documents based on a search query using BGE v2 M3." ) # ✅ Do NOT use `share=True`, do NOT set `ssr_mode` iface.launch()