Mister2005 commited on
Commit
ce7bedc
·
verified ·
1 Parent(s): a577335

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +24 -0
  2. app.py +77 -0
  3. requirements.txt +5 -0
Dockerfile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install dependencies
6
+ COPY requirements.txt .
7
+ RUN pip install --no-cache-dir -r requirements.txt
8
+
9
+ # Copy application
10
+ COPY app.py .
11
+
12
+ # Create a non-root user (good practice for HF Spaces)
13
+ RUN useradd -m -u 1000 user
14
+ USER user
15
+ ENV HOME=/home/user \
16
+ PATH=/home/user/.local/bin:$PATH
17
+
18
+ WORKDIR $HOME/app
19
+
20
+ COPY --chown=user . $HOME/app
21
+
22
+ EXPOSE 7860
23
+
24
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ from sentence_transformers import CrossEncoder
4
+ from typing import List, Dict, Any, Union
5
+ import uvicorn
6
+ import os
7
+
8
+ app = FastAPI(
9
+ title="Cross-Encoder Reranking API",
10
+ description="Reranking service using cross-encoder/ms-marco-MiniLM-L-6-v2",
11
+ version="1.0.0"
12
+ )
13
+
14
+ # Load model once at startup
15
+ MODEL_NAME = os.getenv("MODEL_NAME", "cross-encoder/ms-marco-MiniLM-L-6-v2")
16
+ try:
17
+ model = CrossEncoder(MODEL_NAME)
18
+ print(f"Loaded CrossEncoder model: {MODEL_NAME}")
19
+ except Exception as e:
20
+ print(f"Error loading model: {e}")
21
+ model = None
22
+
23
+ class RerankRequest(BaseModel):
24
+ query: str
25
+ documents: List[str] # List of document texts to rerank
26
+
27
+ class RerankResponse(BaseModel):
28
+ scores: List[float]
29
+ ranked_indices: List[int]
30
+
31
+ @app.get("/")
32
+ def root():
33
+ return {
34
+ "message": "Cross-Encoder Reranking API",
35
+ "model": MODEL_NAME,
36
+ "status": "active" if model else "error"
37
+ }
38
+
39
+ @app.get("/health")
40
+ def health_check():
41
+ return {"status": "healthy", "model_loaded": model is not None}
42
+
43
+ @app.post("/rerank", response_model=RerankResponse)
44
+ def rerank_documents(request: RerankRequest):
45
+ if not model:
46
+ raise HTTPException(status_code=503, detail="Model not loaded")
47
+
48
+ try:
49
+ if not request.documents:
50
+ return RerankResponse(scores=[], ranked_indices=[])
51
+
52
+ # Create pairs [query, doc]
53
+ pairs = [[request.query, doc] for doc in request.documents]
54
+
55
+ # Predict scores
56
+ scores = model.predict(pairs)
57
+
58
+ # Convert numpy floats to python floats
59
+ scores_list = scores.tolist() if hasattr(scores, 'tolist') else list(scores)
60
+
61
+ # Get sorted indices (descending score)
62
+ # Using enumerate to keep track of original index
63
+ indexed_scores = list(enumerate(scores_list))
64
+ indexed_scores.sort(key=lambda x: x[1], reverse=True)
65
+
66
+ ranked_indices = [idx for idx, score in indexed_scores]
67
+
68
+ return RerankResponse(
69
+ scores=scores_list,
70
+ ranked_indices=ranked_indices
71
+ )
72
+
73
+ except Exception as e:
74
+ raise HTTPException(status_code=500, detail=str(e))
75
+
76
+ if __name__ == "__main__":
77
+ uvicorn.run(app, host="0.0.0.0", port=7860)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ sentence-transformers
4
+ pydantic
5
+ torch --index-url https://download.pytorch.org/whl/cpu