mishig HF staff commited on
Commit
510fde2
β€’
1 Parent(s): 6949114

dockerify the app

Browse files
Files changed (4) hide show
  1. Dockerfile +11 -0
  2. README.md +1 -3
  3. app.py +21 -21
  4. requirements.txt +3 -1
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ COPY . .
10
+
11
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -3,9 +3,7 @@ title: Embeddings Similarity
3
  emoji: πŸ“š
4
  colorFrom: purple
5
  colorTo: gray
6
- sdk: gradio
7
- sdk_version: 3.41.2
8
- app_file: app.py
9
  pinned: false
10
  ---
11
 
 
3
  emoji: πŸ“š
4
  colorFrom: purple
5
  colorTo: gray
6
+ sdk: docker
 
 
7
  pinned: false
8
  ---
9
 
app.py CHANGED
@@ -2,10 +2,16 @@ from transformers import AutoTokenizer, AutoModel
2
  import torch
3
  import torch.nn.functional as F
4
  import hnswlib
5
- import gradio as gr
6
  import numpy as np
7
- import json
8
  import datetime
 
 
 
 
 
 
 
 
9
 
10
  seperator = "-HFSEP-"
11
  base_name="intfloat/e5-large-v2"
@@ -56,30 +62,24 @@ def create_hnsw_index(embeddings_np, space='ip', ef_construction=100, M=16):
56
  index.add_items(embeddings_np, ids)
57
  return index
58
 
59
- def gradio_function(query, paragraph_chunks, top_k):
60
- paragraph_chunks = paragraph_chunks.split(seperator) # Split the comma-separated values into a list
61
- paragraph_chunks = [item.strip() for item in paragraph_chunks] # Trim whitespace from each item
62
- print("Len of batches", len(paragraph_chunks))
 
 
 
 
 
 
63
 
64
  print("creating embeddings", current_timestamp())
65
- embeddings_np = get_embeddings([query]+paragraph_chunks)
66
  query_embedding, chunks_embeddings = embeddings_np[0], embeddings_np[1:]
67
 
68
  print("creating index", current_timestamp())
69
  search_index = create_hnsw_index(chunks_embeddings)
70
  print("searching index", current_timestamp())
71
- labels, _ = search_index.knn_query(query_embedding, k=min(int(top_k), len(chunks_embeddings)))
72
  labels = labels[0].tolist()
73
- return json.dumps(labels)
74
-
75
- interface = gr.Interface(
76
- fn=gradio_function,
77
- inputs=[
78
- gr.Textbox(placeholder="Enter a user query..."),
79
- gr.Textbox(placeholder="Enter comma-separated strings..."),
80
- gr.Number()
81
- ],
82
- outputs="text"
83
- )
84
-
85
- interface.launch()
 
2
  import torch
3
  import torch.nn.functional as F
4
  import hnswlib
 
5
  import numpy as np
 
6
  import datetime
7
+ from fastapi import FastAPI
8
+ from pydantic import BaseModel
9
+ from typing import List
10
+
11
+ if torch.cuda.is_available():
12
+ print("CUDA is available! Inference on GPU!")
13
+ else:
14
+ print("CUDA is not available. Inference on CPU.")
15
 
16
  seperator = "-HFSEP-"
17
  base_name="intfloat/e5-large-v2"
 
62
  index.add_items(embeddings_np, ids)
63
  return index
64
 
65
+ app = FastAPI()
66
+
67
+ class EmbeddingsSimilarityReq(BaseModel):
68
+ paragraphs: List[str]
69
+ query: str
70
+ top_k: int
71
+
72
+ @app.post("/")
73
+ async def find_similar_paragraphsitem(req: EmbeddingsSimilarityReq):
74
+ print("Len of batches", len(req.paragraphs))
75
 
76
  print("creating embeddings", current_timestamp())
77
+ embeddings_np = get_embeddings([req.query]+req.paragraphs)
78
  query_embedding, chunks_embeddings = embeddings_np[0], embeddings_np[1:]
79
 
80
  print("creating index", current_timestamp())
81
  search_index = create_hnsw_index(chunks_embeddings)
82
  print("searching index", current_timestamp())
83
+ labels, _ = search_index.knn_query(query_embedding, k=min(int(req.top_k), len(chunks_embeddings)))
84
  labels = labels[0].tolist()
85
+ return labels
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,4 +1,6 @@
1
  torch==2.0.1
2
  transformers
3
  gradio
4
- hnswlib
 
 
 
1
  torch==2.0.1
2
  transformers
3
  gradio
4
+ hnswlib
5
+ fastapi
6
+ uvicorn[standard]