Spaces:
Running
Running
chore: support `bge-m3` and `gte-multilingual-base` models
Browse files- README.md +1 -1
- lightweight_embeddings/__init__.py +5 -3
- lightweight_embeddings/router.py +1 -0
- lightweight_embeddings/service.py +8 -2
README.md
CHANGED
@@ -21,7 +21,7 @@ header: mini
|
|
21 |
- **Reranking Support**: Includes powerful reranking capabilities for both text and image inputs.
|
22 |
- **Optimized for Speed**: Built with lightweight transformer models and efficient backends for rapid inference, even on low-resource systems.
|
23 |
- **Flexible Model Support**: Use a range of transformer models tailored to diverse use cases:
|
24 |
-
- Text models: `
|
25 |
- Image model: `siglip-base-patch16-256-multilingual`
|
26 |
- **Production-Ready**: Easily deploy anywhere with Docker for hassle-free setup.
|
27 |
- **Interactive Playground**: Test embeddings and reranking directly via a **Gradio-powered interface** alongside detailed REST API documentation.
|
|
|
21 |
- **Reranking Support**: Includes powerful reranking capabilities for both text and image inputs.
|
22 |
- **Optimized for Speed**: Built with lightweight transformer models and efficient backends for rapid inference, even on low-resource systems.
|
23 |
- **Flexible Model Support**: Use a range of transformer models tailored to diverse use cases:
|
24 |
+
- Text models: `snowflake-arctic-embed-l-v2.0`, `bge-m3`, `gte-multilingual-base`, `paraphrase-multilingual-MiniLM-L12-v2`, `paraphrase-multilingual-mpnet-base-v2`, `multilingual-e5-small`, `multilingual-e5-base`, `multilingual-e5-large`.
|
25 |
- Image model: `siglip-base-patch16-256-multilingual`
|
26 |
- **Production-Ready**: Easily deploy anywhere with Docker for hassle-free setup.
|
27 |
- **Interactive Playground**: Test embeddings and reranking directly via a **Gradio-powered interface** alongside detailed REST API documentation.
|
lightweight_embeddings/__init__.py
CHANGED
@@ -13,6 +13,7 @@ Supported text model IDs:
|
|
13 |
- "paraphrase-multilingual-MiniLM-L12-v2"
|
14 |
- "paraphrase-multilingual-mpnet-base-v2"
|
15 |
- "bge-m3"
|
|
|
16 |
|
17 |
Supported image model ID:
|
18 |
- "siglip-base-patch16-256-multilingual"
|
@@ -135,12 +136,13 @@ def create_main_interface():
|
|
135 |
# Available model options for the dropdown
|
136 |
model_options = [
|
137 |
"snowflake-arctic-embed-l-v2.0",
|
|
|
|
|
|
|
|
|
138 |
"multilingual-e5-small",
|
139 |
"multilingual-e5-base",
|
140 |
"multilingual-e5-large",
|
141 |
-
"paraphrase-multilingual-MiniLM-L12-v2",
|
142 |
-
"paraphrase-multilingual-mpnet-base-v2",
|
143 |
-
"bge-m3",
|
144 |
"siglip-base-patch16-256-multilingual",
|
145 |
]
|
146 |
|
|
|
13 |
- "paraphrase-multilingual-MiniLM-L12-v2"
|
14 |
- "paraphrase-multilingual-mpnet-base-v2"
|
15 |
- "bge-m3"
|
16 |
+
- "gte-multilingual-base"
|
17 |
|
18 |
Supported image model ID:
|
19 |
- "siglip-base-patch16-256-multilingual"
|
|
|
136 |
# Available model options for the dropdown
|
137 |
model_options = [
|
138 |
"snowflake-arctic-embed-l-v2.0",
|
139 |
+
"bge-m3",
|
140 |
+
"gte-multilingual-base",
|
141 |
+
"paraphrase-multilingual-MiniLM-L12-v2",
|
142 |
+
"paraphrase-multilingual-mpnet-base-v2",
|
143 |
"multilingual-e5-small",
|
144 |
"multilingual-e5-base",
|
145 |
"multilingual-e5-large",
|
|
|
|
|
|
|
146 |
"siglip-base-patch16-256-multilingual",
|
147 |
]
|
148 |
|
lightweight_embeddings/router.py
CHANGED
@@ -11,6 +11,7 @@ Supported Text Model IDs:
|
|
11 |
- "paraphrase-multilingual-MiniLM-L12-v2"
|
12 |
- "paraphrase-multilingual-mpnet-base-v2"
|
13 |
- "bge-m3"
|
|
|
14 |
|
15 |
Supported Image Model IDs:
|
16 |
- "siglip-base-patch16-256-multilingual"
|
|
|
11 |
- "paraphrase-multilingual-MiniLM-L12-v2"
|
12 |
- "paraphrase-multilingual-mpnet-base-v2"
|
13 |
- "bge-m3"
|
14 |
+
- "gte-multilingual-base"
|
15 |
|
16 |
Supported Image Model IDs:
|
17 |
- "siglip-base-patch16-256-multilingual"
|
lightweight_embeddings/service.py
CHANGED
@@ -18,6 +18,7 @@ Supported Text Model IDs:
|
|
18 |
- "paraphrase-multilingual-MiniLM-L12-v2"
|
19 |
- "paraphrase-multilingual-mpnet-base-v2"
|
20 |
- "bge-m3"
|
|
|
21 |
|
22 |
Supported Image Model IDs:
|
23 |
- "google/siglip-base-patch16-256-multilingual" (default, but extensible)
|
@@ -56,6 +57,7 @@ class TextModelType(str, Enum):
|
|
56 |
PARAPHRASE_MULTILINGUAL_MINILM_L12_V2 = "paraphrase-multilingual-MiniLM-L12-v2"
|
57 |
PARAPHRASE_MULTILINGUAL_MPNET_BASE_V2 = "paraphrase-multilingual-mpnet-base-v2"
|
58 |
BGE_M3 = "bge-m3"
|
|
|
59 |
|
60 |
|
61 |
class ImageModelType(str, Enum):
|
@@ -122,8 +124,12 @@ class ModelConfig:
|
|
122 |
onnx_file="onnx/model_quantized.onnx",
|
123 |
),
|
124 |
TextModelType.BGE_M3: ModelInfo(
|
125 |
-
model_id="
|
126 |
-
onnx_file="onnx/
|
|
|
|
|
|
|
|
|
127 |
),
|
128 |
}
|
129 |
return text_configs[self.text_model_type]
|
|
|
18 |
- "paraphrase-multilingual-MiniLM-L12-v2"
|
19 |
- "paraphrase-multilingual-mpnet-base-v2"
|
20 |
- "bge-m3"
|
21 |
+
- "gte-multilingual-base"
|
22 |
|
23 |
Supported Image Model IDs:
|
24 |
- "google/siglip-base-patch16-256-multilingual" (default, but extensible)
|
|
|
57 |
PARAPHRASE_MULTILINGUAL_MINILM_L12_V2 = "paraphrase-multilingual-MiniLM-L12-v2"
|
58 |
PARAPHRASE_MULTILINGUAL_MPNET_BASE_V2 = "paraphrase-multilingual-mpnet-base-v2"
|
59 |
BGE_M3 = "bge-m3"
|
60 |
+
GTE_MULTILINGUAL_BASE = "gte-multilingual-base"
|
61 |
|
62 |
|
63 |
class ImageModelType(str, Enum):
|
|
|
124 |
onnx_file="onnx/model_quantized.onnx",
|
125 |
),
|
126 |
TextModelType.BGE_M3: ModelInfo(
|
127 |
+
model_id="Xenova/bge-m3",
|
128 |
+
onnx_file="onnx/model_quantized.onnx",
|
129 |
+
),
|
130 |
+
TextModelType.GTE_MULTILINGUAL_BASE: ModelInfo(
|
131 |
+
model_id="onnx-community/gte-multilingual-base",
|
132 |
+
onnx_file="onnx/model_quantized.onnx",
|
133 |
),
|
134 |
}
|
135 |
return text_configs[self.text_model_type]
|