tasal9 commited on
Commit
bf7fc70
Β·
verified Β·
1 Parent(s): 955f0af

Upload ZamAI Multilingual Embeddings model

Browse files
Files changed (4) hide show
  1. config.json +24 -0
  2. modeling.py +11 -0
  3. push_to_hf.py +515 -0
  4. upload_to_hf.py +58 -0
config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 384,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 1536,
12
+ "layer_norm_eps": 1e-12,
13
+ "max_position_embeddings": 512,
14
+ "model_type": "bert",
15
+ "num_attention_heads": 12,
16
+ "num_hidden_layers": 12,
17
+ "pad_token_id": 0,
18
+ "position_embedding_type": "absolute",
19
+ "torch_dtype": "float32",
20
+ "transformers_version": "4.21.2",
21
+ "type_vocab_size": 2,
22
+ "use_cache": true,
23
+ "vocab_size": 250002
24
+ }
modeling.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ from sentence_transformers import SentenceTransformer
3
+
4
+ # This is required for the Hugging Face model
5
+ def load_model():
6
+ """Load the ZamAI Multilingual Embeddings model"""
7
+ return SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')
8
+
9
+ if __name__ == "__main__":
10
+ model = load_model()
11
+ print("ZamAI Multilingual Embeddings model loaded successfully!")
push_to_hf.py ADDED
@@ -0,0 +1,515 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ ZamAI Multilingual Embeddings - Hugging Face Hub Deployment Script
4
+ This script pushes the ZamAI multilingual embeddings model to Hugging Face Hub
5
+ """
6
+
7
+ import os
8
+ import subprocess
9
+ import sys
10
+ from pathlib import Path
11
+ from huggingface_hub import HfApi, login, create_repo
12
+
13
+ def check_huggingface_auth():
14
+ """Check if user is authenticated with Hugging Face"""
15
+ try:
16
+ api = HfApi()
17
+ user_info = api.whoami()
18
+ print(f"βœ… Authenticated as: {user_info['name']}")
19
+ return True
20
+ except Exception as e:
21
+ print("❌ Not authenticated with Hugging Face")
22
+ print("Please run: huggingface-cli login")
23
+ return False
24
+
25
+ def create_model_card():
26
+ """Create a comprehensive model card for the ZamAI model"""
27
+ model_card_content = """---
28
+ license: apache-2.0
29
+ datasets:
30
+ - tasal9/Pashto_Dataset
31
+ language:
32
+ - ps
33
+ - en
34
+ - ar
35
+ - ur
36
+ - fa
37
+ library_name: sentence-transformers
38
+ tags:
39
+ - multilingual
40
+ - embeddings
41
+ - semantic-search
42
+ - pashto
43
+ - chromadb
44
+ - llamaindex
45
+ - cross-lingual
46
+ - afghanistan
47
+ - zamai
48
+ pipeline_tag: feature-extraction
49
+ model-index:
50
+ - name: Multilingual-ZamAI-Embeddings
51
+ results: []
52
+ widget:
53
+ - source_sentence: "This is a sample sentence in English."
54
+ sentences:
55
+ - "This sentence is similar to the first one."
56
+ - "Ψ―Ψ§ Ψ¬Ω…Ω„Ω‡ Ψ― Ω„ΩˆΩ…Ϊ“Ϋ جملې Ψ³Ψ±Ω‡ ورΨͺΩ‡ Ψ―Ω‡."
57
+ - "This sentence has nothing to do with the others."
58
+ example_title: "English to multilingual similarity"
59
+ - source_sentence: "Ψ―Ψ§ ΩΎΩ‡ پښΨͺو کې ΫŒΩˆΩ‡ Ω†Ω…ΩˆΩ†Ω‡ Ψ¬Ω…Ω„Ω‡ Ψ―Ω‡."
60
+ sentences:
61
+ - "This is a sample sentence in English."
62
+ - "Ψ―Ψ§ Ψ¬Ω…Ω„Ω‡ Ψ― Ω„ΩˆΩ…Ϊ“Ϋ جملې Ψ³Ψ±Ω‡ ورΨͺΩ‡ Ψ―Ω‡."
63
+ - "Ψ²Ω‡ Ψ― پښΨͺو ژبې Ψ²Ψ―Ω‡ Ϊ©Ϊ“Ω‡ Ϊ©ΩˆΩ…."
64
+ example_title: "Pashto to multilingual similarity"
65
+ ---
66
+
67
+ # ZamAI Multilingual Embeddings
68
+
69
+ This model provides state-of-the-art multilingual sentence embeddings with a special focus on Pashto language support. Built on the foundation of `sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2`, this model enables semantic search, document retrieval, and cross-lingual understanding across 50+ languages.
70
+
71
+ ## Model Details
72
+
73
+ - **Model Type**: Sentence Transformer (BERT-based)
74
+ - **Base Model**: [sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2)
75
+ - **Languages Supported**: 50+ including Pashto (ps), English (en), Arabic (ar), Urdu (ur), Farsi (fa), and more
76
+ - **Max Sequence Length**: 512 tokens
77
+ - **Output Dimensionality**: 384
78
+ - **License**: Apache 2.0
79
+
80
+ ## Key Features
81
+
82
+ - **Cross-lingual Understanding**: Retrieve semantically similar content across different languages
83
+ - **Pashto Language Support**: Optimized for Pashto language processing and understanding
84
+ - **Vector Database Integration**: Ready-to-use with ChromaDB and LlamaIndex
85
+ - **High Performance**: Efficient processing suitable for real-time applications
86
+
87
+ ## Usage
88
+
89
+ ### Basic Usage with Sentence Transformers
90
+
91
+ ```python
92
+ from sentence_transformers import SentenceTransformer
93
+ import numpy as np
94
+
95
+ # Load the model
96
+ model = SentenceTransformer('tasal9/Multilingual-ZamAI-Embeddings')
97
+
98
+ # English sentences
99
+ sentences_en = [
100
+ "This is a sample sentence in English.",
101
+ "This sentence is similar to the first one."
102
+ ]
103
+
104
+ # Pashto sentences
105
+ sentences_ps = [
106
+ "Ψ―Ψ§ ΩΎΩ‡ پښΨͺو کې ΫŒΩˆΩ‡ Ω†Ω…ΩˆΩ†Ω‡ Ψ¬Ω…Ω„Ω‡ Ψ―Ω‡.",
107
+ "Ψ―Ψ§ Ψ¬Ω…Ω„Ω‡ Ψ― Ω„ΩˆΩ…Ϊ“Ϋ جملې Ψ³Ψ±Ω‡ ورΨͺΩ‡ Ψ―Ω‡."
108
+ ]
109
+
110
+ # Get embeddings
111
+ embeddings_en = model.encode(sentences_en)
112
+ embeddings_ps = model.encode(sentences_ps)
113
+
114
+ # Calculate cross-lingual similarity
115
+ from numpy import dot
116
+ from numpy.linalg import norm
117
+
118
+ def cosine_similarity(a, b):
119
+ return dot(a, b) / (norm(a) * norm(b))
120
+
121
+ # Compare English and Pashto sentences
122
+ similarity = cosine_similarity(embeddings_en[0], embeddings_ps[0])
123
+ print(f"Cross-lingual similarity: {similarity:.4f}")
124
+ ```
125
+
126
+ ### Advanced Usage with ChromaDB and LlamaIndex
127
+
128
+ ```python
129
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
130
+ from llama_index.vector_stores.chroma import ChromaVectorStore
131
+ from llama_index.core import StorageContext, VectorStoreIndex
132
+ import chromadb
133
+
134
+ # Initialize the embedding model
135
+ embed_model = HuggingFaceEmbedding(model_name="tasal9/Multilingual-ZamAI-Embeddings")
136
+
137
+ # Set up ChromaDB
138
+ chroma_client = chromadb.PersistentClient(path="./chroma_db")
139
+ collection = chroma_client.get_or_create_collection("multilingual_collection")
140
+ vector_store = ChromaVectorStore(chroma_collection=collection)
141
+ storage_context = StorageContext.from_defaults(vector_store=vector_store)
142
+
143
+ # Create index with your documents
144
+ # index = VectorStoreIndex.from_documents(documents, storage_context=storage_context, embed_model=embed_model)
145
+ # query_engine = index.as_query_engine()
146
+
147
+ # Query in any language
148
+ # result = query_engine.query("What is the capital of Afghanistan?")
149
+ # result_ps = query_engine.query("Ψ― افغانسΨͺΨ§Ω† پلازمېنه Ϊ…Ω‡ Ψ―Ω‡ΨŸ")
150
+ ```
151
+
152
+ ## Performance
153
+
154
+ The model demonstrates excellent cross-lingual performance:
155
+
156
+ - **English-English**: High semantic similarity detection
157
+ - **Pashto-Pashto**: Native language understanding and similarity
158
+ - **Cross-lingual (English-Pashto)**: Strong cross-lingual semantic alignment
159
+ - **Multilingual**: Supports 50+ languages with consistent performance
160
+
161
+ ## Applications
162
+
163
+ - **Semantic Search**: Find relevant documents across multiple languages
164
+ - **Cross-lingual Information Retrieval**: Retrieve Pashto content using English queries and vice versa
165
+ - **Document Similarity**: Compare documents in different languages
166
+ - **Question Answering**: Build multilingual QA systems
167
+ - **Content Recommendation**: Recommend similar content across languages
168
+
169
+ ## Technical Details
170
+
171
+ - **Architecture**: BERT-based transformer model
172
+ - **Training Data**: Multilingual parallel and monolingual corpora
173
+ - **Optimization**: Optimized for semantic similarity tasks
174
+ - **Integration**: Compatible with Hugging Face Transformers, Sentence Transformers, LlamaIndex, and ChromaDB
175
+
176
+ ## Citation
177
+
178
+ If you use this model in your research, please cite:
179
+
180
+ ```bibtex
181
+ @misc{zamai-multilingual-embeddings-2024,
182
+ title={ZamAI Multilingual Embeddings: Cross-lingual Sentence Transformers with Pashto Support},
183
+ author={ZamAI Team},
184
+ year={2024},
185
+ url={https://huggingface.co/tasal9/Multilingual-ZamAI-Embeddings}
186
+ }
187
+ ```
188
+
189
+ ## License
190
+
191
+ This model is released under the Apache 2.0 License. See the [LICENSE](LICENSE) file for details.
192
+
193
+ ## Contact
194
+
195
+ For questions or support, please open an issue on the [model repository](https://huggingface.co/tasal9/Multilingual-ZamAI-Embeddings) or contact the ZamAI team.
196
+ """
197
+
198
+ with open("README.md", "w", encoding="utf-8") as f:
199
+ f.write(model_card_content)
200
+ print("βœ… Model card created: README.md")
201
+
202
+ def prepare_repository():
203
+ """Prepare the repository for upload"""
204
+ print("πŸ”§ Preparing repository...")
205
+
206
+ # Create model card
207
+ create_model_card()
208
+
209
+ # Create additional files
210
+ files_to_create = {
211
+ ".gitattributes": """*.7z filter=lfs diff=lfs merge=lfs -text
212
+ *.arrow filter=lfs diff=lfs merge=lfs -text
213
+ *.bin filter=lfs diff=lfs merge=lfs -text
214
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
215
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
216
+ *.ftz filter=lfs diff=lfs merge=lfs -text
217
+ *.gz filter=lfs diff=lfs merge=lfs -text
218
+ *.h5 filter=lfs diff=lfs merge=lfs -text
219
+ *.joblib filter=lfs diff=lfs merge=lfs -text
220
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
221
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
222
+ *.model filter=lfs diff=lfs merge=lfs -text
223
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
224
+ *.npy filter=lfs diff=lfs merge=lfs -text
225
+ *.npz filter=lfs diff=lfs merge=lfs -text
226
+ *.onnx filter=lfs diff=lfs merge=lfs -text
227
+ *.ot filter=lfs diff=lfs merge=lfs -text
228
+ *.parquet filter=lfs diff=lfs merge=lfs -text
229
+ *.pb filter=lfs diff=lfs merge=lfs -text
230
+ *.pickle filter=lfs diff=lfs merge=lfs -text
231
+ *.pkl filter=lfs diff=lfs merge=lfs -text
232
+ *.pt filter=lfs diff=lfs merge=lfs -text
233
+ *.pth filter=lfs diff=lfs merge=lfs -text
234
+ *.rar filter=lfs diff=lfs merge=lfs -text
235
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
236
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
237
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
238
+ *.tar filter=lfs diff=lfs merge=lfs -text
239
+ *.tflite filter=lfs diff=lfs merge=lfs -text
240
+ *.tgz filter=lfs diff=lfs merge=lfs -text
241
+ *.wasm filter=lfs diff=lfs merge=lfs -text
242
+ *.xz filter=lfs diff=lfs merge=lfs -text
243
+ *.zip filter=lfs diff=lfs merge=lfs -text
244
+ *.zst filter=lfs diff=lfs merge=lfs -text
245
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
246
+ """,
247
+ "LICENSE": """Apache License
248
+ Version 2.0, January 2004
249
+ http://www.apache.org/licenses/
250
+
251
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
252
+
253
+ 1. Definitions.
254
+
255
+ "License" shall mean the terms and conditions for use, reproduction,
256
+ and distribution as defined by Sections 1 through 9 of this document.
257
+
258
+ "Licensor" shall mean the copyright owner or entity granting the License.
259
+
260
+ "Legal Entity" shall mean the union of the acting entity and all
261
+ other entities that control, are controlled by, or are under common
262
+ control with that entity. For the purposes of this definition,
263
+ "control" means (i) the power, direct or indirect, to cause the
264
+ direction or management of such entity, whether by contract or
265
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
266
+ outstanding shares, or (iii) beneficial ownership of such entity.
267
+
268
+ "You" (or "Your") shall mean an individual or Legal Entity
269
+ exercising permissions granted by this License.
270
+
271
+ "Source" shall mean the preferred form for making modifications,
272
+ including but not limited to software source code, documentation
273
+ source, and configuration files.
274
+
275
+ "Object" shall mean any form resulting from mechanical
276
+ transformation or translation of a Source form, including but
277
+ not limited to compiled object code, generated documentation,
278
+ and conversions to other media types.
279
+
280
+ "Work" shall mean the work of authorship, whether in Source or
281
+ Object form, made available under the License, as indicated by a
282
+ copyright notice that is included in or attached to the work
283
+ (which shall not include communications that are clearly marked or
284
+ otherwise designated in writing by the copyright owner as "Not a Work").
285
+
286
+ "Derivative Works" shall mean any work, whether in Source or Object
287
+ form, that is based upon (or derived from) the Work and for which the
288
+ editorial revisions, annotations, elaborations, or other modifications
289
+ represent, as a whole, an original work of authorship. For the purposes
290
+ of this License, Derivative Works shall not include works that remain
291
+ separable from, or merely link (or bind by name) to the interfaces of,
292
+ the Work and derivative works thereof.
293
+
294
+ "Contribution" shall mean any work of authorship, including
295
+ the original version of the Work and any modifications or additions
296
+ to that Work or Derivative Works thereof, that is intentionally
297
+ submitted to Licensor for inclusion in the Work by the copyright owner
298
+ or by an individual or Legal Entity authorized to submit on behalf of
299
+ the copyright owner. For the purposes of this definition, "submitted"
300
+ means any form of electronic, verbal, or written communication sent
301
+ to the Licensor or its representatives, including but not limited to
302
+ communication on electronic mailing lists, source code control
303
+ systems, and issue tracking systems that are managed by, or on behalf
304
+ of, the Licensor for the purpose of discussing and improving the Work,
305
+ but excluding communication that is conspicuously marked or otherwise
306
+ designated in writing by the copyright owner as "Not a Contribution."
307
+
308
+ 2. Grant of Copyright License. Subject to the terms and conditions of
309
+ this License, each Contributor hereby grants to You a perpetual,
310
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
311
+ copyright license to use, reproduce, modify, merge, publish,
312
+ distribute, sublicense, and/or sell copies of the Work, and to
313
+ permit persons to whom the Work is furnished to do so, subject to
314
+ the following conditions:
315
+
316
+ The above copyright notice and this permission notice shall be
317
+ included in all copies or substantial portions of the Work.
318
+
319
+ 3. Grant of Patent License. Subject to the terms and conditions of
320
+ this License, each Contributor hereby grants to You a perpetual,
321
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
322
+ (except as stated in this section) patent license to make, have made,
323
+ use, offer to sell, sell, import, and otherwise transfer the Work,
324
+ where such license applies only to those patent claims licensable
325
+ by such Contributor that are necessarily infringed by their
326
+ Contribution(s) alone or by combination of their Contribution(s)
327
+ with the Work to which such Contribution(s) was submitted. If You
328
+ institute patent litigation against any entity (including a
329
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
330
+ or a Contribution incorporated within the Work constitutes direct
331
+ or contributory patent infringement, then any patent licenses
332
+ granted to You under this License for that Work shall terminate
333
+ as of the date such litigation is filed.
334
+
335
+ 4. Redistribution. You may reproduce and distribute copies of the
336
+ Work or Derivative Works thereof in any medium, with or without
337
+ modifications, and in Source or Object form, provided that You
338
+ meet the following conditions:
339
+
340
+ (a) You must give any other recipients of the Work or
341
+ Derivative Works a copy of this License; and
342
+
343
+ (b) You must cause any modified files to carry prominent notices
344
+ stating that You changed the files; and
345
+
346
+ (c) You must retain, in the Source form of any Derivative Works
347
+ that You distribute, all copyright, trademark, patent, and
348
+ attribution notices from the Source form of the Work,
349
+ excluding those notices that do not pertain to any part of
350
+ the Derivative Works; and
351
+
352
+ (d) If the Work includes a "NOTICE" text file as part of its
353
+ distribution, then any Derivative Works that You distribute must
354
+ include a readable copy of the attribution notices contained
355
+ within such NOTICE file, excluding those notices that do not
356
+ pertain to any part of the Derivative Works, in at least one
357
+ of the following places: within a NOTICE text file distributed
358
+ as part of the Derivative Works; within the Source form or
359
+ documentation, if provided along with the Derivative Works; or,
360
+ within a display generated by the Derivative Works, if and
361
+ wherever such third-party notices normally appear. The contents
362
+ of the NOTICE file are for informational purposes only and
363
+ do not modify the License. You may add Your own attribution
364
+ notices within Derivative Works that You distribute, alongside
365
+ or as an addendum to the NOTICE text from the Work, provided
366
+ that such additional attribution notices cannot be construed
367
+ as modifying the License.
368
+
369
+ You may add Your own copyright notice to Your modifications and
370
+ may provide additional or different license terms and conditions
371
+ for use, reproduction, or distribution of Your modifications, or
372
+ for any such Derivative Works as a whole, provided Your use,
373
+ reproduction, and distribution of the Work otherwise complies with
374
+ the conditions stated in this License.
375
+
376
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
377
+ any Contribution intentionally submitted for inclusion in the Work
378
+ by You to the Licensor shall be under the terms and conditions of
379
+ this License, without any additional terms or conditions.
380
+ Notwithstanding the above, nothing herein shall supersede or modify
381
+ the terms of any separate license agreement you may have executed
382
+ with Licensor regarding such Contributions.
383
+
384
+ 6. Trademarks. This License does not grant permission to use the trade
385
+ names, trademarks, service marks, or product names of the Licensor,
386
+ except as required for reasonable and customary use in describing the
387
+ origin of the Work and reproducing the content of the NOTICE file.
388
+
389
+ 7. Disclaimer of Warranty. Unless required by applicable law or
390
+ agreed to in writing, Licensor provides the Work (and each
391
+ Contributor provides its Contributions) on an "AS IS" BASIS,
392
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
393
+ implied, including, without limitation, any warranties or conditions
394
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
395
+ PARTICULAR PURPOSE. You are solely responsible for determining the
396
+ appropriateness of using or redistributing the Work and assume any
397
+ risks associated with Your exercise of permissions under this License.
398
+
399
+ 8. Limitation of Liability. In no event and under no legal theory,
400
+ whether in tort (including negligence), contract, or otherwise,
401
+ unless required by applicable law (such as deliberate and grossly
402
+ negligent acts) or agreed to in writing, shall any Contributor be
403
+ liable to You for damages, including any direct, indirect, special,
404
+ incidental, or consequential damages of any character arising as a
405
+ result of this License or out of the use or inability to use the
406
+ Work (including but not limited to damages for loss of goodwill,
407
+ work stoppage, computer failure or malfunction, or any and all
408
+ other commercial damages or losses), even if such Contributor
409
+ has been advised of the possibility of such damages.
410
+
411
+ 9. Accepting Warranty or Support. When redistributing the Work or
412
+ Derivative Works thereof, You may choose to offer, and charge a fee
413
+ for, acceptance of support, warranty, indemnity, or other liability
414
+ obligations and/or rights consistent with this License. However, in
415
+ accepting such obligations, You may act only on Your own behalf and on
416
+ Your sole responsibility, not on behalf of any other Contributor, and
417
+ only if You agree to indemnify, defend, and hold each Contributor
418
+ harmless for any liability incurred by, or claims asserted against,
419
+ such Contributor by reason of your accepting any such warranty or support.
420
+
421
+ END OF TERMS AND CONDITIONS"""
422
+ }
423
+
424
+ for filename, content in files_to_create.items():
425
+ with open(filename, "w", encoding="utf-8") as f:
426
+ f.write(content)
427
+ print(f"βœ… Created: {filename}")
428
+
429
+ def push_to_huggingface(repo_name="tasal9/Multilingual-ZamAI-Embeddings"):
430
+ """Push the model to Hugging Face Hub"""
431
+
432
+ # Check authentication
433
+ if not check_huggingface_auth():
434
+ print("\nπŸ”‘ Please authenticate with Hugging Face first:")
435
+ print("huggingface-cli login")
436
+ return False
437
+
438
+ try:
439
+ # Prepare repository
440
+ prepare_repository()
441
+
442
+ # Create repository on Hugging Face Hub
443
+ api = HfApi()
444
+
445
+ try:
446
+ print(f"πŸš€ Creating repository: {repo_name}")
447
+ create_repo(
448
+ repo_id=repo_name,
449
+ repo_type="model",
450
+ exist_ok=True,
451
+ private=False
452
+ )
453
+ print(f"βœ… Repository created/verified: {repo_name}")
454
+ except Exception as e:
455
+ print(f"ℹ️ Repository may already exist: {e}")
456
+
457
+ # Upload files
458
+ print("πŸ“€ Uploading files to Hugging Face Hub...")
459
+
460
+ # Upload all files in the current directory
461
+ current_dir = Path(".")
462
+ files_to_upload = [
463
+ "README.md",
464
+ "requirements.txt",
465
+ "setup.py",
466
+ "demo.py",
467
+ "simple_demo.py",
468
+ "indexer.py",
469
+ "modeling.py",
470
+ "config.json",
471
+ "LICENSE",
472
+ ".gitattributes"
473
+ ]
474
+
475
+ for file_path in files_to_upload:
476
+ if os.path.exists(file_path):
477
+ try:
478
+ api.upload_file(
479
+ path_or_fileobj=file_path,
480
+ path_in_repo=file_path,
481
+ repo_id=repo_name,
482
+ commit_message=f"Add {file_path}"
483
+ )
484
+ print(f"βœ… Uploaded: {file_path}")
485
+ except Exception as e:
486
+ print(f"⚠️ Warning uploading {file_path}: {e}")
487
+
488
+ print(f"\nπŸŽ‰ Successfully pushed ZamAI Multilingual Embeddings to Hugging Face Hub!")
489
+ print(f"πŸ”— Model URL: https://huggingface.co/{repo_name}")
490
+ print(f"πŸ“– Usage: model = SentenceTransformer('{repo_name}')")
491
+
492
+ return True
493
+
494
+ except Exception as e:
495
+ print(f"❌ Error pushing to Hugging Face: {e}")
496
+ return False
497
+
498
+ if __name__ == "__main__":
499
+ print("πŸš€ ZamAI Multilingual Embeddings - Hugging Face Deployment")
500
+ print("=" * 60)
501
+
502
+ # Check if we're in the right directory
503
+ if not os.path.exists("setup.py"):
504
+ print("❌ Please run this script from the Multilingual-ZamAI-Embeddings directory")
505
+ sys.exit(1)
506
+
507
+ # Push to Hugging Face
508
+ success = push_to_huggingface()
509
+
510
+ if success:
511
+ print("\n✨ Deployment completed successfully!")
512
+ print("Your model is now available on Hugging Face Hub")
513
+ else:
514
+ print("\n❌ Deployment failed. Please check the errors above.")
515
+ sys.exit(1)
upload_to_hf.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ ZamAI Model Upload Script
4
+ This script uploads the ZamAI Multilingual Embeddings model to Hugging Face Hub
5
+ """
6
+
7
+ import os
8
+ from huggingface_hub import HfApi, create_repo, upload_folder
9
+ from pathlib import Path
10
+
11
+ def upload_to_hf():
12
+ """Upload the ZamAI model to Hugging Face Hub"""
13
+
14
+ # Read the HF token
15
+ with open('HF-Token.txt', 'r') as f:
16
+ token = f.read().strip()
17
+
18
+ # Initialize HF API
19
+ api = HfApi(token=token)
20
+
21
+ # Repository details
22
+ repo_id = "tasal9/Multilingual-ZamAI-Embeddings"
23
+ repo_type = "model"
24
+
25
+ print(f"πŸš€ Uploading ZamAI Multilingual Embeddings to {repo_id}")
26
+
27
+ try:
28
+ # Create repository if it doesn't exist
29
+ create_repo(
30
+ repo_id=repo_id,
31
+ token=token,
32
+ repo_type=repo_type,
33
+ exist_ok=True,
34
+ private=False
35
+ )
36
+ print(f"βœ… Repository {repo_id} is ready")
37
+
38
+ # Upload the entire folder
39
+ api.upload_folder(
40
+ folder_path=".",
41
+ repo_id=repo_id,
42
+ repo_type=repo_type,
43
+ token=token,
44
+ commit_message="Upload ZamAI Multilingual Embeddings model",
45
+ ignore_patterns=[".git/", "__pycache__/", "*.pyc", "HF-Token.txt", "chroma_db/"]
46
+ )
47
+
48
+ print(f"πŸŽ‰ Successfully uploaded to https://huggingface.co/{repo_id}")
49
+ print("Your model is now live on Hugging Face Hub!")
50
+
51
+ except Exception as e:
52
+ print(f"❌ Error uploading to Hugging Face: {str(e)}")
53
+ return False
54
+
55
+ return True
56
+
57
+ if __name__ == "__main__":
58
+ upload_to_hf()