Sentence Similarity
English
txtai
davidmezzetti commited on
Commit
30c7935
1 Parent(s): 5e0e08e
Files changed (6) hide show
  1. .gitattributes +3 -0
  2. README.md +20 -0
  3. config.json +30 -0
  4. documents +3 -0
  5. embeddings +3 -0
  6. graph +3 -0
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ documents filter=lfs diff=lfs merge=lfs -text
37
+ embeddings filter=lfs diff=lfs merge=lfs -text
38
+ graph filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ inference: false
3
+ language: en
4
+ license:
5
+ - cc-by-sa-3.0
6
+ - gfdl
7
+ library_name: txtai
8
+ tags:
9
+ - sentence-similarity
10
+ datasets:
11
+ - NeuML/wikipedia-20240101
12
+ ---
13
+
14
+ # Wikipedia txtai embeddings slim
15
+
16
+ This is a [txtai](https://github.com/neuml/txtai) embeddings index for the [English edition of Wikipedia](https://en.wikipedia.org/).
17
+
18
+ The slim version has the `100K most popular` Wikipedia pages ranked by page views. This embeddings index also has graph indexing enabled, which enables using this as a source for GraphRAG.
19
+
20
+ See the [txtai-wikipedia](https://hf.co/models/neuml/txtai-wikipedia) model page for additional information on this datasource.
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "autoid": "uuid5",
3
+ "path": "intfloat/e5-base",
4
+ "instructions": {
5
+ "query": "query: ",
6
+ "data": "passage: "
7
+ },
8
+ "content": true,
9
+ "faiss": {
10
+ "quantize": true
11
+ },
12
+ "graph": {
13
+ "approximate": false,
14
+ "topics": {},
15
+ "backend": "networkx"
16
+ },
17
+ "dimensions": 768,
18
+ "backend": "faiss",
19
+ "offset": 100000,
20
+ "build": {
21
+ "create": "2024-07-24T17:10:13Z",
22
+ "python": "3.8.19",
23
+ "settings": {
24
+ "components": "IVF1265,SQ8"
25
+ },
26
+ "system": "Linux (x86_64)",
27
+ "txtai": "7.3.0"
28
+ },
29
+ "update": "2024-07-24T17:10:13Z"
30
+ }
documents ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:112a7f34d271a030e0a87694d6521fa1589878f48cb49f8f678383ca6f733cf3
3
+ size 119484416
embeddings ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9aba0c30f7e49dabde924812d35832ee354945e8785b29d2f4e3179b62031c8
3
+ size 81502528
graph ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0ea98eebb9bb8eb04cda254958576118a86386067b50d9086253deb5c689f25
3
+ size 82165760