Raphael Sourty commited on
Commit
a05a32a
1 Parent(s): d522787
Files changed (1) hide show
  1. test.md +0 -61
test.md DELETED
@@ -1,61 +0,0 @@
1
- ---
2
- language:
3
- - en
4
- ---
5
-
6
-
7
- This model was trained with [Sparsembed](https://github.com/raphaelsty/sparsembed). You can find details on how to use it in the [Sparsembed](https://github.com/raphaelsty/sparsembed) repository.
8
-
9
- ```sh
10
- pip install sparsembed
11
- ```
12
-
13
- ```python
14
- from sparsembed import model, retrieve
15
- from transformers import AutoModelForMaskedLM, AutoTokenizer
16
-
17
- device = "cuda" # cpu
18
-
19
- batch_size = 10
20
-
21
- # List documents to index:
22
- documents = [
23
- {'id': 0,
24
- 'title': 'Paris',
25
- 'url': 'https://en.wikipedia.org/wiki/Paris',
26
- 'text': 'Paris is the capital and most populous city of France.'},
27
- {'id': 1,
28
- 'title': 'Paris',
29
- 'url': 'https://en.wikipedia.org/wiki/Paris',
30
- 'text': "Since the 17th century, Paris has been one of Europe's major centres of science, and arts."},
31
- {'id': 2,
32
- 'title': 'Paris',
33
- 'url': 'https://en.wikipedia.org/wiki/Paris',
34
- 'text': 'The City of Paris is the centre and seat of government of the region and province of Île-de-France.'
35
- }]
36
-
37
- model = model.Splade(
38
- model=AutoModelForMaskedLM.from_pretrained("raphaelsty/splade_max").to(device),
39
- tokenizer=AutoTokenizer.from_pretrained("raphaelsty/splade_max"),
40
- device=device
41
- )
42
-
43
- retriever = retrieve.SpladeRetriever(
44
- key="id", # Key identifier of each document.
45
- on=["title", "text"], # Fields to search.
46
- model=model # Splade retriever.
47
- )
48
-
49
- retriever = retriever.add(
50
- documents=documents,
51
- batch_size=batch_size,
52
- k_tokens=256, # Number of activated tokens.
53
- )
54
-
55
- retriever(
56
- ["paris", "Toulouse"], # Queries
57
- k_tokens=20, # Maximum number of activated tokens.
58
- k=100, # Number of documents to retrieve.
59
- batch_size=batch_size
60
- )
61
- ```