LinWeizheDragon
commited on
Commit
•
2aee0f7
1
Parent(s):
7ac7d85
Update README.md
Browse files
README.md
CHANGED
@@ -55,6 +55,35 @@ This model can be used combined with language models to create a retrieval-augme
|
|
55 |
|
56 |
For details of training, indexing, and performing retrieval, please refer to [here](https://github.com/LinWeizheDragon/FLMR).
|
57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
## Training Details
|
60 |
|
|
|
55 |
|
56 |
For details of training, indexing, and performing retrieval, please refer to [here](https://github.com/LinWeizheDragon/FLMR).
|
57 |
|
58 |
+
1. Install the [FLMR package](https://github.com/LinWeizheDragon/FLMR).
|
59 |
+
|
60 |
+
2. A simple example use of this model:
|
61 |
+
```python
|
62 |
+
from flmr import FLMRConfig, FLMRModelForRetrieval, FLMRQueryEncoderTokenizer, FLMRContextEncoderTokenizer
|
63 |
+
checkpoint_path = "LinWeizheDragon/ColBERT-v2"
|
64 |
+
query_tokenizer = FLMRQueryEncoderTokenizer.from_pretrained(checkpoint_path, subfolder="query_tokenizer")
|
65 |
+
context_tokenizer = FLMRContextEncoderTokenizer.from_pretrained(checkpoint_path, subfolder="context_tokenizer")
|
66 |
+
|
67 |
+
model = FLMRModelForRetrieval.from_pretrained(checkpoint_path,
|
68 |
+
query_tokenizer=query_tokenizer,
|
69 |
+
context_tokenizer=context_tokenizer,
|
70 |
+
)
|
71 |
+
|
72 |
+
Q_encoding = query_tokenizer(["What is the capital of France?", "What is the capital of China?"])
|
73 |
+
D_encoding = context_tokenizer(["Paris is the capital of France.", "Beijing is the capital of China.",
|
74 |
+
"Paris is the capital of France.", "Beijing is the capital of China."])
|
75 |
+
|
76 |
+
inputs = dict(
|
77 |
+
query_input_ids=Q_encoding['input_ids'],
|
78 |
+
query_attention_mask=Q_encoding['attention_mask'],
|
79 |
+
context_input_ids=D_encoding['input_ids'],
|
80 |
+
context_attention_mask=D_encoding['attention_mask'],
|
81 |
+
use_in_batch_negatives=True,
|
82 |
+
)
|
83 |
+
|
84 |
+
res = model.forward(**inputs)
|
85 |
+
```
|
86 |
+
|
87 |
|
88 |
## Training Details
|
89 |
|