liuqi6777 commited on
Commit
eb19bb7
1 Parent(s): 4ef2301

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +27 -25
README.md CHANGED
@@ -50,20 +50,21 @@ n_gpu: int = 1 # Set your number of available GPUs
50
  experiment: str = "" # Name of the folder where the logs and created indices will be stored
51
  index_name: str = "" # The name of your index, i.e. the name of your vector database
52
 
53
- with Run().context(RunConfig(nranks=n_gpu, experiment=experiment)):
54
- config = ColBERTConfig(
55
- doc_maxlen=8192 # Our model supports 8k context length for indexing long documents
56
- )
57
- indexer = Indexer(
58
- checkpoint="jinaai/jina-colbert-v1-en",
59
- config=config,
60
- )
61
- documents = [
62
- "ColBERT is an efficient and effective passage retrieval model.",
63
- "Jina-ColBERT is a ColBERT-style model but based on JinaBERT so it can support both 8k context length.",
64
- ...
65
- ]
66
- indexer.index(name=index_name, collection=documents)
 
67
  ```
68
 
69
  ### Searching
@@ -77,17 +78,18 @@ experiment: str = "" # Name of the folder where the logs and created indices wi
77
  index_name: str = "" # Name of your previously created index where the documents you want to search are stored.
78
  k: int = 10 # how many results you want to retrieve
79
 
80
- with Run().context(RunConfig(nranks=n_gpu, experiment=experiment)):
81
- config = ColBERTConfig(
82
- query_maxlen=128 # Although the model supports 8k context length, we suggest not to use a very long query, as it may cause significant computational complexity and CUDA memory usage.
83
- )
84
- searcher = Searcher(
85
- index=index_name,
86
- config=config
87
- ) # You don't need to specify the checkpoint again, the model name is stored in the index.
88
- query = "How to use ColBERT for indexing long documents?"
89
- results = searcher.search(query, k=k)
90
- # results: tuple of tuples of length k containing ((passage_id, passage_rank, passage_score), ...)
 
91
  ```
92
 
93
  ## Evaluation Results
 
50
  experiment: str = "" # Name of the folder where the logs and created indices will be stored
51
  index_name: str = "" # The name of your index, i.e. the name of your vector database
52
 
53
+ if __name__ == "__main__":
54
+ with Run().context(RunConfig(nranks=n_gpu, experiment=experiment)):
55
+ config = ColBERTConfig(
56
+ doc_maxlen=8192 # Our model supports 8k context length for indexing long documents
57
+ )
58
+ indexer = Indexer(
59
+ checkpoint="jinaai/jina-colbert-v1-en",
60
+ config=config,
61
+ )
62
+ documents = [
63
+ "ColBERT is an efficient and effective passage retrieval model.",
64
+ "Jina-ColBERT is a ColBERT-style model but based on JinaBERT so it can support both 8k context length.",
65
+ # Add more documents here to ensure the clustering work correctly
66
+ ]
67
+ indexer.index(name=index_name, collection=documents)
68
  ```
69
 
70
  ### Searching
 
78
  index_name: str = "" # Name of your previously created index where the documents you want to search are stored.
79
  k: int = 10 # how many results you want to retrieve
80
 
81
+ if __name__ == "__main__":
82
+ with Run().context(RunConfig(nranks=n_gpu, experiment=experiment)):
83
+ config = ColBERTConfig(
84
+ query_maxlen=128 # Although the model supports 8k context length, we suggest not to use a very long query, as it may cause significant computational complexity and CUDA memory usage.
85
+ )
86
+ searcher = Searcher(
87
+ index=index_name,
88
+ config=config
89
+ ) # You don't need to specify the checkpoint again, the model name is stored in the index.
90
+ query = "How to use ColBERT for indexing long documents?"
91
+ results = searcher.search(query, k=k)
92
+ # results: tuple of tuples of length k containing ((passage_id, passage_rank, passage_score), ...)
93
  ```
94
 
95
  ## Evaluation Results