diff --git a/.gitattributes b/.gitattributes index c7d9f3332a950355d5a77d85000f05e6f45435ea..0dc9038fae7661e5ff199e4da1913f1afedb9792 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,34 +1,29 @@ -*.7z filter=lfs diff=lfs merge=lfs -text -*.arrow filter=lfs diff=lfs merge=lfs -text -*.bin filter=lfs diff=lfs merge=lfs -text -*.bz2 filter=lfs diff=lfs merge=lfs -text -*.ckpt filter=lfs diff=lfs merge=lfs -text -*.ftz filter=lfs diff=lfs merge=lfs -text -*.gz filter=lfs diff=lfs merge=lfs -text -*.h5 filter=lfs diff=lfs merge=lfs -text -*.joblib filter=lfs diff=lfs merge=lfs -text -*.lfs.* filter=lfs diff=lfs merge=lfs -text -*.mlmodel filter=lfs diff=lfs merge=lfs -text -*.model filter=lfs diff=lfs merge=lfs -text -*.msgpack filter=lfs diff=lfs merge=lfs -text -*.npy filter=lfs diff=lfs merge=lfs -text -*.npz filter=lfs diff=lfs merge=lfs -text -*.onnx filter=lfs diff=lfs merge=lfs -text -*.ot filter=lfs diff=lfs merge=lfs -text -*.parquet filter=lfs diff=lfs merge=lfs -text -*.pb filter=lfs diff=lfs merge=lfs -text -*.pickle filter=lfs diff=lfs merge=lfs -text -*.pkl filter=lfs diff=lfs merge=lfs -text -*.pt filter=lfs diff=lfs merge=lfs -text -*.pth filter=lfs diff=lfs merge=lfs -text -*.rar filter=lfs diff=lfs merge=lfs -text -*.safetensors filter=lfs diff=lfs merge=lfs -text -saved_model/**/* filter=lfs diff=lfs merge=lfs -text -*.tar.* filter=lfs diff=lfs merge=lfs -text -*.tflite filter=lfs diff=lfs merge=lfs -text -*.tgz filter=lfs diff=lfs merge=lfs -text -*.wasm filter=lfs diff=lfs merge=lfs -text -*.xz filter=lfs diff=lfs merge=lfs -text -*.zip filter=lfs diff=lfs merge=lfs -text -*.zst filter=lfs diff=lfs merge=lfs -text -*tfevents* filter=lfs diff=lfs merge=lfs -text +index/**/* filter=lfs diff=lfs merge=lfs -text +data/data-00002-of-00028.arrow filter=lfs diff=lfs merge=lfs -text +data/data-00004-of-00028.arrow filter=lfs diff=lfs merge=lfs -text +data/data-00017-of-00028.arrow filter=lfs diff=lfs merge=lfs -text +data/data-00005-of-00028.arrow filter=lfs diff=lfs merge=lfs -text +data/data-00018-of-00028.arrow filter=lfs diff=lfs merge=lfs -text +data/data-00020-of-00028.arrow filter=lfs diff=lfs merge=lfs -text +data/data-00003-of-00028.arrow filter=lfs diff=lfs merge=lfs -text +data/data-00011-of-00028.arrow filter=lfs diff=lfs merge=lfs -text +data/data-00026-of-00028.arrow filter=lfs diff=lfs merge=lfs -text +data/data-00013-of-00028.arrow filter=lfs diff=lfs merge=lfs -text +data/data-00000-of-00028.arrow filter=lfs diff=lfs merge=lfs -text +data/data-00012-of-00028.arrow filter=lfs diff=lfs merge=lfs -text +data/data-00021-of-00028.arrow filter=lfs diff=lfs merge=lfs -text +data/data-00001-of-00028.arrow filter=lfs diff=lfs merge=lfs -text +data/data-00014-of-00028.arrow filter=lfs diff=lfs merge=lfs -text +data/data-00008-of-00028.arrow filter=lfs diff=lfs merge=lfs -text +data/data-00025-of-00028.arrow filter=lfs diff=lfs merge=lfs -text +data/data-00006-of-00028.arrow filter=lfs diff=lfs merge=lfs -text +data/data-00019-of-00028.arrow filter=lfs diff=lfs merge=lfs -text +data/data-00023-of-00028.arrow filter=lfs diff=lfs merge=lfs -text +data/data-00027-of-00028.arrow filter=lfs diff=lfs merge=lfs -text +data/data-00022-of-00028.arrow filter=lfs diff=lfs merge=lfs -text +data/data-00024-of-00028.arrow filter=lfs diff=lfs merge=lfs -text +data/data-00007-of-00028.arrow filter=lfs diff=lfs merge=lfs -text +data/data-00010-of-00028.arrow filter=lfs diff=lfs merge=lfs -text +data/data-00009-of-00028.arrow filter=lfs diff=lfs merge=lfs -text +data/data-00015-of-00028.arrow filter=lfs diff=lfs merge=lfs -text +data/data-00016-of-00028.arrow filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md index e9246f11eead2f4a4621de86b5bb083d7c754816..2db223743939f9d3bcbafc3b6cefaace2afac41c 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,13 @@ --- -title: Imdb Search -emoji: ๐Ÿ“Š -colorFrom: gray +title: IMDB search +emoji: ๐Ÿ  +colorFrom: blue colorTo: blue sdk: gradio -sdk_version: 3.18.0 +sdk_version: 3.12.0 app_file: app.py pinned: false +license: apache-2.0 --- -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference \ No newline at end of file diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..d2310fb7d9179175dd945f83d8f2530d804727d5 --- /dev/null +++ b/app.py @@ -0,0 +1,86 @@ +import gradio as gr +from datasets import load_from_disk +from pyserini.search.lucene import LuceneSearcher + +searcher = LuceneSearcher("index") +ds = load_from_disk("data") +PAGINATION_VISIBLE = True +NUM_PAGES = 10 # STATIC. THIS CAN'T CHANGE BECAUSE GRADIO CAN'T DYNAMICALLY CREATE COMPONENTS. +RESULTS_PER_PAGE = 5 + +TEXT_FIELD = "text" +METADATA_FIELD = "docid" + +def result_html(result, meta): + return ( + f"
{meta}

" + f"
{result[:250]}...

{result[250:]}




" + ) + +def format_results(results): + return "\n".join([result_html(result, meta) for result,meta in zip(results[TEXT_FIELD], results[METADATA_FIELD])]) + +def page_0(query): + hits = searcher.search(query, k=NUM_PAGES*RESULTS_PER_PAGE) + ix = [int(hit.docid) for hit in hits] + results = ds.select(ix).shard(num_shards=NUM_PAGES, index=0, contiguous=True) # no need to shard. split ix in batches instead. (would make sense if results was cacheable) + results = format_results(results) + return results, [ix] + +def page_i(i, ix): + ix = ix[0] + results = ds.select(ix).shard(num_shards=NUM_PAGES, index=i, contiguous=True) + results = format_results(results) + return results, [ix] + +with gr.Blocks(css="#b {min-width:15px;background:transparent;border:white;box-shadow:none;}") as demo: # + with gr.Row(): + gr.Markdown(value="""##

IMDB search

""") + with gr.Row(): + with gr.Column(scale=1): + result_list = gr.Dataframe(type="array", visible=False, col_count=1) + with gr.Column(scale=13): + query = gr.Textbox(lines=1, max_lines=1, placeholder="Searchโ€ฆ", label="") + with gr.Column(scale=1): + with gr.Row(scale=1): + pass + with gr.Row(scale=1): + submit_btn = gr.Button("๐Ÿ”", elem_id="b").style(full_width=False) + with gr.Row(scale=1): + pass + + with gr.Row(): + with gr.Column(scale=1): + pass + with gr.Column(scale=13): + c = gr.HTML(label="Results") + with gr.Row(): + # left = gr.Button(value="โ—€", elem_id="b", visible=False).style(full_width=True) + page_1 = gr.Button(value="1", elem_id="b", visible=PAGINATION_VISIBLE).style(full_width=True) + page_2 = gr.Button(value="2", elem_id="b", visible=PAGINATION_VISIBLE).style(full_width=True) + page_3 = gr.Button(value="3", elem_id="b", visible=PAGINATION_VISIBLE).style(full_width=True) + page_4 = gr.Button(value="4", elem_id="b", visible=PAGINATION_VISIBLE).style(full_width=True) + page_5 = gr.Button(value="5", elem_id="b", visible=PAGINATION_VISIBLE).style(full_width=True) + page_6 = gr.Button(value="6", elem_id="b", visible=PAGINATION_VISIBLE).style(full_width=True) + page_7 = gr.Button(value="7", elem_id="b", visible=PAGINATION_VISIBLE).style(full_width=True) + page_8 = gr.Button(value="8", elem_id="b", visible=PAGINATION_VISIBLE).style(full_width=True) + page_9 = gr.Button(value="9", elem_id="b", visible=PAGINATION_VISIBLE).style(full_width=True) + page_10 = gr.Button(value="10", elem_id="b", visible=PAGINATION_VISIBLE).style(full_width=True) + # right = gr.Button(value="โ–ถ", elem_id="b", visible=False).style(full_width=True) + with gr.Column(scale=1): + pass + query.submit(fn=page_0, inputs=[query], outputs=[c, result_list]) + submit_btn.click(page_0, inputs=[query], outputs=[c, result_list]) + with gr.Box(visible=False): + nums = [gr.Number(i, visible=False, precision=0) for i in range(NUM_PAGES)] + page_1.click(fn=page_i, inputs=[nums[0], result_list], outputs=[c, result_list]) + page_2.click(fn=page_i, inputs=[nums[1], result_list], outputs=[c, result_list]) + page_3.click(fn=page_i, inputs=[nums[2], result_list], outputs=[c, result_list]) + page_4.click(fn=page_i, inputs=[nums[3], result_list], outputs=[c, result_list]) + page_5.click(fn=page_i, inputs=[nums[4], result_list], outputs=[c, result_list]) + page_6.click(fn=page_i, inputs=[nums[5], result_list], outputs=[c, result_list]) + page_7.click(fn=page_i, inputs=[nums[6], result_list], outputs=[c, result_list]) + page_8.click(fn=page_i, inputs=[nums[7], result_list], outputs=[c, result_list]) + page_9.click(fn=page_i, inputs=[nums[8], result_list], outputs=[c, result_list]) + page_10.click(fn=page_i, inputs=[nums[9], result_list], outputs=[c, result_list]) +demo.launch(enable_queue=True, debug=True) \ No newline at end of file diff --git a/data/.gitkeep b/data/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/data/data-00000-of-00028.arrow b/data/data-00000-of-00028.arrow new file mode 100644 index 0000000000000000000000000000000000000000..5fb849922bd9d7460264e083001722a4607a6d44 --- /dev/null +++ b/data/data-00000-of-00028.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a1ee6ae93ee94def8066f7d4b10fe58625bd3b814f0816f8952c590a1f0104e +size 1182752 diff --git a/data/data-00001-of-00028.arrow b/data/data-00001-of-00028.arrow new file mode 100644 index 0000000000000000000000000000000000000000..f6e4287ce500205bab30dc63b9fc31b9b2d96c40 --- /dev/null +++ b/data/data-00001-of-00028.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:028e08406f19c4eaa635529ba64f49c30116869a34bca79de0df283c21e821fe +size 1129936 diff --git a/data/data-00002-of-00028.arrow b/data/data-00002-of-00028.arrow new file mode 100644 index 0000000000000000000000000000000000000000..c290d28570c732b888680b748002ed6cb82ac189 --- /dev/null +++ b/data/data-00002-of-00028.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e73a2a71712511a2a01c93ec7ff2649aeed8749b89183a7e86001a24eb7aa0c5 +size 1145352 diff --git a/data/data-00003-of-00028.arrow b/data/data-00003-of-00028.arrow new file mode 100644 index 0000000000000000000000000000000000000000..ce2eee2420809197e5e595bab136d91e6beb0e08 --- /dev/null +++ b/data/data-00003-of-00028.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23b7329ac2b5fdbeab6363b67a724fdfd6577d33d323ee5d540aa96fe6d24aca +size 1172640 diff --git a/data/data-00004-of-00028.arrow b/data/data-00004-of-00028.arrow new file mode 100644 index 0000000000000000000000000000000000000000..d68a4cc9afb114ed8d4b19a047986646475e236d --- /dev/null +++ b/data/data-00004-of-00028.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ead5dad1bb9b3f4765647049278426cc51b6cac2bcba2bee89ac65d28f5a7733 +size 1187064 diff --git a/data/data-00005-of-00028.arrow b/data/data-00005-of-00028.arrow new file mode 100644 index 0000000000000000000000000000000000000000..0a149eae068f9924d82ccb364422211fa0df440c --- /dev/null +++ b/data/data-00005-of-00028.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e7cc11f82748aeb6662bf8ab014d77f2d759540e8e425c7f181d47615eaff24 +size 1193288 diff --git a/data/data-00006-of-00028.arrow b/data/data-00006-of-00028.arrow new file mode 100644 index 0000000000000000000000000000000000000000..6982f37a270964f4b999f091ba8014e825ac7e82 --- /dev/null +++ b/data/data-00006-of-00028.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c0a1f1deaf509314696c51512fac74ee3d619b3f6b2a306151af3fbc3f4574a +size 1257400 diff --git a/data/data-00007-of-00028.arrow b/data/data-00007-of-00028.arrow new file mode 100644 index 0000000000000000000000000000000000000000..3dddedfa2a95f0406c976bb08eb9695f0670030b --- /dev/null +++ b/data/data-00007-of-00028.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c007f5c47c8884828b69caccdd086b7d4c6a95d7851acc1141af050cf1b45099 +size 1117368 diff --git a/data/data-00008-of-00028.arrow b/data/data-00008-of-00028.arrow new file mode 100644 index 0000000000000000000000000000000000000000..951dbbc11906184db6e701fb6fb01ef5eeb8332b --- /dev/null +++ b/data/data-00008-of-00028.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dabe3091ad434f0709df48bce3680887092cac3ff8eacdc3395cb1790e840336 +size 1279104 diff --git a/data/data-00009-of-00028.arrow b/data/data-00009-of-00028.arrow new file mode 100644 index 0000000000000000000000000000000000000000..ab66d623615f230180b09c92b94446b6960e399e --- /dev/null +++ b/data/data-00009-of-00028.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ed50832ee22d82458a49a317a47e02ae223b0ee3dbac09142ca7f6bfd56b699 +size 1198560 diff --git a/data/data-00010-of-00028.arrow b/data/data-00010-of-00028.arrow new file mode 100644 index 0000000000000000000000000000000000000000..8a8a8525bd8b71fa4224d406641a9ea85a010336 --- /dev/null +++ b/data/data-00010-of-00028.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ac291600dfd053ac310bfc0d5bb24b6b062fa0b464f2f1299e5fe6480af2a11 +size 1132048 diff --git a/data/data-00011-of-00028.arrow b/data/data-00011-of-00028.arrow new file mode 100644 index 0000000000000000000000000000000000000000..d24f6d8f5ab93a3a29743022d9e32f09e1e07152 --- /dev/null +++ b/data/data-00011-of-00028.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8831cbe3be650a23eab12b371a6fc228a1862ebbc271622ee76bcf6dac35c526 +size 1132416 diff --git a/data/data-00012-of-00028.arrow b/data/data-00012-of-00028.arrow new file mode 100644 index 0000000000000000000000000000000000000000..e770dfcfad2164eb6daa0c170cd790ded554cf9c --- /dev/null +++ b/data/data-00012-of-00028.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01bce3dd4da0ea2395774a0f8d23279faf7d2ab22f0a7a8bdd88114654503ea6 +size 1131856 diff --git a/data/data-00013-of-00028.arrow b/data/data-00013-of-00028.arrow new file mode 100644 index 0000000000000000000000000000000000000000..f5a9a624c6ad46306b336c5467c9924721b929a5 --- /dev/null +++ b/data/data-00013-of-00028.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68863d00d7ec4e521574713ff19f1ccf19ea7a0391a0afbcb6aa81525f30e45b +size 1192864 diff --git a/data/data-00014-of-00028.arrow b/data/data-00014-of-00028.arrow new file mode 100644 index 0000000000000000000000000000000000000000..1dbb82997fdd87efe8cddd03a64479764aab7405 --- /dev/null +++ b/data/data-00014-of-00028.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6778d98b72dd72e10fef944fc49bf39fa6611461f8b1501115bc9c36bdf7a52d +size 1179232 diff --git a/data/data-00015-of-00028.arrow b/data/data-00015-of-00028.arrow new file mode 100644 index 0000000000000000000000000000000000000000..3ef7b05ad2c0f418bf040c56ae252f9157089456 --- /dev/null +++ b/data/data-00015-of-00028.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd66a20d13aefd53eefaa8927d85f93ba85037570329ca12963500592a0e7e7e +size 1280600 diff --git a/data/data-00016-of-00028.arrow b/data/data-00016-of-00028.arrow new file mode 100644 index 0000000000000000000000000000000000000000..2a692edc7a344811e4cc6da6059a7f50cdf20c29 --- /dev/null +++ b/data/data-00016-of-00028.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04ea574871964d888a3748eb4d70d0682388d4b8eb0acc0e358f19e790ba96b6 +size 1186768 diff --git a/data/data-00017-of-00028.arrow b/data/data-00017-of-00028.arrow new file mode 100644 index 0000000000000000000000000000000000000000..8dea0753bb5802dc697817b9c5b1e5c5217a4006 --- /dev/null +++ b/data/data-00017-of-00028.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ddee884b136a935cfc33c82fc1f7562c9f5ce33d46cbf90e9654f0903461916 +size 1220096 diff --git a/data/data-00018-of-00028.arrow b/data/data-00018-of-00028.arrow new file mode 100644 index 0000000000000000000000000000000000000000..97a3271403851f93743ce20ba6f033f6997799e0 --- /dev/null +++ b/data/data-00018-of-00028.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83a6e8e03ad7a0573392aa1e0e29477315b01b5a0715efb18206dba9d845e19a +size 1259104 diff --git a/data/data-00019-of-00028.arrow b/data/data-00019-of-00028.arrow new file mode 100644 index 0000000000000000000000000000000000000000..3bb79dbb2a422e35ffb75282f146af8c8e1c7f9f --- /dev/null +++ b/data/data-00019-of-00028.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96a5ff6305022fda1c2b77f7e637c472a7749483b1f088b59d7fd2c04a3861e0 +size 1154104 diff --git a/data/data-00020-of-00028.arrow b/data/data-00020-of-00028.arrow new file mode 100644 index 0000000000000000000000000000000000000000..f2b3e92cb16ee90addc6725360ef229c09319672 --- /dev/null +++ b/data/data-00020-of-00028.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecfa7b0da3fb75228140f7f6d3c57d00d1b4c4e2b34cf2b4f97920603c0e6dbe +size 1214392 diff --git a/data/data-00021-of-00028.arrow b/data/data-00021-of-00028.arrow new file mode 100644 index 0000000000000000000000000000000000000000..6ca5651dadf7f6f5fb0eb4e8bae350ba976a16c3 --- /dev/null +++ b/data/data-00021-of-00028.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68053c7a9f52161746926cde2477c1dd84ea6f923e254c59abc2eea6d89044c2 +size 1236032 diff --git a/data/data-00022-of-00028.arrow b/data/data-00022-of-00028.arrow new file mode 100644 index 0000000000000000000000000000000000000000..e56d1bcaf2affdc76541a6ad8c012e3130790ae9 --- /dev/null +++ b/data/data-00022-of-00028.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b4b091d072623a846983444ee45e556fe8e60c71d391f9bbc9d1fe035182d4c +size 1226720 diff --git a/data/data-00023-of-00028.arrow b/data/data-00023-of-00028.arrow new file mode 100644 index 0000000000000000000000000000000000000000..f74882f34025903a8987076ca2511ffd4328c4d9 --- /dev/null +++ b/data/data-00023-of-00028.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c66848c7358c3e42fb1e4bac54345756f833f082b27ca9df91182ac0df7690a +size 1196744 diff --git a/data/data-00024-of-00028.arrow b/data/data-00024-of-00028.arrow new file mode 100644 index 0000000000000000000000000000000000000000..80fcad2ed52d7e8b0de58f72e56a3a26dfee044e --- /dev/null +++ b/data/data-00024-of-00028.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2314af74807fbf3c35e2e7620de090db31b78626557a1d048ce29ab083a817d3 +size 1223576 diff --git a/data/data-00025-of-00028.arrow b/data/data-00025-of-00028.arrow new file mode 100644 index 0000000000000000000000000000000000000000..c7a7d8a0729acb218dde82bc9b2d1f8bb80342a2 --- /dev/null +++ b/data/data-00025-of-00028.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e017a0828dc714499bf0e5eccc6a2063614aadf7b72d5f6204e94cb21644f7b +size 1173912 diff --git a/data/data-00026-of-00028.arrow b/data/data-00026-of-00028.arrow new file mode 100644 index 0000000000000000000000000000000000000000..88f769a94b5c974d222cb9bcd4db09111b048c1f --- /dev/null +++ b/data/data-00026-of-00028.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:178b06fdfd6bcf84656715c7280a9a8f8ed72cb904f7e2288f933fcaa68d7c85 +size 1254640 diff --git a/data/data-00027-of-00028.arrow b/data/data-00027-of-00028.arrow new file mode 100644 index 0000000000000000000000000000000000000000..5101969ca2b07fd05114e2eb63212d5961e326b5 --- /dev/null +++ b/data/data-00027-of-00028.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7da5a5081a8cd37ae5025739b4aadc93aaec9e11a05046e3a062cbdf16acb6f9 +size 1196600 diff --git a/data/dataset_info.json b/data/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..c92279c0d515fe452a64ea31245ef8d731eaa49e --- /dev/null +++ b/data/dataset_info.json @@ -0,0 +1,55 @@ +{ + "builder_name": "imdb", + "citation": "@InProceedings{maas-EtAl:2011:ACL-HLT2011,\n author = {Maas, Andrew L. and Daly, Raymond E. and Pham, Peter T. and Huang, Dan and Ng, Andrew Y. and Potts, Christopher},\n title = {Learning Word Vectors for Sentiment Analysis},\n booktitle = {Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies},\n month = {June},\n year = {2011},\n address = {Portland, Oregon, USA},\n publisher = {Association for Computational Linguistics},\n pages = {142--150},\n url = {http://www.aclweb.org/anthology/P11-1015}\n}\n", + "config_name": "plain_text", + "dataset_size": 133190302, + "description": "Large Movie Review Dataset.\nThis is a dataset for binary sentiment classification containing substantially more data than previous benchmark datasets. We provide a set of 25,000 highly polar movie reviews for training, and 25,000 for testing. There is additional unlabeled data for use as well.", + "download_checksums": { + "http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz": { + "num_bytes": 84125825, + "checksum": "c40f74a18d3b61f90feba1e17730e0d38e8b97c05fde7008942e91923d1658fe" + } + }, + "download_size": 84125825, + "features": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "docid": { + "dtype": "int64", + "_type": "Value" + } + }, + "homepage": "http://ai.stanford.edu/~amaas/data/sentiment/", + "license": "", + "size_in_bytes": 217316127, + "splits": { + "train": { + "name": "train", + "num_bytes": 33432823, + "num_examples": 25000, + "dataset_name": "imdb" + }, + "test": { + "name": "test", + "num_bytes": 32650685, + "num_examples": 25000, + "dataset_name": "imdb" + }, + "unsupervised": { + "name": "unsupervised", + "num_bytes": 67106794, + "num_examples": 50000, + "dataset_name": "imdb" + } + }, + "task_templates": [], + "version": { + "version_str": "1.0.0", + "description": "", + "major": 1, + "minor": 0, + "patch": 0 + } +} \ No newline at end of file diff --git a/data/state.json b/data/state.json new file mode 100644 index 0000000000000000000000000000000000000000..46f26077dd452d875b641072afad6c4b9a8af401 --- /dev/null +++ b/data/state.json @@ -0,0 +1,94 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00028.arrow" + }, + { + "filename": "data-00001-of-00028.arrow" + }, + { + "filename": "data-00002-of-00028.arrow" + }, + { + "filename": "data-00003-of-00028.arrow" + }, + { + "filename": "data-00004-of-00028.arrow" + }, + { + "filename": "data-00005-of-00028.arrow" + }, + { + "filename": "data-00006-of-00028.arrow" + }, + { + "filename": "data-00007-of-00028.arrow" + }, + { + "filename": "data-00008-of-00028.arrow" + }, + { + "filename": "data-00009-of-00028.arrow" + }, + { + "filename": "data-00010-of-00028.arrow" + }, + { + "filename": "data-00011-of-00028.arrow" + }, + { + "filename": "data-00012-of-00028.arrow" + }, + { + "filename": "data-00013-of-00028.arrow" + }, + { + "filename": "data-00014-of-00028.arrow" + }, + { + "filename": "data-00015-of-00028.arrow" + }, + { + "filename": "data-00016-of-00028.arrow" + }, + { + "filename": "data-00017-of-00028.arrow" + }, + { + "filename": "data-00018-of-00028.arrow" + }, + { + "filename": "data-00019-of-00028.arrow" + }, + { + "filename": "data-00020-of-00028.arrow" + }, + { + "filename": "data-00021-of-00028.arrow" + }, + { + "filename": "data-00022-of-00028.arrow" + }, + { + "filename": "data-00023-of-00028.arrow" + }, + { + "filename": "data-00024-of-00028.arrow" + }, + { + "filename": "data-00025-of-00028.arrow" + }, + { + "filename": "data-00026-of-00028.arrow" + }, + { + "filename": "data-00027-of-00028.arrow" + } + ], + "_fingerprint": "1d74567875c58748", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": "train" +} \ No newline at end of file diff --git a/index/.gitkeep b/index/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/index/_0.fdm b/index/_0.fdm new file mode 100644 index 0000000000000000000000000000000000000000..6914eebbb632abd9d6b60d62e99b335746630e83 Binary files /dev/null and b/index/_0.fdm differ diff --git a/index/_0.fdt b/index/_0.fdt new file mode 100644 index 0000000000000000000000000000000000000000..dd0a06268ea4e79c580b0f727594985e7bc7af3d Binary files /dev/null and b/index/_0.fdt differ diff --git a/index/_0.fdx b/index/_0.fdx new file mode 100644 index 0000000000000000000000000000000000000000..bccbbcb96cddf4ebc1d186070c5710410092108a Binary files /dev/null and b/index/_0.fdx differ diff --git a/index/_0.fnm b/index/_0.fnm new file mode 100644 index 0000000000000000000000000000000000000000..3c6fc60ad6a188f52d8c5e80f7ce4730e6358676 Binary files /dev/null and b/index/_0.fnm differ diff --git a/index/_0.nvd b/index/_0.nvd new file mode 100644 index 0000000000000000000000000000000000000000..6060486fe3160619aafe48ee052de1f6bc16a83e Binary files /dev/null and b/index/_0.nvd differ diff --git a/index/_0.nvm b/index/_0.nvm new file mode 100644 index 0000000000000000000000000000000000000000..8a229209c85ac46fce08e4c517057e848a9d71e7 Binary files /dev/null and b/index/_0.nvm differ diff --git a/index/_0.si b/index/_0.si new file mode 100644 index 0000000000000000000000000000000000000000..fc7e977e8a2af5140a6a18d27e2edf62bcc40142 Binary files /dev/null and b/index/_0.si differ diff --git a/index/_0_Lucene90_0.doc b/index/_0_Lucene90_0.doc new file mode 100644 index 0000000000000000000000000000000000000000..3cba5a85c7e8d96ae0d94d334dec359a4679edea Binary files /dev/null and b/index/_0_Lucene90_0.doc differ diff --git a/index/_0_Lucene90_0.dvd b/index/_0_Lucene90_0.dvd new file mode 100644 index 0000000000000000000000000000000000000000..3fa922bcf9fc1414e05fed36034a3a7e0e43bf7a Binary files /dev/null and b/index/_0_Lucene90_0.dvd differ diff --git a/index/_0_Lucene90_0.dvm b/index/_0_Lucene90_0.dvm new file mode 100644 index 0000000000000000000000000000000000000000..f9e164850676f0a48df4abe940a95a2c58a1f1ee Binary files /dev/null and b/index/_0_Lucene90_0.dvm differ diff --git a/index/_0_Lucene90_0.pos b/index/_0_Lucene90_0.pos new file mode 100644 index 0000000000000000000000000000000000000000..9bf938350d212e4d531a901c7af93b71bb3ad4f0 --- /dev/null +++ b/index/_0_Lucene90_0.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fb16246848a389dd1739783c966b0d1a8ebcb591f41b5c61d31f80d59bf1660 +size 1260420 diff --git a/index/_0_Lucene90_0.tim b/index/_0_Lucene90_0.tim new file mode 100644 index 0000000000000000000000000000000000000000..f33d09a27e676c2704953a30606ab880c991cdbc Binary files /dev/null and b/index/_0_Lucene90_0.tim differ diff --git a/index/_0_Lucene90_0.tip b/index/_0_Lucene90_0.tip new file mode 100644 index 0000000000000000000000000000000000000000..37c619c5405fb3dede05b7577c5522cd1f76a253 Binary files /dev/null and b/index/_0_Lucene90_0.tip differ diff --git a/index/_0_Lucene90_0.tmd b/index/_0_Lucene90_0.tmd new file mode 100644 index 0000000000000000000000000000000000000000..4695dbaf948755c32a3566203cb31df459ec970d Binary files /dev/null and b/index/_0_Lucene90_0.tmd differ diff --git a/index/_1.fdm b/index/_1.fdm new file mode 100644 index 0000000000000000000000000000000000000000..4d6c67a7034fc77868bb4c9a1c8dbc639de42db0 Binary files /dev/null and b/index/_1.fdm differ diff --git a/index/_1.fdt b/index/_1.fdt new file mode 100644 index 0000000000000000000000000000000000000000..c712ab23927acf9fa91b90718d45396c0dc0da85 Binary files /dev/null and b/index/_1.fdt differ diff --git a/index/_1.fdx b/index/_1.fdx new file mode 100644 index 0000000000000000000000000000000000000000..f3940a8bc02e5cf2be0550f9482a61bbecd8a2fb Binary files /dev/null and b/index/_1.fdx differ diff --git a/index/_1.fnm b/index/_1.fnm new file mode 100644 index 0000000000000000000000000000000000000000..4b292759797dfbf3d7cb1a2ae7bf7dcfc4c56960 Binary files /dev/null and b/index/_1.fnm differ diff --git a/index/_1.nvd b/index/_1.nvd new file mode 100644 index 0000000000000000000000000000000000000000..b221d7f963ab57d2d81432bef8d6f222a11ce847 Binary files /dev/null and b/index/_1.nvd differ diff --git a/index/_1.nvm b/index/_1.nvm new file mode 100644 index 0000000000000000000000000000000000000000..2080056b0b4c74480ecfad974eecc803758232da Binary files /dev/null and b/index/_1.nvm differ diff --git a/index/_1.si b/index/_1.si new file mode 100644 index 0000000000000000000000000000000000000000..2550c32fa603ca6a7bcef5f3f1a04b253a120043 Binary files /dev/null and b/index/_1.si differ diff --git a/index/_1_Lucene90_0.doc b/index/_1_Lucene90_0.doc new file mode 100644 index 0000000000000000000000000000000000000000..58488c2cba5db62553271bb318404bb0ff553c3b Binary files /dev/null and b/index/_1_Lucene90_0.doc differ diff --git a/index/_1_Lucene90_0.dvd b/index/_1_Lucene90_0.dvd new file mode 100644 index 0000000000000000000000000000000000000000..539fdccdbf5717a3e46aa6fe4bf842d71face595 Binary files /dev/null and b/index/_1_Lucene90_0.dvd differ diff --git a/index/_1_Lucene90_0.dvm b/index/_1_Lucene90_0.dvm new file mode 100644 index 0000000000000000000000000000000000000000..0ca3daffd9549a8d505852e3b56f885e4edece6a Binary files /dev/null and b/index/_1_Lucene90_0.dvm differ diff --git a/index/_1_Lucene90_0.pos b/index/_1_Lucene90_0.pos new file mode 100644 index 0000000000000000000000000000000000000000..93180b7185f728ca373c5ec3dd978ff35c3ed374 --- /dev/null +++ b/index/_1_Lucene90_0.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a729ee38cc02a7c3c0b0a53d322ac14c63579a972c5cd54b6acf255033eaa82 +size 1261949 diff --git a/index/_1_Lucene90_0.tim b/index/_1_Lucene90_0.tim new file mode 100644 index 0000000000000000000000000000000000000000..7abf8fd18b35c23fbde15d67ea890df590ce9aff Binary files /dev/null and b/index/_1_Lucene90_0.tim differ diff --git a/index/_1_Lucene90_0.tip b/index/_1_Lucene90_0.tip new file mode 100644 index 0000000000000000000000000000000000000000..4ab5585a6ccbf87ccaa7a278fbfc98d3352e7c0a Binary files /dev/null and b/index/_1_Lucene90_0.tip differ diff --git a/index/_1_Lucene90_0.tmd b/index/_1_Lucene90_0.tmd new file mode 100644 index 0000000000000000000000000000000000000000..8a8416271d625f733fc605a370e5f42328eb9332 Binary files /dev/null and b/index/_1_Lucene90_0.tmd differ diff --git a/index/_2.fdm b/index/_2.fdm new file mode 100644 index 0000000000000000000000000000000000000000..7e66b611ce46e57cb193d517feeeff366c6e16ea Binary files /dev/null and b/index/_2.fdm differ diff --git a/index/_2.fdt b/index/_2.fdt new file mode 100644 index 0000000000000000000000000000000000000000..95cb73b8d5c16a116b69a6d4f093c6c5a9d22410 Binary files /dev/null and b/index/_2.fdt differ diff --git a/index/_2.fdx b/index/_2.fdx new file mode 100644 index 0000000000000000000000000000000000000000..402c20b3f5b5b619dbb2b47b28c91cb817881194 Binary files /dev/null and b/index/_2.fdx differ diff --git a/index/_2.fnm b/index/_2.fnm new file mode 100644 index 0000000000000000000000000000000000000000..670441ac0a17443f1009e25fb42c026b772bfbd1 Binary files /dev/null and b/index/_2.fnm differ diff --git a/index/_2.nvd b/index/_2.nvd new file mode 100644 index 0000000000000000000000000000000000000000..9a99a264b9cde86d4d58b4cae8de8b44a2c93b4f Binary files /dev/null and b/index/_2.nvd differ diff --git a/index/_2.nvm b/index/_2.nvm new file mode 100644 index 0000000000000000000000000000000000000000..491b58005aabec153da47d3c233730e88993dd0d Binary files /dev/null and b/index/_2.nvm differ diff --git a/index/_2.si b/index/_2.si new file mode 100644 index 0000000000000000000000000000000000000000..ab7225f1f76018b4925ebf3d0b8192e57a878735 Binary files /dev/null and b/index/_2.si differ diff --git a/index/_2_Lucene90_0.doc b/index/_2_Lucene90_0.doc new file mode 100644 index 0000000000000000000000000000000000000000..e49a77317f851eb2024fd5cbe9a6454da3428a52 Binary files /dev/null and b/index/_2_Lucene90_0.doc differ diff --git a/index/_2_Lucene90_0.dvd b/index/_2_Lucene90_0.dvd new file mode 100644 index 0000000000000000000000000000000000000000..6989f53ed4c44cfef6390f9d80251b8662900f1b Binary files /dev/null and b/index/_2_Lucene90_0.dvd differ diff --git a/index/_2_Lucene90_0.dvm b/index/_2_Lucene90_0.dvm new file mode 100644 index 0000000000000000000000000000000000000000..038531da477ef42be99ebbf1e369228eaf34490d Binary files /dev/null and b/index/_2_Lucene90_0.dvm differ diff --git a/index/_2_Lucene90_0.pos b/index/_2_Lucene90_0.pos new file mode 100644 index 0000000000000000000000000000000000000000..e28cfc8bf58528cfa32e84946abbfc6f719bc1a9 --- /dev/null +++ b/index/_2_Lucene90_0.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdffa10f2a060bb5587affbe1b0ba6f1f1e7cf8ba3fcf8edea41fadec6331d58 +size 1237880 diff --git a/index/_2_Lucene90_0.tim b/index/_2_Lucene90_0.tim new file mode 100644 index 0000000000000000000000000000000000000000..1ecd345e64dfde8508dd672c1182600b11b1d2c4 Binary files /dev/null and b/index/_2_Lucene90_0.tim differ diff --git a/index/_2_Lucene90_0.tip b/index/_2_Lucene90_0.tip new file mode 100644 index 0000000000000000000000000000000000000000..7718032e418a575c68a7333a1c27f51c102920a2 Binary files /dev/null and b/index/_2_Lucene90_0.tip differ diff --git a/index/_2_Lucene90_0.tmd b/index/_2_Lucene90_0.tmd new file mode 100644 index 0000000000000000000000000000000000000000..fdff527458ed3a050fd9b0acdf66d83482f8e4f0 Binary files /dev/null and b/index/_2_Lucene90_0.tmd differ diff --git a/index/_3.fdm b/index/_3.fdm new file mode 100644 index 0000000000000000000000000000000000000000..1602dfb9174a12d58a25b692558702cbc3ee0183 Binary files /dev/null and b/index/_3.fdm differ diff --git a/index/_3.fdt b/index/_3.fdt new file mode 100644 index 0000000000000000000000000000000000000000..1448d8bedcb5065f677d7e6e7d2daba9cd7edb0f Binary files /dev/null and b/index/_3.fdt differ diff --git a/index/_3.fdx b/index/_3.fdx new file mode 100644 index 0000000000000000000000000000000000000000..2bbba27cd7ffca377d80aebd14b4913627bd796f Binary files /dev/null and b/index/_3.fdx differ diff --git a/index/_3.fnm b/index/_3.fnm new file mode 100644 index 0000000000000000000000000000000000000000..4777b66370b0ea2b660487eee164c66a0739be99 Binary files /dev/null and b/index/_3.fnm differ diff --git a/index/_3.nvd b/index/_3.nvd new file mode 100644 index 0000000000000000000000000000000000000000..9ed7b526ad2c7c9911d9ab5ece29865dec69c672 Binary files /dev/null and b/index/_3.nvd differ diff --git a/index/_3.nvm b/index/_3.nvm new file mode 100644 index 0000000000000000000000000000000000000000..dc73f50999c097fb7a2d2c460c48a613c8d02974 Binary files /dev/null and b/index/_3.nvm differ diff --git a/index/_3.si b/index/_3.si new file mode 100644 index 0000000000000000000000000000000000000000..b36713763b065eed0e7c9d4726117ba18243356f Binary files /dev/null and b/index/_3.si differ diff --git a/index/_3_Lucene90_0.doc b/index/_3_Lucene90_0.doc new file mode 100644 index 0000000000000000000000000000000000000000..b14a42be51b2c14a3b127a52721817d928f55540 Binary files /dev/null and b/index/_3_Lucene90_0.doc differ diff --git a/index/_3_Lucene90_0.dvd b/index/_3_Lucene90_0.dvd new file mode 100644 index 0000000000000000000000000000000000000000..a90b66d7060aa95b4bb84455ca27a54e41563f59 Binary files /dev/null and b/index/_3_Lucene90_0.dvd differ diff --git a/index/_3_Lucene90_0.dvm b/index/_3_Lucene90_0.dvm new file mode 100644 index 0000000000000000000000000000000000000000..bd2a52eb3e9a767a4ba5fea506fd5d07af2af66c Binary files /dev/null and b/index/_3_Lucene90_0.dvm differ diff --git a/index/_3_Lucene90_0.pos b/index/_3_Lucene90_0.pos new file mode 100644 index 0000000000000000000000000000000000000000..40993d0b69af01c6fac697abd9dcf1387bb3a026 --- /dev/null +++ b/index/_3_Lucene90_0.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3564818c1abb80ee5cbe082ddef795a99b6d34134d471d9ddeafeaeb6975c652 +size 1258948 diff --git a/index/_3_Lucene90_0.tim b/index/_3_Lucene90_0.tim new file mode 100644 index 0000000000000000000000000000000000000000..b1e1d40999d8e6f77b1fa5762d5044f8378a3b8d Binary files /dev/null and b/index/_3_Lucene90_0.tim differ diff --git a/index/_3_Lucene90_0.tip b/index/_3_Lucene90_0.tip new file mode 100644 index 0000000000000000000000000000000000000000..7ea3f6ef6b8c2b5d0ff529737a1b434d0f3b2a5e Binary files /dev/null and b/index/_3_Lucene90_0.tip differ diff --git a/index/_3_Lucene90_0.tmd b/index/_3_Lucene90_0.tmd new file mode 100644 index 0000000000000000000000000000000000000000..f3ed28053d7a7c7187d9cb0cba035cf624d0d791 Binary files /dev/null and b/index/_3_Lucene90_0.tmd differ diff --git a/index/segments_1 b/index/segments_1 new file mode 100644 index 0000000000000000000000000000000000000000..1d5d2ac87b3ee4960b1c182a75b1f1d8d5c3f807 Binary files /dev/null and b/index/segments_1 differ diff --git a/index/write.lock b/index/write.lock new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/packages.txt b/packages.txt new file mode 100644 index 0000000000000000000000000000000000000000..7083f85c3741aaa661aabe2d5048ef5ebdb13b71 --- /dev/null +++ b/packages.txt @@ -0,0 +1 @@ +openjdk-11-jdk diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..29ed591f65d9215d9a4078df34c760758143bd82 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +pyserini +datasets +faiss-cpu +torch \ No newline at end of file