diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..45c4b1fb016007531581c3bae81f47d1ae27c1d0 --- /dev/null +++ b/app.py @@ -0,0 +1,153 @@ +import os + +import streamlit as st +import streamlit.components.v1 as components +from datasets import load_dataset + + +st.set_page_config(page_title="Gaia Search", layout="wide") + +os.makedirs(os.path.join(os.getcwd(), ".streamlit"), exist_ok=True) +with open(os.path.join(os.getcwd(), ".streamlit/config.toml"), "w") as file: + file.write('[theme]\nbase="light"') + + +st.sidebar.markdown( + """ + +

Gaia Search 🌖🌏

+

A search engine for the LAION large scale image caption corpora

+""", + unsafe_allow_html=True, +) + +st.sidebar.markdown( + """ + +

+GitHub | Project Report +

+

+ + + +

+""", + unsafe_allow_html=True, +) + +query = st.sidebar.text_input(label="Search query", value="") + +footer = """ + +""" +st.sidebar.markdown(footer, unsafe_allow_html=True) + + +searcher = LuceneSearcher("index") +ds = load_dataset("imdb", split="train") + + +def search(query): + hits = searcher.search(query, k=10) + results = ds.select([int(hit.docid) for hit in hits]) + return results + "

" + + +if st.sidebar.button("Search"): + results = search(query) + rendered_results = f""" +
+
+ {results} +
+ """ + st.markdown( + """ + + """, + unsafe_allow_html=True, + ) + st.markdown( + """ + + """, + unsafe_allow_html=True, + ) + st.markdown( + f""" +
+ Gaia Search 🌖🌏 +
+ +
+
+ +
+
+ """, + unsafe_allow_html=True, + ) + components.html( + """ + + + """ + + rendered_results, + height=800, + scrolling=True, + ) diff --git a/index/.gitkeep b/index/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/index/_c.fdm b/index/_c.fdm new file mode 100644 index 0000000000000000000000000000000000000000..10b8ac6adbb0b3e4985c39826ef97cd0e54ed4ea Binary files /dev/null and b/index/_c.fdm differ diff --git a/index/_c.fdt b/index/_c.fdt new file mode 100644 index 0000000000000000000000000000000000000000..e49bfc54ed689018520b8a95488144cc37c809d1 Binary files /dev/null and b/index/_c.fdt differ diff --git a/index/_c.fdx b/index/_c.fdx new file mode 100644 index 0000000000000000000000000000000000000000..6a5117cf644f557f657af78c40ac895bf372a1d5 Binary files /dev/null and b/index/_c.fdx differ diff --git a/index/_c.fnm b/index/_c.fnm new file mode 100644 index 0000000000000000000000000000000000000000..56d4d988faa2d929462e8d90bdf22efe14726d96 Binary files /dev/null and b/index/_c.fnm differ diff --git a/index/_c.nvd b/index/_c.nvd new file mode 100644 index 0000000000000000000000000000000000000000..3e65f3f2687952a41dea0f7f82dc9afdfb93bb7d Binary files /dev/null and b/index/_c.nvd differ diff --git a/index/_c.nvm b/index/_c.nvm new file mode 100644 index 0000000000000000000000000000000000000000..978fae64be3faaf75727138fd7af3f8eacfc4533 Binary files /dev/null and b/index/_c.nvm differ diff --git a/index/_c.si b/index/_c.si new file mode 100644 index 0000000000000000000000000000000000000000..3b0036eb79312f07d58280d5099f477ff86071c3 Binary files /dev/null and b/index/_c.si differ diff --git a/index/_c_Lucene90_0.doc b/index/_c_Lucene90_0.doc new file mode 100644 index 0000000000000000000000000000000000000000..5415752e4ac1b1f430fb128662810693f903830d Binary files /dev/null and b/index/_c_Lucene90_0.doc differ diff --git a/index/_c_Lucene90_0.dvd b/index/_c_Lucene90_0.dvd new file mode 100644 index 0000000000000000000000000000000000000000..93ac0aad32dfb61863a7ba8bc42ced4b3a135000 Binary files /dev/null and b/index/_c_Lucene90_0.dvd differ diff --git a/index/_c_Lucene90_0.dvm b/index/_c_Lucene90_0.dvm new file mode 100644 index 0000000000000000000000000000000000000000..2acc288ba50759bcb48e219b5d66cb810d75fbb1 Binary files /dev/null and b/index/_c_Lucene90_0.dvm differ diff --git a/index/_c_Lucene90_0.pos b/index/_c_Lucene90_0.pos new file mode 100644 index 0000000000000000000000000000000000000000..40685acedc3745d6cc7d726c773abc557da5a69a --- /dev/null +++ b/index/_c_Lucene90_0.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4ac8c1c910a978617aa54300a6c2421ab1295ad8fd3a28da9eca9ffec36948d +size 1240149 diff --git a/index/_c_Lucene90_0.tim b/index/_c_Lucene90_0.tim new file mode 100644 index 0000000000000000000000000000000000000000..891d5adfd52c2ba8a9b8a9876f2074135b3f9620 Binary files /dev/null and b/index/_c_Lucene90_0.tim differ diff --git a/index/_c_Lucene90_0.tip b/index/_c_Lucene90_0.tip new file mode 100644 index 0000000000000000000000000000000000000000..f1dbd1530a175d89e5fb4c0b67116bdc1817b7a4 Binary files /dev/null and b/index/_c_Lucene90_0.tip differ diff --git a/index/_c_Lucene90_0.tmd b/index/_c_Lucene90_0.tmd new file mode 100644 index 0000000000000000000000000000000000000000..fee1dfc6376c303e5864f8aca905970e8bc66d41 Binary files /dev/null and b/index/_c_Lucene90_0.tmd differ diff --git a/index/_d.fdm b/index/_d.fdm new file mode 100644 index 0000000000000000000000000000000000000000..9c575771c7a6404658315019e45ced9e2514bff8 Binary files /dev/null and b/index/_d.fdm differ diff --git a/index/_d.fdt b/index/_d.fdt new file mode 100644 index 0000000000000000000000000000000000000000..5206c2a8662eb51c91787a12397a33c12b00e906 Binary files /dev/null and b/index/_d.fdt differ diff --git a/index/_d.fdx b/index/_d.fdx new file mode 100644 index 0000000000000000000000000000000000000000..1d8f2cf1ab35be1234ac27a2fcb770cc4a333dd9 Binary files /dev/null and b/index/_d.fdx differ diff --git a/index/_d.fnm b/index/_d.fnm new file mode 100644 index 0000000000000000000000000000000000000000..351b0a6eeddccf1b8acd8b740ce525fad8ceb83e Binary files /dev/null and b/index/_d.fnm differ diff --git a/index/_d.nvd b/index/_d.nvd new file mode 100644 index 0000000000000000000000000000000000000000..661b3295974d300f895c3ad74d1162813e4b023b Binary files /dev/null and b/index/_d.nvd differ diff --git a/index/_d.nvm b/index/_d.nvm new file mode 100644 index 0000000000000000000000000000000000000000..b115c745767bcbe106596aa124f99c63a52646a1 Binary files /dev/null and b/index/_d.nvm differ diff --git a/index/_d.si b/index/_d.si new file mode 100644 index 0000000000000000000000000000000000000000..f29513b61c310bd51edc94f2413758f6af6d8ff2 Binary files /dev/null and b/index/_d.si differ diff --git a/index/_d_Lucene90_0.doc b/index/_d_Lucene90_0.doc new file mode 100644 index 0000000000000000000000000000000000000000..7d396ea61e3ff79dd7e755af8be8355ec8230e36 Binary files /dev/null and b/index/_d_Lucene90_0.doc differ diff --git a/index/_d_Lucene90_0.dvd b/index/_d_Lucene90_0.dvd new file mode 100644 index 0000000000000000000000000000000000000000..d4a7cbbf7e5cba08fa75d0df43f5fadf5d7654d8 Binary files /dev/null and b/index/_d_Lucene90_0.dvd differ diff --git a/index/_d_Lucene90_0.dvm b/index/_d_Lucene90_0.dvm new file mode 100644 index 0000000000000000000000000000000000000000..15d150d265cf77c2a500d90fa19932ba8d1340cb Binary files /dev/null and b/index/_d_Lucene90_0.dvm differ diff --git a/index/_d_Lucene90_0.pos b/index/_d_Lucene90_0.pos new file mode 100644 index 0000000000000000000000000000000000000000..64f55aac98e7346ce1845fb3d5d26ad92837ffbc --- /dev/null +++ b/index/_d_Lucene90_0.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf1185b23dfe7d554487554e5b731375c22821508fcf8781aec15d0899984efa +size 1262499 diff --git a/index/_d_Lucene90_0.tim b/index/_d_Lucene90_0.tim new file mode 100644 index 0000000000000000000000000000000000000000..6433aaf0b1f1fc2e83e3be404d8d814921d18d79 Binary files /dev/null and b/index/_d_Lucene90_0.tim differ diff --git a/index/_d_Lucene90_0.tip b/index/_d_Lucene90_0.tip new file mode 100644 index 0000000000000000000000000000000000000000..3e30a5ec74b6e0f1af1019750cf9669938d144fe Binary files /dev/null and b/index/_d_Lucene90_0.tip differ diff --git a/index/_d_Lucene90_0.tmd b/index/_d_Lucene90_0.tmd new file mode 100644 index 0000000000000000000000000000000000000000..bfb5eee9660fb3c8da2b99034b0f17511cbfc251 Binary files /dev/null and b/index/_d_Lucene90_0.tmd differ diff --git a/index/_e.fdm b/index/_e.fdm new file mode 100644 index 0000000000000000000000000000000000000000..454082aa384cfb8086ce2995dc0b15c4ad2ce4bb Binary files /dev/null and b/index/_e.fdm differ diff --git a/index/_e.fdt b/index/_e.fdt new file mode 100644 index 0000000000000000000000000000000000000000..7ca4d2995f1475584639fed152d0f3b05b3f0cfe Binary files /dev/null and b/index/_e.fdt differ diff --git a/index/_e.fdx b/index/_e.fdx new file mode 100644 index 0000000000000000000000000000000000000000..1a8f1e31889530040936442a1374bc34fef044ce Binary files /dev/null and b/index/_e.fdx differ diff --git a/index/_e.fnm b/index/_e.fnm new file mode 100644 index 0000000000000000000000000000000000000000..0ae8e27d7b8442884ed7a9aa8f1314d0b76c4580 Binary files /dev/null and b/index/_e.fnm differ diff --git a/index/_e.nvd b/index/_e.nvd new file mode 100644 index 0000000000000000000000000000000000000000..46d107aee53a07b000a3b6465c659bbbd52e8784 Binary files /dev/null and b/index/_e.nvd differ diff --git a/index/_e.nvm b/index/_e.nvm new file mode 100644 index 0000000000000000000000000000000000000000..d87516ca46d5757777ff0e898deb51f538432fb2 Binary files /dev/null and b/index/_e.nvm differ diff --git a/index/_e.si b/index/_e.si new file mode 100644 index 0000000000000000000000000000000000000000..6a0e026ce203bf654493183bf067a569299dcd06 Binary files /dev/null and b/index/_e.si differ diff --git a/index/_e_Lucene90_0.doc b/index/_e_Lucene90_0.doc new file mode 100644 index 0000000000000000000000000000000000000000..be62c65b8cd41158b36e002b180845376063fd01 Binary files /dev/null and b/index/_e_Lucene90_0.doc differ diff --git a/index/_e_Lucene90_0.dvd b/index/_e_Lucene90_0.dvd new file mode 100644 index 0000000000000000000000000000000000000000..f5c5094cd6b4e72ecbcb5994beb8603fcd1c4cf9 Binary files /dev/null and b/index/_e_Lucene90_0.dvd differ diff --git a/index/_e_Lucene90_0.dvm b/index/_e_Lucene90_0.dvm new file mode 100644 index 0000000000000000000000000000000000000000..d4616fe3b49677c175d5ea4e8c090b986451d261 Binary files /dev/null and b/index/_e_Lucene90_0.dvm differ diff --git a/index/_e_Lucene90_0.pos b/index/_e_Lucene90_0.pos new file mode 100644 index 0000000000000000000000000000000000000000..ba777a257c6b66fb680290ffb4d8ca8eada22531 --- /dev/null +++ b/index/_e_Lucene90_0.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ccaee5e5633b5c35ba7045c5e12e692587380d32c3020b81bac24a28da69ca2 +size 1238216 diff --git a/index/_e_Lucene90_0.tim b/index/_e_Lucene90_0.tim new file mode 100644 index 0000000000000000000000000000000000000000..fd06a9963205ee25f363ea89841f5f946510a06b Binary files /dev/null and b/index/_e_Lucene90_0.tim differ diff --git a/index/_e_Lucene90_0.tip b/index/_e_Lucene90_0.tip new file mode 100644 index 0000000000000000000000000000000000000000..3ee5b6de12031e8106e3d84edd2d8ad3f7ee0916 Binary files /dev/null and b/index/_e_Lucene90_0.tip differ diff --git a/index/_e_Lucene90_0.tmd b/index/_e_Lucene90_0.tmd new file mode 100644 index 0000000000000000000000000000000000000000..41f93e8dff6770fd61e7e136d0e4d2995f7d609a Binary files /dev/null and b/index/_e_Lucene90_0.tmd differ diff --git a/index/_f.fdm b/index/_f.fdm new file mode 100644 index 0000000000000000000000000000000000000000..61b9beb4afe5313305a2310a5e1f4c27e29551ac Binary files /dev/null and b/index/_f.fdm differ diff --git a/index/_f.fdt b/index/_f.fdt new file mode 100644 index 0000000000000000000000000000000000000000..ac01d8eabd06489e5d77169dd76ffa44bfc3baa3 Binary files /dev/null and b/index/_f.fdt differ diff --git a/index/_f.fdx b/index/_f.fdx new file mode 100644 index 0000000000000000000000000000000000000000..8862acba6cf02065a84921ecb552aeb75bb529a1 Binary files /dev/null and b/index/_f.fdx differ diff --git a/index/_f.fnm b/index/_f.fnm new file mode 100644 index 0000000000000000000000000000000000000000..3145f0ef9470db6ad05b0f4bafb528bf7f936cdf Binary files /dev/null and b/index/_f.fnm differ diff --git a/index/_f.nvd b/index/_f.nvd new file mode 100644 index 0000000000000000000000000000000000000000..3661480f2f798b9fc7390230c99a6dc0c1718781 Binary files /dev/null and b/index/_f.nvd differ diff --git a/index/_f.nvm b/index/_f.nvm new file mode 100644 index 0000000000000000000000000000000000000000..f532b7f80e4115225c1cd3c4b1e5ab8edecc5b28 Binary files /dev/null and b/index/_f.nvm differ diff --git a/index/_f.si b/index/_f.si new file mode 100644 index 0000000000000000000000000000000000000000..afc11e8ee19889f601d88489a67f808c022303ff Binary files /dev/null and b/index/_f.si differ diff --git a/index/_f_Lucene90_0.doc b/index/_f_Lucene90_0.doc new file mode 100644 index 0000000000000000000000000000000000000000..7ae37621d4fc93fa85bf5a6b1e6c1f63d833e35c Binary files /dev/null and b/index/_f_Lucene90_0.doc differ diff --git a/index/_f_Lucene90_0.dvd b/index/_f_Lucene90_0.dvd new file mode 100644 index 0000000000000000000000000000000000000000..5a947e5ac8ad16994d49ba38e4016abaa29ff611 Binary files /dev/null and b/index/_f_Lucene90_0.dvd differ diff --git a/index/_f_Lucene90_0.dvm b/index/_f_Lucene90_0.dvm new file mode 100644 index 0000000000000000000000000000000000000000..d80bee5604226dbab2ade0c7ac5e2e6011f495f3 Binary files /dev/null and b/index/_f_Lucene90_0.dvm differ diff --git a/index/_f_Lucene90_0.pos b/index/_f_Lucene90_0.pos new file mode 100644 index 0000000000000000000000000000000000000000..4c264eaac83ef60c64ca947d54515f2b36041f9e --- /dev/null +++ b/index/_f_Lucene90_0.pos @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e48a050c215696f91bd8560cdd2ff18a40980ae17d782d160ec7ac18852d4258 +size 1277898 diff --git a/index/_f_Lucene90_0.tim b/index/_f_Lucene90_0.tim new file mode 100644 index 0000000000000000000000000000000000000000..837aa1d6a84d635012ac75302bc6887c7b6ca284 Binary files /dev/null and b/index/_f_Lucene90_0.tim differ diff --git a/index/_f_Lucene90_0.tip b/index/_f_Lucene90_0.tip new file mode 100644 index 0000000000000000000000000000000000000000..e507d49f4e6b51c1265aa6f9b84ff901ec02960e Binary files /dev/null and b/index/_f_Lucene90_0.tip differ diff --git a/index/_f_Lucene90_0.tmd b/index/_f_Lucene90_0.tmd new file mode 100644 index 0000000000000000000000000000000000000000..e41cc7a2aa0a8fc14838ddcd1090808115d94d1a Binary files /dev/null and b/index/_f_Lucene90_0.tmd differ diff --git a/index/segments_4 b/index/segments_4 new file mode 100644 index 0000000000000000000000000000000000000000..38729e3ae6f7b3a294aa545b615275ffa8f697d1 Binary files /dev/null and b/index/segments_4 differ diff --git a/index/write.lock b/index/write.lock new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/packages.txt b/packages.txt new file mode 100644 index 0000000000000000000000000000000000000000..7083f85c3741aaa661aabe2d5048ef5ebdb13b71 --- /dev/null +++ b/packages.txt @@ -0,0 +1 @@ +openjdk-11-jdk