import os import streamlit as st import streamlit.components.v1 as components from datasets import load_dataset st.set_page_config(page_title="Gaia Search", layout="wide") os.makedirs(os.path.join(os.getcwd(), ".streamlit"), exist_ok=True) with open(os.path.join(os.getcwd(), ".streamlit/config.toml"), "w") as file: file.write('[theme]\nbase="light"') st.sidebar.markdown( """

Gaia Search 🌖🌏

A search engine for the LAION large scale image caption corpora

""", unsafe_allow_html=True, ) st.sidebar.markdown( """

GitHub | Project Report

""", unsafe_allow_html=True, ) query = st.sidebar.text_input(label="Search query", value="") footer = """ """ st.sidebar.markdown(footer, unsafe_allow_html=True) searcher = LuceneSearcher("index") ds = load_dataset("imdb", split="train") def search(query): hits = searcher.search(query, k=10) results = ds.select([int(hit.docid) for hit in hits]) return results + "

" if st.sidebar.button("Search"): results = search(query) rendered_results = f"""

{results}

""" st.markdown( """ """, unsafe_allow_html=True, ) st.markdown( """ """, unsafe_allow_html=True, ) st.markdown( f"""

Gaia Search 🌖🌏

""", unsafe_allow_html=True, ) components.html( """ """ + rendered_results, height=800, scrolling=True, )