import streamlit as st from st_utils import bm25_search, semantic_search, hf_api, paginator from huggingface_hub import ModelSearchArguments import webbrowser from numerize.numerize import numerize import math st.set_page_config( page_title="HF Search Engine", page_icon="šŸ”Ž", layout="wide", initial_sidebar_state="auto", ) ### SIDEBAR search_backend = st.sidebar.selectbox( "Search method", ["semantic", "bm25", "hfapi"], format_func=lambda x: {"hfapi": "Keyword search", "bm25": "BM25 search", "semantic": "Semantic Search"}[x], ) limit_results = int(st.sidebar.number_input("Limit results", min_value=0, value=10)) sort_by = st.sidebar.selectbox( "Sort by", [None, "downloads", "likes", "lastModified"], format_func=lambda x: {None: "Relevance", "downloads": "Most downloads", "likes": "Most likes", "lastModified": "Recently updated"}[x], ) st.sidebar.markdown("# Filters") args = ModelSearchArguments() library = st.sidebar.multiselect( "Library", args.library.values(), format_func=lambda x: {v: k for k, v in args.library.items()}[x] ) task = st.sidebar.multiselect( "Task", args.pipeline_tag.values(), format_func=lambda x: {v: k for k, v in args.pipeline_tag.items()}[x] ) ### MAIN PAGE st.markdown( "

šŸ”ŽšŸ¤— HF Search Engine

", unsafe_allow_html=True, ) # Search bar search_query = st.text_input("Search for a model in HuggingFace", value="", max_chars=None, key=None, type="default") if search_query != "": filters = { "library": library, "task": task, } if search_backend == "hfapi": res = hf_api(search_query, limit_results, sort_by, filters) elif search_backend == "semantic": res = semantic_search(search_query, limit_results, sort_by, filters) elif search_backend == "bm25": res = bm25_search(search_query, limit_results, sort_by, filters) hit_list, hits_count = res["hits"], res["count"] hit_list = [ { "modelId": hit["modelId"], "tags": hit["tags"], "downloads": hit["downloads"], "likes": hit["likes"], "readme": hit.get("readme", None), } for hit in hit_list ] if hit_list: st.write(f"Search results ({hits_count}):") if hits_count > 100: shown_results = 100 else: shown_results = hits_count for i, hit in paginator( f"Select results (showing {shown_results} of {hits_count} results)", hit_list, ): col1, col2, col3 = st.columns([5, 1, 1]) col1.metric("Model", hit["modelId"]) col2.metric("NĀ° downloads", numerize(hit["downloads"]) if not math.isnan(hit["downloads"]) else "N/A") col3.metric("NĀ° likes", numerize(hit["likes"]) if not math.isnan(hit["likes"]) else "N/A") st.button( f"View model on šŸ¤—", on_click=lambda hit=hit: webbrowser.open(f"https://huggingface.co/{hit['modelId']}", new=2), key=f"{i}-{hit['modelId']}", ) st.write(f"**Tags:** {'  ā€¢  '.join(hit['tags'])}") if hit["readme"]: with st.expander("See README"): st.write(hit["readme"]) # TODO: embed huggingface spaces # import streamlit.components.v1 as components # components.html( # f""" # #
# # # """, # height=400, # ) st.markdown("---") else: st.write(f"No Search results, please try again with different keywords") st.markdown( "
Made with ā¤ļø By Nouamane - Checkout complete project here
", unsafe_allow_html=True, )