File size: 2,747 Bytes
40c2a2b
 
 
 
 
fef8635
 
 
 
 
4a400da
fef8635
 
 
 
 
 
4a400da
fef8635
 
 
 
 
 
 
4a400da
fef8635
 
 
 
 
 
 
4a400da
fef8635
 
 
 
 
 
 
 
 
84c8245
 
fef8635
40c2a2b
fef8635
 
 
 
 
 
 
 
 
40c2a2b
4a400da
fef8635
 
 
 
 
 
 
 
 
 
4a400da
 
fef8635
 
40c2a2b
fef8635
4a400da
fef8635
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import pandas as pd
import streamlit as st
import math


class ModelFinder:
    def __init__(self, models_df):
        self.setup_inputs()
        self.models_df = models_df
        self.n_per_page = 10

    def setup_page(self):
        st.title("Huggingface model explorer")
        st.text(f"search {len(models_df)} models by name or readme")
        st.text(
            "note that there are many more models but here we only show those with readme"
        )

    def setup_inputs(self):
        col1, col2, col3, col4, col5 = st.columns(5)
        self.query_input = col1.text_input("model name query", value="")
        self.author_query_input = col2.text_input("author query", value="")
        self.id_query_input = col3.text_input("modelId query", value="")
        self.readme_query_input = col4.text_input("readme query", value="")
        self.page = col5

    def get_selected_models_df(self, query, readme_query, id_query, author_query):
        return self.models_df[
            self.models_df["readme"].str.lower().str.contains(readme_query)
            & self.models_df["modelId"].str.lower().str.contains(id_query)
            & self.models_df["author"].str.lower().str.contains(author_query)
            & self.models_df["model_name"].str.lower().str.contains(query)
        ]

    def show_paged_selected_model_info(self, selected_models_df):
        page = self.page.number_input("page", 0, math.ceil(len(selected_models_df) / 10))
        selected_models_df_subset = selected_models_df.iloc[
            page * self.n_per_page : (page + 1) * self.n_per_page
        ]
        st.write(f"found {len(selected_models_df)} models")
        for (model_name, tag, readme) in selected_models_df_subset[
            ["modelId", "pipeline_tag", "readme"]
        ].itertuples(index=False):
            model_url = f"http://huggingface.co/{model_name}"
            with st.expander(f"[{model_name}]({model_url}) ({tag})"):
                st.write(readme)

    def run(self):
        self.setup_page()
        selected_models_df = self.get_selected_models_df(
            self.query_input,
            self.readme_query_input,
            self.id_query_input,
            self.author_query_input,
        )
        self.show_paged_selected_model_info(selected_models_df)


def prepare_models_df(path):
    df = pd.read_parquet(path).dropna(subset=["readme"])
    sep_tuples = [
        tp if len(tp) == 2 else ("", tp[0])
        for tp in df["modelId"].str.split("/").to_list()
    ]
    authors, model_names = zip(*sep_tuples)
    df["author"] = authors
    df["model_name"] = model_names
    return df


model_path = "models_with_readmes.parquet"
models_df = prepare_models_df(model_path)

app = ModelFinder(models_df)

app.run()