Spaces:
Runtime error
Runtime error
ludekcizinsky
commited on
Commit
β’
f47e717
1
Parent(s):
db4540e
feat(new models): added new models, refactor the ui
Browse files- app.py +23 -9
- homepage2vec/__pycache__/data_collection.cpython-310.pyc +0 -0
- models/.DS_Store +0 -0
- models/{gpt3.5 β finetuned/gpt3.5}/features.txt +0 -0
- models/{gpt3.5 β finetuned/gpt3.5}/model.pt +1 -1
- models/finetuned/gpt4/features.txt +8 -0
- models/finetuned/gpt4/model.pt +3 -0
- models/homepage2vec/features.txt +8 -0
- models/homepage2vec/model.pt +3 -0
app.py
CHANGED
@@ -5,10 +5,21 @@ import os
|
|
5 |
from homepage2vec.model import WebsiteClassifier as Homepage2Vec
|
6 |
|
7 |
EXAMPLES = [
|
8 |
-
|
9 |
-
["
|
10 |
-
["
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
]
|
13 |
|
14 |
|
@@ -24,8 +35,11 @@ def predict(model_choice : str, url : str) -> Dict[str, float]:
|
|
24 |
Dict[str, float]: The categories and their corresponding scores.
|
25 |
"""
|
26 |
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
29 |
|
30 |
# Initialise model
|
31 |
model = Homepage2Vec(model_dir=model_dir)
|
@@ -44,11 +58,11 @@ def predict(model_choice : str, url : str) -> Dict[str, float]:
|
|
44 |
|
45 |
iface = gr.Interface(
|
46 |
fn=predict,
|
47 |
-
inputs=[gr.Dropdown(choices=["gpt3.5", "gpt4"], label="Select Model"),
|
48 |
-
gr.Textbox(label="Enter Website URL", placeholder="
|
49 |
outputs=gr.Label(num_top_classes=14, label="Predicted Labels", show_label=True),
|
50 |
title="Homepage2Vec",
|
51 |
-
description="
|
52 |
examples=EXAMPLES,
|
53 |
live=False,
|
54 |
allow_flagging="never",
|
|
|
5 |
from homepage2vec.model import WebsiteClassifier as Homepage2Vec
|
6 |
|
7 |
EXAMPLES = [
|
8 |
+
# Personal site
|
9 |
+
["original", "tanjasenghaasdesigns.de"],
|
10 |
+
["finetuned-gpt4", "tanjasenghaasdesigns.de"],
|
11 |
+
|
12 |
+
# EPFL
|
13 |
+
["finetuned-gpt3.5", "epfl.ch"],
|
14 |
+
["finetuned-gpt4", "epfl.ch"],
|
15 |
+
|
16 |
+
# Czech Crunch - czech tech news
|
17 |
+
["original", "cc.cz"],
|
18 |
+
["finetuned-gpt4", "cc.cz"],
|
19 |
+
|
20 |
+
# Promaminky - czech site for moms
|
21 |
+
["original", "promaminky.cz"],
|
22 |
+
["finetuned-gpt3.5", "promaminky.cz"],
|
23 |
]
|
24 |
|
25 |
|
|
|
35 |
Dict[str, float]: The categories and their corresponding scores.
|
36 |
"""
|
37 |
|
38 |
+
if model_choice == "original":
|
39 |
+
model_dir = os.path.join("models", "homepage2vec")
|
40 |
+
else:
|
41 |
+
which_gpt = model_choice.split("-")[1]
|
42 |
+
model_dir = os.path.join("models", "finetuned", which_gpt)
|
43 |
|
44 |
# Initialise model
|
45 |
model = Homepage2Vec(model_dir=model_dir)
|
|
|
58 |
|
59 |
iface = gr.Interface(
|
60 |
fn=predict,
|
61 |
+
inputs=[gr.Dropdown(choices=["original", "finetuned-gpt3.5", "finetuned-gpt4"], label="Select Model", show_label=True, value="finetuned-gpt4"),
|
62 |
+
gr.Textbox(label="Enter Website's URL or domain", placeholder="e.g. ikea.com")],
|
63 |
outputs=gr.Label(num_top_classes=14, label="Predicted Labels", show_label=True),
|
64 |
title="Homepage2Vec",
|
65 |
+
description="Select a version of the Homepage2Vec model and enter a website's URL or domain to predict its categories. The original model was trained on 886K websites from Curlie directory. The finetuned models, in addition, were trained on GPT annotated websites. On average, the fintuned models should predict more labels than the original model while maintaining high accuracy.",
|
66 |
examples=EXAMPLES,
|
67 |
live=False,
|
68 |
allow_flagging="never",
|
homepage2vec/__pycache__/data_collection.cpython-310.pyc
CHANGED
Binary files a/homepage2vec/__pycache__/data_collection.cpython-310.pyc and b/homepage2vec/__pycache__/data_collection.cpython-310.pyc differ
|
|
models/.DS_Store
CHANGED
Binary files a/models/.DS_Store and b/models/.DS_Store differ
|
|
models/{gpt3.5 β finetuned/gpt3.5}/features.txt
RENAMED
File without changes
|
models/{gpt3.5 β finetuned/gpt3.5}/model.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 19072308
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3c325b49245d46326daa3c9ff6dd4e52b806cf88b8986e27598419764aed3ab
|
3 |
size 19072308
|
models/finetuned/gpt4/features.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
f_tld 27
|
2 |
+
f_url 768
|
3 |
+
f_metatags 30
|
4 |
+
f_title 768
|
5 |
+
f_description 768
|
6 |
+
f_keywords 768
|
7 |
+
f_links_50 768
|
8 |
+
f_text_100 768
|
models/finetuned/gpt4/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b0c10a4a65ad90f87e33f7c62364b4d44991e72819d186785beff84c24f0f22
|
3 |
+
size 19072308
|
models/homepage2vec/features.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
f_tld 27
|
2 |
+
f_url 768
|
3 |
+
f_metatags 30
|
4 |
+
f_title 768
|
5 |
+
f_description 768
|
6 |
+
f_keywords 768
|
7 |
+
f_links_50 768
|
8 |
+
f_text_100 768
|
models/homepage2vec/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:634de448281c5026597c05b5132ac0dc802305689adc104bde71949186999215
|
3 |
+
size 19072357
|