Spaces:
Sleeping
Sleeping
Commit
·
2904d0e
1
Parent(s):
fef1ab3
added loading of my model
Browse files- app.py +14 -1
- dataset.py +40 -0
app.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
import random
|
@@ -7,13 +8,25 @@ from diffusers import DiffusionPipeline
|
|
7 |
import torch
|
8 |
|
9 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
10 |
-
model_repo_id = "stabilityai/sdxl-turbo"
|
11 |
|
12 |
if torch.cuda.is_available():
|
13 |
torch_dtype = torch.float16
|
14 |
else:
|
15 |
torch_dtype = torch.float32
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
|
18 |
pipe = pipe.to(device)
|
19 |
|
|
|
1 |
+
from transformers import AutoModelForSequenceClassification
|
2 |
import gradio as gr
|
3 |
import numpy as np
|
4 |
import random
|
|
|
8 |
import torch
|
9 |
|
10 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
11 |
+
model_repo_id = "stabilityai/sdxl-turbo"
|
12 |
|
13 |
if torch.cuda.is_available():
|
14 |
torch_dtype = torch.float16
|
15 |
else:
|
16 |
torch_dtype = torch.float32
|
17 |
|
18 |
+
|
19 |
+
from article_classifier.dataset import labels, id2label, label2id, categorie2human
|
20 |
+
|
21 |
+
model_path = "distilbert/distilbert-base-cased" # todo, replace with hacker1337/article-classifier
|
22 |
+
model = AutoModelForSequenceClassification.from_pretrained(
|
23 |
+
model_path,
|
24 |
+
num_labels=len(id2label),
|
25 |
+
id2label=id2label,
|
26 |
+
label2id=label2id,
|
27 |
+
problem_type="multi_label_classification",
|
28 |
+
)
|
29 |
+
|
30 |
pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
|
31 |
pipe = pipe.to(device)
|
32 |
|
dataset.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
labels = ["CV", "AI", "ML", "NE", "CL"]
|
2 |
+
id2label = {i: label for i, label in enumerate(labels)}
|
3 |
+
label2id = {label: i for i, label in enumerate(labels)}
|
4 |
+
|
5 |
+
categorie2human = {
|
6 |
+
"CV": "Computer Vision",
|
7 |
+
"AI": "Artificial Intelligence",
|
8 |
+
"ML": "Machine Learning",
|
9 |
+
"NE": "Neural and Evolutionary Computing",
|
10 |
+
"CL": "Computation and Language"
|
11 |
+
}
|
12 |
+
|
13 |
+
|
14 |
+
def load_arxiv_dataset():
|
15 |
+
import kagglehub
|
16 |
+
import os
|
17 |
+
from datasets import load_dataset
|
18 |
+
|
19 |
+
# Download latest version
|
20 |
+
path = kagglehub.dataset_download("spsayakpaul/arxiv-paper-abstracts")
|
21 |
+
|
22 |
+
|
23 |
+
|
24 |
+
dataset = load_dataset(
|
25 |
+
"csv",
|
26 |
+
data_files=os.path.join(path, "arxiv_data.csv"),
|
27 |
+
encoding="utf-8",
|
28 |
+
split="train"
|
29 |
+
)
|
30 |
+
|
31 |
+
# convert string to lists
|
32 |
+
import ast
|
33 |
+
|
34 |
+
def parse_terms(example):
|
35 |
+
example["terms"] = ast.literal_eval(example["terms"])
|
36 |
+
return example
|
37 |
+
|
38 |
+
dataset = dataset.map(parse_terms)
|
39 |
+
|
40 |
+
return dataset
|