Hacker1337 commited on
Commit
2904d0e
·
1 Parent(s): fef1ab3

added loading of my model

Browse files
Files changed (2) hide show
  1. app.py +14 -1
  2. dataset.py +40 -0
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import gradio as gr
2
  import numpy as np
3
  import random
@@ -7,13 +8,25 @@ from diffusers import DiffusionPipeline
7
  import torch
8
 
9
  device = "cuda" if torch.cuda.is_available() else "cpu"
10
- model_repo_id = "stabilityai/sdxl-turbo" # Replace to the model you would like to use
11
 
12
  if torch.cuda.is_available():
13
  torch_dtype = torch.float16
14
  else:
15
  torch_dtype = torch.float32
16
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
18
  pipe = pipe.to(device)
19
 
 
1
+ from transformers import AutoModelForSequenceClassification
2
  import gradio as gr
3
  import numpy as np
4
  import random
 
8
  import torch
9
 
10
  device = "cuda" if torch.cuda.is_available() else "cpu"
11
+ model_repo_id = "stabilityai/sdxl-turbo"
12
 
13
  if torch.cuda.is_available():
14
  torch_dtype = torch.float16
15
  else:
16
  torch_dtype = torch.float32
17
 
18
+
19
+ from article_classifier.dataset import labels, id2label, label2id, categorie2human
20
+
21
+ model_path = "distilbert/distilbert-base-cased" # todo, replace with hacker1337/article-classifier
22
+ model = AutoModelForSequenceClassification.from_pretrained(
23
+ model_path,
24
+ num_labels=len(id2label),
25
+ id2label=id2label,
26
+ label2id=label2id,
27
+ problem_type="multi_label_classification",
28
+ )
29
+
30
  pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
31
  pipe = pipe.to(device)
32
 
dataset.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ labels = ["CV", "AI", "ML", "NE", "CL"]
2
+ id2label = {i: label for i, label in enumerate(labels)}
3
+ label2id = {label: i for i, label in enumerate(labels)}
4
+
5
+ categorie2human = {
6
+ "CV": "Computer Vision",
7
+ "AI": "Artificial Intelligence",
8
+ "ML": "Machine Learning",
9
+ "NE": "Neural and Evolutionary Computing",
10
+ "CL": "Computation and Language"
11
+ }
12
+
13
+
14
+ def load_arxiv_dataset():
15
+ import kagglehub
16
+ import os
17
+ from datasets import load_dataset
18
+
19
+ # Download latest version
20
+ path = kagglehub.dataset_download("spsayakpaul/arxiv-paper-abstracts")
21
+
22
+
23
+
24
+ dataset = load_dataset(
25
+ "csv",
26
+ data_files=os.path.join(path, "arxiv_data.csv"),
27
+ encoding="utf-8",
28
+ split="train"
29
+ )
30
+
31
+ # convert string to lists
32
+ import ast
33
+
34
+ def parse_terms(example):
35
+ example["terms"] = ast.literal_eval(example["terms"])
36
+ return example
37
+
38
+ dataset = dataset.map(parse_terms)
39
+
40
+ return dataset