AndreyForty commited on
Commit
2f856a9
·
verified ·
1 Parent(s): 5676f70

Delete paper_classifier.py

Browse files
Files changed (1) hide show
  1. paper_classifier.py +0 -73
paper_classifier.py DELETED
@@ -1,73 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import Iterable
4
-
5
- BASE_MODEL_NAME = "distilbert/distilbert-base-cased"
6
- DEFAULT_MODEL_DIR = "artifacts/distilbert-arxiv"
7
- MAX_LENGTH = 256
8
- TOP_P_THRESHOLD = 0.95
9
- EXPECTED_ARXIV_CATEGORIES = [
10
- "Computer Science",
11
- "Physics",
12
- "Mathematics",
13
- "Statistics",
14
- "Quantitative Biology",
15
- "Quantitative Finance",
16
- "Economics",
17
- "Electrical Engineering and Systems Science",
18
- ]
19
- EXAMPLES = {
20
- "Graph Neural Networks": {
21
- "title": "Message Passing Neural Networks for Molecular Property Prediction",
22
- "abstract": (
23
- "We introduce a graph-based neural architecture for supervised learning on "
24
- "molecular graphs. The model propagates messages between atoms, aggregates "
25
- "node states into a graph embedding, and predicts physical and chemical "
26
- "properties with competitive accuracy."
27
- ),
28
- },
29
- "Physics": {
30
- "title": "Topological phase transitions in two-dimensional quantum materials",
31
- "abstract": (
32
- "We study a lattice model with strong spin-orbit coupling and show how "
33
- "interactions modify the phase diagram. Using numerical simulations we "
34
- "characterize edge states, quantify transport signatures, and discuss "
35
- "observable consequences for low-temperature experiments."
36
- ),
37
- },
38
- "Bioinformatics": {
39
- "title": "Transformer models for protein function annotation from sequence",
40
- "abstract": (
41
- "We pretrain a transformer encoder on amino acid sequences and finetune it "
42
- "for protein function prediction. The approach improves annotation quality "
43
- "for underrepresented families and reveals biologically meaningful sequence "
44
- "patterns."
45
- ),
46
- },
47
- }
48
-
49
-
50
- def format_input_text(title: str, abstract: str) -> str:
51
- title = title.strip()
52
- abstract = abstract.strip()
53
-
54
- parts: list[str] = []
55
- if title:
56
- parts.append(f"Title: {title}\nTitle summary: {title}")
57
- if abstract:
58
- parts.append(f"Abstract: {abstract}")
59
-
60
- return "\n\n".join(parts)
61
-
62
-
63
- def take_top_p(records: Iterable[dict[str, float]], threshold: float) -> list[dict[str, float]]:
64
- selected: list[dict[str, float]] = []
65
- cumulative = 0.0
66
-
67
- for record in records:
68
- selected.append(record)
69
- cumulative += record["score"]
70
- if cumulative >= threshold:
71
- break
72
-
73
- return selected