Fill-Mask
Transformers
PyTorch
esm
Inference Endpoints
File size: 3,453 Bytes
ffaff91
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# Data Cleaning Parameters
# TCGA abbreviations for cancer. From https://gdc.cancer.gov/resources-tcga-users/tcga-code-tables/tcga-study-abbreviations 
TCGA_CODES = {
    'LAML':	'Acute Myeloid Leukemia',
    'ACC':	'Adrenocortical carcinoma',
    'BLCA':	'Bladder Urothelial Carcinoma',
    'LGG':	'Brain Lower Grade Glioma',
    'BRCA':	'Breast invasive carcinoma',
    'CESC':	'Cervical squamous cell carcinoma and endocervical adenocarcinoma',
    'CHOL':	'Cholangiocarcinoma',
    'LCML':	'Chronic Myelogenous Leukemia',
    'COAD':	'Colon adenocarcinoma',
    'CNTL':	'Controls',
    'ESCA':	'Esophageal carcinoma',
    'FPPP':	'FFPE Pilot Phase II',
    'GBM':	'Glioblastoma multiforme',
    'HNSC':	'Head and Neck squamous cell carcinoma',
    'KICH':	'Kidney Chromophobe',
    'KIRC':	'Kidney renal clear cell carcinoma',
    'KIRP':	'Kidney renal papillary cell carcinoma',
    'LIHC':	'Liver hepatocellular carcinoma',
    'LUAD':	'Lung adenocarcinoma',
    'LUSC':	'Lung squamous cell carcinoma',
    'DLBC':	'Lymphoid Neoplasm Diffuse Large B-cell Lymphoma',
    'MESO':	'Mesothelioma',
    'MISC':	'Miscellaneous',
    'OV':	'Ovarian serous cystadenocarcinoma',
    'PAAD':	'Pancreatic adenocarcinoma',
    'PCPG':	'Pheochromocytoma and Paraganglioma',
    'PRAD':	'Prostate adenocarcinoma',
    'READ':	'Rectum adenocarcinoma',
    'SARC':	'Sarcoma',
    'SKCM':	'Skin Cutaneous Melanoma',
    'STAD':	'Stomach adenocarcinoma',
    'TGCT':	'Testicular Germ Cell Tumors',
    'THYM':	'Thymoma',
    'THCA':	'Thyroid carcinoma',
    'UCS':	'Uterine Carcinosarcoma',
    'UCEC':	'Uterine Corpus Endometrial Carcinoma',
    'UVM':	'Uveal Melanoma'
}

FODB_CODES = {
    'ACC':	'Adenoid cystic carcinoma',
    'ALL':	'Acute Lymphoid Leukemia',
    'AML':	'Acute Myeloid Leukemia',
    'BALL':	'B-cell acute lymphoblastic leukemia',
    'BLCA':	'Bladder Urothelial Carcinoma',
    'BRCA':	'Breast invasive carcinoma',
    'CESC':	'Cervical squamous cell carcinoma and endocervical adenocarcinoma',
    'CHOL':	'Cholangiocarcinoma',
    'EPD':	'Ependymoma',
    'HGG':	'High-grade glioma',
    'HNSC':	'Head and Neck squamous cell carcinoma',
    'KIRC':	'Kidney renal clear cell carcinoma',
    'LGG':	'Low-grade glioma',
    'LUAD':	'Lung adenocarcinoma',
    'LUSC': 'Lung squamous cell carcinoma',
    'MEL':	'Melanoma',
    'MESO':	'Mesothelioma',
    'NBL':	'Neuroblastoma',
    'OS':	'Osteosarcoma',
    'OV':	'Ovarian serous cystadenocarcinoma',
    'PCPG':	'Pheochromocytoma and Paraganglioma',
    'PRAD':	'Prostate adenocarcinoma',
    'READ':	'Rectum adenocarcinoma',
    'RHB':	'Rhabdomyosarcoma',
    'SARC':	'Sarcoma',
    'STAD':	'Stomach adenocarcinoma',
    'TALL':	'T-cell acute lymphoblastic leukemia',
    'THYM':	'Thymoma',
    'UCEC':	'Uterine Corpus Endometrial Carcinoma',
    'UCS':	'Uterine Carcinosarcoma',
    'UVM':	'Uveal Melanoma',
    'WLM':	'Wilms tumor'
}

VALID_AAS = {'A',
             'R',
             'N',
             'D',
             'C',
             'E',
             'Q',
             'G',
             'H',
             'I',
             'L',
             'K',
             'M',
             'F',
             'P',
             'S',
             'T',
             'W',
             'Y',
             'V'}

DELIMITERS = {',',
              ';',
              '|',
              '\t',
              ' ',
              ':',
              '-',
              '/',
              '\\',
              '\n'}