Create model.yaml
Browse files- model.yaml +64 -0
model.yaml
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
type: pytorch
|
3 |
+
args:
|
4 |
+
module_file: pretrained_model_reloaded_th.py
|
5 |
+
module_obj: model
|
6 |
+
weights:
|
7 |
+
md5: 4878981d84499eb575abd0f3b45570d3
|
8 |
+
url: https://zenodo.org/record/1466068/files/pretrained_model_reloaded_th.pth?download=1
|
9 |
+
default_dataloader:
|
10 |
+
defined_as: kipoiseq.dataloaders.SeqIntervalDl
|
11 |
+
default_args:
|
12 |
+
alphabet_axis: 0
|
13 |
+
auto_resize_len: 600
|
14 |
+
dtype: np.float32
|
15 |
+
dummy_axis: 2
|
16 |
+
dependencies:
|
17 |
+
conda:
|
18 |
+
- python=3.6
|
19 |
+
- h5py=2.10.0
|
20 |
+
- _pytorch_select=0.2=gpu_0
|
21 |
+
- pytorch=1.3.1=cuda100py36h53c1284_0
|
22 |
+
- pip=20.3.3
|
23 |
+
- pysam=0.15.3
|
24 |
+
- cython=0.29.23
|
25 |
+
pip:
|
26 |
+
- kipoiseq
|
27 |
+
info:
|
28 |
+
authors:
|
29 |
+
- github: davek44
|
30 |
+
name: David R. Kelley
|
31 |
+
cite_as: https://doi.org/10.1101/gr.200535.115
|
32 |
+
contributors:
|
33 |
+
- github: krrome
|
34 |
+
name: Roman Kreuzhuber
|
35 |
+
trained_on: "From 2,071,886 total sites, 71,886 randomly reserved for testing and 70,000 for validation, leaving 1,930,000 for training."
|
36 |
+
doc: "This is the Basset model published by David Kelley converted to pytorch by\
|
37 |
+
\ Roman Kreuzhuber. It categorically predicts probabilities of accesible genomic\
|
38 |
+
\ regions in 164 cell types (ENCODE project and Roadmap Epigenomics Consortium). Data was generated using DNAse-seq. The sequence\
|
39 |
+
\ length the model uses as input is 600bp. The input of the tensor has to be (N,\
|
40 |
+
\ 4, 600, 1) for N samples, 600bp window size and 4 nucleotides. Per sample, 164\
|
41 |
+
\ probabilities of accessible chromatin will be predicted. \n"
|
42 |
+
license: MIT
|
43 |
+
name: Basset
|
44 |
+
tags:
|
45 |
+
- DNA accessibility
|
46 |
+
version: 0.1.0
|
47 |
+
schema:
|
48 |
+
inputs:
|
49 |
+
associated_metadata: ranges
|
50 |
+
doc: DNA sequence
|
51 |
+
name: seq
|
52 |
+
shape: (4,600,1)
|
53 |
+
special_type: DNASeq
|
54 |
+
targets:
|
55 |
+
column_labels:
|
56 |
+
- target_labels.txt
|
57 |
+
doc: Probability of accessible chromatin in 164 cell types
|
58 |
+
name: DHS_probs
|
59 |
+
shape: (164, )
|
60 |
+
|
61 |
+
test:
|
62 |
+
expect:
|
63 |
+
url: https://s3.eu-central-1.amazonaws.com/kipoi-models/predictions/14f9bf4b49e21c7b31e8f6d6b9fc69ed88e25f43/Basset/predictions.h5
|
64 |
+
md5: 9df59f9899b27e65ab95426cb9557ad3
|