haimasree commited on
Commit
196368e
·
1 Parent(s): 1b7ca55

Create model.yaml

Browse files
Files changed (1) hide show
  1. model.yaml +64 -0
model.yaml ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ type: pytorch
3
+ args:
4
+ module_file: pretrained_model_reloaded_th.py
5
+ module_obj: model
6
+ weights:
7
+ md5: 4878981d84499eb575abd0f3b45570d3
8
+ url: https://zenodo.org/record/1466068/files/pretrained_model_reloaded_th.pth?download=1
9
+ default_dataloader:
10
+ defined_as: kipoiseq.dataloaders.SeqIntervalDl
11
+ default_args:
12
+ alphabet_axis: 0
13
+ auto_resize_len: 600
14
+ dtype: np.float32
15
+ dummy_axis: 2
16
+ dependencies:
17
+ conda:
18
+ - python=3.6
19
+ - h5py=2.10.0
20
+ - _pytorch_select=0.2=gpu_0
21
+ - pytorch=1.3.1=cuda100py36h53c1284_0
22
+ - pip=20.3.3
23
+ - pysam=0.15.3
24
+ - cython=0.29.23
25
+ pip:
26
+ - kipoiseq
27
+ info:
28
+ authors:
29
+ - github: davek44
30
+ name: David R. Kelley
31
+ cite_as: https://doi.org/10.1101/gr.200535.115
32
+ contributors:
33
+ - github: krrome
34
+ name: Roman Kreuzhuber
35
+ trained_on: "From 2,071,886 total sites, 71,886 randomly reserved for testing and 70,000 for validation, leaving 1,930,000 for training."
36
+ doc: "This is the Basset model published by David Kelley converted to pytorch by\
37
+ \ Roman Kreuzhuber. It categorically predicts probabilities of accesible genomic\
38
+ \ regions in 164 cell types (ENCODE project and Roadmap Epigenomics Consortium). Data was generated using DNAse-seq. The sequence\
39
+ \ length the model uses as input is 600bp. The input of the tensor has to be (N,\
40
+ \ 4, 600, 1) for N samples, 600bp window size and 4 nucleotides. Per sample, 164\
41
+ \ probabilities of accessible chromatin will be predicted. \n"
42
+ license: MIT
43
+ name: Basset
44
+ tags:
45
+ - DNA accessibility
46
+ version: 0.1.0
47
+ schema:
48
+ inputs:
49
+ associated_metadata: ranges
50
+ doc: DNA sequence
51
+ name: seq
52
+ shape: (4,600,1)
53
+ special_type: DNASeq
54
+ targets:
55
+ column_labels:
56
+ - target_labels.txt
57
+ doc: Probability of accessible chromatin in 164 cell types
58
+ name: DHS_probs
59
+ shape: (164, )
60
+
61
+ test:
62
+ expect:
63
+ url: https://s3.eu-central-1.amazonaws.com/kipoi-models/predictions/14f9bf4b49e21c7b31e8f6d6b9fc69ed88e25f43/Basset/predictions.h5
64
+ md5: 9df59f9899b27e65ab95426cb9557ad3