File size: 2,017 Bytes
196368e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64

type: pytorch
args:
  module_file: pretrained_model_reloaded_th.py
  module_obj: model
  weights:
    md5: 4878981d84499eb575abd0f3b45570d3
    url: https://zenodo.org/record/1466068/files/pretrained_model_reloaded_th.pth?download=1
default_dataloader:
  defined_as: kipoiseq.dataloaders.SeqIntervalDl
  default_args:
    alphabet_axis: 0
    auto_resize_len: 600
    dtype: np.float32
    dummy_axis: 2
dependencies:
  conda:
  - python=3.6
  - h5py=2.10.0
  - _pytorch_select=0.2=gpu_0
  - pytorch=1.3.1=cuda100py36h53c1284_0
  - pip=20.3.3
  - pysam=0.15.3
  - cython=0.29.23
  pip:
  - kipoiseq
info:
  authors:
  - github: davek44
    name: David R. Kelley
  cite_as: https://doi.org/10.1101/gr.200535.115
  contributors:
  - github: krrome
    name: Roman Kreuzhuber
  trained_on: "From 2,071,886 total sites, 71,886 randomly reserved for testing and 70,000 for validation, leaving 1,930,000 for training."
  doc: "This is the Basset model published by David Kelley converted to pytorch by\
    \ Roman Kreuzhuber. It categorically predicts probabilities of accesible genomic\
    \ regions in  164 cell types (ENCODE project and Roadmap Epigenomics Consortium). Data was generated using DNAse-seq. The sequence\
    \ length the model uses as input is 600bp. The input of the tensor has to be (N,\
    \ 4, 600, 1) for N samples, 600bp window size and 4 nucleotides. Per sample, 164\
    \ probabilities of accessible chromatin will be predicted. \n"
  license: MIT
  name: Basset
  tags:
  - DNA accessibility
  version: 0.1.0
schema:
  inputs:
    associated_metadata: ranges
    doc: DNA sequence
    name: seq
    shape: (4,600,1)
    special_type: DNASeq
  targets:
    column_labels:
    - target_labels.txt
    doc: Probability of accessible chromatin in 164 cell types
    name: DHS_probs
    shape: (164, )

test:
  expect:
    url: https://s3.eu-central-1.amazonaws.com/kipoi-models/predictions/14f9bf4b49e21c7b31e8f6d6b9fc69ed88e25f43/Basset/predictions.h5
    md5: 9df59f9899b27e65ab95426cb9557ad3