File size: 3,325 Bytes
90b68b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
522c3e6
90b68b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
522c3e6
5ff291c
90b68b7
 
5ff291c
90b68b7
 
 
 
5ff291c
90b68b7
 
 
 
 
522c3e6
90b68b7
b0adba5
71d7794
 
b7e7379
 
90b68b7
 
 
 
 
 
71d7794
90b68b7
 
 
 
 
 
522c3e6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import gradio as gr
import os,uuid,pysam
import time,gdown
import pyBigWig,pickle
from scipy.sparse import csr_matrix
import numpy as np

def atac_bwtonpz(atac_file):
    bw = pyBigWig.open(atac_file)
    signals = {}
    for chrom, length in bw.chroms().items():
        try:
            if chrom == 'chrX':
                chr = 'X'
            else:
                chr = int(chrom[3:])
        except Exception:
            continue
        temp = np.zeros(length)
        intervals = bw.intervals(chrom)
        for interval in intervals:
            temp[interval[0]:interval[1]] = interval[2]

        seq_length = length // 1000 * 1000
        signals[chr] = csr_matrix(temp.astype('float32')[:seq_length])
    with open(atac_file.replace('bigWig','pickle'),'wb') as f:
        pickle.dump(signals,f)


def process(bam_file,progress=gr.Progress()):
    if not os.path.exists('ATAC'):
        os.mkdir('ATAC')
    else:
        for f in os.listdir('ATAC/'):
            if not f.startswith('example'):
                os.remove(os.path.join('ATAC/', f))
    if bam_file=='':
        raise gr.Error('')

    fid = str(uuid.uuid4())
    print(bam_file,fid)

    if 'dropbox' in bam_file:
        progress(0.2, desc="Downloading BAM file "+bam_file)
        os.system('wget -O ATAC/'+fid+'.bam '+bam_file)
    else:
        progress(0.2, desc="Downloading BAM file "+bam_file)
        gdown.download(url=bam_file,fuzzy=True, output='ATAC/'+fid + '.bam')
   
    progress(0.4, desc="Indexing BAM file")
    time.sleep(0.1)
    pysam.index('ATAC/'+fid + '.bam')
    progress(0.6, desc="Converting BAM file to BigWig file (Please note that processing time may be lengthy)")
    time.sleep(0.1)
    os.system('bamCoverage --bam %s -o %s --outFileFormat bigwig --normalizeUsing RPGC '
              '--effectiveGenomeSize 2913022398 --Offset 1 --binSize 1 --numberOfProcessors 6 '
              '--blackListFileName data/black_list.bed' % ('ATAC/'+fid + '.bam','ATAC/'+ fid + '.bigWig'))
    progress(0.8, desc="Processing bigWig file")
    time.sleep(0.1)
    atac_bwtonpz('ATAC/'+fid + '.bigWig')
    os.remove('ATAC/'+fid+'.bam.bai')
    os.remove('ATAC/'+fid + '.bigWig')
    os.remove('ATAC/'+fid + '.bam')
    return 'ATAC/'+fid + '.pickle'

css="li {font-size: 16px; --font: 'Quicksand', 'ui-sans-serif', 'system-ui', sans-serif"

with gr.Blocks(theme=gr.themes.Soft(),css=css) as app:
    gr.HTML('<p>For faster inference without waiting in queue, you may duplicate the space. <a href="https://huggingface.co/spaces/drjieliu/epcot_app?duplicate=true">'
                 '<img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a></p>')
    with gr.Row():
        with gr.Column():
            inp=gr.Textbox(
                label="URL to ATAC-seq BAM",
                info='Only Dropbox and Google Drive file links are accecpted (set "Anyone with the link")',
                lines=1,
                placeholder='e.g. https://drive.google.com/file/d/xxxxx/view?usp=sharing'
            )
            btn2=gr.Button("Submit")
        with gr.Column():
            out = gr.File(label='Download the processed file')
        btn2.click(fn=process, inputs=inp, outputs=out)

app.queue(concurrency_count=1,max_size=5).launch(enable_queue=True, debug=True)