|
import gradio as gr |
|
import os,uuid,pysam |
|
import time,gdown |
|
import pyBigWig,pickle |
|
from scipy.sparse import csr_matrix |
|
import numpy as np |
|
|
|
def atac_bwtonpz(atac_file): |
|
bw = pyBigWig.open(atac_file) |
|
signals = {} |
|
for chrom, length in bw.chroms().items(): |
|
try: |
|
if chrom == 'chrX': |
|
chr = 'X' |
|
else: |
|
chr = int(chrom[3:]) |
|
except Exception: |
|
continue |
|
temp = np.zeros(length) |
|
intervals = bw.intervals(chrom) |
|
for interval in intervals: |
|
temp[interval[0]:interval[1]] = interval[2] |
|
|
|
seq_length = length // 1000 * 1000 |
|
signals[chr] = csr_matrix(temp.astype('float32')[:seq_length]) |
|
with open(atac_file.replace('bigWig','pickle'),'wb') as f: |
|
pickle.dump(signals,f) |
|
|
|
|
|
def process(bam_file,progress=gr.Progress()): |
|
if not os.path.exists('ATAC'): |
|
os.mkdir('ATAC') |
|
else: |
|
for f in os.listdir('ATAC/'): |
|
if not f.startswith('example'): |
|
os.remove(os.path.join('ATAC/', f)) |
|
if bam_file=='': |
|
raise gr.Error('') |
|
|
|
fid = str(uuid.uuid4()) |
|
print(bam_file,fid) |
|
|
|
if 'dropbox' in bam_file: |
|
progress(0.2, desc="Downloading BAM file "+bam_file) |
|
os.system('wget -O ATAC/'+fid+'.bam '+bam_file) |
|
else: |
|
progress(0.2, desc="Downloading BAM file "+bam_file) |
|
gdown.download(url=bam_file,fuzzy=True, output='ATAC/'+fid + '.bam') |
|
|
|
progress(0.4, desc="Indexing BAM file") |
|
time.sleep(0.1) |
|
pysam.index('ATAC/'+fid + '.bam') |
|
progress(0.6, desc="Converting BAM file to BigWig file (Please note that processing time may be lengthy)") |
|
time.sleep(0.1) |
|
os.system('bamCoverage --bam %s -o %s --outFileFormat bigwig --normalizeUsing RPGC ' |
|
'--effectiveGenomeSize 2913022398 --Offset 1 --binSize 1 --numberOfProcessors 6 ' |
|
'--blackListFileName data/black_list.bed' % ('ATAC/'+fid + '.bam','ATAC/'+ fid + '.bigWig')) |
|
progress(0.8, desc="Processing bigWig file") |
|
time.sleep(0.1) |
|
atac_bwtonpz('ATAC/'+fid + '.bigWig') |
|
os.remove('ATAC/'+fid+'.bam.bai') |
|
os.remove('ATAC/'+fid + '.bigWig') |
|
os.remove('ATAC/'+fid + '.bam') |
|
return 'ATAC/'+fid + '.pickle' |
|
|
|
css="li {font-size: 16px; --font: 'Quicksand', 'ui-sans-serif', 'system-ui', sans-serif" |
|
|
|
with gr.Blocks(theme=gr.themes.Soft(),css=css) as app: |
|
gr.HTML('<p>For faster inference without waiting in queue, you may duplicate the space. <a href="https://huggingface.co/spaces/drjieliu/epcot_app?duplicate=true">' |
|
'<img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a></p>') |
|
with gr.Row(): |
|
with gr.Column(): |
|
inp=gr.Textbox( |
|
label="URL to ATAC-seq BAM", |
|
info='Only Dropbox and Google Drive file links are accecpted (set "Anyone with the link")', |
|
lines=1, |
|
placeholder='e.g. https://drive.google.com/file/d/xxxxx/view?usp=sharing' |
|
) |
|
btn2=gr.Button("Submit") |
|
with gr.Column(): |
|
out = gr.File(label='Download the processed file') |
|
btn2.click(fn=process, inputs=inp, outputs=out) |
|
|
|
app.queue(concurrency_count=1,max_size=5).launch(enable_queue=True, debug=True) |