epcot_app / app.py
drjieliu's picture
Update app.py
71d7794
import gradio as gr
import os,uuid,pysam
import time,gdown
import pyBigWig,pickle
from scipy.sparse import csr_matrix
import numpy as np
def atac_bwtonpz(atac_file):
bw = pyBigWig.open(atac_file)
signals = {}
for chrom, length in bw.chroms().items():
try:
if chrom == 'chrX':
chr = 'X'
else:
chr = int(chrom[3:])
except Exception:
continue
temp = np.zeros(length)
intervals = bw.intervals(chrom)
for interval in intervals:
temp[interval[0]:interval[1]] = interval[2]
seq_length = length // 1000 * 1000
signals[chr] = csr_matrix(temp.astype('float32')[:seq_length])
with open(atac_file.replace('bigWig','pickle'),'wb') as f:
pickle.dump(signals,f)
def process(bam_file,progress=gr.Progress()):
if not os.path.exists('ATAC'):
os.mkdir('ATAC')
else:
for f in os.listdir('ATAC/'):
if not f.startswith('example'):
os.remove(os.path.join('ATAC/', f))
if bam_file=='':
raise gr.Error('')
fid = str(uuid.uuid4())
print(bam_file,fid)
if 'dropbox' in bam_file:
progress(0.2, desc="Downloading BAM file "+bam_file)
os.system('wget -O ATAC/'+fid+'.bam '+bam_file)
else:
progress(0.2, desc="Downloading BAM file "+bam_file)
gdown.download(url=bam_file,fuzzy=True, output='ATAC/'+fid + '.bam')
progress(0.4, desc="Indexing BAM file")
time.sleep(0.1)
pysam.index('ATAC/'+fid + '.bam')
progress(0.6, desc="Converting BAM file to BigWig file (Please note that processing time may be lengthy)")
time.sleep(0.1)
os.system('bamCoverage --bam %s -o %s --outFileFormat bigwig --normalizeUsing RPGC '
'--effectiveGenomeSize 2913022398 --Offset 1 --binSize 1 --numberOfProcessors 6 '
'--blackListFileName data/black_list.bed' % ('ATAC/'+fid + '.bam','ATAC/'+ fid + '.bigWig'))
progress(0.8, desc="Processing bigWig file")
time.sleep(0.1)
atac_bwtonpz('ATAC/'+fid + '.bigWig')
os.remove('ATAC/'+fid+'.bam.bai')
os.remove('ATAC/'+fid + '.bigWig')
os.remove('ATAC/'+fid + '.bam')
return 'ATAC/'+fid + '.pickle'
css="li {font-size: 16px; --font: 'Quicksand', 'ui-sans-serif', 'system-ui', sans-serif"
with gr.Blocks(theme=gr.themes.Soft(),css=css) as app:
gr.HTML('<p>For faster inference without waiting in queue, you may duplicate the space. <a href="https://huggingface.co/spaces/drjieliu/epcot_app?duplicate=true">'
'<img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a></p>')
with gr.Row():
with gr.Column():
inp=gr.Textbox(
label="URL to ATAC-seq BAM",
info='Only Dropbox and Google Drive file links are accecpted (set "Anyone with the link")',
lines=1,
placeholder='e.g. https://drive.google.com/file/d/xxxxx/view?usp=sharing'
)
btn2=gr.Button("Submit")
with gr.Column():
out = gr.File(label='Download the processed file')
btn2.click(fn=process, inputs=inp, outputs=out)
app.queue(concurrency_count=1,max_size=5).launch(enable_queue=True, debug=True)