import gradio as gr import os,uuid,pysam import time,gdown import pyBigWig,pickle from scipy.sparse import csr_matrix import numpy as np def atac_bwtonpz(atac_file): bw = pyBigWig.open(atac_file) signals = {} for chrom, length in bw.chroms().items(): try: if chrom == 'chrX': chr = 'X' else: chr = int(chrom[3:]) except Exception: continue temp = np.zeros(length) intervals = bw.intervals(chrom) for interval in intervals: temp[interval[0]:interval[1]] = interval[2] seq_length = length // 1000 * 1000 signals[chr] = csr_matrix(temp.astype('float32')[:seq_length]) with open(atac_file.replace('bigWig','pickle'),'wb') as f: pickle.dump(signals,f) def process(bam_file,progress=gr.Progress()): if not os.path.exists('ATAC'): os.mkdir('ATAC') else: for f in os.listdir('ATAC/'): if not f.startswith('example'): os.remove(os.path.join('ATAC/', f)) if bam_file=='': raise gr.Error('') fid = str(uuid.uuid4()) print(bam_file,fid) if 'dropbox' in bam_file: progress(0.2, desc="Downloading BAM file "+bam_file) os.system('wget -O ATAC/'+fid+'.bam '+bam_file) else: progress(0.2, desc="Downloading BAM file "+bam_file) gdown.download(url=bam_file,fuzzy=True, output='ATAC/'+fid + '.bam') progress(0.4, desc="Indexing BAM file") time.sleep(0.1) pysam.index('ATAC/'+fid + '.bam') progress(0.6, desc="Converting BAM file to BigWig file (Please note that processing time may be lengthy)") time.sleep(0.1) os.system('bamCoverage --bam %s -o %s --outFileFormat bigwig --normalizeUsing RPGC ' '--effectiveGenomeSize 2913022398 --Offset 1 --binSize 1 --numberOfProcessors 6 ' '--blackListFileName data/black_list.bed' % ('ATAC/'+fid + '.bam','ATAC/'+ fid + '.bigWig')) progress(0.8, desc="Processing bigWig file") time.sleep(0.1) atac_bwtonpz('ATAC/'+fid + '.bigWig') os.remove('ATAC/'+fid+'.bam.bai') os.remove('ATAC/'+fid + '.bigWig') os.remove('ATAC/'+fid + '.bam') return 'ATAC/'+fid + '.pickle' css="li {font-size: 16px; --font: 'Quicksand', 'ui-sans-serif', 'system-ui', sans-serif" with gr.Blocks(theme=gr.themes.Soft(),css=css) as app: gr.HTML('

For faster inference without waiting in queue, you may duplicate the space. ' 'Duplicate Space

') with gr.Row(): with gr.Column(): inp=gr.Textbox( label="URL to ATAC-seq BAM", info='Only Dropbox and Google Drive file links are accecpted (set "Anyone with the link")', lines=1, placeholder='e.g. https://drive.google.com/file/d/xxxxx/view?usp=sharing' ) btn2=gr.Button("Submit") with gr.Column(): out = gr.File(label='Download the processed file') btn2.click(fn=process, inputs=inp, outputs=out) app.queue(concurrency_count=1,max_size=5).launch(enable_queue=True, debug=True)