Spaces:
Runtime error
Runtime error
''' | |
File name: tile_WSI.py | |
Date created: March/2021 | |
Source: | |
Tiling code inspired from | |
https://github.com/openslide/openslide-python/blob/master/examples/deepzoom/deepzoom_tile.py | |
The code has been extensively modified | |
Objective: | |
Tile svs, jpg or dcm images with the possibility of rejecting some tiles based based on xml or jpg masks | |
Be careful: | |
Overload of the node - may have memory issue if node is shared with other jobs. | |
''' | |
from __future__ import print_function | |
import json | |
import openslide | |
from openslide import open_slide, ImageSlide | |
from openslide.deepzoom import DeepZoomGenerator | |
from optparse import OptionParser | |
import re | |
import shutil | |
from unicodedata import normalize | |
import numpy as np | |
import scipy.misc | |
import subprocess | |
from glob import glob | |
from multiprocessing import Process, JoinableQueue | |
import time | |
import os | |
import sys | |
try: | |
import pydicom as dicom | |
except ImportError: | |
import dicom | |
# from scipy.misc import imsave | |
from imageio import imwrite as imsave | |
# from scipy.misc import imread | |
from imageio import imread | |
# from scipy.misc import imresize | |
from xml.dom import minidom | |
from PIL import Image, ImageDraw, ImageCms | |
from skimage import color, io | |
from tqdm import tqdm | |
Image.MAX_IMAGE_PIXELS = None | |
VIEWER_SLIDE_NAME = 'slide' | |
class TileWorker(Process): | |
"""A child process that generates and writes tiles.""" | |
def __init__(self, queue, slidepath, tile_size, overlap, limit_bounds,quality, _Bkg, _ROIpc): | |
Process.__init__(self, name='TileWorker') | |
self.daemon = True | |
self._queue = queue | |
self._slidepath = slidepath | |
self._tile_size = tile_size | |
self._overlap = overlap | |
self._limit_bounds = limit_bounds | |
self._quality = quality | |
self._slide = None | |
self._Bkg = _Bkg | |
self._ROIpc = _ROIpc | |
def RGB_to_lab(self, tile): | |
# srgb_p = ImageCms.createProfile("sRGB") | |
# lab_p = ImageCms.createProfile("LAB") | |
# rgb2lab = ImageCms.buildTransformFromOpenProfiles(srgb_p, lab_p, "RGB", "LAB") | |
# Lab = ImageCms.applyTransform(tile, rgb2lab) | |
# Lab = np.array(Lab) | |
# Lab = Lab.astype('float') | |
# Lab[:,:,0] = Lab[:,:,0] / 2.55 | |
# Lab[:,:,1] = Lab[:,:,1] - 128 | |
# Lab[:,:,2] = Lab[:,:,2] - 128 | |
print("RGB to Lab") | |
Lab = color.rgb2lab(tile) | |
return Lab | |
def Lab_to_RGB(self,Lab): | |
# srgb_p = ImageCms.createProfile("sRGB") | |
# lab_p = ImageCms.createProfile("LAB") | |
# lab2rgb = ImageCms.buildTransformFromOpenProfiles(srgb_p, lab_p, "LAB", "RGB") | |
# Lab[:,:,0] = Lab[:,:,0] * 2.55 | |
# Lab[:,:,1] = Lab[:,:,1] + 128 | |
# Lab[:,:,2] = Lab[:,:,2] + 128 | |
# newtile = ImageCms.applyTransform(Lab, lab2rgb) | |
print("Lab to RGB") | |
newtile = (color.lab2rgb(Lab) * 255).astype(np.uint8) | |
return newtile | |
def normalize_tile(self, tile, NormVec): | |
Lab = self.RGB_to_lab(tile) | |
TileMean = [0,0,0] | |
TileStd = [1,1,1] | |
newMean = NormVec[0:3] | |
newStd = NormVec[3:6] | |
for i in range(3): | |
TileMean[i] = np.mean(Lab[:,:,i]) | |
TileStd[i] = np.std(Lab[:,:,i]) | |
# print("mean/std chanel " + str(i) + ": " + str(TileMean[i]) + " / " + str(TileStd[i])) | |
tmp = ((Lab[:,:,i] - TileMean[i]) * (newStd[i] / TileStd[i])) + newMean[i] | |
if i == 0: | |
tmp[tmp<0] = 0 | |
tmp[tmp>100] = 100 | |
Lab[:,:,i] = tmp | |
else: | |
tmp[tmp<-128] = 128 | |
tmp[tmp>127] = 127 | |
Lab[:,:,i] = tmp | |
tile = self.Lab_to_RGB(Lab) | |
return tile | |
def run(self): | |
self._slide = open_slide(self._slidepath) | |
last_associated = None | |
dz = self._get_dz() | |
while True: | |
data = self._queue.get() | |
if data is None: | |
self._queue.task_done() | |
break | |
#associated, level, address, outfile = data | |
associated, level, address, outfile, format, outfile_bw, PercentMasked, SaveMasks, TileMask, Normalize = data | |
if last_associated != associated: | |
dz = self._get_dz(associated) | |
last_associated = associated | |
#try: | |
if True: | |
try: | |
tile = dz.get_tile(level, address) | |
# A single tile is being read | |
#check the percentage of the image with "information". Should be above 50% | |
gray = tile.convert('L') | |
bw = gray.point(lambda x: 0 if x<220 else 1, 'F') | |
arr = np.array(np.asarray(bw)) | |
avgBkg = np.average(bw) | |
bw = gray.point(lambda x: 0 if x<220 else 1, '1') | |
# check if the image is mostly background | |
#print("res: " + outfile + " is " + str(avgBkg)) | |
if avgBkg <= (self._Bkg / 100.0): | |
# print("PercentMasked: %.6f, %.6f" % (PercentMasked, self._ROIpc / 100.0) ) | |
# if an Aperio selection was made, check if is within the selected region | |
if PercentMasked >= (self._ROIpc / 100.0): | |
if Normalize != '': | |
print("normalize " + str(outfile)) | |
# arrtile = np.array(tile) | |
tile = Image.fromarray(self.normalize_tile(tile, Normalize).astype('uint8'),'RGB') | |
tile.save(outfile, quality=self._quality) | |
if bool(SaveMasks)==True: | |
height = TileMask.shape[0] | |
width = TileMask.shape[1] | |
TileMaskO = np.zeros((height,width,3), 'uint8') | |
maxVal = float(TileMask.max()) | |
TileMaskO[...,0] = (TileMask[:,:].astype(float) / maxVal * 255.0).astype(int) | |
TileMaskO[...,1] = (TileMask[:,:].astype(float) / maxVal * 255.0).astype(int) | |
TileMaskO[...,2] = (TileMask[:,:].astype(float) / maxVal * 255.0).astype(int) | |
TileMaskO = numpy.array(Image.fromarray(TileMaskO).resize(arr.shape[0], arr.shape[1],3)) | |
# TileMaskO = imresize(TileMaskO, (arr.shape[0], arr.shape[1],3)) | |
TileMaskO[TileMaskO<10] = 0 | |
TileMaskO[TileMaskO>=10] = 255 | |
imsave(outfile_bw,TileMaskO) #(outfile_bw, quality=self._quality) | |
#print("%s good: %f" %(outfile, avgBkg)) | |
#elif level>5: | |
# tile.save(outfile, quality=self._quality) | |
#print("%s empty: %f" %(outfile, avgBkg)) | |
self._queue.task_done() | |
except Exception as e: | |
# print(level, address) | |
print("image %s failed at dz.get_tile for level %f" % (self._slidepath, level)) | |
# e = sys.exc_info()[0] | |
print(e) | |
self._queue.task_done() | |
def _get_dz(self, associated=None): | |
if associated is not None: | |
image = ImageSlide(self._slide.associated_images[associated]) | |
else: | |
image = self._slide | |
return DeepZoomGenerator(image, self._tile_size, self._overlap, limit_bounds=self._limit_bounds) | |
class DeepZoomImageTiler(object): | |
"""Handles generation of tiles and metadata for a single image.""" | |
def __init__(self, dz, basename, format, associated, queue, slide, basenameJPG, xmlfile, mask_type, xmlLabel, ROIpc, ImgExtension, SaveMasks, Mag, normalize, Fieldxml): | |
self._dz = dz | |
self._basename = basename | |
self._basenameJPG = basenameJPG | |
self._format = format | |
self._associated = associated | |
self._queue = queue | |
self._processed = 0 | |
self._slide = slide | |
self._xmlfile = xmlfile | |
self._mask_type = mask_type | |
self._xmlLabel = xmlLabel | |
self._ROIpc = ROIpc | |
self._ImgExtension = ImgExtension | |
self._SaveMasks = SaveMasks | |
self._Mag = Mag | |
self._normalize = normalize | |
self._Fieldxml = Fieldxml | |
def run(self): | |
self._write_tiles() | |
self._write_dzi() | |
def _write_tiles(self): | |
########################################3 | |
# nc_added | |
#level = self._dz.level_count-1 | |
Magnification = 20 | |
tol = 2 | |
#get slide dimensions, zoom levels, and objective information | |
Factors = self._slide.level_downsamples | |
try: | |
Objective = float(self._slide.properties[openslide.PROPERTY_NAME_OBJECTIVE_POWER]) | |
# print(self._basename + " - Obj information found") | |
except: | |
print(self._basename + " - No Obj information found") | |
print(self._ImgExtension) | |
if ("jpg" in self._ImgExtension) | ("dcm" in self._ImgExtension) | ("tif" in self._ImgExtension): | |
#Objective = self._ROIpc | |
Objective = 1. | |
Magnification = Objective | |
print("input is jpg - will be tiled as such with %f" % Objective) | |
else: | |
return | |
#calculate magnifications | |
Available = tuple(Objective / x for x in Factors) | |
#find highest magnification greater than or equal to 'Desired' | |
Mismatch = tuple(x-Magnification for x in Available) | |
AbsMismatch = tuple(abs(x) for x in Mismatch) | |
if len(AbsMismatch) < 1: | |
print(self._basename + " - Objective field empty!") | |
return | |
''' | |
if(min(AbsMismatch) <= tol): | |
Level = int(AbsMismatch.index(min(AbsMismatch))) | |
Factor = 1 | |
else: #pick next highest level, downsample | |
Level = int(max([i for (i, val) in enumerate(Mismatch) if val > 0])) | |
Factor = Magnification / Available[Level] | |
# end added | |
''' | |
xml_valid = False | |
# a dir was provided for xml files | |
''' | |
ImgID = os.path.basename(self._basename) | |
Nbr_of_masks = 0 | |
if self._xmlfile != '': | |
xmldir = os.path.join(self._xmlfile, ImgID + '.xml') | |
print("xml:") | |
print(xmldir) | |
if os.path.isfile(xmldir): | |
xml_labels, xml_valid = self.xml_read_labels(xmldir) | |
Nbr_of_masks = len(xml_labels) | |
else: | |
print("No xml file found for slide %s.svs (expected: %s). Directory or xml file does not exist" % (ImgID, xmldir) ) | |
return | |
else: | |
Nbr_of_masks = 1 | |
''' | |
if True: | |
#if self._xmlfile != '' && : | |
# print(self._xmlfile, self._ImgExtension) | |
ImgID = os.path.basename(self._basename) | |
xmldir = os.path.join(self._xmlfile, ImgID + '.xml') | |
# print("xml:") | |
# print(xmldir) | |
if (self._xmlfile != '') & (self._ImgExtension != 'jpg') & (self._ImgExtension != 'dcm'): | |
# print("read xml file...") | |
mask, xml_valid, Img_Fact = self.xml_read(xmldir, self._xmlLabel, self._Fieldxml) | |
if xml_valid == False: | |
print("Error: xml %s file cannot be read properly - please check format" % xmldir) | |
return | |
elif (self._xmlfile != '') & (self._ImgExtension == 'dcm'): | |
# print("check mask for dcm") | |
mask, xml_valid, Img_Fact = self.jpg_mask_read(xmldir) | |
# mask <-- read mask | |
# Img_Fact <-- 1 | |
# xml_valid <-- True if mask file exists. | |
if xml_valid == False: | |
print("Error: xml %s file cannot be read properly - please check format" % xmldir) | |
return | |
# print("current directory: %s" % self._basename) | |
#return | |
#print(self._dz.level_count) | |
for level in range(self._dz.level_count-1,-1,-1): | |
ThisMag = Available[0]/pow(2,self._dz.level_count-(level+1)) | |
if self._Mag > 0: | |
if ThisMag != self._Mag: | |
continue | |
######################################## | |
#tiledir = os.path.join("%s_files" % self._basename, str(level)) | |
tiledir = os.path.join("%s_files" % self._basename, str(ThisMag)) | |
if not os.path.exists(tiledir): | |
os.makedirs(tiledir) | |
cols, rows = self._dz.level_tiles[level] | |
if xml_valid: | |
# print("xml valid") | |
'''# If xml file is used, check for each tile what are their corresponding coordinate in the base image | |
IndX_orig, IndY_orig = self._dz.level_tiles[-1] | |
CurrentLevel_ReductionFactor = (Img_Fact * float(self._dz.level_dimensions[-1][0]) / float(self._dz.level_dimensions[level][0])) | |
startIndX_current_level_conv = [int(i * CurrentLevel_ReductionFactor) for i in range(cols)] | |
print("***********") | |
endIndX_current_level_conv = [int(i * CurrentLevel_ReductionFactor) for i in range(cols)] | |
endIndX_current_level_conv.append(self._dz.level_dimensions[level][0]) | |
endIndX_current_level_conv.pop(0) | |
startIndY_current_level_conv = [int(i * CurrentLevel_ReductionFactor) for i in range(rows)] | |
#endIndX_current_level_conv = [i * CurrentLevel_ReductionFactor - 1 for i in range(rows)] | |
endIndY_current_level_conv = [int(i * CurrentLevel_ReductionFactor) for i in range(rows)] | |
endIndY_current_level_conv.append(self._dz.level_dimensions[level][1]) | |
endIndY_current_level_conv.pop(0) | |
''' | |
#startIndY_current_level_conv = [] | |
#endIndY_current_level_conv = [] | |
#startIndX_current_level_conv = [] | |
#endIndX_current_level_conv = [] | |
#for row in range(rows): | |
# for col in range(cols): | |
# Dlocation, Dlevel, Dsize = self._dz.get_tile_coordinates(level,(col, row)) | |
# Ddimension = self._dz.get_tile_dimensions(level,(col, row)) | |
# startIndY_current_level_conv.append(int((Dlocation[1]) / Img_Fact)) | |
# endIndY_current_level_conv.append(int((Dlocation[1] + Ddimension[1]) / Img_Fact)) | |
# startIndX_current_level_conv.append(int((Dlocation[0]) / Img_Fact)) | |
# endIndX_current_level_conv.append(int((Dlocation[0] + Ddimension[0]) / Img_Fact)) | |
# print(Dlocation, Ddimension, int((Dlocation[1]) / Img_Fact), int((Dlocation[1] + Ddimension[1]) / Img_Fact), int((Dlocation[0]) / Img_Fact), int((Dlocation[0] + Ddimension[0]) / Img_Fact)) | |
for row in range(rows): | |
for col in range(cols): | |
InsertBaseName = False | |
if InsertBaseName: | |
tilename = os.path.join(tiledir, '%s_%d_%d.%s' % ( | |
self._basenameJPG, col, row, self._format)) | |
tilename_bw = os.path.join(tiledir, '%s_%d_%d_mask.%s' % ( | |
self._basenameJPG, col, row, self._format)) | |
else: | |
tilename = os.path.join(tiledir, '%d_%d.%s' % ( | |
col, row, self._format)) | |
tilename_bw = os.path.join(tiledir, '%d_%d_mask.%s' % ( | |
col, row, self._format)) | |
if xml_valid: | |
# compute percentage of tile in mask | |
# print(row, col) | |
# print(startIndX_current_level_conv[col]) | |
# print(endIndX_current_level_conv[col]) | |
# print(startIndY_current_level_conv[row]) | |
# print(endIndY_current_level_conv[row]) | |
# print(mask.shape) | |
# print(mask[startIndX_current_level_conv[col]:endIndX_current_level_conv[col], startIndY_current_level_conv[row]:endIndY_current_level_conv[row]]) | |
# TileMask = mask[startIndY_current_level_conv[row]:endIndY_current_level_conv[row], startIndX_current_level_conv[col]:endIndX_current_level_conv[col]] | |
# PercentMasked = mask[startIndY_current_level_conv[row]:endIndY_current_level_conv[row], startIndX_current_level_conv[col]:endIndX_current_level_conv[col]].mean() | |
# print(startIndY_current_level_conv[row], endIndY_current_level_conv[row], startIndX_current_level_conv[col], endIndX_current_level_conv[col]) | |
Dlocation, Dlevel, Dsize = self._dz.get_tile_coordinates(level,(col, row)) | |
Ddimension = tuple([pow(2,(self._dz.level_count - 1 - level)) * x for x in self._dz.get_tile_dimensions(level,(col, row))]) | |
startIndY_current_level_conv = (int((Dlocation[1]) / Img_Fact)) | |
endIndY_current_level_conv = (int((Dlocation[1] + Ddimension[1]) / Img_Fact)) | |
startIndX_current_level_conv = (int((Dlocation[0]) / Img_Fact)) | |
endIndX_current_level_conv = (int((Dlocation[0] + Ddimension[0]) / Img_Fact)) | |
# print(Ddimension, Dlocation, Dlevel, Dsize, self._dz.level_count , level, col, row) | |
#startIndY_current_level_conv = (int((Dlocation[1]) / Img_Fact)) | |
#endIndY_current_level_conv = (int((Dlocation[1] + Ddimension[1]) / Img_Fact)) | |
#startIndX_current_level_conv = (int((Dlocation[0]) / Img_Fact)) | |
#endIndX_current_level_conv = (int((Dlocation[0] + Ddimension[0]) / Img_Fact)) | |
TileMask = mask[startIndY_current_level_conv:endIndY_current_level_conv, startIndX_current_level_conv:endIndX_current_level_conv] | |
PercentMasked = mask[startIndY_current_level_conv:endIndY_current_level_conv, startIndX_current_level_conv:endIndX_current_level_conv].mean() | |
# print(Ddimension, startIndY_current_level_conv, endIndY_current_level_conv, startIndX_current_level_conv, endIndX_current_level_conv) | |
if self._mask_type == 0: | |
# keep ROI outside of the mask | |
PercentMasked = 1.0 - PercentMasked | |
# print("Invert Mask percentage") | |
# if PercentMasked > 0: | |
# print("PercentMasked_p %.3f" % (PercentMasked)) | |
# else: | |
# print("PercentMasked_0 %.3f" % (PercentMasked)) | |
else: | |
PercentMasked = 1.0 | |
TileMask = [] | |
if not os.path.exists(tilename): | |
self._queue.put((self._associated, level, (col, row), | |
tilename, self._format, tilename_bw, PercentMasked, self._SaveMasks, TileMask, self._normalize)) | |
self._tile_done() | |
def _tile_done(self): | |
self._processed += 1 | |
count, total = self._processed, self._dz.tile_count | |
if count % 100 == 0 or count == total: | |
#print("Tiling %s: wrote %d/%d tiles" % ( | |
# self._associated or 'slide', count, total), | |
# end='\r', file=sys.stderr) | |
if count == total: | |
print(file=sys.stderr) | |
def _write_dzi(self): | |
with open('%s.dzi' % self._basename, 'w') as fh: | |
fh.write(self.get_dzi()) | |
def get_dzi(self): | |
return self._dz.get_dzi(self._format) | |
def jpg_mask_read(self, xmldir): | |
# Original size of the image | |
ImgMaxSizeX_orig = float(self._dz.level_dimensions[-1][0]) | |
ImgMaxSizeY_orig = float(self._dz.level_dimensions[-1][1]) | |
# Number of centers at the highest resolution | |
cols, rows = self._dz.level_tiles[-1] | |
# Img_Fact = int(ImgMaxSizeX_orig / 1.0 / cols) | |
Img_Fact = 1 | |
try: | |
# xmldir: change extension from xml to *jpg | |
xmldir = xmldir[:-4] + "mask.jpg" | |
# xmlcontent = read xmldir image | |
xmlcontent = imread(xmldir) | |
xmlcontent = xmlcontent - np.min(xmlcontent) | |
mask = xmlcontent / np.max(xmlcontent) | |
# we want image between 0 and 1 | |
xml_valid = True | |
except: | |
xml_valid = False | |
print("error with minidom.parse(xmldir)") | |
return [], xml_valid, 1.0 | |
return mask, xml_valid, Img_Fact | |
def xml_read(self, xmldir, Attribute_Name, Fieldxml): | |
# Original size of the image | |
ImgMaxSizeX_orig = float(self._dz.level_dimensions[-1][0]) | |
ImgMaxSizeY_orig = float(self._dz.level_dimensions[-1][1]) | |
# Number of centers at the highest resolution | |
cols, rows = self._dz.level_tiles[-1] | |
NewFact = max(ImgMaxSizeX_orig, ImgMaxSizeY_orig) / min(max(ImgMaxSizeX_orig, ImgMaxSizeY_orig),15000.0) | |
# Img_Fact = | |
# read_region(location, level, size) | |
# dz.get_tile_coordinates(14,(0,2)) | |
# ((0, 1792), 1, (320, 384)) | |
Img_Fact = float(ImgMaxSizeX_orig) / 5.0 / float(cols) | |
# print("image info:") | |
# print(ImgMaxSizeX_orig, ImgMaxSizeY_orig, cols, rows) | |
try: | |
xmlcontent = minidom.parse(xmldir) | |
xml_valid = True | |
except: | |
xml_valid = False | |
print("error with minidom.parse(xmldir)") | |
return [], xml_valid, 1.0 | |
xy = {} | |
xy_neg = {} | |
NbRg = 0 | |
labelIDs = xmlcontent.getElementsByTagName('Annotation') | |
# print("%d labels" % len(labelIDs) ) | |
for labelID in labelIDs: | |
if (Attribute_Name==[]) | (Attribute_Name==''): | |
isLabelOK = True | |
else: | |
try: | |
labeltag = labelID.getElementsByTagName('Attribute')[0] | |
if (Attribute_Name==labeltag.attributes[Fieldxml].value): | |
# if (Attribute_Name==labeltag.attributes['Value'].value): | |
# if (Attribute_Name==labeltag.attributes['Name'].value): | |
isLabelOK = True | |
else: | |
isLabelOK = False | |
except: | |
isLabelOK = False | |
if Attribute_Name == "non_selected_regions": | |
isLabelOK = True | |
#print("label ID, tag:") | |
#print(labelID, Attribute_Name, labeltag.attributes['Name'].value) | |
#if Attribute_Name==labeltag.attributes['Name'].value: | |
if isLabelOK: | |
regionlist = labelID.getElementsByTagName('Region') | |
for region in regionlist: | |
vertices = region.getElementsByTagName('Vertex') | |
NbRg += 1 | |
regionID = region.attributes['Id'].value + str(NbRg) | |
NegativeROA = region.attributes['NegativeROA'].value | |
# print("%d vertices" % len(vertices)) | |
if len(vertices) > 0: | |
#print( len(vertices) ) | |
if NegativeROA=="0": | |
xy[regionID] = [] | |
for vertex in vertices: | |
# get the x value of the vertex / convert them into index in the tiled matrix of the base image | |
# x = int(round(float(vertex.attributes['X'].value) / ImgMaxSizeX_orig * (cols*Img_Fact))) | |
# y = int(round(float(vertex.attributes['Y'].value) / ImgMaxSizeY_orig * (rows*Img_Fact))) | |
x = int(round(float(vertex.attributes['X'].value) / NewFact)) | |
y = int(round(float(vertex.attributes['Y'].value) / NewFact)) | |
xy[regionID].append((x,y)) | |
#print(vertex.attributes['X'].value, vertex.attributes['Y'].value, x, y ) | |
elif NegativeROA=="1": | |
xy_neg[regionID] = [] | |
for vertex in vertices: | |
# get the x value of the vertex / convert them into index in the tiled matrix of the base image | |
# x = int(round(float(vertex.attributes['X'].value) / ImgMaxSizeX_orig * (cols*Img_Fact))) | |
# y = int(round(float(vertex.attributes['Y'].value) / ImgMaxSizeY_orig * (rows*Img_Fact))) | |
x = int(round(float(vertex.attributes['X'].value) / NewFact)) | |
y = int(round(float(vertex.attributes['Y'].value) / NewFact)) | |
xy_neg[regionID].append((x,y)) | |
#xy_a = np.array(xy[regionID]) | |
# print("%d xy" % len(xy)) | |
#print(xy) | |
# print("%d xy_neg" % len(xy_neg)) | |
#print(xy_neg) | |
# print("Img_Fact:") | |
# print(NewFact) | |
# img = Image.new('L', (int(cols*Img_Fact), int(rows*Img_Fact)), 0) | |
img = Image.new('L', (int(ImgMaxSizeX_orig/NewFact), int(ImgMaxSizeY_orig/NewFact)), 0) | |
for regionID in xy.keys(): | |
xy_a = xy[regionID] | |
ImageDraw.Draw(img,'L').polygon(xy_a, outline=255, fill=255) | |
for regionID in xy_neg.keys(): | |
xy_a = xy_neg[regionID] | |
ImageDraw.Draw(img,'L').polygon(xy_a, outline=255, fill=0) | |
#img = img.resize((cols,rows), Image.ANTIALIAS) | |
mask = np.array(img) | |
#print(mask.shape) | |
if Attribute_Name == "non_selected_regions": | |
# scipy.misc.toimage(255-mask).save(os.path.join(os.path.split(self._basename[:-1])[0], "mask_" + os.path.basename(self._basename) + "_" + Attribute_Name + ".jpeg")) | |
Image.fromarray(255-mask).save(os.path.join(os.path.split(self._basename[:-1])[0], "mask_" + os.path.basename(self._basename) + "_" + Attribute_Name + ".jpeg")) | |
else: | |
if self._mask_type==0: | |
# scipy.misc.toimage(255-mask).save(os.path.join(os.path.split(self._basename[:-1])[0], "mask_" + os.path.basename(self._basename) + "_" + Attribute_Name + "_inv.jpeg")) | |
Image.fromarray(255-mask).save(os.path.join(os.path.split(self._basename[:-1])[0], "mask_" + os.path.basename(self._basename) + "_" + Attribute_Name + "_inv.jpeg")) | |
else: | |
# scipy.misc.toimage(mask).save(os.path.join(os.path.split(self._basename[:-1])[0], "mask_" + os.path.basename(self._basename) + "_" + Attribute_Name + ".jpeg")) | |
Image.fromarray(mask).save(os.path.join(os.path.split(self._basename[:-1])[0], "mask_" + os.path.basename(self._basename) + "_" + Attribute_Name + ".jpeg")) | |
#print(mask) | |
return mask / 255.0, xml_valid, NewFact | |
# Img_Fact | |
class DeepZoomStaticTiler(object): | |
"""Handles generation of tiles and metadata for all images in a slide.""" | |
def __init__(self, slidepath, basename, format, tile_size, overlap, | |
limit_bounds, quality, workers, with_viewer, Bkg, basenameJPG, xmlfile, mask_type, ROIpc, oLabel, ImgExtension, SaveMasks, Mag, normalize, Fieldxml): | |
if with_viewer: | |
# Check extra dependency before doing a bunch of work | |
import jinja2 | |
#print("line226 - %s " % (slidepath) ) | |
self._slide = open_slide(slidepath) | |
self._basename = basename | |
self._basenameJPG = basenameJPG | |
self._xmlfile = xmlfile | |
self._mask_type = mask_type | |
self._format = format | |
self._tile_size = tile_size | |
self._overlap = overlap | |
self._limit_bounds = limit_bounds | |
self._queue = JoinableQueue(2 * workers) | |
self._workers = workers | |
self._with_viewer = with_viewer | |
self._Bkg = Bkg | |
self._ROIpc = ROIpc | |
self._dzi_data = {} | |
self._xmlLabel = oLabel | |
self._ImgExtension = ImgExtension | |
self._SaveMasks = SaveMasks | |
self._Mag = Mag | |
self._normalize = normalize | |
self._Fieldxml = Fieldxml | |
for _i in range(workers): | |
TileWorker(self._queue, slidepath, tile_size, overlap, | |
limit_bounds, quality, self._Bkg, self._ROIpc).start() | |
def run(self): | |
self._run_image() | |
if self._with_viewer: | |
for name in self._slide.associated_images: | |
self._run_image(name) | |
self._write_html() | |
self._write_static() | |
self._shutdown() | |
def _run_image(self, associated=None): | |
"""Run a single image from self._slide.""" | |
if associated is None: | |
image = self._slide | |
if self._with_viewer: | |
basename = os.path.join(self._basename, VIEWER_SLIDE_NAME) | |
else: | |
basename = self._basename | |
else: | |
image = ImageSlide(self._slide.associated_images[associated]) | |
basename = os.path.join(self._basename, self._slugify(associated)) | |
# print("enter DeepZoomGenerator") | |
dz = DeepZoomGenerator(image, self._tile_size, self._overlap,limit_bounds=self._limit_bounds) | |
# print("enter DeepZoomImageTiler") | |
tiler = DeepZoomImageTiler(dz, basename, self._format, associated,self._queue, self._slide, self._basenameJPG, self._xmlfile, self._mask_type, self._xmlLabel, self._ROIpc, self._ImgExtension, self._SaveMasks, self._Mag, self._normalize, self._Fieldxml) | |
tiler.run() | |
self._dzi_data[self._url_for(associated)] = tiler.get_dzi() | |
def _url_for(self, associated): | |
if associated is None: | |
base = VIEWER_SLIDE_NAME | |
else: | |
base = self._slugify(associated) | |
return '%s.dzi' % base | |
def _write_html(self): | |
import jinja2 | |
env = jinja2.Environment(loader=jinja2.PackageLoader(__name__),autoescape=True) | |
template = env.get_template('slide-multipane.html') | |
associated_urls = dict((n, self._url_for(n)) | |
for n in self._slide.associated_images) | |
try: | |
mpp_x = self._slide.properties[openslide.PROPERTY_NAME_MPP_X] | |
mpp_y = self._slide.properties[openslide.PROPERTY_NAME_MPP_Y] | |
mpp = (float(mpp_x) + float(mpp_y)) / 2 | |
except (KeyError, ValueError): | |
mpp = 0 | |
# Embed the dzi metadata in the HTML to work around Chrome's | |
# refusal to allow XmlHttpRequest from file:///, even when | |
# the originating page is also a file:/// | |
data = template.render(slide_url=self._url_for(None),slide_mpp=mpp,associated=associated_urls, properties=self._slide.properties, dzi_data=json.dumps(self._dzi_data)) | |
with open(os.path.join(self._basename, 'index.html'), 'w') as fh: | |
fh.write(data) | |
def _write_static(self): | |
basesrc = os.path.join(os.path.dirname(os.path.abspath(__file__)), | |
'static') | |
basedst = os.path.join(self._basename, 'static') | |
self._copydir(basesrc, basedst) | |
self._copydir(os.path.join(basesrc, 'images'), | |
os.path.join(basedst, 'images')) | |
def _copydir(self, src, dest): | |
if not os.path.exists(dest): | |
os.makedirs(dest) | |
for name in os.listdir(src): | |
srcpath = os.path.join(src, name) | |
if os.path.isfile(srcpath): | |
shutil.copy(srcpath, os.path.join(dest, name)) | |
def _slugify(cls, text): | |
text = normalize('NFKD', text.lower()).encode('ascii', 'ignore').decode() | |
return re.sub('[^a-z0-9]+', '_', text) | |
def _shutdown(self): | |
for _i in range(self._workers): | |
self._queue.put(None) | |
self._queue.join() | |
def ImgWorker(queue): | |
# print("ImgWorker started") | |
while True: | |
cmd = queue.get() | |
if cmd is None: | |
queue.task_done() | |
break | |
# print("Execute: %s" % (cmd)) | |
subprocess.Popen(cmd, shell=True).wait() | |
queue.task_done() | |
def xml_read_labels(xmldir, Fieldxml): | |
try: | |
xmlcontent = minidom.parse(xmldir) | |
xml_valid = True | |
except: | |
xml_valid = False | |
print("error with minidom.parse(xmldir)") | |
return [], xml_valid | |
labeltag = xmlcontent.getElementsByTagName('Attribute') | |
xml_labels = [] | |
for xmllabel in labeltag: | |
xml_labels.append(xmllabel.attributes[Fieldxml].value) | |
#xml_labels.append(xmllabel.attributes['Name'].value) | |
# xml_labels.append(xmllabel.attributes['Value'].value) | |
if xml_labels==[]: | |
xml_labels = [''] | |
# print(xml_labels) | |
return xml_labels, xml_valid | |
if __name__ == '__main__': | |
parser = OptionParser(usage='Usage: %prog [options] <slide>') | |
parser.add_option('-L', '--ignore-bounds', dest='limit_bounds', | |
default=True, action='store_false', | |
help='display entire scan area') | |
parser.add_option('-e', '--overlap', metavar='PIXELS', dest='overlap', | |
type='int', default=1, | |
help='overlap of adjacent tiles [1]') | |
parser.add_option('-f', '--format', metavar='{jpeg|png}', dest='format', | |
default='jpeg', | |
help='image format for tiles [jpeg]') | |
parser.add_option('-j', '--jobs', metavar='COUNT', dest='workers', | |
type='int', default=4, | |
help='number of worker processes to start [4]') | |
parser.add_option('-o', '--output', metavar='NAME', dest='basename', | |
help='base name of output file') | |
parser.add_option('-Q', '--quality', metavar='QUALITY', dest='quality', | |
type='int', default=90, | |
help='JPEG compression quality [90]') | |
parser.add_option('-r', '--viewer', dest='with_viewer', | |
action='store_true', | |
help='generate directory tree with HTML viewer') | |
parser.add_option('-s', '--size', metavar='PIXELS', dest='tile_size', | |
type='int', default=254, | |
help='tile size [254]') | |
parser.add_option('-B', '--Background', metavar='PIXELS', dest='Bkg', | |
type='float', default=50, | |
help='Max background threshold [50]; percentager of background allowed') | |
parser.add_option('-x', '--xmlfile', metavar='NAME', dest='xmlfile', | |
help='xml file if needed') | |
parser.add_option('-F', '--Fieldxml', metavar='{Name|Value}', dest='Fieldxml', | |
default='Value', | |
help='which field of the xml file is the label saved') | |
parser.add_option('-m', '--mask_type', metavar='COUNT', dest='mask_type', | |
type='int', default=1, | |
help='if xml file is used, keep tile within the ROI (1) or outside of it (0)') | |
parser.add_option('-R', '--ROIpc', metavar='PIXELS', dest='ROIpc', | |
type='float', default=50, | |
help='To be used with xml file - minimum percentage of tile covered by ROI (white)') | |
parser.add_option('-l', '--oLabelref', metavar='NAME', dest='oLabelref', | |
help='To be used with xml file - Only tile for label which contains the characters in oLabel') | |
parser.add_option('-S', '--SaveMasks', metavar='NAME', dest='SaveMasks', | |
default=False, | |
help='set to yes if you want to save ALL masks for ALL tiles (will be saved in same directory with <mask> suffix)') | |
parser.add_option('-t', '--tmp_dcm', metavar='NAME', dest='tmp_dcm', | |
help='base name of output folder to save intermediate dcm images converted to jpg (we assume the patient ID is the folder name in which the dcm images are originally saved)') | |
parser.add_option('-M', '--Mag', metavar='PIXELS', dest='Mag', | |
type='float', default=-1, | |
help='Magnification at which tiling should be done (-1 of all)') | |
parser.add_option('-N', '--normalize', metavar='NAME', dest='normalize', | |
help='if normalization is needed, N list the mean and std for each channel. For example \'57,22,-8,20,10,5\' with the first 3 numbers being the targeted means, and then the targeted stds') | |
(opts, args) = parser.parse_args() | |
try: | |
slidepath = args[0] | |
except IndexError: | |
parser.error('Missing slide argument') | |
if opts.basename is None: | |
opts.basename = os.path.splitext(os.path.basename(slidepath))[0] | |
if opts.xmlfile is None: | |
opts.xmlfile = '' | |
try: | |
if opts.normalize is not None: | |
opts.normalize = [float(x) for x in opts.normalize.split(',')] | |
if len(opts.normalize) != 6: | |
opts.normalize = '' | |
parser.error("ERROR: NO NORMALIZATION APPLIED: input vector does not have the right length - 6 values expected") | |
else: | |
opts.normalize = '' | |
except: | |
opts.normalize = '' | |
parser.error("ERROR: NO NORMALIZATION APPLIED: input vector does not have the right format") | |
#if ss != '': | |
# if os.path.isdir(opts.xmlfile): | |
# Initialization | |
# imgExample = "/ifs/home/coudrn01/NN/Lung/RawImages/*/*svs" | |
# tile_size = 512 | |
# max_number_processes = 10 | |
# NbrCPU = 4 | |
# get images from the data/ file. | |
files = glob(slidepath) | |
#ImgExtension = os.path.splitext(slidepath)[1] | |
ImgExtension = slidepath.split('*')[-1] | |
#files | |
#len(files) | |
# print(args) | |
# print(args[0]) | |
# print(slidepath) | |
# print(files) | |
# print("***********************") | |
''' | |
dz_queue = JoinableQueue() | |
procs = [] | |
print("Nb of processes:") | |
print(opts.max_number_processes) | |
for i in range(opts.max_number_processes): | |
p = Process(target = ImgWorker, args = (dz_queue,)) | |
#p.deamon = True | |
p.setDaemon = True | |
p.start() | |
procs.append(p) | |
''' | |
files = sorted(files) | |
print(len(files), ' to process') | |
import time | |
time.sleep(5) | |
for imgNb in tqdm(range(len(files))): | |
filename = files[imgNb] | |
#print(filename) | |
opts.basenameJPG = os.path.splitext(os.path.basename(filename))[0] | |
#print("processing: " + opts.basenameJPG + " with extension: " + ImgExtension) | |
#opts.basenameJPG = os.path.splitext(os.path.basename(slidepath))[0] | |
#if os.path.isdir("%s_files" % (basename)): | |
# print("EXISTS") | |
#else: | |
# print("Not Found") | |
if ("dcm" in ImgExtension) : | |
print("convert %s dcm to jpg" % filename) | |
if opts.tmp_dcm is None: | |
parser.error('Missing output folder for dcm>jpg intermediate files') | |
elif not os.path.isdir(opts.tmp_dcm): | |
parser.error('Missing output folder for dcm>jpg intermediate files') | |
if filename[-3:] == 'jpg': | |
continue | |
ImageFile=dicom.read_file(filename) | |
im1 = ImageFile.pixel_array | |
maxVal = float(im1.max()) | |
minVal = float(im1.min()) | |
height = im1.shape[0] | |
width = im1.shape[1] | |
image = np.zeros((height,width,3), 'uint8') | |
image[...,0] = ((im1[:,:].astype(float) - minVal) / (maxVal - minVal) * 255.0).astype(int) | |
image[...,1] = ((im1[:,:].astype(float) - minVal) / (maxVal - minVal) * 255.0).astype(int) | |
image[...,2] = ((im1[:,:].astype(float) - minVal) / (maxVal - minVal) * 255.0).astype(int) | |
# dcm_ID = os.path.basename(os.path.dirname(filename)) | |
# opts.basenameJPG = dcm_ID + "_" + opts.basenameJPG | |
filename = os.path.join(opts.tmp_dcm, opts.basenameJPG + ".jpg") | |
# print(filename) | |
imsave(filename,image) | |
output = os.path.join(opts.basename, opts.basenameJPG) | |
try: | |
DeepZoomStaticTiler(filename, output, opts.format, opts.tile_size, opts.overlap, opts.limit_bounds, opts.quality, opts.workers, opts.with_viewer, opts.Bkg, opts.basenameJPG, opts.xmlfile, opts.mask_type, opts.ROIpc, '', ImgExtension, opts.SaveMasks, opts.Mag, opts.normalize, opts.Fieldxml).run() | |
except Exception as e: | |
print("Failed to process file %s, error: %s" % (filename, sys.exc_info()[0])) | |
print(e) | |
#elif ("jpg" in ImgExtension) : | |
# output = os.path.join(opts.basename, opts.basenameJPG) | |
# if os.path.exists(output + "_files"): | |
# print("Image %s already tiled" % opts.basenameJPG) | |
# continue | |
# DeepZoomStaticTiler(filename, output, opts.format, opts.tile_size, opts.overlap, opts.limit_bounds, opts.quality, opts.workers, opts.with_viewer, opts.Bkg, opts.basenameJPG, opts.xmlfile, opts.mask_type, opts.ROIpc, '', ImgExtension, opts.SaveMasks, opts.Mag, opts.normalize, opts.Fieldxml).run() | |
elif opts.xmlfile != '': | |
xmldir = os.path.join(opts.xmlfile, opts.basenameJPG + '.xml') | |
# print("xml:") | |
# print(xmldir) | |
if os.path.isfile(xmldir): | |
if (opts.mask_type==1) or (opts.oLabelref!=''): | |
# either mask inside ROI, or mask outside but a reference label exist | |
xml_labels, xml_valid = xml_read_labels(xmldir, opts.Fieldxml) | |
if (opts.mask_type==1): | |
# No inverse mask | |
Nbr_ROIs_ForNegLabel = 1 | |
elif (opts.oLabelref!=''): | |
# Inverse mask and a label reference exist | |
Nbr_ROIs_ForNegLabel = 0 | |
for oLabel in xml_labels: | |
# print("label is %s and ref is %s" % (oLabel, opts.oLabelref)) | |
if (opts.oLabelref in oLabel) or (opts.oLabelref==''): | |
# is a label is identified | |
if (opts.mask_type==0): | |
# Inverse mask and label exist in the image | |
Nbr_ROIs_ForNegLabel += 1 | |
# there is a label, and map is to be inverted | |
output = os.path.join(opts.basename, oLabel+'_inv', opts.basenameJPG) | |
if not os.path.exists(os.path.join(opts.basename, oLabel+'_inv')): | |
os.makedirs(os.path.join(opts.basename, oLabel+'_inv')) | |
else: | |
Nbr_ROIs_ForNegLabel += 1 | |
output = os.path.join(opts.basename, oLabel, opts.basenameJPG) | |
if not os.path.exists(os.path.join(opts.basename, oLabel)): | |
os.makedirs(os.path.join(opts.basename, oLabel)) | |
if 1: | |
#try: | |
DeepZoomStaticTiler(filename, output, opts.format, opts.tile_size, opts.overlap, opts.limit_bounds, opts.quality, opts.workers, opts.with_viewer, opts.Bkg, opts.basenameJPG, opts.xmlfile, opts.mask_type, opts.ROIpc, oLabel, ImgExtension, opts.SaveMasks, opts.Mag, opts.normalize, opts.Fieldxml).run() | |
#except: | |
# print("Failed to process file %s, error: %s" % (filename, sys.exc_info()[0])) | |
if Nbr_ROIs_ForNegLabel==0: | |
print("label %s is not in that image; invert everything" % (opts.oLabelref)) | |
# a label ref was given, and inverse mask is required but no ROI with this label in that map --> take everything | |
oLabel = opts.oLabelref | |
output = os.path.join(opts.basename, opts.oLabelref+'_inv', opts.basenameJPG) | |
if not os.path.exists(os.path.join(opts.basename, oLabel+'_inv')): | |
os.makedirs(os.path.join(opts.basename, oLabel+'_inv')) | |
if 1: | |
#try: | |
DeepZoomStaticTiler(filename, output, opts.format, opts.tile_size, opts.overlap, opts.limit_bounds, opts.quality, opts.workers, opts.with_viewer, opts.Bkg, opts.basenameJPG, opts.xmlfile, opts.mask_type, opts.ROIpc, oLabel, ImgExtension, opts.SaveMasks, opts.Mag, opts.normalize, opts.Fieldxml).run() | |
#except: | |
# print("Failed to process file %s, error: %s" % (filename, sys.exc_info()[0])) | |
else: | |
# Background | |
oLabel = "non_selected_regions" | |
output = os.path.join(opts.basename, oLabel, opts.basenameJPG) | |
if not os.path.exists(os.path.join(opts.basename, oLabel)): | |
os.makedirs(os.path.join(opts.basename, oLabel)) | |
try: | |
DeepZoomStaticTiler(filename, output, opts.format, opts.tile_size, opts.overlap, opts.limit_bounds, opts.quality, opts.workers, opts.with_viewer, opts.Bkg, opts.basenameJPG, opts.xmlfile, opts.mask_type, opts.ROIpc, oLabel, ImgExtension, opts.SaveMasks, opts.Mag, opts.normalize, opts.Fieldxml).run() | |
except Exception as e: | |
print("Failed to process file %s, error: %s" % (filename, sys.exc_info()[0])) | |
print(e) | |
else: | |
if (ImgExtension == ".jpg") | (ImgExtension == ".dcm") : | |
print("Input image to be tiled is jpg or dcm and not svs - will be treated as such") | |
output = os.path.join(opts.basename, opts.basenameJPG) | |
try: | |
DeepZoomStaticTiler(filename, output, opts.format, opts.tile_size, opts.overlap, opts.limit_bounds, opts.quality, opts.workers, opts.with_viewer, opts.Bkg, opts.basenameJPG, opts.xmlfile, opts.mask_type, opts.ROIpc, '', ImgExtension, opts.SaveMasks, opts.Mag, opts.normalize, opts.Fieldxml).run() | |
except Exception as e: | |
print("Failed to process file %s, error: %s" % (filename, sys.exc_info()[0])) | |
print(e) | |
else: | |
print("No xml file found for slide %s.svs (expected: %s). Directory or xml file does not exist" % (opts.basenameJPG, xmldir) ) | |
continue | |
else: | |
output = os.path.join(opts.basename, opts.basenameJPG) | |
if os.path.exists(output + "_files"): | |
print("Image %s already tiled" % opts.basenameJPG) | |
continue | |
try: | |
#if True: | |
DeepZoomStaticTiler(filename, output, opts.format, opts.tile_size, opts.overlap, opts.limit_bounds, opts.quality, opts.workers, opts.with_viewer, opts.Bkg, opts.basenameJPG, opts.xmlfile, opts.mask_type, opts.ROIpc, '', ImgExtension, opts.SaveMasks, opts.Mag, opts.normalize, opts.Fieldxml).run() | |
except Exception as e: | |
print("Failed to process file %s, error: %s" % (filename, sys.exc_info()[0])) | |
print(e) | |
''' | |
dz_queue.join() | |
for i in range(opts.max_number_processes): | |
dz_queue.put( None ) | |
''' | |
print("End") | |