File size: 2,644 Bytes

0ca2a11

#!/usr/bin/env python
# coding: utf-8

# In[1]:


import os
import numpy as np
import shutil
import torch
import torch.nn
import torchvision.models as models
from torch.autograd import Variable 
import torch.cuda
import torchvision.transforms as transforms
from PIL import Image
import torch.nn as nn
import torch.nn.functional as F
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import pairwise_distances_argmin_min
from scipy.spatial.distance import pdist, squareform
from skimage import io, segmentation, morphology, exposure
from skimage.color import rgb2hsv
img_to_tensor = transforms.ToTensor()
import random
import tifffile as tif
path = '/data1/partitionA/CUHKSZ/histopath_2022/grand_competition/Train_Labeled/images/'
files = os.listdir(path)
binary_path = '0/'
gray_path = '1/'
colored_path = 'colored/'
os.makedirs(binary_path, exist_ok=True)
os.makedirs(colored_path, exist_ok=True)
os.makedirs(gray_path, exist_ok=True)
for img_name in files:
    img_path = path + str(img_name)
    if img_name.endswith('.tif') or img_name.endswith('.tiff'):
        img_data = tif.imread(img_path)
    else:
        img_data = io.imread(img_path)
    if len(img_data.shape) == 2 or (len(img_data.shape) == 3 and img_data.shape[-1] == 1):
        shutil.copyfile(path + img_name, binary_path + img_name)
    elif len(img_data.shape) == 3 and img_data.shape[-1] > 3:
        shutil.copyfile(path + img_name, colored_path + img_name)
    else:
        hsv_img = rgb2hsv(img_data)
        s = hsv_img[:,:,1]
        v = hsv_img[:,:,2]
        print(img_name,s.mean(),v.mean())
        if s.mean() > 0.1 or (v.mean()<0.1 or v.mean() > 0.6):
            shutil.copyfile(path + img_name, colored_path + img_name)
        else:
            shutil.copyfile(path + img_name, gray_path + img_name)



# In[3]:


####Phrase 2 clustering by cell size
from skimage import measure
colored_path = 'colored/'
label_path = 'allimages/tif/'
big_path = '2/'
small_path = '3/'
files = os.listdir(colored_path)
os.makedirs(big_path, exist_ok=True)
os.makedirs(small_path, exist_ok=True)
for img_name in files:
    label =  tif.imread(label_path + img_name.split('.')[0]+'.tif')
    props = measure.regionprops(label)
    num_pix = []
    for idx in range(len(props)):
        num_pix.append(props[idx].area)
    max_area = max(num_pix)
    print(max_area)
    if max_area > 30000:
        shutil.copyfile(path + img_name, big_path + img_name)
    else:
        shutil.copyfile(path + img_name, small_path + img_name)