Presets#

from pathlib import Path
from wholeslidedata.iterators.batchiterator import create_batch_iterator
from pprint import pprint

Files#

user_config = {
    'wholeslidedata': {
        'default': {
            'image_sources@replace(false)': {
                'path': '/tmp/TCGA-21-5784-01Z-00-DX1.tif'
            },
            'annotation_sources@replace(false)': {
                'path': '/tmp/TCGA-21-5784-01Z-00-DX1.xml'
            },
            'labels': {
                'stroma': 1,
                'tumor': 2,
                'lymphocytes': 3,
            },
            'batch_shape': {
                'batch_size': 4,
                'spacing': 1.0,
                'shape': [512,512,3],
            }
        }
    }
}
with create_batch_iterator(user_config, mode='training', presets=('files.yml',)) as batch_iterator:
    print('Dataset:')
    pprint(batch_iterator.dataset)
Dataset:
{'AnyOneAssociater': {'annotations': {0: /tmp/TCGA-21-5784-01Z-00-DX1.xml},
                      'images': {0: /tmp/TCGA-21-5784-01Z-00-DX1.tif}}}

Folders#

user_config = {
    'wholeslidedata': {
        'default': {
            'image_sources@replace(false)': {
                'folder': '/tmp/',
                'filters': ['.tif'],
                'excludes': ['mask'],
            
            },
            'annotation_sources@replace(false)': {
                'folder': '/tmp/',
                'filters': ['.xml'],
            },
            'labels': {
                'stroma': 1,
                'tumor': 2,
                'lymphocytes': 3,
            },
            'batch_shape': {
                'batch_size': 4,
                'spacing': 1.0,
                'shape': [512,512,3],
            }
        }
    }
}
with create_batch_iterator(user_config, mode='training', presets=('folders.yml',)) as batch_iterator:
    print('Dataset:')
    pprint(batch_iterator.dataset)
Dataset:
{'TCGA-21-5784-01Z-00-DX1': {'annotations': {0: /tmp/TCGA-21-5784-01Z-00-DX1.xml},
                             'images': {0: /tmp/TCGA-21-5784-01Z-00-DX1.tif}}}

One-hot-encoding#

user_config = './configs/user_config.yml'
!cat $user_config
wholeslidedata:
    default:
        yaml_source:
            training:
                - wsi: 
                    path: /tmp/TCGA-21-5784-01Z-00-DX1.tif
                  wsa: 
                    path: /tmp/TCGA-21-5784-01Z-00-DX1.xml       

        labels:
            stroma: 1
            tumor: 2
            lymphocytes: 3
            
        batch_shape:
            batch_size: 4
            spacing: 1.0
            shape: [512, 512, 3]
with create_batch_iterator(user_config, mode='training', presets=('onehotencoding.yml',)) as batch_iterator:
    x_batch, y_batch, info = next(batch_iterator)
    print(y_batch.shape)
(4, 512, 512, 3)