Source code for WORC.tools.Evaluate

#!/usr/bin/env python

# Copyright 2016-2019 Biomedical Imaging Group Rotterdam, Departments of
# Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import WORC.addexceptions as WORCexceptions
import fastr
from fastr.api import ResourceLimit
import os
import graphviz

# NOTE: Very important to give images and segmentations as dict with patient names!


[docs]class Evaluate(object):
[docs] def __init__(self, label_type, ensemble=50, scores='percentages', parent=None, features=None, fastr_plugin='LinearExecution', name='Example'): ''' Build a network that evaluates the performance of an estimator. Parameters ---------- network: fastr network, default None If you input a network, the evaluate network is added to the existing network. ''' if parent is not None: self.parent = parent self.network = parent.network self.mode = 'WORC' self.name = parent.network.id self.ensemble = parent.configs[0]['Ensemble']['Use'] else: self.mode = 'StandAlone' self.fastr_plugin = fastr_plugin self.name = 'WORC_Evaluate_' + name self.network = fastr.create_network(id=self.name) self.fastr_tmpdir = os.path.join(fastr.config.mounts['tmp'], self.name) self.ensemble = ensemble if features is None and self.mode == 'StandAlone': raise WORCexceptions.WORCIOError('Either features as input or a WORC network is required for the Evaluate network.') self.features = features self.label_type = label_type self.create_network()
[docs] def create_network(self): ''' Add evaluate components to network. ''' # Create all nodes self.node_ROC =\ self.network.create_node('worc/PlotROC:1.0', tool_version='1.0', id='plot_ROC', resources=ResourceLimit(memory='20G')) self.node_SVM =\ self.network.create_node('worc/PlotSVM:1.0', tool_version='1.0', id='plot_SVM', resources=ResourceLimit(memory='20G')) self.node_Barchart =\ self.network.create_node('worc/PlotBarchart:1.0', tool_version='1.0', id='plot_Barchart', resources=ResourceLimit(memory='4G')) self.node_STest =\ self.network.create_node('worc/StatisticalTestFeatures:1.0', tool_version='1.0', id='statistical_test_features', resources=ResourceLimit(memory='4G')) self.node_Ranked_Percentages =\ self.network.create_node('worc/PlotRankedScores:1.0', tool_version='1.0', id='plot_ranked_percentages', resources=ResourceLimit(memory='20G')) self.node_Ranked_Posteriors =\ self.network.create_node('worc/PlotRankedScores:1.0', tool_version='1.0', id='plot_ranked_posteriors', resources=ResourceLimit(memory='20G')) # Create sinks self.sink_ROC_PNG =\ self.network.create_sink('PNGFile', id='ROC_PNG') self.sink_ROC_Tex =\ self.network.create_sink('TexFile', id='ROC_Tex') self.sink_ROC_CSV =\ self.network.create_sink('CSVFile', id='ROC_CSV') self.sink_SVM_Json =\ self.network.create_sink('JsonFile', id='SVM_Json') self.sink_Barchart_PNG =\ self.network.create_sink('PNGFile', id='Barchart_PNG') self.sink_Barchart_Tex =\ self.network.create_sink('TexFile', id='Barchart_Tex') self.sink_STest_CSV =\ self.network.create_sink('CSVFile', id='StatisticalTestFeatures_CSV') self.sink_Ranked_Percentages_Zip =\ self.network.create_sink('ZipFile', id='RankedPercentages_Zip') self.sink_Ranked_Percentages_CSV =\ self.network.create_sink('CSVFile', id='RankedPercentages_CSV') self.sink_Ranked_Posteriors_Zip =\ self.network.create_sink('ZipFile', id='RankedPosteriors_Zip') self.sink_Ranked_Posteriors_CSV =\ self.network.create_sink('CSVFile', id='RankedPosteriors_CSV') # Create links to sinks self.sink_ROC_PNG.input = self.node_ROC.outputs['output_png'] self.sink_ROC_Tex.input = self.node_ROC.outputs['output_tex'] self.sink_ROC_CSV.input = self.node_ROC.outputs['output_csv'] self.sink_SVM_Json.input = self.node_SVM.outputs['output_json'] self.sink_Barchart_PNG.input = self.node_Barchart.outputs['output_png'] self.sink_Barchart_Tex.input = self.node_Barchart.outputs['output_tex'] self.sink_STest_CSV.input = self.node_STest.outputs['performance'] self.sink_Ranked_Percentages_Zip.input = self.node_Ranked_Percentages.outputs['output_zip'] self.sink_Ranked_Percentages_CSV.input = self.node_Ranked_Percentages.outputs['output_csv'] self.sink_Ranked_Posteriors_Zip.input = self.node_Ranked_Posteriors.outputs['output_zip'] self.sink_Ranked_Posteriors_CSV.input = self.node_Ranked_Posteriors.outputs['output_csv'] # Create two constant nodes self.node_Ranked_Percentages.inputs['scores'] = ['percentages'] self.node_Ranked_Posteriors.inputs['scores'] = ['posteriors'] # Create sources that are not in WORC and set them # self.source_LabelType = self.network.create_source('String', id='LabelType') # self.source_Ensemble = self.network.create_source('String', id='Ensemble') # self.source_LabelType.input = [self.label_type] # self.source_Ensemble.input = [self.ensemble] self.source_LabelType = self.network.create_constant('String', [self.label_type], id='LabelType') self.source_Ensemble = self.network.create_constant('String', [self.ensemble], id='Ensemble') # Create sources if not supplied by a WORC network if self.mode == 'StandAlone': self.source_Estimator = self.network.create_source('HDF5', id='Estimator') self.source_PatientInfo = self.network.create_source('PatientInfoFile', id='PatientInfo') self.source_Images = self.network.create_source('ITKImageFile', id='Images', node_group='patients') self.source_Segmentations = self.network.create_source('ITKImageFile', id='Segmentations', node_group='patients') self.source_Config = self.network.create_source('ParameterFile', id='Config') self.labels = list() self.source_Features = list() for idx in range(0, len(self.features)): label = 'Features_' + str(idx) self.labels.append(label) self.source_Features.append(self.network.create_source('HDF5', id=label, node_group='features')) # Create links to sources that are not supplied by a WORC network self.node_ROC.inputs['ensemble'] = self.source_Ensemble.output self.node_ROC.inputs['label_type'] = self.source_LabelType.output self.node_SVM.inputs['ensemble'] = self.source_Ensemble.output self.node_SVM.inputs['label_type'] = self.source_LabelType.output self.node_Barchart.inputs['estimators'] = self.source_Ensemble.output self.node_Barchart.inputs['label_type'] = self.source_LabelType.output self.node_Ranked_Percentages.inputs['ensemble'] = self.source_Ensemble.output self.node_Ranked_Percentages.inputs['label_type'] = self.source_LabelType.output self.node_Ranked_Posteriors.inputs['ensemble'] = self.source_Ensemble.output self.node_Ranked_Posteriors.inputs['label_type'] = self.source_LabelType.output # Create links to the sources that could be in a WORC network if self.mode == 'StandAlone': self.create_links_Standalone() else: self.create_links_Addon()
[docs] def set(self, estimator=None, pinfo=None, images=None, segmentations=None, config=None, features=None, sink_data={}): ''' Set the sources and sinks based on the provided attributes. ''' if self.mode == 'StandAlone': self.source_data = dict() self.sink_data = dict() self.source_data['Estimator'] = estimator self.source_data['PatientInfo'] = pinfo self.source_data['Images'] = images self.source_data['Segmentations'] = segmentations self.source_data['Config'] = config self.source_data['LabelType'] = self.label_type self.source_data['Ensemble'] = self.ensemble for feature, label in zip(features, self.labels): self.source_data[label] = feature else: self.sink_data = self.parent.sink_data if 'ROC_PNG' not in sink_data.keys(): self.sink_data['ROC_PNG'] = ("vfs://output/{}/ROC_{{sample_id}}_{{cardinality}}{{ext}}").format(self.name) if 'ROC_Tex' not in sink_data.keys(): self.sink_data['ROC_Tex'] = ("vfs://output/{}/ROC_{{sample_id}}_{{cardinality}}{{ext}}").format(self.name) if 'ROC_CSV' not in sink_data.keys(): self.sink_data['ROC_CSV'] = ("vfs://output/{}/ROC_{{sample_id}}_{{cardinality}}{{ext}}").format(self.name) if 'SVM_Json' not in sink_data.keys(): self.sink_data['SVM_Json'] = ("vfs://output/{}/performance_{{sample_id}}_{{cardinality}}{{ext}}").format(self.name) if 'Barchart_PNG' not in sink_data.keys(): self.sink_data['Barchart_PNG'] = ("vfs://output/{}/Barchart_{{sample_id}}_{{cardinality}}{{ext}}").format(self.name) if 'Barchart_Tex' not in sink_data.keys(): self.sink_data['Barchart_Tex'] = ("vfs://output/{}/Barchart_{{sample_id}}_{{cardinality}}{{ext}}").format(self.name) if 'StatisticalTestFeatures_CSV' not in sink_data.keys(): self.sink_data['StatisticalTestFeatures_CSV'] = ("vfs://output/{}/StatisticalTestFeatures_{{sample_id}}_{{cardinality}}{{ext}}").format(self.name) if 'RankedPercentages_Zip' not in sink_data.keys(): self.sink_data['RankedPercentages_Zip'] = ("vfs://output/{}/RankedPercentages_{{sample_id}}_{{cardinality}}{{ext}}").format(self.name) if 'RankedPercentages_CSV' not in sink_data.keys(): self.sink_data['RankedPercentages_CSV'] = ("vfs://output/{}/RankedPercentages_{{sample_id}}_{{cardinality}}{{ext}}").format(self.name) if 'RankedPosteriors_Zip' not in sink_data.keys(): self.sink_data['RankedPosteriors_Zip'] = ("vfs://output/{}/RankedPosteriors_{{sample_id}}_{{cardinality}}{{ext}}").format(self.name) if 'RankedPosteriors_CSV' not in sink_data.keys(): self.sink_data['RankedPosteriors_CSV'] = ("vfs://output/{}/RankedPosteriors_{{sample_id}}_{{cardinality}}{{ext}}").format(self.name)
[docs] def execute(self): """ Execute the network through the fastr.network.execute command. """ # Draw and execute nwtwork try: self.network.draw(file_path=self.network.id + '.svg', draw_dimensions=True) except graphviz.backend.ExecutableNotFound: print('[WORC WARNING] Graphviz executable not found: not drawing network diagram. MAke sure the Graphviz executables are on your systems PATH.') self.network.execute(self.source_data, self.sink_data, execution_plugin=self.fastr_plugin, tmpdir=self.fastr_tmpdir)