sparktk.dicom.ops.drop_rows_by_keywords module
# vim: set encoding=utf-8
# Copyright (c) 2016 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
def drop_rows_by_keywords(self, keywords_values_dict):
"""
Drop the rows based on dictionary of {"keyword":"value"}(applying 'and' operation on dictionary) from column holding xml string.
Ex: keywords_values_dict -> {"SOPInstanceUID":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "Manufacturer":"SIEMENS", "StudyDate":"20030315"}
Parameters
----------
:param keywords_values_dict: (dict(str, str)) dictionary of keywords and values from xml string in metadata
Examples
--------
>>> dicom_path = "../datasets/dicom_uncompressed"
>>> dicom = tc.dicom.import_dcm(dicom_path)
>>> dicom.metadata.count()
3
>>> dicom.metadata.inspect(truncate=30)
[#] id metadata
=======================================
[0] 0
AAE=
1.2.840.10008.5.1.4.1.1.4
1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685
...
>>> keywords_values_dict = {"SOPInstanceUID":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "Manufacturer":"SIEMENS", "StudyDate":"20030315"}
>>> dicom.drop_rows_by_keywords(keywords_values_dict)
>>> dicom.metadata.count()
2
#After drop_rows
>>> dicom.metadata.inspect(truncate=30)
[#] id metadata
=======================================
[0] 1 >> dicom.pixeldata.inspect(truncate=30)
[#] id imagematrix
===========================================================
[0] 1 [[ 0. 0. 0. ..., 0. 0. 0.]
[ 0. 70. 85. ..., 215. 288. 337.]
[ 0. 63. 72. ..., 228. 269. 317.]
...,
[ 0. 42. 40. ..., 966. 919. 871.]
[ 0. 42. 33. ..., 988. 887. 860.]
[ 0. 46. 38. ..., 983. 876. 885.]]
[1] 2 [[ 0. 0. 0. ..., 0. 0. 0.]
[ 0. 111. 117. ..., 159. 148. 135.]
[ 0. 116. 111. ..., 152. 138. 139.]
...,
[ 0. 49. 18. ..., 1057. 965. 853.]
[ 0. 42. 20. ..., 1046. 973. 891.]
[ 0. 48. 26. ..., 1041. 969. 930.]]
"""
if not isinstance(keywords_values_dict, dict):
raise TypeError("keywords_values_dict should be a type of dict, but found type as %" % type(keywords_values_dict))
for key, value in keywords_values_dict.iteritems():
if not isinstance(key, basestring) or not isinstance(value, basestring):
raise TypeError("both keyword and value should be of ")
#Always scala dicom is invoked, as python joins are expensive compared to serailizations.
def f(scala_dicom):
scala_dicom.dropRowsByKeywords(self._tc.jutils.convert.to_scala_map(keywords_values_dict))
self._call_scala(f)
Functions
def drop_rows_by_keywords(
self, keywords_values_dict)
Drop the rows based on dictionary of {"keyword":"value"}(applying 'and' operation on dictionary) from column holding xml string.
Ex: keywords_values_dict -> {"SOPInstanceUID":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "Manufacturer":"SIEMENS", "StudyDate":"20030315"}
Parameters:
keywords_values_dict | (dict(str, str)): | dictionary of keywords and values from xml string in metadata |
Examples:
>>> dicom_path = "../datasets/dicom_uncompressed"
>>> dicom = tc.dicom.import_dcm(dicom_path)
>>> dicom.metadata.count()
3
>>> dicom.metadata.inspect(truncate=30)
[#] id metadata
=======================================
[0] 0 <?xml version="1.0" encodin...
[1] 1 <?xml version="1.0" encodin...
[2] 2 <?xml version="1.0" encodin...
#Part of xml string looks as below
<?xml version="1.0" encoding="UTF-8"?>
<NativeDicomModel xml:space="preserve">
<DicomAttribute keyword="FileMetaInformationVersion" tag="00020001" vr="OB"><InlineBinary>AAE=</InlineBinary></DicomAttribute>
<DicomAttribute keyword="MediaStorageSOPClassUID" tag="00020002" vr="UI"><Value number="1">1.2.840.10008.5.1.4.1.1.4</Value></DicomAttribute>
<DicomAttribute keyword="MediaStorageSOPInstanceUID" tag="00020003" vr="UI"><Value number="1">1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685</Value></DicomAttribute>
...
>>> keywords_values_dict = {"SOPInstanceUID":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "Manufacturer":"SIEMENS", "StudyDate":"20030315"}
>>> dicom.drop_rows_by_keywords(keywords_values_dict)
>>> dicom.metadata.count()
2
#After drop_rows
>>> dicom.metadata.inspect(truncate=30)
[#] id metadata
=======================================
[0] 1 <?xml version="1.0" encodin...
[1] 2 <?xml version="1.0" encodin...
>>> dicom.pixeldata.inspect(truncate=30)
[#] id imagematrix
===========================================================
[0] 1 [[ 0. 0. 0. ..., 0. 0. 0.]
[ 0. 70. 85. ..., 215. 288. 337.]
[ 0. 63. 72. ..., 228. 269. 317.]
...,
[ 0. 42. 40. ..., 966. 919. 871.]
[ 0. 42. 33. ..., 988. 887. 860.]
[ 0. 46. 38. ..., 983. 876. 885.]]
[1] 2 [[ 0. 0. 0. ..., 0. 0. 0.]
[ 0. 111. 117. ..., 159. 148. 135.]
[ 0. 116. 111. ..., 152. 138. 139.]
...,
[ 0. 49. 18. ..., 1057. 965. 853.]
[ 0. 42. 20. ..., 1046. 973. 891.]
[ 0. 48. 26. ..., 1041. 969. 930.]]
def drop_rows_by_keywords(self, keywords_values_dict):
"""
Drop the rows based on dictionary of {"keyword":"value"}(applying 'and' operation on dictionary) from column holding xml string.
Ex: keywords_values_dict -> {"SOPInstanceUID":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "Manufacturer":"SIEMENS", "StudyDate":"20030315"}
Parameters
----------
:param keywords_values_dict: (dict(str, str)) dictionary of keywords and values from xml string in metadata
Examples
--------
>>> dicom_path = "../datasets/dicom_uncompressed"
>>> dicom = tc.dicom.import_dcm(dicom_path)
>>> dicom.metadata.count()
3
>>> dicom.metadata.inspect(truncate=30)
[#] id metadata
=======================================
[0] 0
AAE=
1.2.840.10008.5.1.4.1.1.4
1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685
...
>>> keywords_values_dict = {"SOPInstanceUID":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "Manufacturer":"SIEMENS", "StudyDate":"20030315"}
>>> dicom.drop_rows_by_keywords(keywords_values_dict)
>>> dicom.metadata.count()
2
#After drop_rows
>>> dicom.metadata.inspect(truncate=30)
[#] id metadata
=======================================
[0] 1 >> dicom.pixeldata.inspect(truncate=30)
[#] id imagematrix
===========================================================
[0] 1 [[ 0. 0. 0. ..., 0. 0. 0.]
[ 0. 70. 85. ..., 215. 288. 337.]
[ 0. 63. 72. ..., 228. 269. 317.]
...,
[ 0. 42. 40. ..., 966. 919. 871.]
[ 0. 42. 33. ..., 988. 887. 860.]
[ 0. 46. 38. ..., 983. 876. 885.]]
[1] 2 [[ 0. 0. 0. ..., 0. 0. 0.]
[ 0. 111. 117. ..., 159. 148. 135.]
[ 0. 116. 111. ..., 152. 138. 139.]
...,
[ 0. 49. 18. ..., 1057. 965. 853.]
[ 0. 42. 20. ..., 1046. 973. 891.]
[ 0. 48. 26. ..., 1041. 969. 930.]]
"""
if not isinstance(keywords_values_dict, dict):
raise TypeError("keywords_values_dict should be a type of dict, but found type as %" % type(keywords_values_dict))
for key, value in keywords_values_dict.iteritems():
if not isinstance(key, basestring) or not isinstance(value, basestring):
raise TypeError("both keyword and value should be of ")
#Always scala dicom is invoked, as python joins are expensive compared to serailizations.
def f(scala_dicom):
scala_dicom.dropRowsByKeywords(self._tc.jutils.convert.to_scala_map(keywords_values_dict))
self._call_scala(f)