sparktk.dicom.dicom module
# vim: set encoding=utf-8
#  Copyright (c) 2016 Intel Corporation 
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
#
import logging
logger = logging.getLogger('sparktk')
from sparktk import TkContext
# import constructors for the API's sake (not actually dependencies of the Dicom class)
from sparktk.dicom.constructors.import_dcm import import_dcm
__all__ = ["Dicom",
           "import_dcm",
           "load"]
class Dicom(object):
    """
    sparktk Dicom
    Represents a collection of DICOM data objects. Reference: [https://en.wikipedia.org/wiki/DICOM](https://en.wikipedia.org/wiki/DICOM)
    The metadata property is a sparktk frame which defines the metadata of the collection of DICOM objects.
    Its schema has a column named "id" which holds a unique integer ID for the record and another column which
    holds a string of XML comprised of the metadata.  Users can run XQuery or invoke canned column extraction/filter
    operations on this frame.
    The pixeldata property is a sparktk frame which defines the pixeldata of the collection of DICOM objects.
    Its schema has a column named "id" which holds a unique integer ID for the record and another column which
    holds a matrix(internally it is a numpy.ndarray) comprised of the pixeldata.  Users can run numpy supported transformations on it.
    dcm4che-3.x dependencies are used to support various operations on dicom images. It is available as java library
    Reference: [https://github.com/dcm4che/dcm4che](https://github.com/dcm4che/dcm4che)
    Note: Currently sparktk Dicom supports only uncompressed dicom images
    Load a set of uncompressed sample .dcm files from path (integration-tests/datasets/dicom_uncompressed)
    and create a dicom object. The below examples helps you to understand how to access dicom object properties.
    Examples
    --------
        #Path can be local/hdfs to dcm file(s)
        >>> dicom_path = "../datasets/dicom_uncompressed"
        #use import_dcm available inside dicom module to create a dicom object from given dicom_path
        >>> dicom = tc.dicom.import_dcm(dicom_path)
        #Type of dicom object created
        >>> type(dicom)
        
        >>> dicom.metadata.count()
        3
        >>> dicom.pixeldata.count()
        3
        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   0  
            
                AAE= 1.2.840.10008.5.1.4.1.1.4 1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685 
        #Dimesions of the image matrix stored
        >>> image_ndarray.shape
        (320, 320)
        #Use python matplot lib package to verify image visually
        >>> import pylab
        >>> pylab.imshow(image_ndarray, cmap=pylab.cm.bone)
        >>> pylab.show()
        #Save method persists the dicom object to disk
        >>> dicom.save("sandbox/dicom_data")
        #loads the saved dicom object
        >>> load_dicom = tc.load("sandbox/dicom_data")
        #Re-check whether we loaded back the dicom object or not
        >>> type(load_dicom)
        
        #Again access pixeldata and perform same operations as above
        >>> load_pixeldata = load_dicom.pixeldata.take(1)
        #Order may differ when you load back dicom object
        >>> load_pixeldata
        [[0L, array([[   0.,    0.,    0., ...,    0.,    0.,    0.],
        [   0.,  125.,  103., ...,  120.,  213.,  319.],
        [   0.,  117.,   94., ...,  135.,  223.,  325.],
        ...,
        [   0.,   62.,   21., ...,  896.,  886.,  854.],
        [   0.,   63.,   23., ...,  941.,  872.,  897.],
        [   0.,   60.,   30., ...,  951.,  822.,  906.]])]]
        >>> load_image_ndarray= load_pixeldata[0][1]
        >>> type(load_image_ndarray)
        
        >>> load_image_ndarray.shape
        (320, 320)
        #Inspect metadata property to see dicom metadata xml content
        >>> load_dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   0  >> import xml.etree.ElementTree as ET
        #Performing add_columns operation.
        #Add xml tag as column in dicom metadata frame
        #Here we add SOPInstanceUID as column to metadaframe
        #sample function to apply on row - add_columns
        >>> def extractor(tag_name):
        ...    def _extractor(row):
        ...        root = ET.fromstring(row["metadata"])
        ...        for attribute in root.findall('DicomAttribute'):
        ...            keyword = attribute.get('keyword')
        ...            value = None
        ...            if attribute.find('Value') is not None:
        ...                value = attribute.find('Value').text
        ...            if keyword == tag_name:
        ...                return value
        ...    return _extractor
        >>> tag_name = "SOPInstanceUID"
        >>> dicom.metadata.add_columns(extractor(tag_name), (tag_name, str))
        >>> dicom.metadata.count()
        3
        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata                        SOPInstanceUID
        =======================================================================
        [0]   0       Functions
def import_dcm(
dicom_dir_path, tc=<class 'sparktk.arguments.implicit'>)
Creates a dicom object with metadataFrame and pixeldataFrame from a dcm file(s)
| dicom_dir_path | (str): | Local/HDFS path of the dcm file(s) | 
| Returns | (Dicom): | returns a dicom object with metadata and pixeldata frames | 
#Path can be local/hdfs to dcm file(s)
>>> dicom_path = "../datasets/dicom_uncompressed"
#use import_dcm available inside dicom module to create a dicom object from given dicom_path
>>> dicom = tc.dicom.import_dcm(dicom_path)
#Type of dicom object created
>>> type(dicom)
<class 'sparktk.dicom.dicom.Dicom'>
#Inspect metadata property to see dicom metadata xml content
>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   0  <?xml version="1.0" encodin...
[1]   1  <?xml version="1.0" encodin...
[2]   2  <?xml version="1.0" encodin...
#pixeldata property is sparktk frame
>>> pixeldata = dicom.pixeldata.take(1)
>>> pixeldata
[[0L, array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
[ 0.,  7.,  5., ...,  5.,  7.,  8.],
[ 0.,  7.,  6., ...,  5.,  6.,  7.],
...,
[ 0.,  6.,  7., ...,  5.,  5.,  6.],
[ 0.,  2.,  5., ...,  5.,  5.,  4.],
[ 1.,  1.,  3., ...,  1.,  1.,  0.]])]]
def import_dcm(dicom_dir_path, tc=TkContext.implicit):
    """
    Creates a dicom object with metadataFrame and pixeldataFrame from a dcm file(s)
    Parameters
    ----------
    :param dicom_dir_path: (str) Local/HDFS path of the dcm file(s)
    :return: (Dicom) returns a dicom object with metadata and pixeldata frames
    Examples
    --------
        #Path can be local/hdfs to dcm file(s)
        >>> dicom_path = "../datasets/dicom_uncompressed"
        #use import_dcm available inside dicom module to create a dicom object from given dicom_path
        >>> dicom = tc.dicom.import_dcm(dicom_path)
        #Type of dicom object created
        >>> type(dicom)
        
        #Inspect metadata property to see dicom metadata xml content
        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   0  >> pixeldata = dicom.pixeldata.take(1)
        >>> pixeldata
        [[0L, array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
        [ 0.,  7.,  5., ...,  5.,  7.,  8.],
        [ 0.,  7.,  6., ...,  5.,  6.,  7.],
        ...,
        [ 0.,  6.,  7., ...,  5.,  5.,  6.],
        [ 0.,  2.,  5., ...,  5.,  5.,  4.],
        [ 1.,  1.,  3., ...,  1.,  1.,  0.]])]]
    """
    if not isinstance(dicom_dir_path, basestring):
        raise ValueError("dicom_dir_path parameter must be a string, but is {0}.".format(type(dicom_dir_path)))
    TkContext.validate(tc)
    scala_dicom = tc.sc._jvm.org.trustedanalytics.sparktk.dicom.internal.constructors.Import.importDcm(tc.jutils.get_scala_sc(), dicom_dir_path)
    from sparktk.dicom.dicom import Dicom
    return Dicom._from_scala(tc, scala_dicom)
 def load(
path, tc=<class 'sparktk.arguments.implicit'>)
load Dicom from given path
def load(path, tc=TkContext.implicit):
    """load Dicom from given path"""
    TkContext.validate(tc)
    return tc.load(path, Dicom)
Classes
class Dicom
sparktk Dicom
Represents a collection of DICOM data objects. Reference: https://en.wikipedia.org/wiki/DICOM
The metadata property is a sparktk frame which defines the metadata of the collection of DICOM objects. Its schema has a column named "id" which holds a unique integer ID for the record and another column which holds a string of XML comprised of the metadata. Users can run XQuery or invoke canned column extraction/filter operations on this frame.
The pixeldata property is a sparktk frame which defines the pixeldata of the collection of DICOM objects. Its schema has a column named "id" which holds a unique integer ID for the record and another column which holds a matrix(internally it is a numpy.ndarray) comprised of the pixeldata. Users can run numpy supported transformations on it.
dcm4che-3.x dependencies are used to support various operations on dicom images. It is available as java library Reference: https://github.com/dcm4che/dcm4che
Note: Currently sparktk Dicom supports only uncompressed dicom images
Load a set of uncompressed sample .dcm files from path (integration-tests/datasets/dicom_uncompressed) and create a dicom object. The below examples helps you to understand how to access dicom object properties.
#Path can be local/hdfs to dcm file(s)
>>> dicom_path = "../datasets/dicom_uncompressed"
#use import_dcm available inside dicom module to create a dicom object from given dicom_path
>>> dicom = tc.dicom.import_dcm(dicom_path)
#Type of dicom object created
>>> type(dicom)
<class 'sparktk.dicom.dicom.Dicom'>
>>> dicom.metadata.count()
3
>>> dicom.pixeldata.count()
3
>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   0  <?xml version="1.0" encodin...
[1]   1  <?xml version="1.0" encodin...
[2]   2  <?xml version="1.0" encodin...
#Part of xml string looks as below
<?xml version="1.0" encoding="UTF-8"?>
    <NativeDicomModel xml:space="preserve">
        <DicomAttribute keyword="FileMetaInformationVersion" tag="00020001" vr="OB"><InlineBinary>AAE=</InlineBinary></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPClassUID" tag="00020002" vr="UI"><Value number="1">1.2.840.10008.5.1.4.1.1.4</Value></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPInstanceUID" tag="00020003" vr="UI"><Value number="1">1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685</Value></DicomAttribute>
        ...
#pixeldata property is sparktk frame
>>> pixeldata = dicom.pixeldata.take(1)
#Display
>>> pixeldata
[[0L, array([[   0.,    0.,    0., ...,    0.,    0.,    0.],
[   0.,  125.,  103., ...,  120.,  213.,  319.],
[   0.,  117.,   94., ...,  135.,  223.,  325.],
...,
[   0.,   62.,   21., ...,  896.,  886.,  854.],
[   0.,   63.,   23., ...,  941.,  872.,  897.],
[   0.,   60.,   30., ...,  951.,  822.,  906.]])]]
#Access ndarray
>>> image_ndarray= pixeldata[0][1]
>>> type(image_ndarray)
<type 'numpy.ndarray'>
#Dimesions of the image matrix stored
>>> image_ndarray.shape
(320, 320)
#Use python matplot lib package to verify image visually
>>> import pylab
>>> pylab.imshow(image_ndarray, cmap=pylab.cm.bone)
>>> pylab.show()
#Save method persists the dicom object to disk
>>> dicom.save("sandbox/dicom_data")
#loads the saved dicom object
>>> load_dicom = tc.load("sandbox/dicom_data")
#Re-check whether we loaded back the dicom object or not
>>> type(load_dicom)
<class 'sparktk.dicom.dicom.Dicom'>
#Again access pixeldata and perform same operations as above
>>> load_pixeldata = load_dicom.pixeldata.take(1)
#Order may differ when you load back dicom object
>>> load_pixeldata
[[0L, array([[   0.,    0.,    0., ...,    0.,    0.,    0.],
[   0.,  125.,  103., ...,  120.,  213.,  319.],
[   0.,  117.,   94., ...,  135.,  223.,  325.],
...,
[   0.,   62.,   21., ...,  896.,  886.,  854.],
[   0.,   63.,   23., ...,  941.,  872.,  897.],
[   0.,   60.,   30., ...,  951.,  822.,  906.]])]]
>>> load_image_ndarray= load_pixeldata[0][1]
>>> type(load_image_ndarray)
<type 'numpy.ndarray'>
>>> load_image_ndarray.shape
(320, 320)
#Inspect metadata property to see dicom metadata xml content
>>> load_dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   0  <?xml version="1.0" encodin...
[1]   1  <?xml version="1.0" encodin...
[2]   2  <?xml version="1.0" encodin...
#Using to built-in xml libraries to run xquery on metadata
>>> import xml.etree.ElementTree as ET
#Performing add_columns operation.
#Add xml tag as column in dicom metadata frame
#Here we add SOPInstanceUID as column to metadaframe
#sample function to apply on row - add_columns
>>> def extractor(tag_name):
...    def _extractor(row):
...        root = ET.fromstring(row["metadata"])
...        for attribute in root.findall('DicomAttribute'):
...            keyword = attribute.get('keyword')
...            value = None
...            if attribute.find('Value') is not None:
...                value = attribute.find('Value').text
...            if keyword == tag_name:
...                return value
...    return _extractor
>>> tag_name = "SOPInstanceUID"
>>> dicom.metadata.add_columns(extractor(tag_name), (tag_name, str))
>>> dicom.metadata.count()
3
>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata                        SOPInstanceUID
=======================================================================
[0]   0  <?xml version="1.0" encodin...  1.3.6.1.4.1.14519.5.2.1.730...
[1]   1  <?xml version="1.0" encodin...  1.3.6.1.4.1.14519.5.2.1.730...
[2]   2  <?xml version="1.0" encodin...  1.3.6.1.4.1.14519.5.2.1.730...
class Dicom(object):
    """
    sparktk Dicom
    Represents a collection of DICOM data objects. Reference: [https://en.wikipedia.org/wiki/DICOM](https://en.wikipedia.org/wiki/DICOM)
    The metadata property is a sparktk frame which defines the metadata of the collection of DICOM objects.
    Its schema has a column named "id" which holds a unique integer ID for the record and another column which
    holds a string of XML comprised of the metadata.  Users can run XQuery or invoke canned column extraction/filter
    operations on this frame.
    The pixeldata property is a sparktk frame which defines the pixeldata of the collection of DICOM objects.
    Its schema has a column named "id" which holds a unique integer ID for the record and another column which
    holds a matrix(internally it is a numpy.ndarray) comprised of the pixeldata.  Users can run numpy supported transformations on it.
    dcm4che-3.x dependencies are used to support various operations on dicom images. It is available as java library
    Reference: [https://github.com/dcm4che/dcm4che](https://github.com/dcm4che/dcm4che)
    Note: Currently sparktk Dicom supports only uncompressed dicom images
    Load a set of uncompressed sample .dcm files from path (integration-tests/datasets/dicom_uncompressed)
    and create a dicom object. The below examples helps you to understand how to access dicom object properties.
    Examples
    --------
        #Path can be local/hdfs to dcm file(s)
        >>> dicom_path = "../datasets/dicom_uncompressed"
        #use import_dcm available inside dicom module to create a dicom object from given dicom_path
        >>> dicom = tc.dicom.import_dcm(dicom_path)
        #Type of dicom object created
        >>> type(dicom)
        
        >>> dicom.metadata.count()
        3
        >>> dicom.pixeldata.count()
        3
        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   0  
            
                AAE= 1.2.840.10008.5.1.4.1.1.4 1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685 
        #Dimesions of the image matrix stored
        >>> image_ndarray.shape
        (320, 320)
        #Use python matplot lib package to verify image visually
        >>> import pylab
        >>> pylab.imshow(image_ndarray, cmap=pylab.cm.bone)
        >>> pylab.show()
        #Save method persists the dicom object to disk
        >>> dicom.save("sandbox/dicom_data")
        #loads the saved dicom object
        >>> load_dicom = tc.load("sandbox/dicom_data")
        #Re-check whether we loaded back the dicom object or not
        >>> type(load_dicom)
        
        #Again access pixeldata and perform same operations as above
        >>> load_pixeldata = load_dicom.pixeldata.take(1)
        #Order may differ when you load back dicom object
        >>> load_pixeldata
        [[0L, array([[   0.,    0.,    0., ...,    0.,    0.,    0.],
        [   0.,  125.,  103., ...,  120.,  213.,  319.],
        [   0.,  117.,   94., ...,  135.,  223.,  325.],
        ...,
        [   0.,   62.,   21., ...,  896.,  886.,  854.],
        [   0.,   63.,   23., ...,  941.,  872.,  897.],
        [   0.,   60.,   30., ...,  951.,  822.,  906.]])]]
        >>> load_image_ndarray= load_pixeldata[0][1]
        >>> type(load_image_ndarray)
        
        >>> load_image_ndarray.shape
        (320, 320)
        #Inspect metadata property to see dicom metadata xml content
        >>> load_dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   0  >> import xml.etree.ElementTree as ET
        #Performing add_columns operation.
        #Add xml tag as column in dicom metadata frame
        #Here we add SOPInstanceUID as column to metadaframe
        #sample function to apply on row - add_columns
        >>> def extractor(tag_name):
        ...    def _extractor(row):
        ...        root = ET.fromstring(row["metadata"])
        ...        for attribute in root.findall('DicomAttribute'):
        ...            keyword = attribute.get('keyword')
        ...            value = None
        ...            if attribute.find('Value') is not None:
        ...                value = attribute.find('Value').text
        ...            if keyword == tag_name:
        ...                return value
        ...    return _extractor
        >>> tag_name = "SOPInstanceUID"
        >>> dicom.metadata.add_columns(extractor(tag_name), (tag_name, str))
        >>> dicom.metadata.count()
        3
        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata                        SOPInstanceUID
        =======================================================================
        [0]   0       Ancestors (in MRO)
- Dicom
- __builtin__.object
Instance variables
var metadata
var pixeldata
Methods
def __init__(
self, tc, scala_dicom)
def __init__(self, tc, scala_dicom):
    self._tc = tc
    from sparktk.frame.frame import Frame
    self._metadata = Frame(self._tc, scala_dicom.metadata())
    self._pixeldata = Frame(self._tc, scala_dicom.pixeldata())
def drop_rows(
self, predicate)
Drop the rows of dicom metadata and pixeldata frames using given predicate
| predicate: | predicate to apply on filter | 
>>> dicom_path = "../datasets/dicom_uncompressed"
>>> dicom = tc.dicom.import_dcm(dicom_path)
>>> dicom.metadata.count()
3
>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   0  <?xml version="1.0" encodin...
[1]   1  <?xml version="1.0" encodin...
[2]   2  <?xml version="1.0" encodin...
#Part of xml string looks as below
<?xml version="1.0" encoding="UTF-8"?>
    <NativeDicomModel xml:space="preserve">
        <DicomAttribute keyword="FileMetaInformationVersion" tag="00020001" vr="OB"><InlineBinary>AAE=</InlineBinary></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPClassUID" tag="00020002" vr="UI"><Value number="1">1.2.840.10008.5.1.4.1.1.4</Value></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPInstanceUID" tag="00020003" vr="UI"><Value number="1">1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685</Value></DicomAttribute>
        ...
>>> import xml.etree.ElementTree as ET
#sample custom filter function
>>> def drop_meta(tag_name, tag_value):
...    def _drop_meta(row):
...        root = ET.fromstring(row["metadata"])
...        for attribute in root.findall('DicomAttribute'):
...            keyword = attribute.get('keyword')
...            if attribute.get('keyword') is not None:
...                if attribute.find('Value') is not None:
...                    value = attribute.find('Value').text
...                    if keyword == tag_name and value == tag_value:
...                        return True
...    return _drop_meta
>>> tag_name = "SOPInstanceUID"
>>> tag_value = "1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685"
>>> dicom.drop_rows(drop_meta(tag_name, tag_value))
>>> dicom.metadata.count()
2
#After filter
>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   1  <?xml version="1.0" encodin...
[1]   2  <?xml version="1.0" encodin...
>>> dicom.pixeldata.inspect(truncate=30)
[#]  id  imagematrix
===========================================================
[0]   1  [[   0.    0.    0. ...,    0.    0.    0.]
[   0.   70.   85. ...,  215.  288.  337.]
[   0.   63.   72. ...,  228.  269.  317.]
...,
[   0.   42.   40. ...,  966.  919.  871.]
[   0.   42.   33. ...,  988.  887.  860.]
[   0.   46.   38. ...,  983.  876.  885.]]
[1]   2  [[    0.     0.     0. ...,     0.     0.     0.]
[    0.   111.   117. ...,   159.   148.   135.]
[    0.   116.   111. ...,   152.   138.   139.]
...,
[    0.    49.    18. ...,  1057.   965.   853.]
[    0.    42.    20. ...,  1046.   973.   891.]
[    0.    48.    26. ...,  1041.   969.   930.]]
def drop_rows(self, predicate):
    """
    Drop the rows of dicom metadata and pixeldata frames using  given predicate
    Parameters
    ----------
    :param predicate: predicate to apply on filter
    Examples
    --------
        >>> dicom_path = "../datasets/dicom_uncompressed"
        >>> dicom = tc.dicom.import_dcm(dicom_path)
        >>> dicom.metadata.count()
        3
        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   0  
            
                AAE= 1.2.840.10008.5.1.4.1.1.4 1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685  def drop_rows_by_keywords(
self, keywords_values_dict)
Drop the rows based on dictionary of {"keyword":"value"}(applying 'and' operation on dictionary) from column holding xml string.
Ex: keywords_values_dict -> {"SOPInstanceUID":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "Manufacturer":"SIEMENS", "StudyDate":"20030315"}
| keywords_values_dict | (dict(str, str)): | dictionary of keywords and values from xml string in metadata | 
>>> dicom_path = "../datasets/dicom_uncompressed"
>>> dicom = tc.dicom.import_dcm(dicom_path)
>>> dicom.metadata.count()
3
>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   0  <?xml version="1.0" encodin...
[1]   1  <?xml version="1.0" encodin...
[2]   2  <?xml version="1.0" encodin...
#Part of xml string looks as below
<?xml version="1.0" encoding="UTF-8"?>
    <NativeDicomModel xml:space="preserve">
        <DicomAttribute keyword="FileMetaInformationVersion" tag="00020001" vr="OB"><InlineBinary>AAE=</InlineBinary></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPClassUID" tag="00020002" vr="UI"><Value number="1">1.2.840.10008.5.1.4.1.1.4</Value></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPInstanceUID" tag="00020003" vr="UI"><Value number="1">1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685</Value></DicomAttribute>
        ...
>>> keywords_values_dict = {"SOPInstanceUID":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "Manufacturer":"SIEMENS", "StudyDate":"20030315"}
>>> dicom.drop_rows_by_keywords(keywords_values_dict)
>>> dicom.metadata.count()
2
#After drop_rows
>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   1  <?xml version="1.0" encodin...
[1]   2  <?xml version="1.0" encodin...
>>> dicom.pixeldata.inspect(truncate=30)
[#]  id  imagematrix
===========================================================
[0]   1  [[   0.    0.    0. ...,    0.    0.    0.]
[   0.   70.   85. ...,  215.  288.  337.]
[   0.   63.   72. ...,  228.  269.  317.]
...,
[   0.   42.   40. ...,  966.  919.  871.]
[   0.   42.   33. ...,  988.  887.  860.]
[   0.   46.   38. ...,  983.  876.  885.]]
[1]   2  [[    0.     0.     0. ...,     0.     0.     0.]
[    0.   111.   117. ...,   159.   148.   135.]
[    0.   116.   111. ...,   152.   138.   139.]
...,
[    0.    49.    18. ...,  1057.   965.   853.]
[    0.    42.    20. ...,  1046.   973.   891.]
[    0.    48.    26. ...,  1041.   969.   930.]]
def drop_rows_by_keywords(self, keywords_values_dict):
    """
    Drop the rows based on dictionary of {"keyword":"value"}(applying 'and' operation on dictionary) from column holding xml string.
    Ex: keywords_values_dict -> {"SOPInstanceUID":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "Manufacturer":"SIEMENS", "StudyDate":"20030315"}
    Parameters
    ----------
    :param keywords_values_dict: (dict(str, str)) dictionary of keywords and values from xml string in metadata
    Examples
    --------
        >>> dicom_path = "../datasets/dicom_uncompressed"
        >>> dicom = tc.dicom.import_dcm(dicom_path)
        >>> dicom.metadata.count()
        3
        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   0  
            
                AAE= 1.2.840.10008.5.1.4.1.1.4 1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685 ")
    #Always scala dicom is invoked, as python joins are expensive compared to serailizations.
    def f(scala_dicom):
        scala_dicom.dropRowsByKeywords(self._tc.jutils.convert.to_scala_map(keywords_values_dict))
    self._call_scala(f)
  def drop_rows_by_tags(
self, tags_values_dict)
Drop the rows based on dictionary of {"tag":"value"}(applying 'and' operation on dictionary) from column holding xml string
Ex: tags_values_dict -> {"00080018":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "00080070":"SIEMENS", "00080020":"20030315"}
| tags_values_dict | (dict(str, str)): | dictionary of tags and values from xml string in metadata | 
>>> dicom_path = "../datasets/dicom_uncompressed"
>>> dicom = tc.dicom.import_dcm(dicom_path)
>>> dicom.metadata.count()
3
>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   0  <?xml version="1.0" encodin...
[1]   1  <?xml version="1.0" encodin...
[2]   2  <?xml version="1.0" encodin...
#Part of xml string looks as below
<?xml version="1.0" encoding="UTF-8"?>
    <NativeDicomModel xml:space="preserve">
        <DicomAttribute keyword="FileMetaInformationVersion" tag="00020001" vr="OB"><InlineBinary>AAE=</InlineBinary></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPClassUID" tag="00020002" vr="UI"><Value number="1">1.2.840.10008.5.1.4.1.1.4</Value></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPInstanceUID" tag="00020003" vr="UI"><Value number="1">1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685</Value></DicomAttribute>
        ...
>>> tags_values_dict = {"00080018":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "00080070":"SIEMENS", "00080020":"20030315"}
>>> dicom.drop_rows_by_tags(tags_values_dict)
>>> dicom.metadata.count()
2
#After drop_rows
>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   1  <?xml version="1.0" encodin...
[1]   2  <?xml version="1.0" encodin...
>>> dicom.pixeldata.inspect(truncate=30)
[#]  id  imagematrix
===========================================================
[0]   1  [[   0.    0.    0. ...,    0.    0.    0.]
[   0.   70.   85. ...,  215.  288.  337.]
[   0.   63.   72. ...,  228.  269.  317.]
...,
[   0.   42.   40. ...,  966.  919.  871.]
[   0.   42.   33. ...,  988.  887.  860.]
[   0.   46.   38. ...,  983.  876.  885.]]
[1]   2  [[    0.     0.     0. ...,     0.     0.     0.]
[    0.   111.   117. ...,   159.   148.   135.]
[    0.   116.   111. ...,   152.   138.   139.]
...,
[    0.    49.    18. ...,  1057.   965.   853.]
[    0.    42.    20. ...,  1046.   973.   891.]
[    0.    48.    26. ...,  1041.   969.   930.]]
def export_to_dcm(
self, path)
export_to_dcm creates .dcm image from dicom object with (metadata, imagedata) and saves to given path
| path | (str): | local/hdfs path | 
>>> dicom_path = "../datasets/dicom_uncompressed"
>>> dicom = tc.dicom.import_dcm(dicom_path)
>>> dicom.metadata.count()
3
>>> dicom.pixeldata.count()
3
>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   0  <?xml version="1.0" encodin...
[1]   1  <?xml version="1.0" encodin...
[2]   2  <?xml version="1.0" encodin...
#Part of xml string looks as below
<?xml version="1.0" encoding="UTF-8"?>
    <NativeDicomModel xml:space="preserve">
        <DicomAttribute keyword="FileMetaInformationVersion" tag="00020001" vr="OB"><InlineBinary>AAE=</InlineBinary></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPClassUID" tag="00020002" vr="UI"><Value number="1">1.2.840.10008.5.1.4.1.1.4</Value></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPInstanceUID" tag="00020003" vr="UI"><Value number="1">1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685</Value></DicomAttribute>
        ...
#pixeldata property is sparktk frame
>>> pixeldata = dicom.pixeldata.take(1)
#dispaly
>>> pixeldata
[[0L, array([[   0.,    0.,    0., ...,    0.,    0.,    0.],
[   0.,  125.,  103., ...,  120.,  213.,  319.],
[   0.,  117.,   94., ...,  135.,  223.,  325.],
...,
[   0.,   62.,   21., ...,  896.,  886.,  854.],
[   0.,   63.,   23., ...,  941.,  872.,  897.],
[   0.,   60.,   30., ...,  951.,  822.,  906.]])]]
>>> dicom.export_to_dcm("dicom_export")
def export_to_dcm(self, path):
    """
    export_to_dcm creates .dcm image from dicom object with (metadata, imagedata) and saves to given path
    Parameters
    ----------
    :param path: (str) local/hdfs path
    Examples
    --------
        >>> dicom_path = "../datasets/dicom_uncompressed"
        >>> dicom = tc.dicom.import_dcm(dicom_path)
        >>> dicom.metadata.count()
        3
        >>> dicom.pixeldata.count()
        3
        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   0  
            
                AAE= 1.2.840.10008.5.1.4.1.1.4 1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685  def extract_keywords(
self, keywords)
Extracts value for each keyword from column holding xml string and adds column for each keyword to assign value For missing keyword, the value is None
Ex: keywords -> ["PatientID"]
| keywords | (str or list(str)): | List of keywords from metadata xml string | 
>>> dicom_path = "../datasets/dicom_uncompressed"
>>> dicom = tc.dicom.import_dcm(dicom_path)
>>> dicom.metadata.count()
3
>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   0  <?xml version="1.0" encodin...
[1]   1  <?xml version="1.0" encodin...
[2]   2  <?xml version="1.0" encodin...
#Part of xml string looks as below
<?xml version="1.0" encoding="UTF-8"?>
    <NativeDicomModel xml:space="preserve">
        <DicomAttribute keyword="FileMetaInformationVersion" tag="00020001" vr="OB"><InlineBinary>AAE=</InlineBinary></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPClassUID" tag="00020002" vr="UI"><Value number="1">1.2.840.10008.5.1.4.1.1.4</Value></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPInstanceUID" tag="00020003" vr="UI"><Value number="1">1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685</Value></DicomAttribute>
        ...
#Extract values for given keywords and add as new columns in metadata frame
>>> dicom.extract_keywords(["SOPInstanceUID", "Manufacturer", "StudyDate"])
>>> dicom.metadata.count()
3
>>> dicom.metadata.column_names
[u'id', u'metadata', u'SOPInstanceUID', u'Manufacturer', u'StudyDate']
>>> dicom.metadata.inspect(truncate=20)
[#]  id  metadata              SOPInstanceUID        Manufacturer  StudyDate
============================================================================
[0]   0  <?xml version="1....  1.3.6.1.4.1.14519...  SIEMENS       20030315
[1]   1  <?xml version="1....  1.3.6.1.4.1.14519...  SIEMENS       20030315
[2]   2  <?xml version="1....  1.3.6.1.4.1.14519...  SIEMENS       20030315
def extract_keywords(self, keywords):
    """
    Extracts value for each keyword from column holding xml string and adds column for each keyword to assign value
    For missing keyword, the value is None
    Ex: keywords -> ["PatientID"]
    Parameters
    ----------
    :param keywords: (str or list(str)) List of keywords from metadata xml string
    Examples
    --------
        >>> dicom_path = "../datasets/dicom_uncompressed"
        >>> dicom = tc.dicom.import_dcm(dicom_path)
        >>> dicom.metadata.count()
        3
        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   0  
            
                AAE= 1.2.840.10008.5.1.4.1.1.4 1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685  def extract_tags(
self, tags)
Extracts value for each tag from column holding xml string and adds column for each tag to assign value. For missing tag, the value is None
Ex: tags -> ["00020001", "00020002"]
| tags | (str or list(str)): | List of tags from xml string of metadata column | 
>>> dicom_path = "../datasets/dicom_uncompressed"
>>> dicom = tc.dicom.import_dcm(dicom_path)
>>> dicom.metadata.count()
3
>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   0  <?xml version="1.0" encodin...
[1]   1  <?xml version="1.0" encodin...
[2]   2  <?xml version="1.0" encodin...
#Part of xml string looks as below
<?xml version="1.0" encoding="UTF-8"?>
    <NativeDicomModel xml:space="preserve">
        <DicomAttribute keyword="FileMetaInformationVersion" tag="00020001" vr="OB"><InlineBinary>AAE=</InlineBinary></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPClassUID" tag="00020002" vr="UI"><Value number="1">1.2.840.10008.5.1.4.1.1.4</Value></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPInstanceUID" tag="00020003" vr="UI"><Value number="1">1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685</Value></DicomAttribute>
        ...
#Extract value for each tag from column holding xml string
>>> dicom.extract_tags(["00080018", "00080070", "00080030"])
>>> dicom.metadata.count()
3
>>> dicom.metadata.column_names
[u'id', u'metadata', u'00080018', u'00080070', u'00080030']
>>> dicom.metadata.inspect(truncate=20)
[#]  id  metadata              00080018              00080070  00080030
============================================================================
[0]   0  <?xml version="1....  1.3.6.1.4.1.14519...  SIEMENS       20030315
[1]   1  <?xml version="1....  1.3.6.1.4.1.14519...  SIEMENS       20030315
[2]   2  <?xml version="1....  1.3.6.1.4.1.14519...  SIEMENS       20030315
def filter(
self, predicate)
Filter the rows of dicom metadata and pixeldata based on given predicate
| predicate: | predicate to apply on filter | 
>>> dicom_path = "../datasets/dicom_uncompressed"
>>> dicom = tc.dicom.import_dcm(dicom_path)
>>> dicom.metadata.count()
3
>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   0  <?xml version="1.0" encodin...
[1]   1  <?xml version="1.0" encodin...
[2]   2  <?xml version="1.0" encodin...
#Part of xml string looks as below
<?xml version="1.0" encoding="UTF-8"?>
    <NativeDicomModel xml:space="preserve">
        <DicomAttribute keyword="FileMetaInformationVersion" tag="00020001" vr="OB"><InlineBinary>AAE=</InlineBinary></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPClassUID" tag="00020002" vr="UI"><Value number="1">1.2.840.10008.5.1.4.1.1.4</Value></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPInstanceUID" tag="00020003" vr="UI"><Value number="1">1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685</Value></DicomAttribute>
        ...
>>> import xml.etree.ElementTree as ET
#sample custom filter function
>>> def filter_meta(tag_name, tag_value):
...    def _filter_meta(row):
...        root = ET.fromstring(row["metadata"])
...        for attribute in root.findall('DicomAttribute'):
...            keyword = attribute.get('keyword')
...            if attribute.get('keyword') is not None:
...                if attribute.find('Value') is not None:
...                    value = attribute.find('Value').text
...                    if keyword == tag_name and value == tag_value:
...                        return True
...    return _filter_meta
>>> tag_name = "SOPInstanceUID"
>>> tag_value = "1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685"
>>> dicom.filter(filter_meta(tag_name, tag_value))
>>> dicom.metadata.count()
1
#After filter
>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   0  <?xml version="1.0" encodin...
>>> dicom.pixeldata.inspect(truncate=30)
[#]  id  imagematrix
=====================================================
[0]   0  [[   0.    0.    0. ...,    0.    0.    0.]
[   0.  125.  103. ...,  120.  213.  319.]
[   0.  117.   94. ...,  135.  223.  325.]
...,
[   0.   62.   21. ...,  896.  886.  854.]
[   0.   63.   23. ...,  941.  872.  897.]
[   0.   60.   30. ...,  951.  822.  906.]]
def filter(self, predicate):
    """
    Filter the rows of dicom metadata and pixeldata based on given predicate
    Parameters
    ----------
    :param predicate: predicate to apply on filter
    Examples
    --------
        >>> dicom_path = "../datasets/dicom_uncompressed"
        >>> dicom = tc.dicom.import_dcm(dicom_path)
        >>> dicom.metadata.count()
        3
        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   0  
            
                AAE= 1.2.840.10008.5.1.4.1.1.4 1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685  def filter_by_keywords(
self, keywords_values_dict)
Filter the rows based on dictionary of {"keyword":"value"}(applying 'and' operation on dictionary) from column holding xml string
Ex: keywords_values_dict -> {"SOPInstanceUID":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "Manufacturer":"SIEMENS", "StudyDate":"20030315"}
| keywords_values_dict | (dict(str, str)): | dictionary of keywords and values from xml string in metadata | 
>>> dicom_path = "../datasets/dicom_uncompressed"
>>> dicom = tc.dicom.import_dcm(dicom_path)
>>> dicom.metadata.count()
3
>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   0  <?xml version="1.0" encodin...
[1]   1  <?xml version="1.0" encodin...
[2]   2  <?xml version="1.0" encodin...
#Part of xml string looks as below
<?xml version="1.0" encoding="UTF-8"?>
    <NativeDicomModel xml:space="preserve">
        <DicomAttribute keyword="FileMetaInformationVersion" tag="00020001" vr="OB"><InlineBinary>AAE=</InlineBinary></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPClassUID" tag="00020002" vr="UI"><Value number="1">1.2.840.10008.5.1.4.1.1.4</Value></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPInstanceUID" tag="00020003" vr="UI"><Value number="1">1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685</Value></DicomAttribute>
        ...
>>> keywords_values_dict = {"SOPInstanceUID":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "Manufacturer":"SIEMENS", "StudyDate":"20030315"}
>>> dicom.filter_by_keywords(keywords_values_dict)
>>> dicom.metadata.count()
1
#After filter
>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   0  <?xml version="1.0" encodin...
>>> dicom.pixeldata.inspect(truncate=30)
[#]  id  imagematrix
=====================================================
[0]   0  [[   0.    0.    0. ...,    0.    0.    0.]
[   0.  125.  103. ...,  120.  213.  319.]
[   0.  117.   94. ...,  135.  223.  325.]
...,
[   0.   62.   21. ...,  896.  886.  854.]
[   0.   63.   23. ...,  941.  872.  897.]
[   0.   60.   30. ...,  951.  822.  906.]]
def filter_by_keywords(self, keywords_values_dict):
    """
    Filter the rows based on dictionary of {"keyword":"value"}(applying 'and' operation on dictionary) from column holding xml string
    Ex: keywords_values_dict -> {"SOPInstanceUID":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "Manufacturer":"SIEMENS", "StudyDate":"20030315"}
    Parameters
    ----------
    :param keywords_values_dict: (dict(str, str)) dictionary of keywords and values from xml string in metadata
    Examples
    --------
        >>> dicom_path = "../datasets/dicom_uncompressed"
        >>> dicom = tc.dicom.import_dcm(dicom_path)
        >>> dicom.metadata.count()
        3
        >>> dicom.metadata.inspect(truncate=30)
        [#]  id  metadata
        =======================================
        [0]   0  
            
                AAE= 1.2.840.10008.5.1.4.1.1.4 1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685 ")
    #Always scala dicom is invoked, as python joins are expensive compared to serailizations.
    def f(scala_dicom):
        scala_dicom.filterByKeywords(self._tc.jutils.convert.to_scala_map(keywords_values_dict))
    self._call_scala(f)
  def filter_by_tags(
self, tags_values_dict)
Filter the rows based on dictionary of {"tag":"value"}(applying 'and' operation on dictionary) from column holding xml string
Ex: tags_values_dict -> {"00080018":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "00080070":"SIEMENS", "00080020":"20030315"}
| tags_values_dict | (dict(str, str)): | dictionary of tags and values from xml string in metadata. | 
>>> dicom_path = "../datasets/dicom_uncompressed"
>>> dicom = tc.dicom.import_dcm(dicom_path)
>>> dicom.metadata.count()
3
>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   0  <?xml version="1.0" encodin...
[1]   1  <?xml version="1.0" encodin...
[2]   2  <?xml version="1.0" encodin...
#Part of xml string looks as below
<?xml version="1.0" encoding="UTF-8"?>
    <NativeDicomModel xml:space="preserve">
        <DicomAttribute keyword="FileMetaInformationVersion" tag="00020001" vr="OB"><InlineBinary>AAE=</InlineBinary></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPClassUID" tag="00020002" vr="UI"><Value number="1">1.2.840.10008.5.1.4.1.1.4</Value></DicomAttribute>
        <DicomAttribute keyword="MediaStorageSOPInstanceUID" tag="00020003" vr="UI"><Value number="1">1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685</Value></DicomAttribute>
        ...
>>> tags_values_dict = {"00080018":"1.3.6.1.4.1.14519.5.2.1.7308.2101.234736319276602547946349519685", "00080070":"SIEMENS", "00080020":"20030315"}
>>> dicom.filter_by_tags(tags_values_dict)
>>> dicom.metadata.count()
1
#After filter
>>> dicom.metadata.inspect(truncate=30)
[#]  id  metadata
=======================================
[0]   0  <?xml version="1.0" encodin...
>>> dicom.pixeldata.inspect(truncate=30)
[#]  id  imagematrix
=====================================================
[0]   0  [[   0.    0.    0. ...,    0.    0.    0.]
[   0.  125.  103. ...,  120.  213.  319.]
[   0.  117.   94. ...,  135.  223.  325.]
...,
[   0.   62.   21. ...,  896.  886.  854.]
[   0.   63.   23. ...,  941.  872.  897.]
[   0.   60.   30. ...,  951.  822.  906.]]
def save(
self, path)
Persists the dicom object to the given file path
def save(self, path):
    """Persists the dicom object to the given file path"""
    self._get_new_scala().save(path)