Source code for pydicom.dataset

# Copyright 2008-2017 pydicom authors. See LICENSE file for details.
"""Define the Dataset and FileDataset classes.

The Dataset class represents the DICOM Dataset while the FileDataset class
adds extra functionality to Dataset when data is read from or written to file.

Overview of DICOM object model
------------------------------
Dataset (dict subclass)
  Contains DataElement instances, each of which has a tag, VR, VM and value.
    The DataElement value can be:
        * A single value, such as a number, string, etc. (i.e. VM = 1)
        * A list of numbers, strings, etc. (i.e. VM > 1)
        * A Sequence (list subclass), where each item is a Dataset which
            contains its own DataElements, and so on in a recursive manner.
"""

import inspect  # for __dir__
import io
import os
import os.path
import sys
from bisect import bisect_left
from itertools import takewhile

from pydicom import compat
from pydicom.charset import default_encoding, convert_encodings
from pydicom.datadict import dictionary_VR
from pydicom.datadict import (tag_for_keyword, keyword_for_tag,
                              repeater_has_keyword)
from pydicom.tag import Tag, BaseTag, tag_in_exception
from pydicom.dataelem import DataElement, DataElement_from_raw, RawDataElement
from pydicom.uid import (UncompressedPixelTransferSyntaxes,
                         ExplicitVRLittleEndian)
import pydicom  # for dcmwrite
import pydicom.charset
from pydicom.config import logger
import pydicom.config

have_numpy = True
try:
    import numpy
except ImportError:
    have_numpy = False

sys_is_little_endian = (sys.byteorder == 'little')


[docs]class PropertyError(Exception): """For AttributeErrors caught in a property, so do not go to __getattr__""" # http://docs.python.org/release/3.1.3/tutorial/errors.html#tut-userexceptions pass
[docs]class Dataset(dict): """A collection (dictionary) of DICOM DataElements. Examples -------- Add DataElements to the Dataset (for elements in the DICOM dictionary): >>> ds = Dataset() >>> ds.PatientName = "CITIZEN^Joan" >>> ds.add_new(0x00100020, 'LO', '12345') >>> ds[0x0010, 0x0030] = DataElement(0x00100030, 'DA', '20010101') Add Sequence DataElement to the Dataset: >>> ds.BeamSequence = [Dataset(), Dataset(), Dataset()] >>> ds.BeamSequence[0].Manufacturer = "Linac, co." >>> ds.BeamSequence[1].Manufacturer = "Linac and Sons, co." >>> ds.BeamSequence[2].Manufacturer = "Linac and Daughters, co." Add private DataElements to the Dataset: >>> ds.add(DataElement(0x0043102b, 'SS', [4, 4, 0, 0])) >>> ds.add_new(0x0043102b, 'SS', [4, 4, 0, 0]) >>> ds[0x0043, 0x102b] = DataElement(0x0043102b, 'SS', [4, 4, 0, 0]) Updating and retrieving DataElement values: >>> ds.PatientName = "CITIZEN^Joan" >>> ds.PatientName 'CITIZEN^Joan" >>> ds.PatientName = "CITIZEN^John" >>> ds.PatientName 'CITIZEN^John' Retrieving a DataElement's value from a Sequence: >>> ds.BeamSequence[0].Manufacturer 'Linac, co.' >>> ds.BeamSequence[1].Manufacturer 'Linac and Sons, co.' Retrieving DataElements: >>> elem = ds[0x00100010] >>> elem = ds.data_element('PatientName') >>> elem (0010, 0010) Patient's Name PN: 'CITIZEN^Joan' Deleting a DataElement from the Dataset: >>> del ds.PatientID >>> del ds.BeamSequence[1].Manufacturer >>> del ds.BeamSequence[2] Deleting a private DataElement from the Dataset: >>> del ds[0x0043, 0x102b] Determining if a DataElement is present in the Dataset: >>> 'PatientName' in ds True >>> 'PatientID' in ds False >>> (0x0010, 0x0030) in ds True >>> 'Manufacturer' in ds.BeamSequence[0] True Iterating through the top level of a Dataset only (excluding Sequences): >>> for elem in ds: >>> print(elem) Iterating through the entire Dataset (including Sequences): >>> for elem in ds.iterall(): >>> print(elem) Recursively iterate through a Dataset (including Sequences): >>> def recurse(ds): >>> for elem in ds: >>> if elem.VR == 'SQ': >>> [recurse(item) for item in elem] >>> else: >>> # Do something useful with each DataElement Attributes ---------- default_element_format : str The default formatting for string display. default_sequence_element_format : str The default formatting for string display of sequences. indent_chars : str For string display, the characters used to indent nested Sequences. Default is " ". is_little_endian : bool Shall be set before writing with `write_like_original=False`. The written dataset (excluding the pixel data) will be written using the given endianess. is_implicit_VR : bool Shall be set before writing with `write_like_original=False`. The written dataset will be written using the transfer syntax with the given VR handling, e.g LittleEndianImplicit if True, and LittleEndianExplicit or BigEndianExplicit (depending on `is_little_endian`) if False. """ indent_chars = " " # Python 2: Classes defining __eq__ should flag themselves as unhashable __hash__ = None def __init__(self, *args, **kwargs): """Create a new Dataset instance.""" self._parent_encoding = kwargs.get('parent_encoding', default_encoding) dict.__init__(self, *args) self.is_decompressed = False # the following read_XXX attributes are used internally to store # the properties of the dataset after read from a file # set depending on the endianess of the read dataset self.read_little_endian = None # set depending on the VR handling of the read dataset self.read_implicit_vr = None # set to the encoding the dataset had originally self.read_encoding = None self.is_little_endian = None self.is_implicit_VR = None def __enter__(self): """Method invoked on entry to a with statement.""" return self def __exit__(self, exc_type, exc_val, exc_tb): """Method invoked on exit from a with statement.""" # Returning False will re-raise any exceptions that occur return False
[docs] def add(self, data_element): """Add a DataElement to the Dataset. Equivalent to ds[data_element.tag] = data_element Parameters ---------- data_element : pydicom.dataelem.DataElement The DataElement to add to the Dataset. """ self[data_element.tag] = data_element
[docs] def add_new(self, tag, VR, value): """Add a DataElement to the Dataset. Parameters ---------- tag The DICOM (group, element) tag in any form accepted by pydicom.tag.Tag such as [0x0010, 0x0010], (0x10, 0x10), 0x00100010, etc. VR : str The 2 character DICOM value representation (see DICOM standard part 5, Section 6.2). value The value of the data element. One of the following: * a single string or number * a list or tuple with all strings or all numbers * a multi-value string with backslash separator * for a sequence DataElement, an empty list or list of Dataset """ data_element = DataElement(tag, VR, value) # use data_element.tag since DataElement verified it self[data_element.tag] = data_element
[docs] def data_element(self, name): """Return the DataElement corresponding to the element keyword `name`. Parameters ---------- name : str A DICOM element keyword. Returns ------- pydicom.dataelem.DataElement or None For the given DICOM element `keyword`, return the corresponding Dataset DataElement if present, None otherwise. """ tag = tag_for_keyword(name) # Test against None as (0000,0000) is a possible tag if tag is not None: return self[tag] return None
def __contains__(self, name): """Extend dict.__contains__() to handle DICOM keywords. This is called for code like: >>> 'SliceLocation' in ds True Parameters ---------- name : str or int or 2-tuple The Element keyword or tag to search for. Returns ------- bool True if the DataElement is in the Dataset, False otherwise. """ if isinstance(name, (str, compat.text_type)): tag = tag_for_keyword(name) else: try: tag = Tag(name) except Exception: return False # Test against None as (0000,0000) is a possible tag if tag is not None: return dict.__contains__(self, tag) else: return dict.__contains__(self, name) # will no doubt raise an exception
[docs] def decode(self): """Apply character set decoding to all DataElements in the Dataset. See DICOM PS3.5-2008 6.1.1. """ # Find specific character set. 'ISO_IR 6' is default # May be multi-valued, but let pydicom.charset handle all logic on that dicom_character_set = self._character_set # Shortcut to the decode function in pydicom.charset decode_data_element = pydicom.charset.decode # Callback for walk(), to decode the chr strings if necessary # This simply calls the pydicom.charset.decode function def decode_callback(ds, data_element): """Callback to decode `data_element`.""" if data_element.VR == 'SQ': for dset in data_element.value: dset._parent_encoding = dicom_character_set dset.decode() else: decode_data_element(data_element, dicom_character_set) self.walk(decode_callback, recursive=False)
def __delattr__(self, name): """Intercept requests to delete an attribute by `name`. If `name` is a DICOM keyword: Delete the corresponding DataElement from the Dataset. >>> del ds.PatientName Else: Delete the class attribute as any other class would do. >>> del ds._is_some_attribute Parameters ---------- name : str The keyword for the DICOM element or the class attribute to delete. """ # First check if a valid DICOM keyword and if we have that data element tag = tag_for_keyword(name) if tag is not None and tag in self: dict.__delitem__(self, tag) # direct to dict as we know we have key # If not a DICOM name in this dataset, check for regular instance name # can't do delete directly, that will call __delattr__ again elif name in self.__dict__: del self.__dict__[name] # Not found, raise an error in same style as python does else: raise AttributeError(name) def __delitem__(self, key): """Intercept requests to delete an attribute by key. Examples -------- Indexing using DataElement tag >>> ds = Dataset() >>> ds.CommandGroupLength = 100 >>> ds.PatientName = 'CITIZEN^Jan' >>> del ds[0x00000000] >>> ds (0010, 0010) Patient's Name PN: 'CITIZEN^Jan' Slicing using DataElement tag >>> ds = Dataset() >>> ds.CommandGroupLength = 100 >>> ds.SOPInstanceUID = '1.2.3' >>> ds.PatientName = 'CITIZEN^Jan' >>> del ds[:0x00100000] >>> ds (0010, 0010) Patient's Name PN: 'CITIZEN^Jan' Parameters ---------- key The key for the attribute to be deleted. If a slice is used then the tags matching the slice conditions will be deleted. """ # If passed a slice, delete the corresponding DataElements if isinstance(key, slice): for tag in self._slice_dataset(key.start, key.stop, key.step): del self[tag] else: # Assume is a standard tag (for speed in common case) try: dict.__delitem__(self, key) # If not a standard tag, than convert to Tag and try again except KeyError: tag = Tag(key) dict.__delitem__(self, tag) def __dir__(self): """Give a list of attributes available in the Dataset. List of attributes is used, for example, in auto-completion in editors or command-line environments. """ # Force zip object into a list in case of python3. Also backwards # compatible meths = set(list(zip( *inspect.getmembers(self.__class__, inspect.isroutine)))[0]) props = set(list(zip( *inspect.getmembers(self.__class__, inspect.isdatadescriptor)))[0]) dicom_names = set(self.dir()) alldir = sorted(props | meths | dicom_names) return alldir
[docs] def dir(self, *filters): """Return an alphabetical list of DataElement keywords in the Dataset. Intended mainly for use in interactive Python sessions. Only lists the DataElement keywords in the current level of the Dataset (i.e. the contents of any Sequence elements are ignored). Parameters ---------- filters : str Zero or more string arguments to the function. Used for case-insensitive match to any part of the DICOM keyword. Returns ------- list of str The matching DataElement keywords in the dataset. If no filters are used then all DataElement keywords are returned. """ allnames = [keyword_for_tag(tag) for tag in self.keys()] # remove blanks - tags without valid names (e.g. private tags) allnames = [x for x in allnames if x] # Store found names in a dict, so duplicate names appear only once matches = {} for filter_ in filters: filter_ = filter_.lower() match = [x for x in allnames if x.lower().find(filter_) != -1] matches.update(dict([(x, 1) for x in match])) if filters: names = sorted(matches.keys()) return names else: return sorted(allnames)
def __eq__(self, other): """Compare `self` and `other` for equality. Returns ------- bool The result if `self` and `other` are the same class NotImplemented If `other` is not the same class as `self` then returning NotImplemented delegates the result to superclass.__eq__(subclass) """ # When comparing against self this will be faster if other is self: return True if isinstance(other, self.__class__): # Compare Elements using values() # Convert values() to a list for compatibility between # python 2 and 3 # Sort values() by element tag self_elem = sorted(list(self.values()), key=lambda x: x.tag) other_elem = sorted(list(other.values()), key=lambda x: x.tag) return self_elem == other_elem return NotImplemented
[docs] def get(self, key, default=None): """Extend dict.get() to handle DICOM DataElement keywords. Parameters ---------- key : str or pydicom.tag.Tag The element keyword or Tag or the class attribute name to get. default : obj or None If the DataElement or class attribute is not present, return `default` (default None). Returns ------- value If `key` is the keyword for a DataElement in the Dataset then return the DataElement's value. pydicom.dataelem.DataElement If `key` is a tag for a DataElement in the Dataset then return the DataElement instance. value If `key` is a class attribute then return its value. """ if isinstance(key, (str, compat.text_type)): try: return getattr(self, key) except AttributeError: return default else: # is not a string, try to make it into a tag and then hand it # off to the underlying dict if not isinstance(key, BaseTag): try: key = Tag(key) except Exception: raise TypeError("Dataset.get key must be a string or tag") try: return_val = self.__getitem__(key) except KeyError: return_val = default return return_val
def __getattr__(self, name): """Intercept requests for Dataset attribute names. If `name` matches a DICOM keyword, return the value for the DataElement with the corresponding tag. Parameters ---------- name A DataElement keyword or tag or a class attribute name. Returns ------- value If `name` matches a DICOM keyword, returns the corresponding DataElement's value. Otherwise returns the class attribute's value (if present). """ tag = tag_for_keyword(name) if tag is None: # `name` isn't a DICOM element keyword # Try the base class attribute getter (fix for issue 332) return super(Dataset, self).__getattribute__(name) tag = Tag(tag) if tag not in self: # DICOM DataElement not in the Dataset # Try the base class attribute getter (fix for issue 332) return super(Dataset, self).__getattribute__(name) else: return self[tag].value @property def _character_set(self): """The Dataset's SpecificCharacterSet value (if present).""" char_set = self.get(BaseTag(0x00080005), None) if not char_set: char_set = self._parent_encoding else: char_set = convert_encodings(char_set) return char_set def __getitem__(self, key): """Operator for Dataset[key] request. Any deferred data elements will be read in and an attempt will be made to correct any elements with ambiguous VRs. Examples -------- Indexing using DataElement tag >>> ds = Dataset() >>> ds.SOPInstanceUID = '1.2.3' >>> ds.PatientName = 'CITIZEN^Jan' >>> ds.PatientID = '12345' >>> ds[0x00100010] 'CITIZEN^Jan' Slicing using DataElement tag All group 0x0010 elements in the dataset >>> ds[0x00100000:0x0011000] (0010, 0010) Patient's Name PN: 'CITIZEN^Jan' (0010, 0020) Patient ID LO: '12345' All group 0x0002 elements in the dataset >>> ds[(0x0002, 0x0000):(0x0003, 0x0000)] Parameters ---------- key The DICOM (group, element) tag in any form accepted by pydicom.tag.Tag such as [0x0010, 0x0010], (0x10, 0x10), 0x00100010, etc. May also be a slice made up of DICOM tags. Returns ------- pydicom.dataelem.DataElement or pydicom.dataset.Dataset If a single DICOM element tag is used then returns the corresponding DataElement. If a slice is used then returns a Dataset object containing the corresponding DataElements. """ # If passed a slice, return a Dataset containing the corresponding # DataElements if isinstance(key, slice): return self._dataset_slice(key) if isinstance(key, BaseTag): tag = key else: tag = Tag(key) data_elem = dict.__getitem__(self, tag) if isinstance(data_elem, DataElement): return data_elem elif isinstance(data_elem, tuple): # If a deferred read, then go get the value now if data_elem.value is None: from pydicom.filereader import read_deferred_data_element data_elem = read_deferred_data_element( self.fileobj_type, self.filename, self.timestamp, data_elem) if tag != BaseTag(0x00080005): character_set = self.read_encoding or self._character_set else: character_set = default_encoding # Not converted from raw form read from file yet; do so now self[tag] = DataElement_from_raw(data_elem, character_set) # If the Element has an ambiguous VR, try to correct it if 'or' in self[tag].VR: from pydicom.filewriter import correct_ambiguous_vr_element self[tag] = correct_ambiguous_vr_element( self[tag], self, data_elem[6]) return dict.__getitem__(self, tag)
[docs] def get_item(self, key): """Return the raw data element if possible. It will be raw if the user has never accessed the value, or set their own value. Note if the data element is a deferred-read element, then it is read and converted before being returned. Parameters ---------- key The DICOM (group, element) tag in any form accepted by pydicom.tag.Tag such as [0x0010, 0x0010], (0x10, 0x10), 0x00100010, etc. May also be a slice made up of DICOM tags. Returns ------- pydicom.dataelem.DataElement """ if isinstance(key, slice): return self._dataset_slice(key) if isinstance(key, BaseTag): tag = key else: tag = Tag(key) data_elem = dict.__getitem__(self, tag) # If a deferred read, return using __getitem__ to read and convert it if isinstance(data_elem, tuple) and data_elem.value is None: return self[key] return data_elem
def _dataset_slice(self, slice): """Return a slice that has the same properties as the original dataset. That includes properties related to endianess and VR handling, and the specific character set. No element conversion is done, e.g. elements of type RawDataElement are kept. """ tags = self._slice_dataset(slice.start, slice.stop, slice.step) dataset = Dataset({tag: self.get_item(tag) for tag in tags}) dataset.read_implicit_vr = self.read_implicit_vr dataset.read_little_endian = self.read_little_endian dataset.is_little_endian = self.is_little_endian dataset.is_implicit_VR = self.is_implicit_VR dataset.read_encoding = self.read_encoding return dataset @property def is_original_encoding(self): """Return True if the properties to be used for writing are set and have the same value as the ones in the dataset after reading it. This includes properties related to endianess, VR handling and the specific character set. """ return (self.is_implicit_VR is not None and self.is_little_endian is not None and self.read_implicit_vr == self.is_implicit_VR and self.read_little_endian == self.is_little_endian and self.read_encoding == self._character_set)
[docs] def group_dataset(self, group): """Return a Dataset containing only DataElements of a certain group. Parameters ---------- group : int The group part of a DICOM (group, element) tag. Returns ------- pydicom.dataset.Dataset A dataset instance containing elements of the group specified. """ return self[(group, 0x0000):(group + 1, 0x0000)]
def __iter__(self): """Iterate through the top-level of the Dataset, yielding DataElements. >>> for elem in ds: >>> print(elem) The DataElements are returned in increasing tag value order. Sequence items are returned as a single DataElement, so it is up to the calling code to recurse into the Sequence items if desired. Yields ------ pydicom.dataelem.DataElement The Dataset's DataElements, sorted by increasing tag order. """ # Note this is different than the underlying dict class, # which returns the key of the key:value mapping. # Here the value is returned (but data_element.tag has the key) taglist = sorted(self.keys()) for tag in taglist: yield self[tag]
[docs] def elements(self): """Iterate through the top-level of the Dataset, yielding DataElements or RawDataElements (no conversion done). >>> for elem in ds.elements(): >>> print(elem) The elements are returned in the same way as in __getitem__. Yields ------ pydicom.dataelem.DataElement or pydicom.dataelem.RawDataElement The Dataset's DataElements, sorted by increasing tag order. """ taglist = sorted(self.keys()) for tag in taglist: yield self.get_item(tag)
def _is_uncompressed_transfer_syntax(self): """Return True if the TransferSyntaxUID is not a compressed syntax.""" # FIXME uses file_meta here, should really only be thus for FileDataset return self.file_meta.TransferSyntaxUID in ( UncompressedPixelTransferSyntaxes) def __ne__(self, other): """Compare `self` and `other` for inequality.""" return not self == other def _reshape_pixel_array(self, pixel_array): # Note the following reshape operations return a new *view* onto # pixel_array, but don't copy the data if 'NumberOfFrames' in self and self.NumberOfFrames > 1: if self.SamplesPerPixel > 1: # TODO: Handle Planar Configuration attribute assert self.PlanarConfiguration == 0 pixel_array = pixel_array.reshape(self.NumberOfFrames, self.Rows, self.Columns, self.SamplesPerPixel) else: pixel_array = pixel_array.reshape(self.NumberOfFrames, self.Rows, self.Columns) else: if self.SamplesPerPixel > 1: if self.BitsAllocated == 8: if self.PlanarConfiguration == 0: pixel_array = pixel_array.reshape( self.Rows, self.Columns, self.SamplesPerPixel) else: pixel_array = pixel_array.reshape( self.SamplesPerPixel, self.Rows, self.Columns) pixel_array = pixel_array.transpose(1, 2, 0) else: raise NotImplementedError("This code only handles " "SamplesPerPixel > 1 if Bits " "Allocated = 8") else: pixel_array = pixel_array.reshape(self.Rows, self.Columns) return pixel_array def _convert_YBR_to_RGB(self, array_of_YBR_pixels): if have_numpy: ybr_to_rgb = numpy.ndarray((3, 3), dtype=numpy.float) ybr_to_rgb[0, :] = [1.0, +0.000000, +1.402000] ybr_to_rgb[1, :] = [1.0, -0.344136, -0.714136] ybr_to_rgb[2, :] = [1.0, +1.772000, +0.000000] orig_type = array_of_YBR_pixels.dtype array_of_YBR_pixels = array_of_YBR_pixels.astype(numpy.float) array_of_YBR_pixels -= [0, 128, 128] array_of_YBR_pixels = numpy.dot( array_of_YBR_pixels, ybr_to_rgb.T.copy()).astype(orig_type) return array_of_YBR_pixels else: raise NotImplementedError("Numpy is required" "To convert the color space") # Use by pixel_array property def _get_pixel_array(self): self.convert_pixel_data() return self._pixel_array
[docs] def convert_pixel_data(self): """Convert the Pixel Data to a numpy array internally. Returns ------- None Converted pixel data is stored internally in the dataset. If a compressed image format, the image is decompressed, and any related data elements are changed accordingly. """ # Check if already have converted to a NumPy array # Also check if self.PixelData has changed. If so, get new NumPy array already_have = True if not hasattr(self, "_pixel_array"): already_have = False elif self._pixel_id != id(self.PixelData): already_have = False if not already_have: last_exception = None successfully_read_pixel_data = False for x in [h for h in pydicom.config.image_handlers if h and h.supports_transfer_syntax(self)]: try: pixel_array = x.get_pixeldata(self) self._pixel_array = self._reshape_pixel_array(pixel_array) if x.needs_to_convert_to_RGB(self): self._pixel_array = self._convert_YBR_to_RGB( self._pixel_array ) successfully_read_pixel_data = True break except Exception as e: logger.debug("Trouble with", exc_info=e) last_exception = e continue if not successfully_read_pixel_data: handlers_tried = " ".join( [str(x) for x in pydicom.config.image_handlers]) logger.info("%s did not support this transfer syntax", handlers_tried) self._pixel_array = None self._pixel_id = None if last_exception: raise last_exception else: msg = ("No available image handler could " "decode this transfer syntax {}".format( self.file_meta.TransferSyntaxUID.name)) raise NotImplementedError(msg) # is this guaranteed to work if memory is re-used?? self._pixel_id = id(self.PixelData)
[docs] def decompress(self): """Decompresses pixel data and modifies the Dataset in-place If not a compressed tranfer syntax, then pixel data is converted to a numpy array internally, but not returned. If compressed pixel data, then is decompressed using an image handler, and internal state is updated appropriately: - TransferSyntax is updated to non-compressed form - is_undefined_length for pixel data is set False Returns ------- None Raises ------ NotImplementedError If the pixel data was originally compressed but file is not ExplicitVR LittleEndian as required by Dicom standard """ self.convert_pixel_data() self.is_decompressed = True # May have been undefined length pixel data, but won't be now if 'PixelData' in self: self[0x7fe00010].is_undefined_length = False # Make sure correct Transfer Syntax is set # According to the dicom standard PS3.5 section A.4, # all compressed files must have been explicit VR, little endian # First check if was a compressed file if (hasattr(self, 'file_meta') and self.file_meta.TransferSyntaxUID.is_compressed): # Check that current file as read does match expected if not self.is_little_endian or self.is_implicit_VR: msg = ("Current dataset does not match expected ExplicitVR " "LittleEndian transfer syntax from a compressed " "transfer syntax") raise NotImplementedError(msg) # All is as expected, updated the Transfer Syntax self.file_meta.TransferSyntaxUID = ExplicitVRLittleEndian
@property def pixel_array(self): """Return the Pixel Data as a NumPy array. Returns ------- numpy.ndarray The Pixel Data (7FE0,0010) as a NumPy ndarray. """ return self._get_pixel_array() # Format strings spec'd according to python string formatting options # See http://docs.python.org/library/stdtypes.html#string-formatting-operations # noqa default_element_format = "%(tag)s %(name)-35.35s %(VR)s: %(repval)s" default_sequence_element_format = "%(tag)s %(name)-35.35s %(VR)s: %(repval)s" # noqa
[docs] def formatted_lines( self, element_format=default_element_format, sequence_element_format=default_sequence_element_format, indent_format=None): """Iterate through the Dataset yielding formatted str for each element. Parameters ---------- element_format : str The string format to use for non-sequence elements. Formatting uses the attributes of DataElement. Default is "%(tag)s %(name)-35.35s %(VR)s: %(repval)s". sequence_element_format : str The string format to use for sequence elements. Formatting uses the attributes of DataElement. Default is "%(tag)s %(name)-35.35s %(VR)s: %(repval)s" indent_format : str or None Placeholder for future functionality. Yields ------ str A string representation of a DataElement. """ for data_element in self.iterall(): # Get all the attributes possible for this data element (e.g. # gets descriptive text name too) # This is the dictionary of names that can be used in the format # string elem_dict = dict([(x, getattr(data_element, x)() if callable(getattr(data_element, x)) else getattr(data_element, x)) for x in dir(data_element) if not x.startswith("_")]) if data_element.VR == "SQ": yield sequence_element_format % elem_dict else: yield element_format % elem_dict
def _pretty_str(self, indent=0, top_level_only=False): """Return a string of the DataElements in the Dataset, with indented levels. This private method is called by the __str__() method for handling print statements or str(dataset), and the __repr__() method. It is also used by top(), therefore the top_level_only flag. This function recurses, with increasing indentation levels. Parameters ---------- index : int The indent level offset (default 0) top_level_only : bool When True, only create a string for the top level elements, i.e. exclude elements within any Sequences (default False). Returns ------- str A string representation of the Dataset. """ strings = [] indent_str = self.indent_chars * indent nextindent_str = self.indent_chars * (indent + 1) for data_element in self: with tag_in_exception(data_element.tag): if data_element.VR == "SQ": # a sequence strings.append(indent_str + str(data_element.tag) + " %s %i item(s) ---- " % (data_element.description(), len(data_element.value))) if not top_level_only: for dataset in data_element.value: strings.append(dataset._pretty_str(indent + 1)) strings.append(nextindent_str + "---------") else: strings.append(indent_str + repr(data_element)) return "\n".join(strings)
[docs] def remove_private_tags(self): """Remove all private DataElements in the Dataset.""" def RemoveCallback(dataset, data_element): """Internal method to use as callback to walk() method.""" if data_element.tag.is_private: # can't del self[tag] - won't be right dataset on recursion del dataset[data_element.tag] self.walk(RemoveCallback)
[docs] def save_as(self, filename, write_like_original=True): """Write the Dataset to `filename`. Saving a Dataset requires that the Dataset.is_implicit_VR and Dataset.is_little_endian attributes exist and are set appropriately. If Dataset.file_meta.TransferSyntaxUID is present then it should be set to a consistent value to ensure conformance. Conformance with DICOM File Format ---------------------------------- If `write_like_original` is False, the Dataset will be stored in the DICOM File Format in accordance with DICOM Standard Part 10 Section 7. To do so requires that the `Dataset.file_meta` attribute exists and contains a Dataset with the required (Type 1) File Meta Information Group elements (see pydicom.filewriter.dcmwrite and pydicom.filewriter.write_file_meta_info for more information). If `write_like_original` is True then the Dataset will be written as is (after minimal validation checking) and may or may not contain all or parts of the File Meta Information (and hence may or may not be conformant with the DICOM File Format). Parameters ---------- filename : str or file-like Name of file or the file-like to write the new DICOM file to. write_like_original : bool If True (default), preserves the following information from the Dataset (and may result in a non-conformant file): - preamble -- if the original file has no preamble then none will be written. - file_meta -- if the original file was missing any required File Meta Information Group elements then they will not be added or written. If (0002,0000) 'File Meta Information Group Length' is present then it may have its value updated. - seq.is_undefined_length -- if original had delimiters, write them now too, instead of the more sensible length characters - is_undefined_length_sequence_item -- for datasets that belong to a sequence, write the undefined length delimiters if that is what the original had. If False, produces a file conformant with the DICOM File Format, with explicit lengths for all elements. See Also -------- pydicom.filewriter.write_dataset Write a DICOM Dataset to a file. pydicom.filewriter.write_file_meta_info Write the DICOM File Meta Information Group elements to a file. pydicom.filewriter.dcmwrite Write a DICOM file from a FileDataset instance. """ # Ensure is_little_endian and is_implicit_VR are set if self.is_little_endian is None or self.is_implicit_VR is None: raise AttributeError( "'{0}.is_little_endian' and '{0}.is_implicit_VR' must be " "set appropriately before saving.".format( self.__class__.__name__)) pydicom.dcmwrite(filename, self, write_like_original)
def __setattr__(self, name, value): """Intercept any attempts to set a value for an instance attribute. If name is a DICOM keyword, set the corresponding tag and DataElement. Else, set an instance (python) attribute as any other class would do. Parameters ---------- name : str The element keyword for the DataElement you wish to add/change. If `name` is not a DICOM element keyword then this will be the name of the attribute to be added/changed. value The value for the attribute to be added/changed. """ tag = tag_for_keyword(name) if tag is not None: # successfully mapped name to a tag if tag not in self: # don't have this tag yet->create the data_element instance VR = dictionary_VR(tag) data_element = DataElement(tag, VR, value) else: # already have this data_element, just changing its value data_element = self[tag] data_element.value = value # Now have data_element - store it in this dict self[tag] = data_element elif repeater_has_keyword(name): # Check if `name` is repeaters element raise ValueError('{} is a DICOM repeating group ' 'element and must be added using ' 'the add() or add_new() methods.' .format(name)) else: # name not in dicom dictionary - setting a non-dicom instance # attribute # XXX note if user mis-spells a dicom data_element - no error!!! super(Dataset, self).__setattr__(name, value) def __setitem__(self, key, value): """Operator for Dataset[key] = value. Check consistency, and deal with private tags. Parameters ---------- key : int The tag for the element to be added to the Dataset. value : pydicom.dataelem.DataElement or pydicom.dataelem.RawDataElement The element to add to the Dataset. Raises ------ NotImplementedError If `key` is a slice. ValueError If the `key` value doesn't match DataElement.tag. """ if isinstance(key, slice): raise NotImplementedError('Slicing is not supported for setting ' 'Dataset elements.') # OK if is subclass, e.g. DeferredDataElement if not isinstance(value, (DataElement, RawDataElement)): raise TypeError("Dataset contents must be DataElement instances.") if isinstance(value.tag, BaseTag): tag = value.tag else: tag = Tag(value.tag) if key != tag: raise ValueError("DataElement.tag must match the dictionary key") data_element = value if tag.is_private: # See PS 3.5-2008 section 7.8.1 (p. 44) for how blocks are reserved logger.debug("Setting private tag %r" % tag) private_block = tag.elem >> 8 private_creator_tag = Tag(tag.group, private_block) if private_creator_tag in self and tag != private_creator_tag: if data_element.is_raw: data_element = DataElement_from_raw( data_element, self._character_set) data_element.private_creator = self[private_creator_tag].value dict.__setitem__(self, tag, data_element) def _slice_dataset(self, start, stop, step): """Return the element tags in the Dataset that match the slice. Parameters ---------- start : int or 2-tuple of int or None The slice's starting element tag value, in any format accepted by pydicom.tag.Tag. stop : int or 2-tuple of int or None The slice's stopping element tag value, in any format accepted by pydicom.tag.Tag. step : int or None The slice's step size. Returns ------ list of pydicom.tag.Tag The tags in the Dataset that meet the conditions of the slice. """ # Check the starting/stopping Tags are valid when used if start is not None: start = Tag(start) if stop is not None: stop = Tag(stop) all_tags = sorted(self.keys()) # If the Dataset is empty, return an empty list if not all_tags: return [] # Special case the common situations: # - start and/or stop are None # - step is 1 if start is None: if stop is None: # For step=1 avoid copying the list return all_tags if step == 1 else all_tags[::step] else: # Have a stop value, get values until that point step1_list = list(takewhile(lambda x: x < stop, all_tags)) return step1_list if step == 1 else step1_list[::step] # Have a non-None start value. Find its index i_start = bisect_left(all_tags, start) if stop is None: return all_tags[i_start::step] else: i_stop = bisect_left(all_tags, stop) return all_tags[i_start:i_stop:step] def __str__(self): """Handle str(dataset).""" return self._pretty_str()
[docs] def top(self): """Return a str of the Dataset's top level DataElements only.""" return self._pretty_str(top_level_only=True)
[docs] def trait_names(self): """Return a list of valid names for auto-completion code. Used in IPython, so that data element names can be found and offered for autocompletion on the IPython command line. """ return dir(self) # only valid python >=2.6, else use self.__dir__()
[docs] def update(self, dictionary): """Extend dict.update() to handle DICOM keywords.""" for key, value in list(dictionary.items()): if isinstance(key, (str, compat.text_type)): setattr(self, key, value) else: self[Tag(key)] = value
[docs] def iterall(self): """Iterate through the Dataset, yielding all DataElements. Unlike Dataset.__iter__, this *does* recurse into sequences, and so returns all data elements as if the file were "flattened". Yields ------ pydicom.dataelem.DataElement """ for data_element in self: yield data_element if data_element.VR == "SQ": sequence = data_element.value for dataset in sequence: for elem in dataset.iterall(): yield elem
[docs] def walk(self, callback, recursive=True): """Iterate through the DataElements and run `callback` on each. Visit all DataElements, possibly recursing into sequences and their datasets. The callback function is called for each DataElement (including SQ element). Can be used to perform an operation on certain types of DataElements. E.g., `remove_private_tags`() finds all private tags and deletes them. DataElement`s will come back in DICOM order (by increasing tag number within their dataset). Parameters ---------- callback A callable that takes two arguments: * a Dataset * a DataElement belonging to that Dataset recursive : bool Flag to indicate whether to recurse into Sequences. """ taglist = sorted(self.keys()) for tag in taglist: with tag_in_exception(tag): data_element = self[tag] callback(self, data_element) # self = this Dataset # 'tag in self' below needed in case callback deleted # data_element if recursive and tag in self and data_element.VR == "SQ": sequence = data_element.value for dataset in sequence: dataset.walk(callback)
__repr__ = __str__
[docs]class FileDataset(Dataset): """An extension of Dataset to make reading and writing to file-like easier. Attributes ---------- preamble : str or bytes or None The optional DICOM preamble prepended to the dataset, if available. file_meta : pydicom.dataset.Dataset or None The Dataset's file meta information as a Dataset, if available (None if not present). Consists of group 0002 elements. filename : str or None The filename that the dataset was read from (if read from file) or None if the filename is not available (if read from a BytesIO or similar). fileobj_type The object type of the file-like the Dataset was read from. is_implicit_VR : bool True if the dataset encoding is implicit VR, False otherwise. is_little_endian : bool True if the dataset encoding is little endian byte ordering, False otherwise. timestamp : float or None The modification time of the file the dataset was read from, None if the modification time is not available. """ def __init__(self, filename_or_obj, dataset, preamble=None, file_meta=None, is_implicit_VR=True, is_little_endian=True): """Initialize a Dataset read from a DICOM file. Parameters ---------- filename_or_obj : str or None Full path and filename to the file. Use None if is a BytesIO. dataset : Dataset or dict Some form of dictionary, usually a Dataset from read_dataset(). preamble : bytes or str, optional The 128-byte DICOM preamble. file_meta : Dataset, optional The file meta info dataset, as returned by _read_file_meta, or an empty dataset if no file meta information is in the file. is_implicit_VR : bool, optional True (default) if implicit VR transfer syntax used; False if explicit VR. is_little_endian : boolean True (default) if little-endian transfer syntax used; False if big-endian. """ Dataset.__init__(self, dataset) self.preamble = preamble self.file_meta = file_meta self.is_implicit_VR = is_implicit_VR self.is_little_endian = is_little_endian if isinstance(filename_or_obj, compat.string_types): self.filename = filename_or_obj self.fileobj_type = open elif isinstance(filename_or_obj, io.BufferedReader): self.filename = filename_or_obj.name # This is the appropriate constructor for io.BufferedReader self.fileobj_type = open else: # use __class__ python <2.7?; # http://docs.python.org/reference/datamodel.html self.fileobj_type = filename_or_obj.__class__ if getattr(filename_or_obj, "name", False): self.filename = filename_or_obj.name elif getattr(filename_or_obj, "filename", False): # gzip python <2.7? self.filename = filename_or_obj.filename else: # e.g. came from BytesIO or something file-like self.filename = None self.timestamp = None if self.filename and os.path.exists(self.filename): statinfo = os.stat(self.filename) self.timestamp = statinfo.st_mtime def __eq__(self, other): """Compare `self` and `other` for equality. Returns ------- bool The result if `self` and `other` are the same class NotImplemented If `other` is not the same class as `self` then returning NotImplemented delegates the result to superclass.__eq__(subclass) """ # When comparing against self this will be faster if other is self: return True if isinstance(other, self.__class__): # Compare Elements using values() and class members using __dict__ # Convert values() to a list for compatibility between # python 2 and 3 # Sort values() by element tag self_elem = sorted(list(self.values()), key=lambda x: x.tag) other_elem = sorted(list(other.values()), key=lambda x: x.tag) return self_elem == other_elem and self.__dict__ == other.__dict__ return NotImplemented