Source code for pydicom.valuerep

# valuerep.py
"""Special classes for DICOM value representations (VR)"""
# Copyright (c) 2008-2012 Darcy Mason
# This file is part of pydicom, released under a modified MIT license.
#    See the file LICENSE included with this distribution, also
#    available at https://github.com/pydicom/pydicom
from copy import deepcopy
from decimal import Decimal
import re

from datetime import (date, datetime, time, timedelta)

# don't import datetime_conversion directly
from pydicom import config
from pydicom import compat
from pydicom.multival import MultiValue
from pydicom.util.fixes import timezone

# can't import from charset or get circular import
default_encoding = "iso8859"

# For reading/writing data elements,
# these ones have longer explicit VR format
# Taken from PS3.5 Section 7.1.2
extra_length_VRs = ('OB', 'OD', 'OF', 'OL', 'OW', 'SQ', 'UC', 'UN', 'UR', 'UT')

# VRs that can be affected by character repertoire
# in (0008,0005) Specific Character Set
# See PS-3.5 (2011), section 6.1.2 Graphic Characters
# and PN, but it is handled separately.
text_VRs = ('SH', 'LO', 'ST', 'LT', 'UC', 'UR', 'UT')

match_string = b''.join([
    b'(?P<single_byte>', br'(?P<family_name>[^=\^]*)',
    br'\^?(?P<given_name>[^=\^]*)', br'\^?(?P<middle_name>[^=\^]*)',
    br'\^?(?P<name_prefix>[^=\^]*)', br'\^?(?P<name_suffix>[^=\^]*)', b')',
    b'=?(?P<ideographic>[^=]*)', b'=?(?P<phonetic>[^=]*)$'
])

match_string_uni = re.compile(match_string.decode('iso8859'))
match_string_bytes = re.compile(match_string)


[docs]class DA(date):
    """Store value for DICOM VR DA (Date) as datetime.date.

    Note that the datetime.date base class is immutable.

    """
    __slots__ = ['original_string']

    def __getstate__(self):
        return dict((slot, getattr(self, slot)) for slot in self.__slots__
                    if hasattr(self, slot))

    def __setstate__(self, state):
        for slot, value in state.items():
            setattr(self, slot, value)

    def __reduce__(self):
        return super(DA, self).__reduce__() + (self.__getstate__(), )

    def __reduce_ex__(self, protocol):
        return super(DA, self).__reduce__() + (self.__getstate__(), )

    def __new__(cls, val):
        """Create an instance of DA object.

        Raise an exception if the string cannot be parsed or the argument
        is otherwise incompatible.

        :param val: val must be a string conformant to the DA definition
        in the DICOM Standard PS 3.5-2011
        """
        if isinstance(val, (str, compat.string_types)):
            if len(val) == 8:
                year = int(val[0:4])
                month = int(val[4:6])
                day = int(val[6:8])
                val = super(DA, cls).__new__(cls, year, month, day)
            elif len(val) == 10 and val[4] == '.' and val[7] == '.':
                # ACR-NEMA Standard 300, predecessor to DICOM
                # for compatibility with a few old pydicom example files
                year = int(val[0:4])
                month = int(val[5:7])
                day = int(val[8:10])
                val = super(DA, cls).__new__(cls, year, month, day)
            elif val == '':
                val = None  # empty date
            else:
                try:
                    val = super(DA, cls).__new__(cls, val)
                except TypeError:
                    raise ValueError("Cannot convert to datetime: '%s'" %
                                     (val))
        elif isinstance(val, date):
            val = super(DA, cls).__new__(cls, val.year, val.month, val.day)
        else:
            val = super(DA, cls).__new__(cls, val)
        return val

    def __init__(self, val):
        if isinstance(val, (str, compat.string_types)):
            self.original_string = val
        elif isinstance(val, DA) and hasattr(val, 'original_string'):
            self.original_string = val.original_string

    def __str__(self):
        if hasattr(self, 'original_string'):
            return self.original_string
        else:
            return super(DA, self).__str__()


[docs]class DT(datetime):
    """Store value for DICOM VR DT (DateTime) as datetime.datetime.

    Note that the datetime.datetime base class is immutable.

    """
    __slots__ = ['original_string']
    _regex_dt = re.compile(r"((\d{4,14})(\.(\d{1,6}))?)([+-]\d{4})?")

    def __getstate__(self):
        return dict((slot, getattr(self, slot)) for slot in self.__slots__
                    if hasattr(self, slot))

    def __setstate__(self, state):
        for slot, value in state.items():
            setattr(self, slot, value)

    def __reduce__(self):
        return super(DT, self).__reduce__() + (self.__getstate__(), )

    def __reduce_ex__(self, protocol):
        return super(DT, self).__reduce__() + (self.__getstate__(), )

    @staticmethod
    def _utc_offset(offset, name):
        return timezone(timedelta(seconds=offset), name)

    def __new__(cls, val):
        """Create an instance of DT object.

        Raise an exception if the string cannot be parsed or the argument
        is otherwise incompatible.

        :param val: val must be a string conformant to the DT definition
        in the DICOM Standard PS 3.5-2011
        """
        if isinstance(val, (str, compat.string_types)):
            match = DT._regex_dt.match(val)
            if match and len(val) <= 26:
                dt_match = match.group(2)
                year = int(dt_match[0:4])
                if len(dt_match) < 6:
                    month = 1
                else:
                    month = int(dt_match[4:6])
                if len(dt_match) < 8:
                    day = 1
                else:
                    day = int(dt_match[6:8])
                if len(dt_match) < 10:
                    hour = 0
                else:
                    hour = int(dt_match[8:10])
                if len(dt_match) < 12:
                    minute = 0
                else:
                    minute = int(dt_match[10:12])
                if len(dt_match) < 14:
                    second = 0
                    microsecond = 0
                else:
                    second = int(dt_match[12:14])
                    ms_match = match.group(4)
                    if ms_match:
                        microsecond = int(ms_match.rstrip().ljust(6, '0'))
                    else:
                        microsecond = 0
                tz_match = match.group(5)
                if tz_match:
                    offset1 = int(tz_match[1:3]) * 60
                    offset2 = int(tz_match[3:5])
                    offset = (offset1 + offset2) * 60
                    if tz_match[0] == '-':
                        offset = -offset
                    tzinfo = cls._utc_offset(offset, tz_match)
                else:
                    tzinfo = None
                val = super(DT,
                            cls).__new__(cls, year, month, day, hour, minute,
                                         second, microsecond, tzinfo)
            else:
                try:
                    val = super(DT, cls).__new__(cls, val)
                except TypeError:
                    raise ValueError("Cannot convert to datetime: '%s'" %
                                     (val))
        elif isinstance(val, datetime):
            val = super(DT, cls).__new__(cls, val.year, val.month, val.day,
                                         val.hour, val.minute, val.second,
                                         val.microsecond, val.tzinfo)
        else:
            val = super(DT, cls).__new__(cls, val)
        return val

    def __init__(self, val):
        if isinstance(val, (str, compat.string_types)):
            self.original_string = val
        elif isinstance(val, DT) and hasattr(val, 'original_string'):
            self.original_string = val.original_string

    def __str__(self):
        if hasattr(self, 'original_string'):
            return self.original_string
        else:
            return super(DT, self).__str__()


[docs]class TM(time):
    """Store value for DICOM VR of TM (Time) as datetime.time.

    Note that the datetime.time base class is immutable.

    """
    __slots__ = ['original_string']
    _regex_tm = re.compile(r"(\d{2,6})(\.(\d{1,6}))?")

    def __getstate__(self):
        return dict((slot, getattr(self, slot)) for slot in self.__slots__
                    if hasattr(self, slot))

    def __setstate__(self, state):
        for slot, value in state.items():
            setattr(self, slot, value)

    def __reduce__(self):
        return super(TM, self).__reduce__() + (self.__getstate__(), )

    def __reduce_ex__(self, protocol):
        return super(TM, self).__reduce__() + (self.__getstate__(), )

    def __new__(cls, val):
        """Create an instance of TM object from a string.

        Raise an exception if the string cannot be parsed or the argument
        is otherwise incompatible.

        :param val: val must be a string conformant to the TM definition
        in the DICOM Standard PS 3.5-2011
        """
        if isinstance(val, (str, compat.string_types)):
            match = TM._regex_tm.match(val)
            if match and len(val) <= 16:
                tm_match = match.group(1)
                hour = int(tm_match[0:2])
                if len(tm_match) < 4:
                    minute = 0
                else:
                    minute = int(tm_match[2:4])
                if len(tm_match) < 6:
                    second = 0
                    microsecond = 0
                else:
                    second = int(tm_match[4:6])
                    ms_match = match.group(3)
                    if ms_match:
                        microsecond = int(ms_match.rstrip().ljust(6, '0'))
                    else:
                        microsecond = 0
                val = super(TM, cls).__new__(cls, hour, minute, second,
                                             microsecond)
            elif val == '':
                val = None  # empty time
            else:
                try:
                    val = super(TM, cls).__new__(cls, val)
                except TypeError:
                    raise ValueError("Cannot convert to datetime: '%s" % (val))
        elif isinstance(val, time):
            val = super(TM, cls).__new__(cls, val.hour, val.minute, val.second,
                                         val.microsecond)
        else:
            val = super(TM, cls).__new__(cls, val)
        return val

    def __init__(self, val):
        if isinstance(val, (str, compat.string_types)):
            self.original_string = val
        elif isinstance(val, TM) and hasattr(val, 'original_string'):
            self.original_string = val.original_string

    def __str__(self):
        if hasattr(self, 'original_string'):
            return self.original_string
        else:
            return super(TM, self).__str__()


[docs]class DSfloat(float):
    """Store values for DICOM VR of DS (Decimal String) as a float.

    If constructed from an empty string, return the empty string,
    not an instance of this class.

    """
    __slots__ = ['original_string']

    def __getstate__(self):
        return dict((slot, getattr(self, slot)) for slot in self.__slots__
                    if hasattr(self, slot))

    def __setstate__(self, state):
        for slot, value in state.items():
            setattr(self, slot, value)

    def __init__(self, val):
        """Store the original string if one given, for exact write-out of same
        value later.
        """
        # ... also if user changes a data element value, then will get
        # a different object, because float is immutable.

        has_attribute = hasattr(val, 'original_string')
        if isinstance(val, (str, compat.text_type)):
            self.original_string = val
        elif isinstance(val, (DSfloat, DSdecimal)) and has_attribute:
            self.original_string = val.original_string

    def __str__(self):
        if hasattr(self, 'original_string'):
            return self.original_string
        else:
            return super(DSfloat, self).__str__()

    def __repr__(self):
        return "\"" + str(self) + "\""


[docs]class DSdecimal(Decimal):
    """Store values for DICOM VR of DS (Decimal String).
    Note: if constructed by an empty string, returns the empty string,
    not an instance of this class.
    """
    __slots__ = ['original_string']

    def __getstate__(self):
        return dict((slot, getattr(self, slot)) for slot in self.__slots__
                    if hasattr(self, slot))

    def __setstate__(self, state):
        for slot, value in state.items():
            setattr(self, slot, value)

    def __new__(cls, val):
        """Create an instance of DS object, or return a blank string if one is
        passed in, e.g. from a type 2 DICOM blank value.

        :param val: val must be a string or a number type which can be
                   converted to a decimal
        """
        # Store this value here so that if the input string is actually a valid
        # string but decimal.Decimal transforms it to an invalid string it will
        # still be initialized properly
        enforce_length = config.enforce_valid_values
        # DICOM allows spaces around the string,
        # but python doesn't, so clean it
        if isinstance(val, (str, compat.text_type)):
            val = val.strip()
            # If the input string is actually invalid that we relax the valid
            # value constraint for this particular instance
            if len(val) <= 16:
                enforce_length = False
        if val == '':
            return val
        if isinstance(val, float) and not config.allow_DS_float:
            msg = ("DS cannot be instantiated with a float value, "
                   "unless config.allow_DS_float is set to True. "
                   "It is recommended to convert to a string instead, "
                   "with the desired number of digits, or use "
                   "Decimal.quantize and pass a Decimal instance.")
            raise TypeError(msg)
        if not isinstance(val, Decimal):
            val = super(DSdecimal, cls).__new__(cls, val)
        if len(str(val)) > 16 and enforce_length:
            msg = ("DS value representation must be <= 16 "
                   "characters by DICOM standard. Initialize with "
                   "a smaller string, or set config.enforce_valid_values "
                   "to False to override, or use Decimal.quantize() and "
                   "initialize with a Decimal instance.")
            raise OverflowError(msg)
        return val

    def __init__(self, val):
        """Store the original string if one given, for exact write-out of same
        value later. E.g. if set '1.23e2', Decimal would write '123', but DS
        will use the original
        """
        # ... also if user changes a data element value, then will get
        # a different Decimal, as Decimal is immutable.
        if isinstance(val, (str, compat.text_type)):
            self.original_string = val
        elif isinstance(val, (DSfloat, DSdecimal)) and hasattr(val, 'original_string'):  # noqa
            self.original_string = val.original_string

    def __str__(self):
        if hasattr(self, 'original_string') and len(self.original_string) <= 16:  # noqa
            return self.original_string
        else:
            return super(DSdecimal, self).__str__()

    def __repr__(self):
        return "\"" + str(self) + "\""


# CHOOSE TYPE OF DS
if config.use_DS_decimal:
    DSclass = DSdecimal
else:
    DSclass = DSfloat


[docs]def DS(val):
    """Factory function for creating DS class instances.
    Checks for blank string; if so, return that.
    Else calls DSfloat or DSdecimal to create the class
    instance. This avoids overriding __new__ in DSfloat
    (which carries a time penalty for large arrays of DS).
    Similarly the string clean and check can be avoided
    and DSfloat called directly if a string has already
    been processed.
    """
    if isinstance(val, (str, compat.text_type)):
        val = val.strip()
    if val == '' or val is None:
        return ''
    return DSclass(val)


[docs]class IS(int):
    """Derived class of int. Stores original integer
    string for exact rewriting
    of the string originally read or stored.
    """
    if compat.in_py2:
        __slots__ = ['original_string']

        # Unlikely that str(int) will not be the
        # same as the original, but could happen
        # with leading zeros.

        def __getstate__(self):
            return dict((slot, getattr(self, slot)) for slot in self.__slots__
                        if hasattr(self, slot))

        def __setstate__(self, state):
            for slot, value in state.items():
                setattr(self, slot, value)

    def __new__(cls, val):
        """Create instance if new integer string"""
        if val is None:
            return ''
        if isinstance(val, (str, compat.text_type)) and val.strip() == '':
            return ''
        # Overflow error in Python 2 for integers too large
        # while calling super(IS). Fall back on the regular int
        # casting that will automatically convert the val to long
        # if needed.
        try:
            newval = super(IS, cls).__new__(cls, val)
        except OverflowError:
            newval = int(val)
        # check if a float or Decimal passed in, then could have lost info,
        # and will raise error. E.g. IS(Decimal('1')) is ok, but not IS(1.23)
        if isinstance(val, (float, Decimal)) and newval != val:
            raise TypeError("Could not convert value to integer without loss")
        # Checks in case underlying int is >32 bits, DICOM does not allow this
        check_newval = (newval < -2**31 or newval >= 2**31)
        if check_newval and config.enforce_valid_values:
            dcm_limit = "-2**31 to (2**31 - 1) for IS"
            message = "Value exceeds DICOM limits of %s" % (dcm_limit)
            raise OverflowError(message)
        return newval

    def __init__(self, val):
        # If a string passed, then store it
        if isinstance(val, (str, compat.text_type)):
            self.original_string = val
        elif isinstance(val, IS) and hasattr(val, 'original_string'):
            self.original_string = val.original_string

    def __repr__(self):
        if hasattr(self, 'original_string'):
            return "'" + self.original_string + "'"
        else:
            return "\"" + int.__str__(self) + "\""


[docs]def MultiString(val, valtype=str):
    """Split a bytestring by delimiters if there are any

    val -- DICOM bytestring to split up
    valtype -- default str, but can be e.g.
    UID to overwrite to a specific type
    """
    # Remove trailing blank used to pad to even length
    # 2005.05.25: also check for trailing 0, error made
    # in PET files we are converting

    if val and (val.endswith(' ') or val.endswith('\x00')):
        val = val[:-1]
    splitup = val.split("\\")

    if len(splitup) == 1:
        val = splitup[0]
        return valtype(val) if val else val
    else:
        return MultiValue(valtype, splitup)


class PersonName3(object):
    def __init__(self, val, encodings=default_encoding):
        if isinstance(val, PersonName3):
            encodings = val.encodings
            val = val.original_string

        self.original_string = val

        self.encodings = self._verify_encodings(encodings)
        self.parse(val)

    def parse(self, val):
        if isinstance(val, bytes):
            matchstr = match_string_bytes
        else:
            matchstr = match_string_uni

        matchobj = re.match(matchstr, val)

        self.__dict__.update(matchobj.groupdict())

        groups = matchobj.groups()
        self.components = [groups[i] for i in (0, -2, -1)]

    def __eq__(self, other):
        return self.original_string == other

    def __ne__(self, other):
        return not self == other

    def __str__(self):
        return self.original_string.__str__()

    def __repr__(self):
        return self.original_string.__repr__()

    # For python 3, any override of __cmp__ or __eq__
    # immutable requires explicit redirect of hash
    # function to the parent class See
    # See http://docs.python.org/
    #  dev/3.0/reference/datamodel.html#object.__hash__
    __hash__ = object.__hash__

    def decode(self, encodings=None):
        encodings = self._verify_encodings(encodings)

        from pydicom.charset import clean_escseq
        if not isinstance(self.components[0], bytes):
            comps = self.components
        else:
            comps = [
                clean_escseq(comp.decode(enc), encodings)
                for comp, enc in zip(self.components, encodings)
            ]

        while len(comps) and not comps[-1]:
            comps.pop()

        return PersonName3('='.join(comps), encodings)

    def encode(self, encodings=None):
        encodings = self._verify_encodings(encodings)

        if isinstance(self.components[0], bytes):
            comps = self.components
        else:
            comps = [
                C.encode(enc) for C, enc in zip(self.components, encodings)
            ]

        # Remove empty elements from the end
        while len(comps) and not comps[-1]:
            comps.pop()

        return b'='.join(comps)

    def family_comma_given(self):
        return self.formatted('%(family_name)s, %(given_name)s')

    def formatted(self, format_str):
        if isinstance(self.original_string, bytes):
            return format_str % self.decode(default_encoding).__dict__
        else:
            return format_str % self.__dict__

    def _verify_encodings(self, encodings):
        if encodings is None:
            return self.encodings

        if not isinstance(encodings, list):
            encodings = [encodings] * 3

        if len(encodings) == 2:
            encodings.append(encodings[1])

        return encodings


[docs]class PersonNameBase(object):
    """Base class for Person Name classes"""

    def __init__(self, val):
        """Initialize the PN properties"""
        # Note normally use __new__ on subclassing an immutable,
        # but here we just want to do some pre-processing
        # for properties PS 3.5-2008 section 6.2 (p.28)
        # and 6.2.1 describes PN. Briefly:
        # single-byte-characters=ideographic
        # characters=phonetic-characters
        # (each with?):
        #   family-name-complex
        #  ^Given-name-complex
        #  ^Middle-name^name-prefix^name-suffix
        self.parse()

[docs]    def formatted(self, format_str):
        """Return a formatted string according to the format pattern

        Use "...%(property)...%(property)..." where property
        is one of family_name, given_name,
                  middle_name, name_prefix,
                  name_suffix
        """
        return format_str % self.__dict__

[docs]    def parse(self):
        """Break down the components and name parts"""
        self.components = self.split("=")
        nComponents = len(self.components)
        self.single_byte = self.components[0]
        self.ideographic = ''
        self.phonetic = ''
        if nComponents > 1:
            self.ideographic = self.components[1]
        if nComponents > 2:
            self.phonetic = self.components[2]

        if self.single_byte:
            # in case missing trailing items are left out
            name_string = self.single_byte + "^^^^"
            parts = name_string.split("^")[:5]
            self.family_name, self.given_name, self.middle_name = parts[:3]
            self.name_prefix, self.name_suffix = parts[3:]
        else:
            (self.family_name, self.given_name, self.middle_name,
             self.name_prefix, self.name_suffix) = ('', '', '', '', '')


[docs]class PersonName(PersonNameBase, bytes):
    """Human-friendly class to hold VR of Person Name (PN)

    Name is parsed into the following properties:
    single-byte, ideographic, and phonetic components
    (PS3.5-2008 6.2.1)
    family_name,
    given_name,
    middle_name,
    name_prefix,
    name_suffix

    """

    def __new__(cls, val):
        """Return instance of the new class"""
        # Check if trying to convert a string that has already been converted
        if isinstance(val, PersonName):
            return val
        return super(PersonName, cls).__new__(cls, val)

[docs]    def encode(self, *args):
        """Dummy method to mimic py2 str behavior in py3 bytes subclass"""
        # This greatly simplifies the write process so all objects have the
        # "encode" method
        return self

[docs]    def family_comma_given(self):
        """Return name as 'Family-name, Given-name'"""
        return self.formatted("%(family_name)s, %(given_name)s")

    # def __str__(self):
    # return str(self.byte_string)
    # XXX need to process the ideographic or phonetic components?
    # def __len__(self):
    # return len(self.byte_string)


[docs]class PersonNameUnicode(PersonNameBase, compat.text_type):
    """Unicode version of Person Name"""

    def __new__(cls, val, encodings):
        """Return unicode string after conversion of each part
        val -- the PN value to store
        encodings -- a list of python encodings, generally found
                 from pydicom.charset.python_encodings mapping
                 of values in DICOM data element (0008,0005).
        """
        # in here to avoid circular import
        from pydicom.charset import clean_escseq

        # Make the possible three character encodings explicit:
        if not isinstance(encodings, list):
            encodings = [encodings] * 3
        if len(encodings) == 2:
            encodings.append(encodings[1])
        components = val.split(b"=")
        # Remove the first encoding if only one component is present
        if (len(components) == 1):
            del encodings[0]

        comps = [
            clean_escseq(C.decode(enc), encodings)
            for C, enc in zip(components, encodings)
        ]
        new_val = u"=".join(comps)

        return compat.text_type.__new__(cls, new_val)

    def __init__(self, val, encodings):
        self.encodings = self._verify_encodings(encodings)
        PersonNameBase.__init__(self, val)

    def __copy__(self):
        """Correctly copy object.
        Needed because of the overwritten __new__.
        """
        # no need to use the original encoding here - we just encode and
        # decode in utf-8 and set the original encoding later
        name = compat.text_type(self).encode('utf8')
        new_person = PersonNameUnicode(name, 'utf8')
        new_person.__dict__.update(self.__dict__)
        return new_person

    def __deepcopy__(self, memo):
        """Make correctly a deep copy of the object.
        Needed because of the overwritten __new__.
        """
        name = compat.text_type(self).encode('utf8')
        new_person = PersonNameUnicode(name, 'utf8')
        memo[id(self)] = new_person
        for k, v in self.__dict__.items():
            setattr(new_person, k, deepcopy(v, memo))
        return new_person

    def _verify_encodings(self, encodings):
        """Checks the encoding to ensure proper format"""
        if encodings is None:
            return self.encodings

        if not isinstance(encodings, list):
            encodings = [encodings] * 3

        if len(encodings) == 2:
            encodings.append(encodings[1])

        return encodings

[docs]    def encode(self, encodings):
        """Encode the unicode using the specified encoding"""
        encodings = self._verify_encodings(encodings)

        components = self.split('=')

        comps = [C.encode(enc) for C, enc in zip(components, encodings)]

        # Remove empty elements from the end
        while len(comps) and not comps[-1]:
            comps.pop()

        return '='.join(comps)

[docs]    def family_comma_given(self):
        """Return name as 'Family-name, Given-name'"""
        return self.formatted("%(family_name)u, %(given_name)u")