# valuerep.py
"""Special classes for DICOM value representations (VR)"""
# Copyright (c) 2008-2012 Darcy Mason
# This file is part of pydicom, released under a modified MIT license.
# See the file LICENSE included with this distribution, also
# available at https://github.com/pydicom/pydicom
from copy import deepcopy
from decimal import Decimal
import re
from datetime import (date, datetime, time, timedelta)
# don't import datetime_conversion directly
from pydicom import config
from pydicom import compat
from pydicom.multival import MultiValue
from pydicom.util.fixes import timezone
# can't import from charset or get circular import
default_encoding = "iso8859"
# For reading/writing data elements,
# these ones have longer explicit VR format
# Taken from PS3.5 Section 7.1.2
extra_length_VRs = ('OB', 'OD', 'OF', 'OL', 'OW', 'SQ', 'UC', 'UN', 'UR', 'UT')
# VRs that can be affected by character repertoire
# in (0008,0005) Specific Character Set
# See PS-3.5 (2011), section 6.1.2 Graphic Characters
# and PN, but it is handled separately.
text_VRs = ('SH', 'LO', 'ST', 'LT', 'UC', 'UR', 'UT')
match_string = b''.join([
b'(?P<single_byte>', br'(?P<family_name>[^=\^]*)',
br'\^?(?P<given_name>[^=\^]*)', br'\^?(?P<middle_name>[^=\^]*)',
br'\^?(?P<name_prefix>[^=\^]*)', br'\^?(?P<name_suffix>[^=\^]*)', b')',
b'=?(?P<ideographic>[^=]*)', b'=?(?P<phonetic>[^=]*)$'
])
match_string_uni = re.compile(match_string.decode('iso8859'))
match_string_bytes = re.compile(match_string)
[docs]class DA(date):
"""Store value for DICOM VR DA (Date) as datetime.date.
Note that the datetime.date base class is immutable.
"""
__slots__ = ['original_string']
def __getstate__(self):
return dict((slot, getattr(self, slot)) for slot in self.__slots__
if hasattr(self, slot))
def __setstate__(self, state):
for slot, value in state.items():
setattr(self, slot, value)
def __reduce__(self):
return super(DA, self).__reduce__() + (self.__getstate__(), )
def __reduce_ex__(self, protocol):
return super(DA, self).__reduce__() + (self.__getstate__(), )
def __new__(cls, val):
"""Create an instance of DA object.
Raise an exception if the string cannot be parsed or the argument
is otherwise incompatible.
:param val: val must be a string conformant to the DA definition
in the DICOM Standard PS 3.5-2011
"""
if isinstance(val, (str, compat.string_types)):
if len(val) == 8:
year = int(val[0:4])
month = int(val[4:6])
day = int(val[6:8])
val = super(DA, cls).__new__(cls, year, month, day)
elif len(val) == 10 and val[4] == '.' and val[7] == '.':
# ACR-NEMA Standard 300, predecessor to DICOM
# for compatibility with a few old pydicom example files
year = int(val[0:4])
month = int(val[5:7])
day = int(val[8:10])
val = super(DA, cls).__new__(cls, year, month, day)
elif val == '':
val = None # empty date
else:
try:
val = super(DA, cls).__new__(cls, val)
except TypeError:
raise ValueError("Cannot convert to datetime: '%s'" %
(val))
elif isinstance(val, date):
val = super(DA, cls).__new__(cls, val.year, val.month, val.day)
else:
val = super(DA, cls).__new__(cls, val)
return val
def __init__(self, val):
if isinstance(val, (str, compat.string_types)):
self.original_string = val
elif isinstance(val, DA) and hasattr(val, 'original_string'):
self.original_string = val.original_string
def __str__(self):
if hasattr(self, 'original_string'):
return self.original_string
else:
return super(DA, self).__str__()
[docs]class DT(datetime):
"""Store value for DICOM VR DT (DateTime) as datetime.datetime.
Note that the datetime.datetime base class is immutable.
"""
__slots__ = ['original_string']
_regex_dt = re.compile(r"((\d{4,14})(\.(\d{1,6}))?)([+-]\d{4})?")
def __getstate__(self):
return dict((slot, getattr(self, slot)) for slot in self.__slots__
if hasattr(self, slot))
def __setstate__(self, state):
for slot, value in state.items():
setattr(self, slot, value)
def __reduce__(self):
return super(DT, self).__reduce__() + (self.__getstate__(), )
def __reduce_ex__(self, protocol):
return super(DT, self).__reduce__() + (self.__getstate__(), )
@staticmethod
def _utc_offset(offset, name):
return timezone(timedelta(seconds=offset), name)
def __new__(cls, val):
"""Create an instance of DT object.
Raise an exception if the string cannot be parsed or the argument
is otherwise incompatible.
:param val: val must be a string conformant to the DT definition
in the DICOM Standard PS 3.5-2011
"""
if isinstance(val, (str, compat.string_types)):
match = DT._regex_dt.match(val)
if match and len(val) <= 26:
dt_match = match.group(2)
year = int(dt_match[0:4])
if len(dt_match) < 6:
month = 1
else:
month = int(dt_match[4:6])
if len(dt_match) < 8:
day = 1
else:
day = int(dt_match[6:8])
if len(dt_match) < 10:
hour = 0
else:
hour = int(dt_match[8:10])
if len(dt_match) < 12:
minute = 0
else:
minute = int(dt_match[10:12])
if len(dt_match) < 14:
second = 0
microsecond = 0
else:
second = int(dt_match[12:14])
ms_match = match.group(4)
if ms_match:
microsecond = int(ms_match.rstrip().ljust(6, '0'))
else:
microsecond = 0
tz_match = match.group(5)
if tz_match:
offset1 = int(tz_match[1:3]) * 60
offset2 = int(tz_match[3:5])
offset = (offset1 + offset2) * 60
if tz_match[0] == '-':
offset = -offset
tzinfo = cls._utc_offset(offset, tz_match)
else:
tzinfo = None
val = super(DT,
cls).__new__(cls, year, month, day, hour, minute,
second, microsecond, tzinfo)
else:
try:
val = super(DT, cls).__new__(cls, val)
except TypeError:
raise ValueError("Cannot convert to datetime: '%s'" %
(val))
elif isinstance(val, datetime):
val = super(DT, cls).__new__(cls, val.year, val.month, val.day,
val.hour, val.minute, val.second,
val.microsecond, val.tzinfo)
else:
val = super(DT, cls).__new__(cls, val)
return val
def __init__(self, val):
if isinstance(val, (str, compat.string_types)):
self.original_string = val
elif isinstance(val, DT) and hasattr(val, 'original_string'):
self.original_string = val.original_string
def __str__(self):
if hasattr(self, 'original_string'):
return self.original_string
else:
return super(DT, self).__str__()
[docs]class TM(time):
"""Store value for DICOM VR of TM (Time) as datetime.time.
Note that the datetime.time base class is immutable.
"""
__slots__ = ['original_string']
_regex_tm = re.compile(r"(\d{2,6})(\.(\d{1,6}))?")
def __getstate__(self):
return dict((slot, getattr(self, slot)) for slot in self.__slots__
if hasattr(self, slot))
def __setstate__(self, state):
for slot, value in state.items():
setattr(self, slot, value)
def __reduce__(self):
return super(TM, self).__reduce__() + (self.__getstate__(), )
def __reduce_ex__(self, protocol):
return super(TM, self).__reduce__() + (self.__getstate__(), )
def __new__(cls, val):
"""Create an instance of TM object from a string.
Raise an exception if the string cannot be parsed or the argument
is otherwise incompatible.
:param val: val must be a string conformant to the TM definition
in the DICOM Standard PS 3.5-2011
"""
if isinstance(val, (str, compat.string_types)):
match = TM._regex_tm.match(val)
if match and len(val) <= 16:
tm_match = match.group(1)
hour = int(tm_match[0:2])
if len(tm_match) < 4:
minute = 0
else:
minute = int(tm_match[2:4])
if len(tm_match) < 6:
second = 0
microsecond = 0
else:
second = int(tm_match[4:6])
ms_match = match.group(3)
if ms_match:
microsecond = int(ms_match.rstrip().ljust(6, '0'))
else:
microsecond = 0
val = super(TM, cls).__new__(cls, hour, minute, second,
microsecond)
elif val == '':
val = None # empty time
else:
try:
val = super(TM, cls).__new__(cls, val)
except TypeError:
raise ValueError("Cannot convert to datetime: '%s" % (val))
elif isinstance(val, time):
val = super(TM, cls).__new__(cls, val.hour, val.minute, val.second,
val.microsecond)
else:
val = super(TM, cls).__new__(cls, val)
return val
def __init__(self, val):
if isinstance(val, (str, compat.string_types)):
self.original_string = val
elif isinstance(val, TM) and hasattr(val, 'original_string'):
self.original_string = val.original_string
def __str__(self):
if hasattr(self, 'original_string'):
return self.original_string
else:
return super(TM, self).__str__()
[docs]class DSfloat(float):
"""Store values for DICOM VR of DS (Decimal String) as a float.
If constructed from an empty string, return the empty string,
not an instance of this class.
"""
__slots__ = ['original_string']
def __getstate__(self):
return dict((slot, getattr(self, slot)) for slot in self.__slots__
if hasattr(self, slot))
def __setstate__(self, state):
for slot, value in state.items():
setattr(self, slot, value)
def __init__(self, val):
"""Store the original string if one given, for exact write-out of same
value later.
"""
# ... also if user changes a data element value, then will get
# a different object, because float is immutable.
has_attribute = hasattr(val, 'original_string')
if isinstance(val, (str, compat.text_type)):
self.original_string = val
elif isinstance(val, (DSfloat, DSdecimal)) and has_attribute:
self.original_string = val.original_string
def __str__(self):
if hasattr(self, 'original_string'):
return self.original_string
else:
return super(DSfloat, self).__str__()
def __repr__(self):
return "\"" + str(self) + "\""
[docs]class DSdecimal(Decimal):
"""Store values for DICOM VR of DS (Decimal String).
Note: if constructed by an empty string, returns the empty string,
not an instance of this class.
"""
__slots__ = ['original_string']
def __getstate__(self):
return dict((slot, getattr(self, slot)) for slot in self.__slots__
if hasattr(self, slot))
def __setstate__(self, state):
for slot, value in state.items():
setattr(self, slot, value)
def __new__(cls, val):
"""Create an instance of DS object, or return a blank string if one is
passed in, e.g. from a type 2 DICOM blank value.
:param val: val must be a string or a number type which can be
converted to a decimal
"""
# Store this value here so that if the input string is actually a valid
# string but decimal.Decimal transforms it to an invalid string it will
# still be initialized properly
enforce_length = config.enforce_valid_values
# DICOM allows spaces around the string,
# but python doesn't, so clean it
if isinstance(val, (str, compat.text_type)):
val = val.strip()
# If the input string is actually invalid that we relax the valid
# value constraint for this particular instance
if len(val) <= 16:
enforce_length = False
if val == '':
return val
if isinstance(val, float) and not config.allow_DS_float:
msg = ("DS cannot be instantiated with a float value, "
"unless config.allow_DS_float is set to True. "
"It is recommended to convert to a string instead, "
"with the desired number of digits, or use "
"Decimal.quantize and pass a Decimal instance.")
raise TypeError(msg)
if not isinstance(val, Decimal):
val = super(DSdecimal, cls).__new__(cls, val)
if len(str(val)) > 16 and enforce_length:
msg = ("DS value representation must be <= 16 "
"characters by DICOM standard. Initialize with "
"a smaller string, or set config.enforce_valid_values "
"to False to override, or use Decimal.quantize() and "
"initialize with a Decimal instance.")
raise OverflowError(msg)
return val
def __init__(self, val):
"""Store the original string if one given, for exact write-out of same
value later. E.g. if set '1.23e2', Decimal would write '123', but DS
will use the original
"""
# ... also if user changes a data element value, then will get
# a different Decimal, as Decimal is immutable.
if isinstance(val, (str, compat.text_type)):
self.original_string = val
elif isinstance(val, (DSfloat, DSdecimal)) and hasattr(val, 'original_string'): # noqa
self.original_string = val.original_string
def __str__(self):
if hasattr(self, 'original_string') and len(self.original_string) <= 16: # noqa
return self.original_string
else:
return super(DSdecimal, self).__str__()
def __repr__(self):
return "\"" + str(self) + "\""
# CHOOSE TYPE OF DS
if config.use_DS_decimal:
DSclass = DSdecimal
else:
DSclass = DSfloat
[docs]def DS(val):
"""Factory function for creating DS class instances.
Checks for blank string; if so, return that.
Else calls DSfloat or DSdecimal to create the class
instance. This avoids overriding __new__ in DSfloat
(which carries a time penalty for large arrays of DS).
Similarly the string clean and check can be avoided
and DSfloat called directly if a string has already
been processed.
"""
if isinstance(val, (str, compat.text_type)):
val = val.strip()
if val == '' or val is None:
return ''
return DSclass(val)
[docs]class IS(int):
"""Derived class of int. Stores original integer
string for exact rewriting
of the string originally read or stored.
"""
if compat.in_py2:
__slots__ = ['original_string']
# Unlikely that str(int) will not be the
# same as the original, but could happen
# with leading zeros.
def __getstate__(self):
return dict((slot, getattr(self, slot)) for slot in self.__slots__
if hasattr(self, slot))
def __setstate__(self, state):
for slot, value in state.items():
setattr(self, slot, value)
def __new__(cls, val):
"""Create instance if new integer string"""
if val is None:
return ''
if isinstance(val, (str, compat.text_type)) and val.strip() == '':
return ''
# Overflow error in Python 2 for integers too large
# while calling super(IS). Fall back on the regular int
# casting that will automatically convert the val to long
# if needed.
try:
newval = super(IS, cls).__new__(cls, val)
except OverflowError:
newval = int(val)
# check if a float or Decimal passed in, then could have lost info,
# and will raise error. E.g. IS(Decimal('1')) is ok, but not IS(1.23)
if isinstance(val, (float, Decimal)) and newval != val:
raise TypeError("Could not convert value to integer without loss")
# Checks in case underlying int is >32 bits, DICOM does not allow this
check_newval = (newval < -2**31 or newval >= 2**31)
if check_newval and config.enforce_valid_values:
dcm_limit = "-2**31 to (2**31 - 1) for IS"
message = "Value exceeds DICOM limits of %s" % (dcm_limit)
raise OverflowError(message)
return newval
def __init__(self, val):
# If a string passed, then store it
if isinstance(val, (str, compat.text_type)):
self.original_string = val
elif isinstance(val, IS) and hasattr(val, 'original_string'):
self.original_string = val.original_string
def __repr__(self):
if hasattr(self, 'original_string'):
return "'" + self.original_string + "'"
else:
return "\"" + int.__str__(self) + "\""
[docs]def MultiString(val, valtype=str):
"""Split a bytestring by delimiters if there are any
val -- DICOM bytestring to split up
valtype -- default str, but can be e.g.
UID to overwrite to a specific type
"""
# Remove trailing blank used to pad to even length
# 2005.05.25: also check for trailing 0, error made
# in PET files we are converting
if val and (val.endswith(' ') or val.endswith('\x00')):
val = val[:-1]
splitup = val.split("\\")
if len(splitup) == 1:
val = splitup[0]
return valtype(val) if val else val
else:
return MultiValue(valtype, splitup)
class PersonName3(object):
def __init__(self, val, encodings=default_encoding):
if isinstance(val, PersonName3):
encodings = val.encodings
val = val.original_string
self.original_string = val
self.encodings = self._verify_encodings(encodings)
self.parse(val)
def parse(self, val):
if isinstance(val, bytes):
matchstr = match_string_bytes
else:
matchstr = match_string_uni
matchobj = re.match(matchstr, val)
self.__dict__.update(matchobj.groupdict())
groups = matchobj.groups()
self.components = [groups[i] for i in (0, -2, -1)]
def __eq__(self, other):
return self.original_string == other
def __ne__(self, other):
return not self == other
def __str__(self):
return self.original_string.__str__()
def __repr__(self):
return self.original_string.__repr__()
# For python 3, any override of __cmp__ or __eq__
# immutable requires explicit redirect of hash
# function to the parent class See
# See http://docs.python.org/
# dev/3.0/reference/datamodel.html#object.__hash__
__hash__ = object.__hash__
def decode(self, encodings=None):
encodings = self._verify_encodings(encodings)
from pydicom.charset import clean_escseq
if not isinstance(self.components[0], bytes):
comps = self.components
else:
comps = [
clean_escseq(comp.decode(enc), encodings)
for comp, enc in zip(self.components, encodings)
]
while len(comps) and not comps[-1]:
comps.pop()
return PersonName3('='.join(comps), encodings)
def encode(self, encodings=None):
encodings = self._verify_encodings(encodings)
if isinstance(self.components[0], bytes):
comps = self.components
else:
comps = [
C.encode(enc) for C, enc in zip(self.components, encodings)
]
# Remove empty elements from the end
while len(comps) and not comps[-1]:
comps.pop()
return b'='.join(comps)
def family_comma_given(self):
return self.formatted('%(family_name)s, %(given_name)s')
def formatted(self, format_str):
if isinstance(self.original_string, bytes):
return format_str % self.decode(default_encoding).__dict__
else:
return format_str % self.__dict__
def _verify_encodings(self, encodings):
if encodings is None:
return self.encodings
if not isinstance(encodings, list):
encodings = [encodings] * 3
if len(encodings) == 2:
encodings.append(encodings[1])
return encodings
[docs]class PersonNameBase(object):
"""Base class for Person Name classes"""
def __init__(self, val):
"""Initialize the PN properties"""
# Note normally use __new__ on subclassing an immutable,
# but here we just want to do some pre-processing
# for properties PS 3.5-2008 section 6.2 (p.28)
# and 6.2.1 describes PN. Briefly:
# single-byte-characters=ideographic
# characters=phonetic-characters
# (each with?):
# family-name-complex
# ^Given-name-complex
# ^Middle-name^name-prefix^name-suffix
self.parse()
[docs] def parse(self):
"""Break down the components and name parts"""
self.components = self.split("=")
nComponents = len(self.components)
self.single_byte = self.components[0]
self.ideographic = ''
self.phonetic = ''
if nComponents > 1:
self.ideographic = self.components[1]
if nComponents > 2:
self.phonetic = self.components[2]
if self.single_byte:
# in case missing trailing items are left out
name_string = self.single_byte + "^^^^"
parts = name_string.split("^")[:5]
self.family_name, self.given_name, self.middle_name = parts[:3]
self.name_prefix, self.name_suffix = parts[3:]
else:
(self.family_name, self.given_name, self.middle_name,
self.name_prefix, self.name_suffix) = ('', '', '', '', '')
[docs]class PersonName(PersonNameBase, bytes):
"""Human-friendly class to hold VR of Person Name (PN)
Name is parsed into the following properties:
single-byte, ideographic, and phonetic components
(PS3.5-2008 6.2.1)
family_name,
given_name,
middle_name,
name_prefix,
name_suffix
"""
def __new__(cls, val):
"""Return instance of the new class"""
# Check if trying to convert a string that has already been converted
if isinstance(val, PersonName):
return val
return super(PersonName, cls).__new__(cls, val)
[docs] def encode(self, *args):
"""Dummy method to mimic py2 str behavior in py3 bytes subclass"""
# This greatly simplifies the write process so all objects have the
# "encode" method
return self
[docs] def family_comma_given(self):
"""Return name as 'Family-name, Given-name'"""
return self.formatted("%(family_name)s, %(given_name)s")
# def __str__(self):
# return str(self.byte_string)
# XXX need to process the ideographic or phonetic components?
# def __len__(self):
# return len(self.byte_string)
[docs]class PersonNameUnicode(PersonNameBase, compat.text_type):
"""Unicode version of Person Name"""
def __new__(cls, val, encodings):
"""Return unicode string after conversion of each part
val -- the PN value to store
encodings -- a list of python encodings, generally found
from pydicom.charset.python_encodings mapping
of values in DICOM data element (0008,0005).
"""
# in here to avoid circular import
from pydicom.charset import clean_escseq
# Make the possible three character encodings explicit:
if not isinstance(encodings, list):
encodings = [encodings] * 3
if len(encodings) == 2:
encodings.append(encodings[1])
components = val.split(b"=")
# Remove the first encoding if only one component is present
if (len(components) == 1):
del encodings[0]
comps = [
clean_escseq(C.decode(enc), encodings)
for C, enc in zip(components, encodings)
]
new_val = u"=".join(comps)
return compat.text_type.__new__(cls, new_val)
def __init__(self, val, encodings):
self.encodings = self._verify_encodings(encodings)
PersonNameBase.__init__(self, val)
def __copy__(self):
"""Correctly copy object.
Needed because of the overwritten __new__.
"""
# no need to use the original encoding here - we just encode and
# decode in utf-8 and set the original encoding later
name = compat.text_type(self).encode('utf8')
new_person = PersonNameUnicode(name, 'utf8')
new_person.__dict__.update(self.__dict__)
return new_person
def __deepcopy__(self, memo):
"""Make correctly a deep copy of the object.
Needed because of the overwritten __new__.
"""
name = compat.text_type(self).encode('utf8')
new_person = PersonNameUnicode(name, 'utf8')
memo[id(self)] = new_person
for k, v in self.__dict__.items():
setattr(new_person, k, deepcopy(v, memo))
return new_person
def _verify_encodings(self, encodings):
"""Checks the encoding to ensure proper format"""
if encodings is None:
return self.encodings
if not isinstance(encodings, list):
encodings = [encodings] * 3
if len(encodings) == 2:
encodings.append(encodings[1])
return encodings
[docs] def encode(self, encodings):
"""Encode the unicode using the specified encoding"""
encodings = self._verify_encodings(encodings)
components = self.split('=')
comps = [C.encode(enc) for C, enc in zip(components, encodings)]
# Remove empty elements from the end
while len(comps) and not comps[-1]:
comps.pop()
return '='.join(comps)
[docs] def family_comma_given(self):
"""Return name as 'Family-name, Given-name'"""
return self.formatted("%(family_name)u, %(given_name)u")