Source code for pydicom.values

# Copyright 2008-2017 pydicom authors. See LICENSE file for details.
"""Functions for converting values of DICOM
   data elements to proper python types
"""

from io import BytesIO
from struct import (unpack, calcsize)

# don't import datetime_conversion directly
from pydicom import config
from pydicom import compat
from pydicom.compat import in_py2
from pydicom.charset import (default_encoding, text_VRs)
from pydicom.config import logger
from pydicom.filereader import read_sequence
from pydicom.multival import MultiValue
from pydicom.tag import (Tag, TupleTag)
import pydicom.uid
import pydicom.valuerep  # don't import DS directly as can be changed by config
from pydicom.valuerep import (MultiString, DA, DT, TM)

if not in_py2:
    from pydicom.valuerep import PersonName3 as PersonName
else:
    from pydicom.valuerep import PersonName  # NOQA


def convert_tag(byte_string, is_little_endian, offset=0):
    if is_little_endian:
        struct_format = "<HH"
    else:
        struct_format = ">HH"
    return TupleTag(unpack(struct_format, byte_string[offset:offset + 4]))


[docs]def convert_AE_string(byte_string, is_little_endian, struct_format=None, encoding=default_encoding): """Read a byte string for a VR of 'AE'. Elements with VR of 'AE' have non-significant leading and trailing spaces. """ if not in_py2: byte_string = byte_string.decode(encoding) byte_string = byte_string.strip() return byte_string
[docs]def convert_ATvalue(byte_string, is_little_endian, struct_format=None): """Read and return AT (tag) data_element value(s)""" length = len(byte_string) if length == 4: return convert_tag(byte_string, is_little_endian) # length > 4 if length % 4 != 0: logger.warn("Expected length to be multiple of 4 for VR 'AT', " "got length %d", length) return MultiValue(Tag, [ convert_tag(byte_string, is_little_endian, offset=x) for x in range(0, length, 4) ])
def _DA_from_byte_string(byte_string): return DA(byte_string.rstrip())
[docs]def convert_DA_string(byte_string, is_little_endian, struct_format=None): """Read and return a DA value""" if config.datetime_conversion: if not in_py2: byte_string = byte_string.decode(default_encoding) splitup = byte_string.split("\\") if len(splitup) == 1: return _DA_from_byte_string(splitup[0]) else: return MultiValue(_DA_from_byte_string, splitup) else: return convert_string(byte_string, is_little_endian, struct_format)
[docs]def convert_DS_string(byte_string, is_little_endian, struct_format=None): """Read and return a DS value or list of values""" if not in_py2: byte_string = byte_string.decode(default_encoding) # Below, go directly to DS class instance # rather than factory DS, but need to # ensure last string doesn't have # blank padding (use strip()) return MultiString(byte_string.strip(), valtype=pydicom.valuerep.DSclass)
def _DT_from_byte_string(byte_string): byte_string = byte_string.rstrip() length = len(byte_string) if length < 4 or length > 26: logger.warn("Expected length between 4 and 26, got length %d", length) return DT(byte_string)
[docs]def convert_DT_string(byte_string, is_little_endian, struct_format=None): """Read and return a DT value""" if config.datetime_conversion: if not in_py2: byte_string = byte_string.decode(default_encoding) splitup = byte_string.split("\\") if len(splitup) == 1: return _DT_from_byte_string(splitup[0]) else: return MultiValue(_DT_from_byte_string, splitup) else: return convert_string(byte_string, is_little_endian, struct_format)
[docs]def convert_IS_string(byte_string, is_little_endian, struct_format=None): """Read and return an IS value or list of values""" if not in_py2: byte_string = byte_string.decode(default_encoding) return MultiString(byte_string, valtype=pydicom.valuerep.IS)
[docs]def convert_numbers(byte_string, is_little_endian, struct_format): """Convert `byte_string` to a value, depending on `struct_format`. Given an encoded DICOM Element value, use `struct_format` and the endianness of the data to decode it. Parameters ---------- byte_string : bytes The raw byte data to decode. is_little_endian : bool The encoding of `byte_string`. struct_format : str The type of data encoded in `byte_string`. Returns ------- str If there is no encoded data in `byte_string` then an empty string will be returned. value If `byte_string` encodes a single value then it will be returned. list If `byte_string` encodes multiple values then a list of the decoded values will be returned. """ endianChar = '><' [is_little_endian] # "=" means use 'standard' size, needed on 64-bit systems. bytes_per_value = calcsize("=" + struct_format) length = len(byte_string) if length % bytes_per_value != 0: logger.warning("Expected length to be even multiple of number size") format_string = "%c%u%c" % (endianChar, length // bytes_per_value, struct_format) value = unpack(format_string, byte_string) # if the number is empty, then return the empty # string rather than empty list if len(value) == 0: return '' elif len(value) == 1: return value[0] else: # convert from tuple to a list so can modify if need to return list(value)
[docs]def convert_OBvalue(byte_string, is_little_endian, struct_format=None): """Return the raw bytes from reading an OB value""" return byte_string
[docs]def convert_OWvalue(byte_string, is_little_endian, struct_format=None): """Return the raw bytes from reading an OW value rep Note: pydicom does NOT do byte swapping, except in dataset.pixel_array function """ # for now, Maybe later will have own routine return convert_OBvalue(byte_string, is_little_endian)
[docs]def convert_PN(byte_string, is_little_endian, struct_format=None, encoding=None): """Read and return string(s) as PersonName instance(s)""" def get_valtype(x): if not in_py2: if encoding: return PersonName(x, encoding).decode() return PersonName(x).decode() return PersonName(x) # XXX - We have to replicate MultiString functionality # here because we can't decode easily here since that # is performed in PersonNameUnicode ends_with1 = byte_string.endswith(b' ') ends_with2 = byte_string.endswith(b'\x00') if byte_string and (ends_with1 or ends_with2): byte_string = byte_string[:-1] splitup = byte_string.split(b"\\") if len(splitup) == 1: return get_valtype(splitup[0]) else: return MultiValue(get_valtype, splitup)
[docs]def convert_string(byte_string, is_little_endian, struct_format=None, encoding=default_encoding): """Read and return a string or strings""" if not in_py2: byte_string = byte_string.decode(encoding) return MultiString(byte_string)
[docs]def convert_single_string(byte_string, is_little_endian, struct_format=None, encoding=default_encoding): """Read and return a single string (backslash character does not split)""" if not in_py2: byte_string = byte_string.decode(encoding) if byte_string and byte_string.endswith(' '): byte_string = byte_string[:-1] return byte_string
[docs]def convert_SQ(byte_string, is_implicit_VR, is_little_endian, encoding=default_encoding, offset=0): """Convert a sequence that has been read as bytes but not yet parsed.""" fp = BytesIO(byte_string) seq = read_sequence(fp, is_implicit_VR, is_little_endian, len(byte_string), encoding, offset) return seq
def _TM_from_byte_string(byte_string): byte_string = byte_string.rstrip() length = len(byte_string) if (length < 2 or length > 16) and length != 0: logger.warn("Expected length between 2 and 16, got length %d", length) return TM(byte_string)
[docs]def convert_TM_string(byte_string, is_little_endian, struct_format=None): """Read and return a TM value""" if config.datetime_conversion: if not in_py2: byte_string = byte_string.decode(default_encoding) splitup = byte_string.split("\\") if len(splitup) == 1: return _TM_from_byte_string(splitup[0]) else: return MultiValue(_TM_from_byte_string, splitup) else: return convert_string(byte_string, is_little_endian, struct_format)
[docs]def convert_UI(byte_string, is_little_endian, struct_format=None): """Read and return a UI values or values""" # Strip off 0-byte padding for even length (if there) if not in_py2: byte_string = byte_string.decode(default_encoding) if byte_string and byte_string.endswith('\0'): byte_string = byte_string[:-1] return MultiString(byte_string, pydicom.uid.UID)
[docs]def convert_UN(byte_string, is_little_endian, struct_format=None): """Return a byte string for a VR of 'UN' (unknown)""" return byte_string
[docs]def convert_UR_string(byte_string, is_little_endian, struct_format=None, encoding=default_encoding): """Read a byte string for a VR of 'UR' Elements with VR of 'UR' shall not be multi-valued and trailing spaces shall be ignored. """ if not in_py2: byte_string = byte_string.decode(encoding) byte_string = byte_string.rstrip() return byte_string
[docs]def convert_value(VR, raw_data_element, encoding=default_encoding): """Return the converted value (from raw bytes) for the given VR""" if VR not in converters: message = "Unknown Value Representation '{0}'".format(VR) raise NotImplementedError(message) # Look up the function to convert that VR # Dispatch two cases: a plain converter, # or a number one which needs a format string if isinstance(converters[VR], tuple): converter, num_format = converters[VR] else: converter = converters[VR] num_format = None # Ensure that encoding is in the proper 3-element format if isinstance(encoding, compat.string_types): encoding = [encoding, ] * 3 byte_string = raw_data_element.value is_little_endian = raw_data_element.is_little_endian is_implicit_VR = raw_data_element.is_implicit_VR # Not only two cases. Also need extra info if is a raw sequence # Pass the encoding to the converter if it is a specific VR try: if VR == 'PN': value = converter(byte_string, is_little_endian, encoding=encoding) elif VR in text_VRs: # Text VRs use the 2nd specified encoding value = converter(byte_string, is_little_endian, encoding=encoding[1]) elif VR != "SQ": value = converter(byte_string, is_little_endian, num_format) else: value = convert_SQ(byte_string, is_implicit_VR, is_little_endian, encoding, raw_data_element.value_tell) except ValueError: if config.enforce_valid_values: # The user really wants an exception here raise logger.debug('unable to translate tag %s with VR %s' % (raw_data_element.tag, VR)) for vr in convert_retry_VR_order: if vr == VR: continue try: value = convert_value(vr, raw_data_element, encoding) logger.debug('converted value for tag %s with VR %s' % (raw_data_element.tag, vr)) break except Exception: pass else: logger.debug('Could not convert value for tag %s with any VR ' 'in the convert_retry_VR_order list' % raw_data_element.tag) value = raw_data_element.value return value
convert_retry_VR_order = [ 'SH', 'UL', 'SL', 'US', 'SS', 'FL', 'FD', 'OF', 'OB', 'UI', 'DA', 'TM', 'PN', 'IS', 'DS', 'LT', 'SQ', 'UN', 'AT', 'OW', 'DT', 'UT', ] # converters map a VR to the function # to read the value(s). for convert_numbers, # the converter maps to a tuple # (function, struct_format) # (struct_format in python struct module style) converters = { 'UL': (convert_numbers, 'L'), 'SL': (convert_numbers, 'l'), 'US': (convert_numbers, 'H'), 'SS': (convert_numbers, 'h'), 'FL': (convert_numbers, 'f'), 'FD': (convert_numbers, 'd'), 'OF': (convert_numbers, 'f'), 'OB': convert_OBvalue, 'OD': convert_OBvalue, 'OL': convert_OBvalue, 'UI': convert_UI, 'SH': convert_string, 'DA': convert_DA_string, 'TM': convert_TM_string, 'CS': convert_string, 'PN': convert_PN, 'LO': convert_string, 'IS': convert_IS_string, 'DS': convert_DS_string, 'AE': convert_AE_string, 'AS': convert_string, 'LT': convert_single_string, 'SQ': convert_SQ, 'UC': convert_string, 'UN': convert_UN, 'UR': convert_UR_string, 'AT': convert_ATvalue, 'ST': convert_string, 'OW': convert_OWvalue, 'OW/OB': convert_OBvalue, # note OW/OB depends on other items, 'OB/OW': convert_OBvalue, # which we don't know at read time 'OW or OB': convert_OBvalue, 'OB or OW': convert_OBvalue, 'US or SS': convert_OWvalue, 'US or OW': convert_OWvalue, 'US or SS or OW': convert_OWvalue, 'US\\US or SS\\US': convert_OWvalue, 'DT': convert_DT_string, 'UT': convert_single_string, } if __name__ == "__main__": pass