Source code for pydicom.values

# Copyright 2008-2017 pydicom authors. See LICENSE file for details.
"""Functions for converting values of DICOM
   data elements to proper python types
"""

from io import BytesIO
from struct import (unpack, calcsize)

# don't import datetime_conversion directly
from pydicom import config
from pydicom import compat
from pydicom.compat import in_py2
from pydicom.charset import (default_encoding, text_VRs)
from pydicom.config import logger
from pydicom.filereader import read_sequence
from pydicom.multival import MultiValue
from pydicom.tag import (Tag, TupleTag)
import pydicom.uid
import pydicom.valuerep  # don't import DS directly as can be changed by config
from pydicom.valuerep import (MultiString, DA, DT, TM)

if not in_py2:
    from pydicom.valuerep import PersonName3 as PersonName
else:
    from pydicom.valuerep import PersonName  # NOQA


def convert_tag(byte_string, is_little_endian, offset=0):
    if is_little_endian:
        struct_format = "<HH"
    else:
        struct_format = ">HH"
    return TupleTag(unpack(struct_format, byte_string[offset:offset + 4]))


[docs]def convert_AE_string(byte_string,
                      is_little_endian,
                      struct_format=None,
                      encoding=default_encoding):
    """Read a byte string for a VR of 'AE'.

    Elements with VR of 'AE' have non-significant leading and trailing spaces.
    """
    if not in_py2:
        byte_string = byte_string.decode(encoding)
    byte_string = byte_string.strip()
    return byte_string


[docs]def convert_ATvalue(byte_string, is_little_endian, struct_format=None):
    """Read and return AT (tag) data_element value(s)"""
    length = len(byte_string)
    if length == 4:
        return convert_tag(byte_string, is_little_endian)

    # length > 4
    if length % 4 != 0:
        logger.warn("Expected length to be multiple of 4 for VR 'AT', "
                    "got length %d", length)
    return MultiValue(Tag, [
        convert_tag(byte_string, is_little_endian, offset=x)
        for x in range(0, length, 4)
    ])


def _DA_from_byte_string(byte_string):
    return DA(byte_string.rstrip())


[docs]def convert_DA_string(byte_string, is_little_endian, struct_format=None):
    """Read and return a DA value"""

    if config.datetime_conversion:
        if not in_py2:
            byte_string = byte_string.decode(default_encoding)
        splitup = byte_string.split("\\")
        if len(splitup) == 1:
            return _DA_from_byte_string(splitup[0])
        else:
            return MultiValue(_DA_from_byte_string, splitup)
    else:
        return convert_string(byte_string, is_little_endian, struct_format)


[docs]def convert_DS_string(byte_string, is_little_endian, struct_format=None):
    """Read and return a DS value or list of values"""

    if not in_py2:
        byte_string = byte_string.decode(default_encoding)
    # Below, go directly to DS class instance
    # rather than factory DS, but need to
    # ensure last string doesn't have
    # blank padding (use strip())
    return MultiString(byte_string.strip(), valtype=pydicom.valuerep.DSclass)


def _DT_from_byte_string(byte_string):
    byte_string = byte_string.rstrip()
    length = len(byte_string)
    if length < 4 or length > 26:
        logger.warn("Expected length between 4 and 26, got length %d", length)
    return DT(byte_string)


[docs]def convert_DT_string(byte_string, is_little_endian, struct_format=None):
    """Read and return a DT value"""

    if config.datetime_conversion:
        if not in_py2:
            byte_string = byte_string.decode(default_encoding)
        splitup = byte_string.split("\\")
        if len(splitup) == 1:
            return _DT_from_byte_string(splitup[0])
        else:
            return MultiValue(_DT_from_byte_string, splitup)
    else:
        return convert_string(byte_string, is_little_endian, struct_format)


[docs]def convert_IS_string(byte_string, is_little_endian, struct_format=None):
    """Read and return an IS value or list of values"""

    if not in_py2:
        byte_string = byte_string.decode(default_encoding)
    return MultiString(byte_string, valtype=pydicom.valuerep.IS)


[docs]def convert_numbers(byte_string, is_little_endian, struct_format):
    """Convert `byte_string` to a value,
       depending on `struct_format`.

    Given an encoded DICOM Element value,
    use `struct_format` and the endianness
    of the data to decode it.

    Parameters
    ----------
    byte_string : bytes
        The raw byte data to decode.
    is_little_endian : bool
        The encoding of `byte_string`.
    struct_format : str
        The type of data encoded in `byte_string`.

    Returns
    -------
    str
        If there is no encoded data in `byte_string`
        then an empty string will
        be returned.
    value
        If `byte_string` encodes a single value
         then it will be returned.
    list
        If `byte_string` encodes multiple values
        then a list of the decoded
        values will be returned.
    """
    endianChar = '><' [is_little_endian]

    # "=" means use 'standard' size, needed on 64-bit systems.
    bytes_per_value = calcsize("=" + struct_format)
    length = len(byte_string)

    if length % bytes_per_value != 0:
        logger.warning("Expected length to be even multiple of number size")

    format_string = "%c%u%c" % (endianChar, length // bytes_per_value,
                                struct_format)

    value = unpack(format_string, byte_string)

    # if the number is empty, then return the empty
    # string rather than empty list
    if len(value) == 0:
        return ''
    elif len(value) == 1:
        return value[0]
    else:
        # convert from tuple to a list so can modify if need to
        return list(value)


[docs]def convert_OBvalue(byte_string, is_little_endian, struct_format=None):
    """Return the raw bytes from reading an OB value"""
    return byte_string


[docs]def convert_OWvalue(byte_string, is_little_endian, struct_format=None):
    """Return the raw bytes from reading an OW value rep

    Note: pydicom does NOT do byte swapping, except in
    dataset.pixel_array function
    """
    # for now, Maybe later will have own routine
    return convert_OBvalue(byte_string, is_little_endian)


[docs]def convert_PN(byte_string,
               is_little_endian,
               struct_format=None,
               encoding=None):
    """Read and return string(s) as PersonName instance(s)"""

    def get_valtype(x):
        if not in_py2:
            if encoding:
                return PersonName(x, encoding).decode()
            return PersonName(x).decode()
        return PersonName(x)

    # XXX - We have to replicate MultiString functionality
    # here because we can't decode easily here since that
    # is performed in PersonNameUnicode
    ends_with1 = byte_string.endswith(b' ')
    ends_with2 = byte_string.endswith(b'\x00')
    if byte_string and (ends_with1 or ends_with2):
        byte_string = byte_string[:-1]

    splitup = byte_string.split(b"\\")

    if len(splitup) == 1:
        return get_valtype(splitup[0])
    else:
        return MultiValue(get_valtype, splitup)


[docs]def convert_string(byte_string,
                   is_little_endian,
                   struct_format=None,
                   encoding=default_encoding):
    """Read and return a string or strings"""
    if not in_py2:
        byte_string = byte_string.decode(encoding)
    return MultiString(byte_string)


[docs]def convert_single_string(byte_string,
                          is_little_endian,
                          struct_format=None,
                          encoding=default_encoding):
    """Read and return a single string
       (backslash character does not split)"""
    if not in_py2:
        byte_string = byte_string.decode(encoding)
    if byte_string and byte_string.endswith(' '):
        byte_string = byte_string[:-1]
    return byte_string


[docs]def convert_SQ(byte_string,
               is_implicit_VR,
               is_little_endian,
               encoding=default_encoding,
               offset=0):
    """Convert a sequence that has been read
       as bytes but not yet parsed."""
    fp = BytesIO(byte_string)
    seq = read_sequence(fp, is_implicit_VR, is_little_endian,
                        len(byte_string), encoding, offset)
    return seq


def _TM_from_byte_string(byte_string):
    byte_string = byte_string.rstrip()
    length = len(byte_string)
    if (length < 2 or length > 16) and length != 0:
        logger.warn("Expected length between 2 and 16, got length %d", length)
    return TM(byte_string)


[docs]def convert_TM_string(byte_string, is_little_endian, struct_format=None):
    """Read and return a TM value"""
    if config.datetime_conversion:
        if not in_py2:
            byte_string = byte_string.decode(default_encoding)
        splitup = byte_string.split("\\")
        if len(splitup) == 1:
            return _TM_from_byte_string(splitup[0])
        else:
            return MultiValue(_TM_from_byte_string, splitup)
    else:
        return convert_string(byte_string, is_little_endian, struct_format)


[docs]def convert_UI(byte_string, is_little_endian, struct_format=None):
    """Read and return a UI values or values"""
    # Strip off 0-byte padding for even length (if there)
    if not in_py2:
        byte_string = byte_string.decode(default_encoding)
    if byte_string and byte_string.endswith('\0'):
        byte_string = byte_string[:-1]
    return MultiString(byte_string, pydicom.uid.UID)


[docs]def convert_UN(byte_string, is_little_endian, struct_format=None):
    """Return a byte string for a VR of 'UN' (unknown)"""
    return byte_string


[docs]def convert_UR_string(byte_string,
                      is_little_endian,
                      struct_format=None,
                      encoding=default_encoding):
    """Read a byte string for a VR of 'UR'

    Elements with VR of 'UR' shall not be multi-valued
    and trailing spaces shall be ignored.
    """
    if not in_py2:
        byte_string = byte_string.decode(encoding)
    byte_string = byte_string.rstrip()
    return byte_string


[docs]def convert_value(VR, raw_data_element, encoding=default_encoding):
    """Return the converted value (from raw bytes) for the given VR"""
    if VR not in converters:
        message = "Unknown Value Representation '{0}'".format(VR)
        raise NotImplementedError(message)

    # Look up the function to convert that VR
    # Dispatch two cases: a plain converter,
    # or a number one which needs a format string
    if isinstance(converters[VR], tuple):
        converter, num_format = converters[VR]
    else:
        converter = converters[VR]
        num_format = None

    # Ensure that encoding is in the proper 3-element format
    if isinstance(encoding, compat.string_types):
        encoding = [encoding, ] * 3

    byte_string = raw_data_element.value
    is_little_endian = raw_data_element.is_little_endian
    is_implicit_VR = raw_data_element.is_implicit_VR

    # Not only two cases. Also need extra info if is a raw sequence
    # Pass the encoding to the converter if it is a specific VR
    try:
        if VR == 'PN':
            value = converter(byte_string,
                              is_little_endian,
                              encoding=encoding)
        elif VR in text_VRs:
            # Text VRs use the 2nd specified encoding
            value = converter(byte_string,
                              is_little_endian,
                              encoding=encoding[1])
        elif VR != "SQ":
            value = converter(byte_string,
                              is_little_endian,
                              num_format)
        else:
            value = convert_SQ(byte_string,
                               is_implicit_VR,
                               is_little_endian,
                               encoding,
                               raw_data_element.value_tell)
    except ValueError:
        if config.enforce_valid_values:
            # The user really wants an exception here
            raise
        logger.debug('unable to translate tag %s with VR %s'
                     % (raw_data_element.tag, VR))

        for vr in convert_retry_VR_order:
            if vr == VR:
                continue
            try:
                value = convert_value(vr, raw_data_element, encoding)
                logger.debug('converted value for tag %s with VR %s'
                             % (raw_data_element.tag, vr))
                break
            except Exception:
                pass
        else:
            logger.debug('Could not convert value for tag %s with any VR '
                         'in the convert_retry_VR_order list'
                         % raw_data_element.tag)
            value = raw_data_element.value
    return value


convert_retry_VR_order = [
    'SH', 'UL', 'SL', 'US', 'SS', 'FL', 'FD', 'OF', 'OB', 'UI', 'DA', 'TM',
    'PN', 'IS', 'DS', 'LT', 'SQ', 'UN', 'AT', 'OW', 'DT', 'UT', ]
# converters map a VR to the function
# to read the value(s). for convert_numbers,
# the converter maps to a tuple
# (function, struct_format)
# (struct_format in python struct module style)
converters = {
    'UL': (convert_numbers, 'L'),
    'SL': (convert_numbers, 'l'),
    'US': (convert_numbers, 'H'),
    'SS': (convert_numbers, 'h'),
    'FL': (convert_numbers, 'f'),
    'FD': (convert_numbers, 'd'),
    'OF': (convert_numbers, 'f'),
    'OB': convert_OBvalue,
    'OD': convert_OBvalue,
    'OL': convert_OBvalue,
    'UI': convert_UI,
    'SH': convert_string,
    'DA': convert_DA_string,
    'TM': convert_TM_string,
    'CS': convert_string,
    'PN': convert_PN,
    'LO': convert_string,
    'IS': convert_IS_string,
    'DS': convert_DS_string,
    'AE': convert_AE_string,
    'AS': convert_string,
    'LT': convert_single_string,
    'SQ': convert_SQ,
    'UC': convert_string,
    'UN': convert_UN,
    'UR': convert_UR_string,
    'AT': convert_ATvalue,
    'ST': convert_string,
    'OW': convert_OWvalue,
    'OW/OB': convert_OBvalue,  # note OW/OB depends on other items,
    'OB/OW': convert_OBvalue,  # which we don't know at read time
    'OW or OB': convert_OBvalue,
    'OB or OW': convert_OBvalue,
    'US or SS': convert_OWvalue,
    'US or OW': convert_OWvalue,
    'US or SS or OW': convert_OWvalue,
    'US\\US or SS\\US': convert_OWvalue,
    'DT': convert_DT_string,
    'UT': convert_single_string,
}
if __name__ == "__main__":
    pass