diff --git a/sdds/__init__.py b/sdds/__init__.py index f90e710..a3bf667 100644 --- a/sdds/__init__.py +++ b/sdds/__init__.py @@ -16,4 +16,4 @@ write = write_sdds -__all__ = [read, SddsFile, write, __version__] \ No newline at end of file +__all__ = [read, SddsFile, write, __version__] diff --git a/sdds/classes.py b/sdds/classes.py old mode 100644 new mode 100755 index acf0244..51927c4 --- a/sdds/classes.py +++ b/sdds/classes.py @@ -20,12 +20,15 @@ ENDIAN = {'little': '<', 'big': '>'} NUMTYPES = {"float": "f", "double": "d", "short": "i2", - "long": "i4", "llong": "i8", "char": "i1", "boolean": "i1", - "string": "s"} + "long": "i4", "llong": "i8", "char": "c", "boolean": "i1", + "string": "s", "character": "c"} +NUMTYPES_ascii = {"float": "%f", "double": "%e", "short": "%i", + "long": "%i", "llong": "%i", "char": "%s", "boolean": "%i", + "string": "%s", "character": "%s"} NUMTYPES_SIZES = {"float": 4, "double": 8, "short": 2, - "long": 4, "llong": 8, "char": 1, "boolean": 1} + "long": 4, "llong": 8, "char": 1, "character": 1, "boolean": 1,"string": 1} NUMTYPES_CAST = {"float": float, "double": float, "short": int, - "long": int, "llong": int, "char": str, "boolean": int} + "long": int, "llong": int, "char": str, "character": str, "boolean": int, "string": str} def get_dtype_str(type_: str, endianness: str = 'big', length: int = None): @@ -111,14 +114,11 @@ def __init__(self, name: str, type_: str, **kwargs) -> None: self.type = type_ type_hints = get_type_hints(self) for argname in kwargs: - assert hasattr(self, argname),\ - f"Unknown name {argname} for data type "\ - f"{self.__class__.__name__}" + assert hasattr(self, argname), f"Unknown name {argname} for data type {self.__class__.__name__}" # The type of the parameter can be resolved from the type hint type_hint = type_hints[argname] if hasattr(type_hint, "__args__"): # For the Optional[...] types - type_hint = next(t for t in type_hint.__args__ - if not isinstance(t, type(None))) + type_hint = next(t for t in type_hint.__args__ if not isinstance(t, type(None))) setattr(self, argname, type_hint(kwargs[argname])) def __repr__(self): @@ -200,17 +200,23 @@ class SddsFile: val = sdds_file.values["name"] # The definitions and values can also be accessed like: def_, val = sdds_file["name"] + npages = number of pages + mode = "ascii" or "binary" """ version: str # This should always be "SDDS1" description: Optional[Description] definitions: Dict[str, Definition] values: Dict[str, Any] + npages: int + mode: str def __init__(self, version: str, description: Optional[Description], definitions_list: List[Definition], - values_list: List[Any]) -> None: + values_list: List[Any], npages: int, mode: str = "binary") -> None: self.version = version self.description = description + self.npages = npages + self.mode = mode self.definitions = {definition.name: definition for definition in definitions_list} self.values = {definition.name: value for definition, value in zip(definitions_list, values_list)} diff --git a/sdds/reader.py b/sdds/reader.py old mode 100644 new mode 100755 index f81809a..853dd43 --- a/sdds/reader.py +++ b/sdds/reader.py @@ -6,14 +6,17 @@ It provides a high-level function to read SDDS files in different formats, and a series of helpers. """ import pathlib +import shlex import struct import sys from typing import IO, Any, List, Optional, Generator, Dict, Union, Tuple, Callable, Type import numpy as np -from sdds.classes import (SddsFile, Column, Parameter, Definition, Array, Data, Description, - ENCODING, NUMTYPES_CAST, NUMTYPES_SIZES, get_dtype_str) +from sdds.classes import ( + SddsFile, Column, Parameter, Definition, Array, Data, Description, + ENCODING, NUMTYPES_CAST, NUMTYPES_SIZES, get_dtype_str, +) def read_sdds(file_path: Union[pathlib.Path, str], endianness: str = None) -> SddsFile: @@ -39,8 +42,8 @@ def read_sdds(file_path: Union[pathlib.Path, str], endianness: str = None) -> Sd endianness = _get_endianness(inbytes) version, definition_list, description, data = _read_header(inbytes) data_list = _read_data(data, definition_list, inbytes, endianness) - - return SddsFile(version, description, definition_list, data_list) + npages = len(data_list) + return SddsFile(version, description, definition_list, data_list, npages, data.mode) ############################################################################## @@ -144,10 +147,7 @@ def _read_bin_array(inbytes: IO[bytes], definition: Array, endianness: str) -> A dims, total_len = _read_bin_array_len(inbytes, definition.dimensions, endianness) if definition.type == "string": - len_type: str = "long"\ - if not hasattr(definition, "modifier")\ - else {"u1": "char", "i2": "short"}\ - .get(definition.modifier, "long") + len_type: str = {"u1": "char", "i2": "short"}.get(getattr(definition, "modifier", None), "long") str_array = [] for _ in range(total_len): str_len = int(_read_bin_numeric(inbytes, len_type, 1, endianness)) @@ -183,6 +183,7 @@ def _read_string(inbytes: IO[bytes], str_len: int, endianness: str) -> str: ############################################################################## def _read_data_ascii(definitions: List[Definition], inbytes: IO[bytes]) -> List[Any]: + # TODO: Some simplifications (jdilly) def _ascii_generator(ascii_text): for line in ascii_text: yield line @@ -194,21 +195,51 @@ def _ascii_generator(ascii_text): # Get the generator for the text ascii_gen = _ascii_generator(ascii_text) - + + pages = [] + npages = 0 + try: + while True: # loop over pages + page = _read_ascii_page(ascii_gen, definitions) + + if npages == 0: # quite sloppy but it works :-); jzemella @ 2022-08-25 + pages = page + elif npages == 1: + pages = [[l1, l2] for l1, l2 in zip(pages, page)] + else: + t = [l1.append(l2) for l1, l2 in zip(pages, page)] + npages = npages+1 + except: + t = 1 + return pages + + +def _read_ascii_page(ascii_gen, definitions): # Dict of function to call for each type of tag: array, parameter functs_dict = {Parameter: _read_ascii_parameter, - Array: _read_ascii_array + Array: _read_ascii_array, + Column: _read_ascii_column, + Data: _read_ascii_data } # Iterate through every parameters and arrays in the file data = [] + cnt = 0 # add counter to get the column definition at once + flag_col = 1 # sicne all columns are read at once the _read_ascii_column function is only called once per page + ######## for definition in definitions: def_tag = definition.__class__ # Call the function handling the tag we're on # Change the current line according to the tag and dimensions - value = functs_dict[def_tag](ascii_gen, definition) - data.append(value) + if Column != def_tag: + value = functs_dict[def_tag](ascii_gen, definition) + data.append(value) + elif Column == def_tag and flag_col == 1: # used a if statement to distiguish between parameters,arrays and column mode; jzemlella @ 2022-08-25 + values = functs_dict[def_tag](ascii_gen, definitions) + for value in values: + data.append(list(value)) + flag_col = 0 return data @@ -222,15 +253,16 @@ def _read_ascii_parameter(ascii_gen: Generator[str, None, None], return definition.fixed_value if definition.type in NUMTYPES_CAST: return NUMTYPES_CAST[definition.type](definition.fixed_value) - + + para = next(ascii_gen) # No fixed value -> read a line # Strings can be returned without cast if definition.type == "string": - return next(ascii_gen) + return para # For other types, a cast is needed if definition.type in NUMTYPES_CAST: - return NUMTYPES_CAST[definition.type](next(ascii_gen)) + return NUMTYPES_CAST[definition.type](para) raise TypeError(f"Type {definition.type} for Parameter unsupported") @@ -239,12 +271,14 @@ def _read_ascii_array(ascii_gen: Generator[str, None, None], definition: Array) -> np.ndarray: # Get the number of elements per dimension - dimensions = next(ascii_gen).split() - dimensions = np.array(dimensions, dtype="int") + dimensions = definition.dimensions # <<-- dimensions parameter used from definition; jzemella @2022-08-25 + shape = next(ascii_gen) + shape = shape.split() # <<-- changed dimensions to shape defined in line above array data; jzemella @2022-08-25 + shape = np.array(shape, dtype="int") # Get all the data given by the dimensions data = [] - while len(data) != np.prod(dimensions): + while len(data) != np.prod(shape): # <<-- ; jzemella @2022-08-25 # The values on each line are split by a space data += next(ascii_gen).strip().split(' ') @@ -254,8 +288,30 @@ def _read_ascii_array(ascii_gen: Generator[str, None, None], # Convert to np.array so that it can be reshaped to reflect the dimensions data = np.array(data) - data = data.reshape(dimensions) + data = data.reshape(shape) # <<-- ; jzemella @2022-08-25 + + return data + +def _read_ascii_column(ascii_gen: Generator[str, None, None], + definitions: Column) -> List[Any]: + + nrow= next(ascii_gen) + nrow=np.array(nrow, dtype="int") + + data=[] + for i in range(nrow): + line=next(ascii_gen) + data.append([NUMTYPES_CAST[definition.type]( elem) for definition, elem in zip(definitions,shlex.split(line)) ]) + # transpose list + data=list(zip(*data)) + + return data + + +def _read_ascii_data(): + print('"data mode" not implemented yet') + data = [] return data diff --git a/sdds/writer.py b/sdds/writer.py old mode 100644 new mode 100755 index 7f3a5aa..0159370 --- a/sdds/writer.py +++ b/sdds/writer.py @@ -5,15 +5,28 @@ This module contains the writing functionality of ``sdds``. It provides a high-level function to write SDDS files in different formats, and a series of helpers. """ +import itertools import pathlib import struct from typing import IO, List, Union, Iterable, Tuple, Any + import numpy as np -from sdds.classes import (SddsFile, Column, Parameter, Definition, Array, Data, Description, - ENCODING, get_dtype_str) + +from sdds.classes import ( + SddsFile, + Column, + Parameter, + Definition, + Array, + Data, + Description, + ENCODING, + get_dtype_str, + NUMTYPES_ascii, +) -def write_sdds(sdds_file: SddsFile, output_path: Union[pathlib.Path, str]) -> None: +def write_sdds(sdds_file: SddsFile, output_path: Union[pathlib.Path, str], mode: str = None) -> None: """ Writes SddsFile object into ``output_path``. The byteorder will be big-endian, independent of the byteorder of the current machine. @@ -22,16 +35,25 @@ def write_sdds(sdds_file: SddsFile, output_path: Union[pathlib.Path, str]) -> No sdds_file: `SddsFile` object to write output_path (Union[pathlib.Path, str]): `Path` object to the output SDDS file. Can be a `string`, in which case it will be cast to a `Path` object. + mode: Mode to write sdds-file in. If given, overrides the mode in sdds_file. """ output_path = pathlib.Path(output_path) + sdds_file.mode = mode or sdds_file.mode # argument > sdds.mode + with output_path.open("wb") as outbytes: names = _write_header(sdds_file, outbytes) - _write_data(names, sdds_file, outbytes) + if sdds_file.mode == "binary": + _write_data(names, sdds_file, outbytes) + elif sdds_file.mode == "ascii": + _write_ascii_data(names, sdds_file, outbytes) def _write_header(sdds_file: SddsFile, outbytes: IO[bytes]) -> List[str]: - outbytes.writelines(("SDDS1\n".encode(ENCODING), - "!# big-endian\n".encode(ENCODING))) + outbytes.write("SDDS1\n".encode(ENCODING)) + + if sdds_file.mode == "binary": + outbytes.write("!# big-endian\n".encode(ENCODING)) + names = [] if sdds_file.description is not None: outbytes.write(_sdds_def_as_str(sdds_file.description).encode(ENCODING)) @@ -39,38 +61,123 @@ def _write_header(sdds_file: SddsFile, outbytes: IO[bytes]) -> List[str]: names.append(def_name) definition = sdds_file.definitions[def_name] outbytes.write(_sdds_def_as_str(definition).encode(ENCODING)) - outbytes.write("&data mode=binary, &end\n".encode(ENCODING)) + + outbytes.write(f"&data mode={sdds_file.mode}, &end\n".encode(ENCODING)) return names def _sdds_def_as_str(definition: Union[Description, Definition, Data]) -> str: start = definition.TAG + " " - things = ", ".join([f"{key}={definition.__dict__[key]}" - for key in definition.__dict__ if "__" not in key]) + things = ", ".join( + [ + f"{key}={definition.__dict__[key]}" + for key in definition.__dict__ + if "__" not in key + ] + ) end = " &end\n" return start + things + end -def _write_data(names: List[str], sdds_file: SddsFile, outbytes: IO[bytes])-> None: - # row_count: - outbytes.write(np.array(0, dtype=get_dtype_str("long")).tobytes()) - _write_parameters((sdds_file[name] for name in names - if isinstance(sdds_file.definitions[name], Parameter)), - outbytes) - _write_arrays((sdds_file[name] for name in names - if isinstance(sdds_file.definitions[name], Array)), - outbytes) - _write_columns((sdds_file[name] for name in names - if isinstance(sdds_file.definitions[name], Column)), - outbytes) +def _get_row_count(sdds_file, names): + # get number of columns + if sdds_file.npages > 1: + col_data = [ + [sdds_file[name][0], sdds_file.values[name][0]] + for name in names + if isinstance(sdds_file.definitions[name], Column) + ] + else: + col_data = [ + sdds_file[name] + for name in names + if isinstance(sdds_file.definitions[name], Column) + ] + # write row count to file + if len(col_data) > 0: + col_vals = [item[1] for item in col_data] + nrow = len(col_vals[0]) + else: + nrow = 0 + + return nrow + + +def _write_data(names: List[str], sdds_file: SddsFile, outbytes: IO[bytes]) -> None: + + nrow = _get_row_count(sdds_file, names) + if sdds_file.npages > 1: + for npage in range(sdds_file.npages): # write pages + # write row count + outbytes.write(np.array(nrow, dtype=get_dtype_str("long")).tobytes()) + + _write_parameters( + ( + (sdds_file[name][0], sdds_file[name][1][npage]) + for name in names + if isinstance(sdds_file.definitions[name], Parameter) + ), + outbytes, + ) + _write_arrays( + ( + (sdds_file[name][0], sdds_file[name][1][npage]) + for name in names + if isinstance(sdds_file.definitions[name], Array) + ), + outbytes, + ) + _write_columns( + [ + [sdds_file[name][0], sdds_file.values[name][npage]] + for name in names + if isinstance(sdds_file.definitions[name], Column) + ], + outbytes, + ) + else: + # write row count + outbytes.write(np.array(nrow, dtype=get_dtype_str("long")).tobytes()) + _write_parameters( + ( + sdds_file[name] + for name in names + if isinstance(sdds_file.definitions[name], Parameter) + ), + outbytes, + ) + _write_arrays( + ( + sdds_file[name] + for name in names + if isinstance(sdds_file.definitions[name], Array) + ), + outbytes, + ) + _write_columns( + [ + sdds_file[name] + for name in names + if isinstance(sdds_file.definitions[name], Column) + ], + outbytes, + ) def _write_parameters(param_gen: Iterable[Tuple[Parameter, Any]], outbytes: IO[bytes]): for param_def, value in param_gen: - if param_def.type == "string": + if param_def.type in ["string"]: # character will to be displayed by sddsprintout but it is not throwing an error _write_string(value, outbytes) + elif param_def.type in ["char", "character"]: + outbytes.write( + struct.pack( + get_dtype_str("char", length=len(value)), value.encode(ENCODING) + ) + ) else: - outbytes.write(np.array(value, dtype=get_dtype_str(param_def.type)).tobytes()) + outbytes.write( + np.array(value, dtype=get_dtype_str(param_def.type)).tobytes() + ) def _write_arrays(array_gen: Iterable[Tuple[Array, Any]], outbytes: IO[bytes]): @@ -91,14 +198,171 @@ def get_dimensions_from_array(value): for string in value: _write_string(string, outbytes) else: - outbytes.write(np.array(value, dtype=get_dtype_str(array_def.type)).tobytes()) + outbytes.write( + np.array(value, dtype=get_dtype_str(array_def.type)).tobytes() + ) + + +# def _write_columns(col_gen: Iterable[Tuple[Column, Any]], outbytes: IO[bytes]): +def _write_columns(col_data, outbytes: IO[bytes]): + try: + col_defs = [item[0] for item in col_data] + col_vals = [item[1] for item in col_data] + col_vals = list(zip(*col_vals)) + for i in range(len(col_vals)): + for j in range(len(col_vals[0])): + col_val = col_vals[i] + value = col_val[j] + col_def = col_defs[j] -def _write_columns(col_gen: Iterable[Tuple[Column, Any]], outbytes: IO[bytes]): - # TODO: Implement the columns thing. - pass + print(col_def, value) + if col_def.type in ["string"]: + _write_string(value, outbytes) + elif col_def.type in ["character", "char"]: + outbytes.write( + struct.pack( + get_dtype_str("char", length=len(value)), + value.encode(ENCODING), + ) + ) + else: + outbytes.write( + np.array(value, dtype=get_dtype_str(col_def.type)).tobytes() + ) + except: + pass def _write_string(string: str, outbytes: IO[bytes]): + if len(string) == 1: + string = string outbytes.write(np.array(len(string), dtype=get_dtype_str("long")).tobytes()) - outbytes.write(struct.pack(get_dtype_str("string", length=len(string)), string.encode(ENCODING))) + outbytes.write( + struct.pack( + get_dtype_str("string", length=len(string)), string.encode(ENCODING) + ) + ) + + +def _write_ascii_data(names: List[str], sdds_file: SddsFile, outbytes: IO[bytes]) -> None: # one may can combine write_ascii_data with write_data ... + """Write sdds file in ASCII.""" + # outbytes.write(np.array(0, dtype=get_dtype_str("long")).tobytes()) + + if sdds_file.npages > 1: + for npage in range(sdds_file.npages): # write pages + pagecomment = "! page number %i\n" % (npage + 1) + outbytes.write(pagecomment.encode(ENCODING)) + _write_ascii_parameters( + ( + (sdds_file[name][0], sdds_file[name][1][npage]) + for name in names + if isinstance(sdds_file.definitions[name], Parameter) + ), + outbytes, + ) + _write_ascii_arrays( + ( + (sdds_file[name][0], sdds_file[name][1][npage]) + for name in names + if isinstance(sdds_file.definitions[name], Array) + ), + outbytes, + ) + _write_ascii_columns( + [ + [sdds_file[name][0], sdds_file.values[name][npage]] + for name in names + if isinstance(sdds_file.definitions[name], Column) + ], + outbytes, + ) + else: + _write_ascii_parameters( + ( + sdds_file[name] + for name in names + if isinstance(sdds_file.definitions[name], Parameter) + ), + outbytes, + ) + _write_ascii_arrays( + ( + sdds_file[name] + for name in names + if isinstance(sdds_file.definitions[name], Array) + ), + outbytes, + ) + _write_ascii_columns( + [ + sdds_file[name] + for name in names + if isinstance(sdds_file.definitions[name], Column) + ], + outbytes, + ) + + +def _write_ascii_parameters( + param_gen: Iterable[Tuple[Parameter, Any]], outbytes: IO[bytes] +): + for param_def, value in param_gen: + tstr = NUMTYPES_ascii[param_def.type] % (value) + tstr = tstr + "\n" + outbytes.write(tstr.encode(ENCODING)) + + +def _write_ascii_arrays(array_gen: Iterable[Tuple[Array, Any]], outbytes: IO[bytes]): + def get_dimensions_from_array(value): + # Return the number of items per dimension + # For an array a[n][m], returns [n, m] + if isinstance(value, np.ndarray) or isinstance(value, list): + return [len(value)] + get_dimensions_from_array(value[0]) + return [] + + for array_def, value in array_gen: + # Number of items per dimensions need to be written before the data + elements_per_dim = get_dimensions_from_array(value) + shape = np.array(elements_per_dim) + tstr = "" + for cell in shape: + tstr = tstr + " " + NUMTYPES_ascii["short"] % (cell) + tstr = tstr + "\n" + outbytes.write(tstr.encode(ENCODING)) + # write array data + for idx in itertools.product(*[range(s) for s in shape]): + val = value[idx] + outbytes.write( + str(" " + NUMTYPES_ascii[array_def.type] % (val)).encode(ENCODING) + ) + tstr = "\n" + outbytes.write(tstr.encode(ENCODING)) + + +def _write_ascii_columns(col_data, outbytes: IO[bytes]): + + try: + + col_defs = [item[0] for item in col_data] + col_vals = [item[1] for item in col_data] + + nrow = len(col_vals[0]) + col_vals = list(zip(*col_vals)) + + tstr = " %i\n" % nrow + outbytes.write(tstr.encode(ENCODING)) + + for i in range(len(col_vals)): + for j in range(len(col_vals[0])): + col_val = col_vals[i] + outbytes.write( + str(" " + NUMTYPES_ascii[col_defs[j].type] % (col_val[j])).encode( + ENCODING + ) + ) + tstr = "\n" + outbytes.write(tstr.encode(ENCODING)) + + except: + pass diff --git a/tests/inputs/test_pages.sdds b/tests/inputs/test_pages.sdds new file mode 100755 index 0000000..e16b2b2 --- /dev/null +++ b/tests/inputs/test_pages.sdds @@ -0,0 +1,47 @@ +SDDS1 +&description text=text, contents=contents, &end +¶meter name=ShortP, type=short, &end +¶meter name=LongP, type=long, &end +¶meter name=FloatP, type=float, &end +¶meter name=DoubleP, type=double, &end +¶meter name=StringP, type=string, &end +¶meter name=CharacterP, type=character, &end +&array name=testF, type=float, dimensions = 2, &end +&column name=ShortC, type=short, &end +&column name=LongC, type=long, &end +&column name=FloatC, type=float, &end +&column name=DoubleC, type=double, &end +&column name=StringC, type=string, &end +&column name=CharacterC, type=character, &end +&data mode=ascii, &end +! page number 1 +1 +2 + 3.29999995e+00 + 4.400000000000000e+00 +five +a +2 2 +1 +2 +3 +4 + 3 +1 1 1.00000000e+00 1.000000000000000e+00 "row 1" x +2 2 2.00000000e+00 2.000000000000000e+00 "row 2" y +3 3 3.00000000e+00 3.000000000000000e+00 "row 3" z +! page number 2 +7 +7 + 8.80000019e+00 + 9.800000000000001e+00 +ten +b +2 2 +5 6 +7 8 + 4 +-1 -1 -1.00000000e+00 -1.000000000000000e+00 "row 1" i +-2 -2 -2.00000000e+00 -2.000000000000000e+00 "row 2" j +-3 -3 -3.59999990e+00 -3.600000000000000e+00 "row 3" k +-4 -4 -4.40000010e+00 -4.400000000000000e+00 "row 4" l