WTscripts/read_raw_binary.py at main · wofti/WTscripts · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#!/usr/bin/env python

# read_raw_binary.py
# Copyright (C) 2017 Wolfgang Tichy
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
from __future__ import print_function

#import numpy as np
import struct
import argparse

# use pythons arg parser
parser = argparse.ArgumentParser(description=
    '''Print header and some content of a binary data file.
       BUT first use: xxd file''',
    epilog='''Example:
    read_raw_binary.py -c 3 --format f --roff -10 SLyB/xyz.xy.bin''')
parser.add_argument('-c', metavar='COLUMNS', dest='cols',
        default=1, help="number of columns in data file")
parser.add_argument('--format', metavar='FORMAT', dest='format',
        default='d', help="'d' for double', 'f' for float")
parser.add_argument('--byteorder', metavar='BYTEORD', dest='byteorder',
        default='=', help="'=' is native, '<' is little, '>' is big endian")

parser.add_argument('-r', metavar='ROWS', dest='rows',
        default=10, help="number of binary rows we print")
parser.add_argument('--roff', metavar='ROWOFFSET', dest='roff',
        default=0, help="first binary row printed (negative if from data end)")
parser.add_argument('--byteoff', metavar='BYTEOFFSET', dest='byteoff',
        default=0, help="extra byte offset for printing")

parser.add_argument('file', help='filename')

args = parser.parse_args()


#############################################################################
# function to read big or little endian doubles or floats from binary files
# read doubles or floats from file and return in vdata
def read_raw_binary(file, ndata, byteorder, format):
  """byteorder is '=', '>' or '<' for native, big-endian and little-endian
     format is 'd' or 'f' for double of float"""
  # read data into a byte string
  size = struct.calcsize(format)
  bstr = file.read(size*ndata)
  if len(bstr) == 0: return []
  # unpack bstr into tuple of C-floats, ordered as in byteorder
  fmt = byteorder + ('%d' % (ndata)) + format
  dtuple = struct.unpack(fmt, bstr)
  ## convert tuple dtu into numpy array
  #vdata = np.array(dtuple)
  vdata = dtuple
  return vdata


# figure out if a line is actually text or binary
def is_text(line):
  try:
    text = line.decode('utf_8')
    istext = 1
  except:
    istext = 0
  return istext

# load data from e.g. a bam vtk file
def load_data(filename, cols, byteorder, format, rows, roff, byteoff):
  size = struct.calcsize(format)
  with open(filename, 'rb') as f:
    # print all text header lines
    print('########### text header at begining of file ###########')
    while True:
      pos = f.tell()
      # print('S pos =', pos)
      line = f.readline()
      if not line: break
      if is_text(line) == 1:
        print('#', line.decode('ascii'), end='')
      else:
        f.seek(pos) # go back in file f to start of line
        break
    # once we get here, we have read the header and now the data start
    print('########### binary data starts at pos =', pos,
          '###########')
    ndata = cols
    # read rows lines of bin data:
    #print('roff =', roff)
    if roff >= 0:
      f.seek(ndata*size*roff+byteoff, 1) #the 1 means seek from current position
    else:
      f.seek(ndata*size*roff+byteoff, 2) #the 2 means seek from end of file
      if rows > -roff:
        rows = -roff # make sure we do not print past end of file
    pos2 = f.tell()
    if pos2 != pos:
      print('# ...')
      print('########### printing', rows, 'rows from pos =', pos2,
            '###########')
    for i in range(rows):
      vdata = read_raw_binary(f, ndata, byteorder, format)
      for v in vdata: print('%.16g' % v, end=' ')
      print()
    if roff >=0 or rows < -roff:
      print('# ...')
    f.seek(byteoff, 2) #the 2 means seek from end of file
    pos = f.tell()
    print('########### binary data ends at pos =', pos,
          '###########')
    print('########### ASSUMING, each binary data item has size =', size,
          '###########')

#############################################################################

# get args
file = args.file
cols = int(args.cols)
byteorder = args.byteorder
format = args.format
rows = int(args.rows)
roff = int(args.roff)
byteoff = int(args.byteoff)


# load and print data
load_data(file, cols, byteorder, format, rows, roff, byteoff)

#load_data('bamo.00685_320/ID_level_1_proc_88.dat',
#          int(args.cols), args.byteorder, format, rows, roff, byteoff)
#load_data('BAMSLy_m1.35o.00685_320/ID_level_1_proc_88.dat',
#          int(args.cols), args.byteorder, format, rows, roff, byteoff)