diff --git a/convert.py b/convert.py old mode 100644 new mode 100755 diff --git a/convert_gui.py b/convert_gui.py old mode 100644 new mode 100755 diff --git a/spc/spc.py b/spc/spc.py index 0baadca..ad185d3 100644 --- a/spc/spc.py +++ b/spc/spc.py @@ -4,396 +4,44 @@ author: Rohan Isaac """ +# pylint: disable=invalid-name from __future__ import division, absolute_import, unicode_literals, print_function import struct import numpy as np -from .sub import subFile, subFileOld -from .global_fun import read_subheader, flag_bits +from .sub import subFile, subFileOld, subFileShimadzu +from .util import read_subheader, flag_bits - -class File: - """ - Starts loading the data from a .SPC spectral file using data from the - header. Stores all the attributes of a spectral file: - - Data - ---- - content: Full raw data - sub[i]: sub file object for each subfileFor each subfile - sub[i].y: y data for each subfile - x: x-data, global, or for the first subheader - - Examples - -------- - >>> import spc - >>> ftir_1 = spc.File('/path/to/ftir.spc') - """ - - # Format strings for various parts of the file - # calculate size of strings using `struct.calcsize(string)` - head_str = "> 20 - self.month = (d >> 16) % (2**4) - self.day = (d >> 11) % (2**5) - self.hour = (d >> 6) % (2**5) - self.minute = d % (2**6) - - # null terminated string, replace null characters with spaces - # split and join to remove multiple spaces - try: - self.cmnt = ' '.join((self.fcmnt.replace('\x00', ' ')).split()) - except: - self.cmnt = self.fcmnt - - # figure out type of file - if self.fnsub > 1: - self.dat_multi = True - - if self.txyxys: - # x values are given - self.dat_fmt = '-xy' - elif self.txvals: - # only one subfile, which contains the x data - self.dat_fmt = 'x-y' - else: - # no x values are given, but they can be generated - self.dat_fmt = 'gx-y' - - print('{}({})'.format(self.dat_fmt, self.fnsub)) - - sub_pos = self.head_siz - - if not self.txyxys: - # txyxys don't have global x data - if self.txvals: - # if global x data is given - x_dat_pos = self.head_siz - x_dat_end = self.head_siz + (4 * self.fnpts) - self.x = np.array( - [struct.unpack_from( - 'f', content[x_dat_pos:x_dat_end], 4 * i)[0] - for i in range(0, self.fnpts)]) - sub_pos = x_dat_end - else: - # otherwise generate them - self.x = np.linspace(self.ffirst, self.flast, num=self.fnpts) - - # make a list of subfiles - self.sub = [] - - # if subfile directory is given - if self.dat_fmt == '-xy' and self.fnpts > 0: - self.directory = True - # loop over entries in directory - for i in range(0, self.fnsub): - ssfposn, ssfsize, ssftime = struct.unpack( - ' fxtype - # oytype -> fytype - self.oftflgs, \ - self.oversn, \ - self.oexp, \ - self.onpts, \ - self.ofirst, \ - self.olast, \ - self.fxtype, \ - self.fytype, \ - self.oyear, \ - self.omonth, \ - self.oday, \ - self.ohour, \ - self.ominute, \ - self.ores, \ - self.opeakpt, \ - self.onscans, \ - self.ospare, \ - self.ocmnt, \ - self.ocatxt, \ - self.osubh1 = struct.unpack(self.old_head_str.encode('utf8'), - content[:self.old_head_siz]) - - # Flag bits (assuming same) - self.tsprec, \ - self.tcgram, \ - self.tmulti, \ - self.trandm, \ - self.tordrd, \ - self.talabs, \ - self.txyxys, \ - self.txvals = flag_bits(self.oftflgs)[::-1] - - # fix data types - self.oexp = int(self.oexp) - self.onpts = int(self.onpts) # can't have floating num of pts - self.ofirst = float(self.ofirst) - self.olast = float(self.olast) - - # Date information - # !! to fix !! - # Year collected (0=no date/time) - MSB 4 bits are Z type - - # extracted as characters, using ord - self.omonth = ord(self.omonth) - self.oday = ord(self.oday) - self.ohour = ord(self.ohour) - self.ominute = ord(self.ominute) - - # number of scans (? subfiles sometimes ?) - self.onscans = int(self.onscans) - - # null terminated strings - self.ores = self.ores.split(b'\x00')[0] - self.ocmnt = self.ocmnt.split(b'\x00')[0] - - # can it have separate x values ? - self.x = np.linspace(self.ofirst, self.olast, num=self.onpts) - - # make a list of subfiles - self.sub = [] - - # already have subheader from main header, retrace steps - sub_pos = self.old_head_siz - self.subhead_siz - - # for each subfile - # in the old format we don't know how many subfiles to expect, - # just looping till we run out - i = 0 - while True: - try: - # read in subheader - subhead_lst = read_subheader(content[sub_pos:sub_pos + self.subhead_siz]) - - if subhead_lst[6] > 0: - # default to subfile points, unless it is zero - pts = subhead_lst[6] - else: - pts = self.onpts - - # figure out size of subheader - dat_siz = (4 * pts) - sub_end = sub_pos + self.subhead_siz + dat_siz - - # read into object, add to list - # send it pts since we have already figured that out - self.sub.append(subFileOld( - content[sub_pos:sub_end], pts, self.oexp, self.txyxys)) - # update next subfile postion, and index - sub_pos = sub_end - - i += 1 - except: - # zero indexed, set the total number of subfile - self.fnsub = i + 1 - break - - # assuming it can't have separate x values - self.dat_fmt = 'gx-y' - print('{}({})'.format(self.dat_fmt, self.fnsub)) - - self.fxtype = ord(self.fxtype) - self.fytype = ord(self.fytype) - # need to find from year apparently - self.fztype = 0 - self.set_labels() - - # -------------------------------------------- - # SHIMADZU - # -------------------------------------------- - elif self.fversn == b'\xcf': - print("Highly experimental format, may not work ") - raw_data = content[10240:] # data starts here (maybe every time) - # spacing between y and x data is atleast 0 bytes - s_32 = chr(int('0', 2)) * 32 - s_8 = chr(int('0', 2)) * 8 # zero double - dat_len = raw_data.find(s_32) - for i in range(dat_len, len(raw_data), 8): - # find first non zero double - if raw_data[i:i + 8] != s_8: - break - dat_siz = int(dat_len / 8) - self.y = struct.unpack(('<' + dat_siz * 'd').encode('utf8'), raw_data[:dat_len]) - self.x = struct.unpack(('<' + dat_siz * 'd').encode('utf8'), raw_data[i:i + dat_len]) + def unpack_flag_bits(self): + # Flag bits (assuming same) + self.tsprec, \ + self.tcgram, \ + self.tmulti, \ + self.trandm, \ + self.tordrd, \ + self.talabs, \ + self.txyxys, \ + self.txvals = flag_bits(self.tflgs)[::-1] + if self.txyxys: + # x values are given + self.dat_fmt = '-xy' + elif self.txvals: + # only one subfile, which contains the x data + self.dat_fmt = 'x-y' else: - print("File type %s not supported yet. Please add issue. " - % hex(ord(self.fversn))) - self.content = content - - # ------------------------------------------------------------------------ - # Process other data - # ------------------------------------------------------------------------ + # no x values are given, but they can be generated + self.dat_fmt = 'gx-y' def set_labels(self): """ @@ -434,13 +82,13 @@ def set_labels(self): "Millimeters (mm)", "Hours"] - if self.fxtype < 30: - self.xlabel = fxtype_op[self.fxtype] + if self.xtype < 30: + self.xlabel = fxtype_op[self.xtype] else: self.xlabel = "Unknown" - if self.fztype < 30: - self.zlabel = fxtype_op[self.fztype] + if self.ztype < 30: + self.zlabel = fxtype_op[self.ztype] else: self.zlabel = "Unknown" @@ -481,10 +129,10 @@ def set_labels(self): "Arbitrary or Single Beam with Valley Peaks", "Emission"] - if self.fytype < 27: - self.ylabel = fytype_op[self.fytype] - elif self.fytype > 127 and self.fytype < 132: - self.ylabel = fytype_op2[self.fytype - 128] + if self.ytype < 27: + self.ylabel = fytype_op[self.ytype] + elif 127 < self.ytype < 132: + self.ylabel = fytype_op2[self.ytype - 128] else: self.ylabel = "Unknown" @@ -495,7 +143,7 @@ def set_labels(self): # split it based on 00 string # format x, y, z if self.talabs: - ll = self.fcatxt.split(b'\x00') + ll = self.catxt.split(b'\x00') if len(ll) > 2: # make sure there are enough items to extract from xl, yl, zl = ll[:3] @@ -526,7 +174,393 @@ def set_exp_type(self): "Atomic Spectrum", "Chromatography Diode Array Spectra"] - self.exp_type = fexper_op[self.fexper] + self.exp_type = fexper_op[self.exper] + + +class OldFormat(FileFormat): + # Format string for the header + # Calculate size of strings using `struct.calcsize(string)` + head_str = " 0: + # default to subfile points, unless it is zero + pts = subhead_lst[6] + else: + pts = self.npts + + # figure out size of subheader + dat_siz = (4 * pts) + sub_end = sub_pos + self.subhead_siz + dat_siz + + # read into object, add to list + # send it pts since we have already figured that out + self.sub.append(subFileOld( + content[sub_pos:sub_end], pts, self.exp, self.txyxys)) + # update next subfile postion, and index + sub_pos = sub_end + + i += 1 + except: + # zero indexed, set the total number of subfile + self.nsub = i + 1 + break + + print('{}({})'.format(self.dat_fmt, self.nsub)) + + self.set_labels() + + def unpack_header(self, content): + self.tflgs, \ + self.versn, \ + self.exp, \ + self.npts, \ + self.first, \ + self.last, \ + self.xtype, \ + self.ytype, \ + self.year, \ + self.month, \ + self.day, \ + self.hour, \ + self.minute, \ + self.res, \ + self.peakpt, \ + self.nscans, \ + self.spare, \ + self.cmnt, \ + self.catxt, \ + self.subh1 = struct.unpack(self.head_str.encode('utf8'), + content[:self.head_siz]) + + # fix data types + self.exp = int(self.exp) + self.npts = int(self.npts) # can't have floating num of pts + self.first = float(self.first) + self.last = float(self.last) + + # Date information + # !! to fix !! + # Year collected (0=no date/time) - MSB 4 bits are Z type + + # extracted as characters, using ord + self.month = ord(self.month) + self.day = ord(self.day) + self.hour = ord(self.hour) + self.minute = ord(self.minute) + + # number of scans (? subfiles sometimes ?) + self.nscans = int(self.nscans) + + # null terminated strings + self.res = self.res.split(b'\x00')[0] + self.cmnt = self.cmnt.split(b'\x00')[0] + + self.xtype = ord(self.xtype) + self.ytype = ord(self.ytype) + # need to find from year apparently + self.ztype = 0 + +class NewFormat(FileFormat): + # Format string for the header + # Calculate size of strings using `struct.calcsize(string)` + head_str = " 0: + self.directory = True + # loop over entries in directory + for i in range(0, self.nsub): + ssfposn, ssfsize, ssftime = struct.unpack( + '> 20 + self.month = (d >> 16) % (2**4) + self.day = (d >> 11) % (2**5) + self.hour = (d >> 6) % (2**5) + self.minute = d % (2**6) + + # null terminated string, replace null characters with spaces + # split and join to remove multiple spaces + try: + self.cmnt = ' '.join((self.cmnt.replace('\x00', ' ')).split()) + except: + pass + + # figure out type of file + self.dat_multi = self.nsub > 1 + +class NewFormatMSB(NewFormat): + def __init__(self, content): + print("New MSB 1st, yet to be implemented") + pass # To be implemented + +class ShimadzuFormat(FileFormat): + def __init__(self, content): + print("Highly experimental format, may not work ") + raw_data = content[10240:] # data starts here (maybe every time) + # spacing between y and x data is atleast 0 bytes + s_32 = chr(int('0', 2)) * 32 + s_8 = chr(int('0', 2)) * 8 # zero double + dat_len = raw_data.find(s_32) + for i in range(dat_len, len(raw_data), 8): + # find first non zero double + if raw_data[i:i + 8] != s_8: + break + dat_siz = int(dat_len / 8) + + self.dat_fmt = 'x-y' + self.nsub = 1 + + self.y = struct.unpack(('<' + dat_siz * 'd').encode('utf8'), raw_data[:dat_len]) + self.x = struct.unpack(('<' + dat_siz * 'd').encode('utf8'), raw_data[i:i + dat_len]) + + self.ylabel = '' + self.xlabel = '' + + # creating a `sub` member to maintain consistency + self.sub = [] + self.sub.append(subFileShimadzu(self.y)) + +class File: + """ + Starts loading the data from a .SPC spectral file using data from the + header. Stores all the attributes of a spectral file: + + Data + ---- + content: Full raw data + sub[i]: sub file object for each subfileFor each subfile + sub[i].y: y data for each subfile + x: x-data, global, or for the first subheader + + Examples + -------- + >>> import spc + >>> ftir_1 = spc.File('/path/to/ftir.spc') + """ + + # ------------------------------------------------------------------------ + # CONSTRUCTOR + # ------------------------------------------------------------------------ + + def __init__(self, filename): + # load entire into memory temporarly + with open(filename, "rb") as fin: + content = fin.read() + # print "Read raw data" + + self.length = len(content) + # extract first two bytes to determine file type version + self.tflg, self.versn = struct.unpack('