From 4b3f753dfa3483c33c1e7c437991955a16186e41 Mon Sep 17 00:00:00 2001 From: MHC03 Date: Tue, 25 Oct 2016 15:51:48 +0200 Subject: [PATCH] Read with Numpy The added functions are a quicker solution to read binary files with the numpy.fromfile method. It does not have an overhead like struct.unpack, so you can read bigger files like 2GB of RAM without using 10GB. --- apt_importers.py | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/apt_importers.py b/apt_importers.py index d9a358a..653980d 100644 --- a/apt_importers.py +++ b/apt_importers.py @@ -1,5 +1,7 @@ import pandas as pd import struct +import numpy as np + def read_pos(f): """ Loads an APT .pos file as a pandas dataframe. @@ -11,7 +13,7 @@ def read_pos(f): Da: mass/charge ratio of ion""" # read in the data n = len(file(f).read())/4 - d = struct.unpack('>'+'f'*n,file(f).read(4*n)) + d = struct.unpack('>'+'f'*n, file(f).read(4*n)) # '>' denotes 'big-endian' byte order # unpack data pos = pd.DataFrame({'x': d[0::4], @@ -21,6 +23,23 @@ def read_pos(f): return pos +def read_pos_numpy(file_path): + """ + Look at read_pos function for further details. + :param file_path: file path + :return: numpy array + """ + if file_path[-4:] is not ".pos": + raise ValueError("File Path does not end with .pos.") + with open(file_path, 'rb') as f: + # >f4 is big endian 4 byte float + dt_type = np.dtype({'names': ['x', 'y', 'z', 'Da'], + 'formats': ['>f4', '>f4', '>f4', '>f4']}) + # returns a numpy array, where you can, for example, access all 'x' by pos_arr['x'] and so on + pos_arr = np.fromfile(f, dt_type, -1) + return pos_arr + + def read_epos(f): """Loads an APT .epos file as a pandas dataframe. @@ -74,6 +93,23 @@ def read_epos(f): return pos +def read_epos_numpy(file_path): + """ + Look at read_epos function for further details. + :param file_path: file path + :return: numpy array + """ + if file_path[-5:] is not ".epos": + raise ValueError("File Path does not end with .epos.") + with open(file_path, 'rb') as f: + # >f4 is big endian 4 byte float + dt_type = np.dtype({'names': ['x', 'y', 'z', 'Da', 'ns', 'Dc_kV', 'pulse_kV', 'det_x', 'det_y', 'pslep', 'ipp'], + 'formats': ['>f4', '>f4', '>f4', '>f4', '>f4', '>f4', '>f4', '>f4', '>f4', '>i4', '>i4']}) + # returns a numpy array, where you can, for example, access all 'x' by pos_arr['x'] and so on + pos_arr = np.fromfile(f, dt_type, -1) + return pos_arr + + def read_rrng(f): """Loads a .rrng file produced by IVAS. Returns two dataframes of 'ions' and 'ranges'."""