diff --git a/cuda_bindings/cuda/bindings/cufile.pyx b/cuda_bindings/cuda/bindings/cufile.pyx index a4e5c23994..c2f3718ce8 100644 --- a/cuda_bindings/cuda/bindings/cufile.pyx +++ b/cuda_bindings/cuda/bindings/cufile.pyx @@ -131,13 +131,21 @@ cdef class _py_anon_pod1: return obj -_py_anon_pod3_dtype = _numpy.dtype([ - ("dev_ptr_base", _numpy.intp, ), - ("file_offset", _numpy.int64, ), - ("dev_ptr_offset", _numpy.int64, ), - ("size_", _numpy.uint64, ), - ], align=True) - +cdef _get__py_anon_pod3_dtype_offsets(): + cdef _anon_pod3 pod = _anon_pod3() + return _numpy.dtype({ + 'names': ['dev_ptr_base', 'file_offset', 'dev_ptr_offset', 'size_'], + 'formats': [_numpy.intp, _numpy.int64, _numpy.int64, _numpy.uint64], + 'offsets': [ + (&(pod.devPtr_base)) - (&pod), + (&(pod.file_offset)) - (&pod), + (&(pod.devPtr_offset)) - (&pod), + (&(pod.size)) - (&pod), + ], + 'itemsize': sizeof((NULL).u.batch), + }) + +_py_anon_pod3_dtype = _get__py_anon_pod3_dtype_offsets() cdef class _py_anon_pod3: """Empty-initialize an instance of `_anon_pod3`. @@ -256,12 +264,20 @@ cdef class _py_anon_pod3: return obj -io_events_dtype = _numpy.dtype([ - ("cookie", _numpy.intp, ), - ("status", _numpy.int32, ), - ("ret", _numpy.uint64, ), - ], align=True) +cdef _get_io_events_dtype_offsets(): + cdef CUfileIOEvents_t pod = CUfileIOEvents_t() + return _numpy.dtype({ + 'names': ['cookie', 'status', 'ret'], + 'formats': [_numpy.intp, _numpy.int32, _numpy.uint64], + 'offsets': [ + (&(pod.cookie)) - (&pod), + (&(pod.status)) - (&pod), + (&(pod.ret)) - (&pod), + ], + 'itemsize': sizeof(CUfileIOEvents_t), + }) +io_events_dtype = _get_io_events_dtype_offsets() cdef class IOEvents: """Empty-initialize an array of `CUfileIOEvents_t`. @@ -406,11 +422,19 @@ cdef class IOEvents: return obj -op_counter_dtype = _numpy.dtype([ - ("ok", _numpy.uint64, ), - ("err", _numpy.uint64, ), - ], align=True) +cdef _get_op_counter_dtype_offsets(): + cdef CUfileOpCounter_t pod = CUfileOpCounter_t() + return _numpy.dtype({ + 'names': ['ok', 'err'], + 'formats': [_numpy.uint64, _numpy.uint64], + 'offsets': [ + (&(pod.ok)) - (&pod), + (&(pod.err)) - (&pod), + ], + 'itemsize': sizeof(CUfileOpCounter_t), + }) +op_counter_dtype = _get_op_counter_dtype_offsets() cdef class OpCounter: """Empty-initialize an instance of `CUfileOpCounter_t`. @@ -511,39 +535,47 @@ cdef class OpCounter: return obj -per_gpu_stats_dtype = _numpy.dtype([ - ("uuid", _numpy.int8, (16,)), - ("read_bytes", _numpy.uint64, ), - ("read_bw_bytes_per_sec", _numpy.uint64, ), - ("read_utilization", _numpy.uint64, ), - ("read_duration_us", _numpy.uint64, ), - ("n_total_reads", _numpy.uint64, ), - ("n_p2p_reads", _numpy.uint64, ), - ("n_nvfs_reads", _numpy.uint64, ), - ("n_posix_reads", _numpy.uint64, ), - ("n_unaligned_reads", _numpy.uint64, ), - ("n_dr_reads", _numpy.uint64, ), - ("n_sparse_regions", _numpy.uint64, ), - ("n_inline_regions", _numpy.uint64, ), - ("n_reads_err", _numpy.uint64, ), - ("writes_bytes", _numpy.uint64, ), - ("write_bw_bytes_per_sec", _numpy.uint64, ), - ("write_utilization", _numpy.uint64, ), - ("write_duration_us", _numpy.uint64, ), - ("n_total_writes", _numpy.uint64, ), - ("n_p2p_writes", _numpy.uint64, ), - ("n_nvfs_writes", _numpy.uint64, ), - ("n_posix_writes", _numpy.uint64, ), - ("n_unaligned_writes", _numpy.uint64, ), - ("n_dr_writes", _numpy.uint64, ), - ("n_writes_err", _numpy.uint64, ), - ("n_mmap", _numpy.uint64, ), - ("n_mmap_ok", _numpy.uint64, ), - ("n_mmap_err", _numpy.uint64, ), - ("n_mmap_free", _numpy.uint64, ), - ("reg_bytes", _numpy.uint64, ), - ], align=True) - +cdef _get_per_gpu_stats_dtype_offsets(): + cdef CUfilePerGpuStats_t pod = CUfilePerGpuStats_t() + return _numpy.dtype({ + 'names': ['uuid', 'read_bytes', 'read_bw_bytes_per_sec', 'read_utilization', 'read_duration_us', 'n_total_reads', 'n_p2p_reads', 'n_nvfs_reads', 'n_posix_reads', 'n_unaligned_reads', 'n_dr_reads', 'n_sparse_regions', 'n_inline_regions', 'n_reads_err', 'writes_bytes', 'write_bw_bytes_per_sec', 'write_utilization', 'write_duration_us', 'n_total_writes', 'n_p2p_writes', 'n_nvfs_writes', 'n_posix_writes', 'n_unaligned_writes', 'n_dr_writes', 'n_writes_err', 'n_mmap', 'n_mmap_ok', 'n_mmap_err', 'n_mmap_free', 'reg_bytes'], + 'formats': [_numpy.int8, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64], + 'offsets': [ + (&(pod.uuid)) - (&pod), + (&(pod.read_bytes)) - (&pod), + (&(pod.read_bw_bytes_per_sec)) - (&pod), + (&(pod.read_utilization)) - (&pod), + (&(pod.read_duration_us)) - (&pod), + (&(pod.n_total_reads)) - (&pod), + (&(pod.n_p2p_reads)) - (&pod), + (&(pod.n_nvfs_reads)) - (&pod), + (&(pod.n_posix_reads)) - (&pod), + (&(pod.n_unaligned_reads)) - (&pod), + (&(pod.n_dr_reads)) - (&pod), + (&(pod.n_sparse_regions)) - (&pod), + (&(pod.n_inline_regions)) - (&pod), + (&(pod.n_reads_err)) - (&pod), + (&(pod.writes_bytes)) - (&pod), + (&(pod.write_bw_bytes_per_sec)) - (&pod), + (&(pod.write_utilization)) - (&pod), + (&(pod.write_duration_us)) - (&pod), + (&(pod.n_total_writes)) - (&pod), + (&(pod.n_p2p_writes)) - (&pod), + (&(pod.n_nvfs_writes)) - (&pod), + (&(pod.n_posix_writes)) - (&pod), + (&(pod.n_unaligned_writes)) - (&pod), + (&(pod.n_dr_writes)) - (&pod), + (&(pod.n_writes_err)) - (&pod), + (&(pod.n_mmap)) - (&pod), + (&(pod.n_mmap_ok)) - (&pod), + (&(pod.n_mmap_err)) - (&pod), + (&(pod.n_mmap_free)) - (&pod), + (&(pod.reg_bytes)) - (&pod), + ], + 'itemsize': sizeof(CUfilePerGpuStats_t), + }) + +per_gpu_stats_dtype = _get_per_gpu_stats_dtype_offsets() cdef class PerGpuStats: """Empty-initialize an instance of `CUfilePerGpuStats_t`. @@ -896,12 +928,20 @@ cdef class PerGpuStats: return obj -descr_dtype = _numpy.dtype([ - ("type", _numpy.int32, ), - ("handle", _py_anon_pod1_dtype, ), - ("fs_ops", _numpy.intp, ), - ], align=True) +cdef _get_descr_dtype_offsets(): + cdef CUfileDescr_t pod = CUfileDescr_t() + return _numpy.dtype({ + 'names': ['type', 'handle', 'fs_ops'], + 'formats': [_numpy.int32, _py_anon_pod1_dtype, _numpy.intp], + 'offsets': [ + (&(pod.type)) - (&pod), + (&(pod.handle)) - (&pod), + (&(pod.fs_ops)) - (&pod), + ], + 'itemsize': sizeof(CUfileDescr_t), + }) +descr_dtype = _get_descr_dtype_offsets() cdef class Descr: """Empty-initialize an array of `CUfileDescr_t`. @@ -1144,52 +1184,60 @@ cdef class _py_anon_pod2: return obj -stats_level1_dtype = _numpy.dtype([ - ("read_ops", op_counter_dtype, ), - ("write_ops", op_counter_dtype, ), - ("hdl_register_ops", op_counter_dtype, ), - ("hdl_deregister_ops", op_counter_dtype, ), - ("buf_register_ops", op_counter_dtype, ), - ("buf_deregister_ops", op_counter_dtype, ), - ("read_bytes", _numpy.uint64, ), - ("write_bytes", _numpy.uint64, ), - ("read_bw_bytes_per_sec", _numpy.uint64, ), - ("write_bw_bytes_per_sec", _numpy.uint64, ), - ("read_lat_avg_us", _numpy.uint64, ), - ("write_lat_avg_us", _numpy.uint64, ), - ("read_ops_per_sec", _numpy.uint64, ), - ("write_ops_per_sec", _numpy.uint64, ), - ("read_lat_sum_us", _numpy.uint64, ), - ("write_lat_sum_us", _numpy.uint64, ), - ("batch_submit_ops", op_counter_dtype, ), - ("batch_complete_ops", op_counter_dtype, ), - ("batch_setup_ops", op_counter_dtype, ), - ("batch_cancel_ops", op_counter_dtype, ), - ("batch_destroy_ops", op_counter_dtype, ), - ("batch_enqueued_ops", op_counter_dtype, ), - ("batch_posix_enqueued_ops", op_counter_dtype, ), - ("batch_processed_ops", op_counter_dtype, ), - ("batch_posix_processed_ops", op_counter_dtype, ), - ("batch_nvfs_submit_ops", op_counter_dtype, ), - ("batch_p2p_submit_ops", op_counter_dtype, ), - ("batch_aio_submit_ops", op_counter_dtype, ), - ("batch_iouring_submit_ops", op_counter_dtype, ), - ("batch_mixed_io_submit_ops", op_counter_dtype, ), - ("batch_total_submit_ops", op_counter_dtype, ), - ("batch_read_bytes", _numpy.uint64, ), - ("batch_write_bytes", _numpy.uint64, ), - ("batch_read_bw_bytes", _numpy.uint64, ), - ("batch_write_bw_bytes", _numpy.uint64, ), - ("batch_submit_lat_avg_us", _numpy.uint64, ), - ("batch_completion_lat_avg_us", _numpy.uint64, ), - ("batch_submit_ops_per_sec", _numpy.uint64, ), - ("batch_complete_ops_per_sec", _numpy.uint64, ), - ("batch_submit_lat_sum_us", _numpy.uint64, ), - ("batch_completion_lat_sum_us", _numpy.uint64, ), - ("last_batch_read_bytes", _numpy.uint64, ), - ("last_batch_write_bytes", _numpy.uint64, ), - ], align=True) - +cdef _get_stats_level1_dtype_offsets(): + cdef CUfileStatsLevel1_t pod = CUfileStatsLevel1_t() + return _numpy.dtype({ + 'names': ['read_ops', 'write_ops', 'hdl_register_ops', 'hdl_deregister_ops', 'buf_register_ops', 'buf_deregister_ops', 'read_bytes', 'write_bytes', 'read_bw_bytes_per_sec', 'write_bw_bytes_per_sec', 'read_lat_avg_us', 'write_lat_avg_us', 'read_ops_per_sec', 'write_ops_per_sec', 'read_lat_sum_us', 'write_lat_sum_us', 'batch_submit_ops', 'batch_complete_ops', 'batch_setup_ops', 'batch_cancel_ops', 'batch_destroy_ops', 'batch_enqueued_ops', 'batch_posix_enqueued_ops', 'batch_processed_ops', 'batch_posix_processed_ops', 'batch_nvfs_submit_ops', 'batch_p2p_submit_ops', 'batch_aio_submit_ops', 'batch_iouring_submit_ops', 'batch_mixed_io_submit_ops', 'batch_total_submit_ops', 'batch_read_bytes', 'batch_write_bytes', 'batch_read_bw_bytes', 'batch_write_bw_bytes', 'batch_submit_lat_avg_us', 'batch_completion_lat_avg_us', 'batch_submit_ops_per_sec', 'batch_complete_ops_per_sec', 'batch_submit_lat_sum_us', 'batch_completion_lat_sum_us', 'last_batch_read_bytes', 'last_batch_write_bytes'], + 'formats': [op_counter_dtype, op_counter_dtype, op_counter_dtype, op_counter_dtype, op_counter_dtype, op_counter_dtype, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, op_counter_dtype, op_counter_dtype, op_counter_dtype, op_counter_dtype, op_counter_dtype, op_counter_dtype, op_counter_dtype, op_counter_dtype, op_counter_dtype, op_counter_dtype, op_counter_dtype, op_counter_dtype, op_counter_dtype, op_counter_dtype, op_counter_dtype, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64, _numpy.uint64], + 'offsets': [ + (&(pod.read_ops)) - (&pod), + (&(pod.write_ops)) - (&pod), + (&(pod.hdl_register_ops)) - (&pod), + (&(pod.hdl_deregister_ops)) - (&pod), + (&(pod.buf_register_ops)) - (&pod), + (&(pod.buf_deregister_ops)) - (&pod), + (&(pod.read_bytes)) - (&pod), + (&(pod.write_bytes)) - (&pod), + (&(pod.read_bw_bytes_per_sec)) - (&pod), + (&(pod.write_bw_bytes_per_sec)) - (&pod), + (&(pod.read_lat_avg_us)) - (&pod), + (&(pod.write_lat_avg_us)) - (&pod), + (&(pod.read_ops_per_sec)) - (&pod), + (&(pod.write_ops_per_sec)) - (&pod), + (&(pod.read_lat_sum_us)) - (&pod), + (&(pod.write_lat_sum_us)) - (&pod), + (&(pod.batch_submit_ops)) - (&pod), + (&(pod.batch_complete_ops)) - (&pod), + (&(pod.batch_setup_ops)) - (&pod), + (&(pod.batch_cancel_ops)) - (&pod), + (&(pod.batch_destroy_ops)) - (&pod), + (&(pod.batch_enqueued_ops)) - (&pod), + (&(pod.batch_posix_enqueued_ops)) - (&pod), + (&(pod.batch_processed_ops)) - (&pod), + (&(pod.batch_posix_processed_ops)) - (&pod), + (&(pod.batch_nvfs_submit_ops)) - (&pod), + (&(pod.batch_p2p_submit_ops)) - (&pod), + (&(pod.batch_aio_submit_ops)) - (&pod), + (&(pod.batch_iouring_submit_ops)) - (&pod), + (&(pod.batch_mixed_io_submit_ops)) - (&pod), + (&(pod.batch_total_submit_ops)) - (&pod), + (&(pod.batch_read_bytes)) - (&pod), + (&(pod.batch_write_bytes)) - (&pod), + (&(pod.batch_read_bw_bytes)) - (&pod), + (&(pod.batch_write_bw_bytes)) - (&pod), + (&(pod.batch_submit_lat_avg_us)) - (&pod), + (&(pod.batch_completion_lat_avg_us)) - (&pod), + (&(pod.batch_submit_ops_per_sec)) - (&pod), + (&(pod.batch_complete_ops_per_sec)) - (&pod), + (&(pod.batch_submit_lat_sum_us)) - (&pod), + (&(pod.batch_completion_lat_sum_us)) - (&pod), + (&(pod.last_batch_read_bytes)) - (&pod), + (&(pod.last_batch_write_bytes)) - (&pod), + ], + 'itemsize': sizeof(CUfileStatsLevel1_t), + }) + +stats_level1_dtype = _get_stats_level1_dtype_offsets() cdef class StatsLevel1: """Empty-initialize an instance of `CUfileStatsLevel1_t`. @@ -1659,14 +1707,22 @@ cdef class StatsLevel1: return obj -io_params_dtype = _numpy.dtype([ - ("mode", _numpy.int32, ), - ("u", _py_anon_pod2_dtype, ), - ("fh", _numpy.intp, ), - ("opcode", _numpy.int32, ), - ("cookie", _numpy.intp, ), - ], align=True) +cdef _get_io_params_dtype_offsets(): + cdef CUfileIOParams_t pod = CUfileIOParams_t() + return _numpy.dtype({ + 'names': ['mode', 'u', 'fh', 'opcode', 'cookie'], + 'formats': [_numpy.int32, _py_anon_pod2_dtype, _numpy.intp, _numpy.int32, _numpy.intp], + 'offsets': [ + (&(pod.mode)) - (&pod), + (&(pod.u)) - (&pod), + (&(pod.fh)) - (&pod), + (&(pod.opcode)) - (&pod), + (&(pod.cookie)) - (&pod), + ], + 'itemsize': sizeof(CUfileIOParams_t), + }) +io_params_dtype = _get_io_params_dtype_offsets() cdef class IOParams: """Empty-initialize an array of `CUfileIOParams_t`. @@ -1831,12 +1887,20 @@ cdef class IOParams: return obj -stats_level2_dtype = _numpy.dtype([ - ("basic", stats_level1_dtype, ), - ("read_size_kb_hist", _numpy.uint64, (32,)), - ("write_size_kb_hist", _numpy.uint64, (32,)), - ], align=True) +cdef _get_stats_level2_dtype_offsets(): + cdef CUfileStatsLevel2_t pod = CUfileStatsLevel2_t() + return _numpy.dtype({ + 'names': ['basic', 'read_size_kb_hist', 'write_size_kb_hist'], + 'formats': [stats_level1_dtype, _numpy.uint64, _numpy.uint64], + 'offsets': [ + (&(pod.basic)) - (&pod), + (&(pod.read_size_kb_hist)) - (&pod), + (&(pod.write_size_kb_hist)) - (&pod), + ], + 'itemsize': sizeof(CUfileStatsLevel2_t), + }) +stats_level2_dtype = _get_stats_level2_dtype_offsets() cdef class StatsLevel2: """Empty-initialize an instance of `CUfileStatsLevel2_t`. @@ -1946,12 +2010,20 @@ cdef class StatsLevel2: return obj -stats_level3_dtype = _numpy.dtype([ - ("detailed", stats_level2_dtype, ), - ("num_gpus", _numpy.uint32, ), - ("per_gpu_stats", per_gpu_stats_dtype, (16,)), - ], align=True) +cdef _get_stats_level3_dtype_offsets(): + cdef CUfileStatsLevel3_t pod = CUfileStatsLevel3_t() + return _numpy.dtype({ + 'names': ['detailed', 'num_gpus', 'per_gpu_stats'], + 'formats': [stats_level2_dtype, _numpy.uint32, per_gpu_stats_dtype], + 'offsets': [ + (&(pod.detailed)) - (&pod), + (&(pod.num_gpus)) - (&pod), + (&(pod.per_gpu_stats)) - (&pod), + ], + 'itemsize': sizeof(CUfileStatsLevel3_t), + }) +stats_level3_dtype = _get_stats_level3_dtype_offsets() cdef class StatsLevel3: """Empty-initialize an instance of `CUfileStatsLevel3_t`. @@ -2061,20 +2133,6 @@ cdef class StatsLevel3: return obj -# Hack: Overwrite the generated descr_dtype, which NumPy deduced the offset wrong. -descr_dtype = _numpy.dtype({ - "names": ['type', 'handle', 'fs_ops'], - "formats": [_numpy.int32, _py_anon_pod1_dtype, _numpy.intp], - "offsets": [0, 8, 16], -}, align=True) - -# Hack: Overwrite the generated io_params_dtype, which NumPy deduced the offset wrong. -io_params_dtype = _numpy.dtype({ - "names": ['mode', 'u', 'fh', 'opcode', 'cookie'], - "formats": [_numpy.int32, _py_anon_pod2_dtype, _numpy.intp, _numpy.int32, _numpy.intp], - "offsets": [0, 8, 40, 48, 56], -}, align=True) - ############################################################################### # Enum