-
Notifications
You must be signed in to change notification settings - Fork 82
Closed
Description
There is sporatic segfault that could occur on tutorial 3.
Running script below (very similar to tutorial 3) that creates the environment many times will likely reproduce at least one seg fault.
import os
from pathlib import Path
import torch
import mediapy
# Set working directory to the base directory 'gpudrive'
working_dir = Path.cwd()
while working_dir.name != 'gpudrive':
working_dir = working_dir.parent
if working_dir == Path.home():
raise FileNotFoundError("Base directory 'gpudrive' not found")
os.chdir(working_dir)
from gpudrive.env.config import EnvConfig
from gpudrive.env.env_torch import GPUDriveTorchEnv
from gpudrive.visualize.utils import img_from_fig
from gpudrive.env.dataset import SceneDataLoader
MAX_NUM_OBJECTS = 64 # Maximum number of objects in the scene we control
NUM_WORLDS = 2 # Number of parallel environments
UNIQUE_SCENES = 2 # Number of unique scenes
device = 'cpu' # for simplicity purposes in notebook we use cpu, note that the simulator is optimized for GPU so use cuda if possible
env_config = EnvConfig(
steer_actions = torch.round(
torch.linspace(-1.0, 1.0, 3), decimals=3),
accel_actions = torch.round(
torch.linspace(-3, 3, 3), decimals=3
)
)
# Make dataloader
data_loader = SceneDataLoader(
root="data/processed/examples", # Path to the dataset
batch_size=NUM_WORLDS, # Batch size, you want this to be equal to the number of worlds (envs) so that every world receives a different scene
dataset_size=UNIQUE_SCENES, # Total number of different scenes we want to use
sample_with_replacement=False,
seed=42,
shuffle=True,
)
for i in range(50):
print(f"On iteration {i}")
# Make environment
env = GPUDriveTorchEnv(
config=env_config,
data_loader=data_loader,
max_cont_agents=MAX_NUM_OBJECTS, # Maximum number of agents to control per scenario
device=device,
)
# env.reset() # Reset the environment to start a new episode
print("Environment create complete ______")
GDB output:
Thread 698 "python" received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x7fede9d81640 (LWP 3475700)]
_ZZN7madrona12StateManager12iterateQueryIJNS_4phys10broadphase3BVHEEZNS_9TaskGraph12iterateQueryINS_7ContextEPFvRS7_RS4_EJS4_EEEvRT_RNS_5QueryIJDpT1_EEEOT0_EUlDpRT_E_EEvjRKNSE_IJDpSL_EEESK_ENKUliSP_E_clIJPS4_EEEDaiSP_ (num_rows=<optimized out>, ptrs=<optimized out>, this=<optimized out>) at /home/jinkai.qiu/work/gpudrive/external/madrona/src/common/../../include/madrona/state.inl:427
427 fn(ptrs[i] ...);
(gdb) bt
#0 _ZZN7madrona12StateManager12iterateQueryIJNS_4phys10broadphase3BVHEEZNS_9TaskGraph12iterateQueryINS_7ContextEPFvRS7_RS4_EJS4_EEEvRT_RNS_5QueryIJDpT1_EEEOT0_EUlDpRT_E_EEvjRKNSE_IJDpSL_EEESK_ENKUliSP_E_clIJPS4_EEEDaiSP_ (num_rows=<optimized out>, ptrs=<optimized out>, this=<optimized out>)
at /home/jinkai.qiu/work/gpudrive/external/madrona/src/common/../../include/madrona/state.inl:427
#1 _ZN7madrona12StateManager21iterateArchetypesImplIJNS_4phys10broadphase3BVHEEZNS0_12iterateQueryIJS4_EZNS_9TaskGraph12iterateQueryINS_7ContextEPFvRS8_RS4_EJS4_EEEvRT_RNS_5QueryIJDpT1_EEEOT0_EUlDpRT_E_EEvjRKNSF_IJDpSM_EEESL_EUliSQ_E_JLj0EEEEvjST_SL_NSt6__mad116integer_sequenceIjJXspT1_EEEE (
this=0x2ab45a00, query=..., world_id=<optimized out>, fn=...)
at /home/jinkai.qiu/work/gpudrive/external/madrona/src/common/../../include/madrona/state.inl:413
#2 _ZN7madrona12StateManager17iterateArchetypesIJNS_4phys10broadphase3BVHEEZNS0_12iterateQueryIJS4_EZNS_9TaskGraph12iterateQueryINS_7ContextEPFvRS8_RS4_EJS4_EEEvRT_RNS_5QueryIJDpT1_EEEOT0_EUlDpRT_E_EEvjRKNSF_IJDpSM_EEESL_EUliSQ_E_EEvjST_SL_ (this=0x2ab45a00, query=...,
world_id=<optimized out>, fn=...) at /home/jinkai.qiu/work/gpudrive/external/madrona/src/common/../../include/madrona/state.inl:388
#3 _ZN7madrona12StateManager12iterateQueryIJNS_4phys10broadphase3BVHEEZNS_9TaskGraph12iterateQueryINS_7ContextEPFvRS7_RS4_EJS4_EEEvRT_RNS_5QueryIJDpT1_EEEOT0_EUlDpRT_E_EEvjRKNSE_IJDpSL_EEESK_ (this=0x2ab45a00, query=..., world_id=<optimized out>, fn=...)
at /home/jinkai.qiu/work/gpudrive/external/madrona/src/common/../../include/madrona/state.inl:424
#4 madrona::TaskGraph::iterateQuery<madrona::Context, void (*)(madrona::Context&, madrona::phys::broadphase::BVH&), madrona::phys::broadphase::BVH> (this=<optimized out>, ctx=..., query=..., fn=<optimized out>)
at /home/jinkai.qiu/work/gpudrive/external/madrona/src/common/../../include/madrona/taskgraph.inl:17
--Type <RET> for more, q to quit, c to continue without paging--c
#5 madrona::ParallelForNode<madrona::Context, &madrona::phys::broadphase::updateBVHEntry, madrona::phys::broadphase::BVH>::run (this=<optimized out>, ctx_base=..., taskgraph=...) at /home/jinkai.qiu/work/gpudrive/external/madrona/src/common/../../include/madrona/taskgraph_builder.inl:76
#6 std::__mad1::__invoke[abi:nn180100]<void (madrona::ParallelForNode<madrona::Context, &madrona::phys::broadphase::updateBVHEntry, madrona::phys::broadphase::BVH>::*)(madrona::Context&, madrona::TaskGraph&), madrona::ParallelForNode<madrona::Context, &madrona::phys::broadphase::updateBVHEntry, madrona::phys::broadphase::BVH>*, madrona::Context&, madrona::TaskGraph&, void>(void (madrona::ParallelForNode<madrona::Context, &madrona::phys::broadphase::updateBVHEntry, madrona::phys::broadphase::BVH>::*&&)(madrona::Context&, madrona::TaskGraph&), madrona::ParallelForNode<madrona::Context, &madrona::phys::broadphase::updateBVHEntry, madrona::phys::broadphase::BVH>*&&, madrona::Context&, madrona::TaskGraph&) (__args=..., __args=..., __f=<optimized out>, __a0=<optimized out>) at /home/jinkai.qiu/work/gpudrive/external/madrona/external/madrona-toolchain/bundled-toolchain/libcxx-noexcept/include/c++/v1/__type_traits/invoke.h:312
#7 std::__mad1::invoke[abi:nn180100]<void (madrona::ParallelForNode<madrona::Context, &madrona::phys::broadphase::updateBVHEntry, madrona::phys::broadphase::BVH>::*)(madrona::Context&, madrona::TaskGraph&), madrona::ParallelForNode<madrona::Context, &madrona::phys::broadphase::updateBVHEntry, madrona::phys::broadphase::BVH>*, madrona::Context&, madrona::TaskGraph&>(void (madrona::ParallelForNode<madrona::Context, &madrona::phys::broadphase::updateBVHEntry, madrona::phys::broadphase::BVH>::*&&)(madrona::Context&, madrona::TaskGraph&), madrona::ParallelForNode<madrona::Context, &madrona::phys::broadphase::updateBVHEntry, madrona::phys::broadphase::BVH>*&&, madrona::Context&, madrona::TaskGraph&) (__args=<optimized out>, __args=<optimized out>, __f=<optimized out>, __args=<optimized out>) at /home/jinkai.qiu/work/gpudrive/external/madrona/external/madrona-toolchain/bundled-toolchain/libcxx-noexcept/include/c++/v1/__functional/invoke.h:28
#8 _ZZN7madrona16TaskGraphBuilder9addNodeFnITnDaXadL_ZNS_15ParallelForNodeINS_7ContextEXadL_ZNS_4phys10broadphase14updateBVHEntryERS3_RNS5_3BVHEEEJS7_EE3runES6_RNS_9TaskGraphEEES9_EENS_15TaskGraphNodeIDENS0_11TypedDataIDIT0_EENS_4SpanIKSC_EENS_8OptionalISC_EEENKUlPNS_8NodeBaseEPS3_PSA_E_clESM_SN_SO_ (node_data=<optimized out>, ctx=<optimized out>, task_graph=<optimized out>, this=<optimized out>) at /home/jinkai.qiu/work/gpudrive/external/madrona/src/common/../../include/madrona/taskgraph_builder.inl:36
#9 _ZZN7madrona16TaskGraphBuilder9addNodeFnITnDaXadL_ZNS_15ParallelForNodeINS_7ContextEXadL_ZNS_4phys10broadphase14updateBVHEntryERS3_RNS5_3BVHEEEJS7_EE3runES6_RNS_9TaskGraphEEES9_EENS_15TaskGraphNodeIDENS0_11TypedDataIDIT0_EENS_4SpanIKSC_EENS_8OptionalISC_EEENUlPNS_8NodeBaseEPS3_PSA_E_8__invokeESM_SN_SO_ (node_data=<optimized out>, ctx=<optimized out>, task_graph=<optimized out>) at /home/jinkai.qiu/work/gpudrive/external/madrona/src/common/../../include/madrona/taskgraph_builder.inl:33
#10 0x00007ffef7f94d37 in madrona::TaskGraph::run (this=0x120c4548, ctx=0x9136d40) at /home/jinkai.qiu/work/gpudrive/external/madrona/src/core/taskgraph.cpp:168
#11 0x00007ffef7efdded in madrona::ThreadPoolExecutor::Impl::workerThread (this=0x2ab458c0, worker_id=<optimized out>) at /home/jinkai.qiu/work/gpudrive/external/madrona/src/mw/cpu_exec.cpp:224
#12 0x00007ffef7efdeca in madrona::ThreadPoolExecutor::Impl::make(madrona::ThreadPoolExecutor::Config const&)::$_0::operator()(madrona::ThreadPoolExecutor::Impl*, long) const (this=0x43777350, impl=0x3f10bf00, i=-1) at /home/jinkai.qiu/work/gpudrive/external/madrona/src/mw/cpu_exec.cpp:120
#13 std::__mad1::__invoke[abi:nn180100]<madrona::ThreadPoolExecutor::Impl::make(madrona::ThreadPoolExecutor::Config const&)::$_0, madrona::ThreadPoolExecutor::Impl*, long>(madrona::ThreadPoolExecutor::Impl::make(madrona::ThreadPoolExecutor::Config const&)::$_0&&, madrona::ThreadPoolExecutor::Impl*&&, long&&) (__f=..., __args=@0x43777360: 1, __args=@0x43777360: 1) at /home/jinkai.qiu/work/gpudrive/external/madrona/external/madrona-toolchain/bundled-toolchain/libcxx-noexcept/include/c++/v1/__type_traits/invoke.h:344
#14 std::__mad1::__thread_execute[abi:nn180100]<std::__mad1::unique_ptr<std::__mad1::__thread_struct, std::__mad1::default_delete<std::__mad1::__thread_struct> >, madrona::ThreadPoolExecutor::Impl::make(madrona::ThreadPoolExecutor::Config const&)::$_0, madrona::ThreadPoolExecutor::Impl*, long, 2ul, 3ul>(std::__mad1::tuple<std::__mad1::unique_ptr<std::__mad1::__thread_struct, std::__mad1::default_delete<std::__mad1::__thread_struct> >, madrona::ThreadPoolExecutor::Impl::make(madrona::ThreadPoolExecutor::Config const&)::$_0, madrona::ThreadPoolExecutor::Impl*, long>&, std::__mad1::__tuple_indices<2ul, 3ul>) (__t=...) at /home/jinkai.qiu/work/gpudrive/external/madrona/external/madrona-toolchain/bundled-toolchain/libcxx-noexcept/include/c++/v1/__thread/thread.h:193
#15 std::__mad1::__thread_proxy[abi:nn180100]<std::__mad1::tuple<std::__mad1::unique_ptr<std::__mad1::__thread_struct, std::__mad1::default_delete<std::__mad1::__thread_struct> >, madrona::ThreadPoolExecutor::Impl::make(madrona::ThreadPoolExecutor::Config const&)::$_0, madrona::ThreadPoolExecutor::Impl*, long> >(void*) (__vp=0x43777350) at /home/jinkai.qiu/work/gpudrive/external/madrona/external/madrona-toolchain/bundled-toolchain/libcxx-noexcept/include/c++/v1/__thread/thread.h:202
#16 0x00007ffff7c94ac3 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
#17 0x00007ffff7d26850 in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
Metadata
Metadata
Assignees
Labels
No labels