diff --git a/array.c b/array.c index 9061919bf81da8..e34d3070a3a1d6 100644 --- a/array.c +++ b/array.c @@ -18,6 +18,10 @@ #include "probes.h" #include "id.h" #include "debug_counter.h" +#include "gc.h" +#include "transient_heap.h" + +// #define ARRAY_DEBUG #ifndef ARRAY_DEBUG # define NDEBUG @@ -53,6 +57,8 @@ VALUE rb_cArray; #define FL_SET_EMBED(a) do { \ assert(!ARY_SHARED_P(a)); \ FL_SET((a), RARRAY_EMBED_FLAG); \ + FL_UNSET_RAW((a), RARRAY_TRANSIENT_FLAG); \ + ary_verify(a); \ } while (0) #define FL_UNSET_EMBED(ary) FL_UNSET((ary), RARRAY_EMBED_FLAG|RARRAY_EMBED_LEN_MASK) #define FL_SET_SHARED(ary) do { \ @@ -130,11 +136,65 @@ VALUE rb_cArray; } while (0) #define FL_SET_SHARED_ROOT(ary) do { \ assert(!ARY_EMBED_P(ary)); \ + assert(!ARY_TRANSIENT_P(ary)); \ FL_SET((ary), RARRAY_SHARED_ROOT_FLAG); \ } while (0) #define ARY_SET(a, i, v) RARRAY_ASET((assert(!ARY_SHARED_P(a)), (a)), (i), (v)) + +#ifdef ARRAY_DEBUG +#define ary_verify(ary) ary_verify_(ary, __FILE__, __LINE__) + +static VALUE +ary_verify_(VALUE ary, const char *file, int line) +{ + assert(RB_TYPE_P(ary, T_ARRAY)); + + if (FL_TEST(ary, ELTS_SHARED)) { + VALUE root = RARRAY(ary)->as.heap.aux.shared; + const VALUE *ptr = RARRAY_CONST_PTR(ary); + const VALUE *root_ptr = RARRAY_CONST_PTR(root); + long len = RARRAY_LEN(ary), root_len = RARRAY_LEN(root); + assert(FL_TEST(root, RARRAY_SHARED_ROOT_FLAG)); + assert(root_ptr <= ptr && ptr + len <= root_ptr + root_len); + ary_verify(root); + } + else if (ARY_EMBED_P(ary)) { + assert(!ARY_TRANSIENT_P(ary)); + assert(!ARY_SHARED_P(ary)); + assert(RARRAY_LEN(ary) <= RARRAY_EMBED_LEN_MAX); + } + else { +#if 1 + const VALUE *ptr = RARRAY_CONST_PTR(ary); + long i, len = RARRAY_LEN(ary); + volatile VALUE v; + if (len > 1) len = 1; // check only HEAD + for (i=0; ias.heap.aux.capa; + + if (ARY_TRANSIENT_P(ary)) { + if (new_capa <= old_capa) { + /* do nothing */ + } + else { + VALUE *new_ptr = rb_transient_heap_alloc(ary, sizeof(VALUE) * new_capa); + + if (new_ptr == NULL) { + new_ptr = ALLOC_N(VALUE, new_capa); + FL_UNSET_RAW(ary, RARRAY_TRANSIENT_FLAG); + } + + MEMCPY(new_ptr, ARY_HEAP_PTR(ary), VALUE, old_capa); + ARY_SET_PTR(ary, new_ptr); + } + } + else { + SIZED_REALLOC_N(RARRAY(ary)->as.heap.ptr, VALUE, new_capa, old_capa); + } + ary_verify(ary); +} + +void +rb_ary_transient_heap_promote(VALUE ary, int promote) +{ + if (ARY_TRANSIENT_P(ary)) { + VALUE *new_ptr; + const VALUE *old_ptr = RARRAY_CONST_PTR(ary); + long capa = RARRAY(ary)->as.heap.aux.capa; + long len = RARRAY(ary)->as.heap.len; + if (ARY_SHARED_ROOT_P(ary)) { + capa = len; + } + + assert(ARY_OWNS_HEAP_P(ary)); + assert(ARY_TRANSIENT_P(ary)); + + if (promote) { + new_ptr = ALLOC_N(VALUE, capa); + FL_UNSET_RAW(ary, RARRAY_TRANSIENT_FLAG); + } + else { + new_ptr = ary_heap_alloc(ary, capa); + } + + MEMCPY(new_ptr, old_ptr, VALUE, capa); + /* do not use ARY_SET_PTR() because they assert !frozen */ + RARRAY(ary)->as.heap.ptr = new_ptr; + } + + ary_verify(ary); +} + static void ary_resize_capa(VALUE ary, long capacity) { assert(RARRAY_LEN(ary) <= capacity); assert(!OBJ_FROZEN(ary)); assert(!ARY_SHARED_P(ary)); + + // fprintf(stderr, "ary_resize_capa (%ld): %s\n", capacity, rb_obj_info(ary)); + if (capacity > RARRAY_EMBED_LEN_MAX) { if (ARY_EMBED_P(ary)) { long len = ARY_EMBED_LEN(ary); - VALUE *ptr = ALLOC_N(VALUE, (capacity)); + VALUE *ptr = ary_heap_alloc(ary, capacity); + MEMCPY(ptr, ARY_EMBED_PTR(ary), VALUE, len); FL_UNSET_EMBED(ary); ARY_SET_PTR(ary, ptr); ARY_SET_HEAP_LEN(ary, len); } else { - SIZED_REALLOC_N(RARRAY(ary)->as.heap.ptr, VALUE, capacity, RARRAY(ary)->as.heap.aux.capa); + // fprintf(stderr, "ary_resize_capa %s\n", rb_obj_info(ary)); + ary_heap_realloc(ary, capacity); } - ARY_SET_CAPA(ary, (capacity)); + ARY_SET_CAPA(ary, capacity); + // fprintf(stderr, "-> ary_resize_capa: %s\n", rb_obj_info(ary)); + + // fprintf(stderr, "ary_resize_capa %p len:%ld capa:%ld - %s\n", (void *)ary, RARRAY_LEN(ary), capacity, rb_obj_info(ary)); } else { if (!ARY_EMBED_P(ary)) { long len = RARRAY_LEN(ary); + long old_capa = RARRAY(ary)->as.heap.aux.capa; const VALUE *ptr = RARRAY_CONST_PTR(ary); - if (len > capacity) len = capacity; MEMCPY((VALUE *)RARRAY(ary)->as.ary, ptr, VALUE, len); + ary_heap_free_ptr(ary, ptr, old_capa); + FL_SET_EMBED(ary); ARY_SET_LEN(ary, len); - ruby_sized_xfree((VALUE *)ptr, RARRAY(ary)->as.heap.aux.capa); + + // fprintf(stderr, "ary_resize_capa: heap->embed %p len:%ld\n", (void *)ary, len); } } + + ary_verify(ary); } static inline void @@ -242,8 +416,9 @@ ary_shrink_capa(VALUE ary) long old_capa = RARRAY(ary)->as.heap.aux.capa; assert(!ARY_SHARED_P(ary)); assert(old_capa >= capacity); - if (old_capa > capacity) - SIZED_REALLOC_N(RARRAY(ary)->as.heap.ptr, VALUE, capacity, old_capa); + if (old_capa > capacity) ary_heap_realloc(ary, capacity); + + ary_verify(ary); } static void @@ -258,7 +433,10 @@ ary_double_capa(VALUE ary, long min) new_capa = (ARY_MAX_SIZE - min) / 2; } new_capa += min; + // fprintf(stderr, "ary_double_capa: %p %d\n", (void *)ary, FL_TEST(ary, RARRAY_TRANSIENT_FLAG) ? 1 : 0); ary_resize_capa(ary, new_capa); + + ary_verify(ary); } static void @@ -272,6 +450,7 @@ rb_ary_decrement_share(VALUE shared) } else if (num > 0) { ARY_SET_SHARED_NUM(shared, num); + // ary_verify(shared); } } } @@ -313,7 +492,9 @@ rb_ary_set_shared(VALUE ary, VALUE shared) static inline void rb_ary_modify_check(VALUE ary) { + rb_transient_heap_verify(); rb_check_frozen(ary); + ary_verify(ary); } void @@ -323,6 +504,9 @@ rb_ary_modify(VALUE ary) if (ARY_SHARED_P(ary)) { long shared_len, len = RARRAY_LEN(ary); VALUE shared = ARY_SHARED(ary); + + ary_verify(shared); + if (len <= RARRAY_EMBED_LEN_MAX) { const VALUE *ptr = ARY_HEAP_PTR(ary); FL_UNSET_SHARED(ary); @@ -343,7 +527,7 @@ rb_ary_modify(VALUE ary) rb_ary_decrement_share(shared); } else { - VALUE *ptr = ALLOC_N(VALUE, len); + VALUE *ptr = ary_heap_alloc(ary, len); MEMCPY(ptr, RARRAY_CONST_PTR(ary), VALUE, len); rb_ary_unshare(ary); ARY_SET_CAPA(ary, len); @@ -352,6 +536,7 @@ rb_ary_modify(VALUE ary) rb_gc_writebarrier_remember(ary); } + ary_verify(ary); } static VALUE @@ -370,7 +555,10 @@ ary_ensure_room_for_push(VALUE ary, long add_len) if (ARY_SHARED_OCCUPIED(shared)) { if (RARRAY_CONST_PTR(ary) - RARRAY_CONST_PTR(shared) + new_len <= RARRAY_LEN(shared)) { rb_ary_modify_check(ary); - return shared; + + ary_verify(ary); + ary_verify(shared); + return shared; } else { /* if array is shared, then it is likely it participate in push/shift pattern */ @@ -379,11 +567,13 @@ ary_ensure_room_for_push(VALUE ary, long add_len) if (new_len > capa - (capa >> 6)) { ary_double_capa(ary, new_len); } + ary_verify(ary); return ary; } } } - rb_ary_modify(ary); + ary_verify(ary); + rb_ary_modify(ary); } else { rb_ary_modify_check(ary); @@ -393,6 +583,7 @@ ary_ensure_room_for_push(VALUE ary, long add_len) ary_double_capa(ary, new_len); } + ary_verify(ary); return ary; } @@ -465,7 +656,7 @@ ary_new(VALUE klass, long capa) ary = ary_alloc(klass); if (capa > RARRAY_EMBED_LEN_MAX) { - ptr = ALLOC_N(VALUE, capa); + ptr = ary_heap_alloc(ary, capa); FL_UNSET_EMBED(ary); ARY_SET_PTR(ary, ptr); ARY_SET_CAPA(ary, capa); @@ -529,7 +720,9 @@ rb_ary_new_from_values(long n, const VALUE *elts) VALUE rb_ary_tmp_new(long capa) { - return ary_new(0, capa); + VALUE ary = ary_new(0, capa); + rb_ary_transient_heap_promote(ary, TRUE); + return ary; } VALUE @@ -546,7 +739,7 @@ rb_ary_free(VALUE ary) { if (ARY_OWNS_HEAP_P(ary)) { RB_DEBUG_COUNTER_INC(obj_ary_ptr); - ruby_sized_xfree((void *)ARY_HEAP_PTR(ary), ARY_HEAP_SIZE(ary)); + ary_heap_free(ary); } else { RB_DEBUG_COUNTER_INC(obj_ary_embed); @@ -569,13 +762,15 @@ ary_discard(VALUE ary) { rb_ary_free(ary); RBASIC(ary)->flags |= RARRAY_EMBED_FLAG; - RBASIC(ary)->flags &= ~RARRAY_EMBED_LEN_MASK; + RBASIC(ary)->flags &= ~(RARRAY_EMBED_LEN_MASK | RARRAY_TRANSIENT_FLAG); } static VALUE ary_make_shared(VALUE ary) { assert(!ARY_EMBED_P(ary)); + ary_verify(ary); + if (ARY_SHARED_P(ary)) { return ARY_SHARED(ary); } @@ -583,6 +778,7 @@ ary_make_shared(VALUE ary) return ary; } else if (OBJ_FROZEN(ary)) { + rb_ary_transient_heap_promote(ary, TRUE); ary_shrink_capa(ary); FL_SET_SHARED_ROOT(ary); ARY_SET_SHARED_NUM(ary, 1); @@ -590,18 +786,25 @@ ary_make_shared(VALUE ary) } else { long capa = ARY_CAPA(ary), len = RARRAY_LEN(ary); + const VALUE *ptr; NEWOBJ_OF(shared, struct RArray, 0, T_ARRAY | (RGENGC_WB_PROTECTED_ARRAY ? FL_WB_PROTECTED : 0)); - FL_UNSET_EMBED(shared); + rb_ary_transient_heap_promote(ary, TRUE); + ptr = ARY_HEAP_PTR(ary); + + FL_UNSET_EMBED(shared); ARY_SET_LEN((VALUE)shared, capa); - ARY_SET_PTR((VALUE)shared, RARRAY_CONST_PTR(ary)); - ary_mem_clear((VALUE)shared, len, capa - len); + ARY_SET_PTR((VALUE)shared, ptr); + ary_mem_clear((VALUE)shared, len, capa - len); FL_SET_SHARED_ROOT(shared); ARY_SET_SHARED_NUM((VALUE)shared, 1); FL_SET_SHARED(ary); ARY_SET_SHARED(ary, (VALUE)shared); OBJ_FREEZE(shared); - return (VALUE)shared; + + ary_verify((VALUE)shared); + ary_verify(ary); + return (VALUE)shared; } } @@ -736,7 +939,7 @@ rb_ary_initialize(int argc, VALUE *argv, VALUE ary) rb_ary_modify(ary); if (argc == 0) { if (ARY_OWNS_HEAP_P(ary) && RARRAY_CONST_PTR(ary) != 0) { - ruby_sized_xfree((void *)RARRAY_CONST_PTR(ary), ARY_HEAP_SIZE(ary)); + ary_heap_free(ary); } rb_ary_unshare_safe(ary); FL_SET_EMBED(ary); @@ -858,6 +1061,9 @@ ary_make_partial(VALUE ary, VALUE klass, long offset, long len) ARY_INCREASE_PTR(result, offset); ARY_SET_LEN(result, len); + + ary_verify(shared); + ary_verify(result); return result; } } @@ -916,12 +1122,13 @@ ary_take_first_or_last(int argc, const VALUE *argv, VALUE ary, enum ary_take_pos VALUE rb_ary_push(VALUE ary, VALUE item) { - long idx = RARRAY_LEN(ary); + long idx = RARRAY_LEN((ary_verify(ary), ary)); VALUE target_ary = ary_ensure_room_for_push(ary, 1); RARRAY_PTR_USE(ary, ptr, { RB_OBJ_WRITE(target_ary, &ptr[idx], item); }); ARY_SET_LEN(ary, idx + 1); + ary_verify(ary); return ary; } @@ -973,6 +1180,7 @@ rb_ary_pop(VALUE ary) } --n; ARY_SET_LEN(ary, n); + ary_verify(ary); return RARRAY_AREF(ary, n); } @@ -1006,6 +1214,7 @@ rb_ary_pop_m(int argc, VALUE *argv, VALUE ary) rb_ary_modify_check(ary); result = ary_take_first_or_last(argc, argv, ary, ARY_TAKE_LAST); ARY_INCREASE_LEN(ary, -RARRAY_LEN(result)); + ary_verify(ary); return result; } @@ -1024,6 +1233,7 @@ rb_ary_shift(VALUE ary) MEMMOVE(ptr, ptr+1, VALUE, len-1); }); /* WB: no new reference */ ARY_INCREASE_LEN(ary, -1); + ary_verify(ary); return top; } assert(!ARY_EMBED_P(ary)); /* ARY_EMBED_LEN_MAX < ARY_DEFAULT_SIZE */ @@ -1037,6 +1247,8 @@ rb_ary_shift(VALUE ary) ARY_INCREASE_PTR(ary, 1); /* shift ptr */ ARY_INCREASE_LEN(ary, -1); + ary_verify(ary); + return top; } @@ -1096,6 +1308,7 @@ rb_ary_shift_m(int argc, VALUE *argv, VALUE ary) } ARY_INCREASE_LEN(ary, -n); + ary_verify(ary); return result; } @@ -1129,7 +1342,9 @@ ary_ensure_room_for_unshift(VALUE ary, int argc) /* use shared array for big "queues" */ if (new_len > ARY_DEFAULT_SIZE * 4) { - /* make a room for unshifted items */ + ary_verify(ary); + + /* make a room for unshifted items */ capa = ARY_CAPA(ary); ary_make_shared(ary); @@ -1146,6 +1361,8 @@ ary_ensure_room_for_unshift(VALUE ary, int argc) } ARY_SET_PTR(ary, head - argc); assert(ARY_SHARED_OCCUPIED(ARY_SHARED(ary))); + + ary_verify(ary); return ARY_SHARED(ary); } else { @@ -1154,6 +1371,7 @@ ary_ensure_room_for_unshift(VALUE ary, int argc) MEMMOVE(ptr + argc, ptr, VALUE, len); }); + ary_verify(ary); return ary; } } @@ -1667,11 +1885,12 @@ rb_ary_resize(VALUE ary, long len) } else { if (olen > len + ARY_DEFAULT_SIZE) { - SIZED_REALLOC_N(RARRAY(ary)->as.heap.ptr, VALUE, len, RARRAY(ary)->as.heap.aux.capa); + ary_heap_realloc(ary, len); ARY_SET_CAPA(ary, len); } ARY_SET_HEAP_LEN(ary, len); } + ary_verify(ary); return ary; } @@ -1814,7 +2033,7 @@ VALUE rb_ary_each(VALUE ary) { long i; - + ary_verify(ary); RETURN_SIZED_ENUMERATOR(ary, 0, 0, ary_enum_length); for (i=0; intbl) { - st_table *tbl = RHASH(hash)->ntbl; + if (RHASH_TABLE_P(hash)) { + st_table *tbl = RHASH(hash)->as.ntbl; st_free_table(tbl); + RHASH(hash)->as.ntbl = NULL; } - rb_gc_force_recycle(hash); + //rb_gc_force_recycle(hash); } /* diff --git a/common.mk b/common.mk index 287241e2ed327a..0338b392f3f483 100644 --- a/common.mk +++ b/common.mk @@ -133,6 +133,7 @@ COMMONOBJS = array.$(OBJEXT) \ thread.$(OBJEXT) \ time.$(OBJEXT) \ transcode.$(OBJEXT) \ + transient_heap.$(OBJEXT) \ util.$(OBJEXT) \ variable.$(OBJEXT) \ version.$(OBJEXT) \ @@ -2903,6 +2904,7 @@ transcode.$(OBJEXT): {$(VPATH)}st.h transcode.$(OBJEXT): {$(VPATH)}subst.h transcode.$(OBJEXT): {$(VPATH)}transcode.c transcode.$(OBJEXT): {$(VPATH)}transcode_data.h +transient_heap.$(OBJEXT): {$(VPATH)}transient_heap.c util.$(OBJEXT): $(hdrdir)/ruby/ruby.h util.$(OBJEXT): $(top_srcdir)/include/ruby.h util.$(OBJEXT): {$(VPATH)}config.h diff --git a/compile.c b/compile.c index 5a458200ec78eb..1430099a40e392 100644 --- a/compile.c +++ b/compile.c @@ -9363,7 +9363,7 @@ ibf_dump_object_hash(struct ibf_dump *dump, VALUE obj) { long len = RHASH_SIZE(obj); (void)IBF_W(&len, long, 1); - if (len > 0) st_foreach(RHASH(obj)->ntbl, ibf_dump_object_hash_i, (st_data_t)dump); + if (len > 0) st_foreach(RHASH(obj)->as.ntbl, ibf_dump_object_hash_i, (st_data_t)dump); } static VALUE diff --git a/encoding.c b/encoding.c index 8d1894aa91ff00..65002dbc6b0b84 100644 --- a/encoding.c +++ b/encoding.c @@ -836,13 +836,18 @@ rb_enc_set_index(VALUE obj, int idx) must_encindex(idx); enc_set_index(obj, idx); } - +#include "gc.h" +#include "vm_debug.h" VALUE rb_enc_associate_index(VALUE obj, int idx) { rb_encoding *enc; int oldidx, oldtermlen, termlen; + if (!enc_capable(obj)) { + rb_bug("rb_enc_associate_index: not enc_capable"); + } + /* enc_check_capable(obj);*/ rb_check_frozen(obj); oldidx = rb_enc_get_index(obj); diff --git a/gc.c b/gc.c index d509e60fd3a415..1c5d05a0f4b668 100644 --- a/gc.c +++ b/gc.c @@ -35,6 +35,7 @@ #include #include "ruby_assert.h" #include "debug_counter.h" +#include "transient_heap.h" #include "mjit.h" #undef rb_data_object_wrap @@ -1188,6 +1189,7 @@ RVALUE_PAGE_OLD_UNCOLLECTIBLE_SET(rb_objspace_t *objspace, struct heap_page *pag { MARK_IN_BITMAP(&page->uncollectible_bits[0], obj); objspace->rgengc.old_objects++; + rb_transient_heap_promote(obj); #if RGENGC_PROFILE >= 2 objspace->profile.total_promoted_count++; @@ -2246,12 +2248,10 @@ obj_free(rb_objspace_t *objspace, VALUE obj) rb_str_free(obj); break; case T_ARRAY: - rb_ary_free(obj); + rb_ary_free(obj); break; case T_HASH: - if (RANY(obj)->as.hash.ntbl) { - st_free_table(RANY(obj)->as.hash.ntbl); - } + rb_hash_free(obj); break; case T_REGEXP: if (RANY(obj)->as.regexp.ptr) { @@ -3262,8 +3262,11 @@ obj_memsize_of(VALUE obj, int use_all_types) size += rb_ary_memsize(obj); break; case T_HASH: - if (RHASH(obj)->ntbl) { - size += st_memsize(RHASH(obj)->ntbl); + if (RHASH_ARRAY_P(obj)) { + size += sizeof(li_table); + } + else if (RHASH(obj)->as.ntbl) { + size += st_memsize(RHASH(obj)->as.ntbl); } break; case T_REGEXP: @@ -4156,6 +4159,20 @@ mark_hash(rb_objspace_t *objspace, st_table *tbl) st_foreach(tbl, mark_keyvalue, (st_data_t)objspace); } +static void +mark_hash_linear(rb_objspace_t *objspace, VALUE hash) +{ + if (RHASH_ARRAY_P(hash)) { + linear_foreach(hash, mark_keyvalue, (st_data_t)objspace); + if (objspace->mark_func_data == NULL && RHASH_TRANSIENT_P(hash)) { + rb_transient_heap_mark(hash, RHASH(hash)->as.ltbl); + } + } + else if (RHASH_TABLE_P(hash)) + st_foreach(RHASH(hash)->as.ntbl, mark_keyvalue, (st_data_t)objspace); + gc_mark(objspace, RHASH(hash)->ifnone); +} + void rb_mark_hash(st_table *tbl) { @@ -4602,21 +4619,28 @@ gc_mark_children(rb_objspace_t *objspace, VALUE obj) break; case T_ARRAY: - if (FL_TEST(obj, ELTS_SHARED)) { - gc_mark(objspace, any->as.array.as.heap.aux.shared); + if (FL_TEST(obj, ELTS_SHARED)) { + VALUE root = any->as.array.as.heap.aux.shared; + gc_mark(objspace, root); } else { long i, len = RARRAY_LEN(obj); const VALUE *ptr = RARRAY_CONST_PTR(obj); for (i=0; i < len; i++) { - gc_mark(objspace, *ptr++); + gc_mark(objspace, ptr[i]); } - } + + if (objspace->mark_func_data == NULL) { + if (!FL_TEST_RAW(obj, RARRAY_EMBED_FLAG) && + ARY_TRANSIENT_P(obj)) { + rb_transient_heap_mark(obj, ptr); + } + } + } break; case T_HASH: - mark_hash(objspace, any->as.hash.ntbl); - gc_mark(objspace, any->as.hash.ifnone); + mark_hash_linear(objspace, obj); break; case T_STRING: @@ -5602,6 +5626,8 @@ gc_marks_finish(rb_objspace_t *objspace) #endif } + rb_transient_heap_finish_marking(); + gc_event_hook(objspace, RUBY_INTERNAL_EVENT_GC_END_MARK, 0); return TRUE; @@ -6471,6 +6497,7 @@ gc_start(rb_objspace_t *objspace, int reason) objspace->profile.heap_used_at_gc_start = heap_allocated_pages; gc_prof_setup_new_record(objspace, reason); gc_reset_malloc_info(objspace); + rb_transient_heap_start_marking(do_full_mark); gc_event_hook(objspace, RUBY_INTERNAL_EVENT_GC_START, 0 /* TODO: pass minor/immediate flag? */); GC_ASSERT(during_gc); @@ -9454,13 +9481,21 @@ rb_raw_obj_info(char *buff, const int buff_size, VALUE obj) #if USE_RGENGC const int age = RVALUE_FLAGS_AGE(RBASIC(obj)->flags); - snprintf(buff, buff_size, "%p [%d%s%s%s%s] %s", - (void *)obj, age, - C(RVALUE_UNCOLLECTIBLE_BITMAP(obj), "L"), - C(RVALUE_MARK_BITMAP(obj), "M"), - C(RVALUE_MARKING_BITMAP(obj), "R"), - C(RVALUE_WB_UNPROTECTED_BITMAP(obj), "U"), - obj_type_name(obj)); + if (is_pointer_to_heap(&rb_objspace, (void *)obj)) { + snprintf(buff, buff_size, "%p [%d%s%s%s%s] %s", + (void *)obj, age, + C(RVALUE_UNCOLLECTIBLE_BITMAP(obj), "L"), + C(RVALUE_MARK_BITMAP(obj), "M"), + C(RVALUE_MARKING_BITMAP(obj), "R"), + C(RVALUE_WB_UNPROTECTED_BITMAP(obj), "U"), + obj_type_name(obj)); + } + else { + /* fake */ + snprintf(buff, buff_size, "%p [%dXXXX] %s", + (void *)obj, age, + obj_type_name(obj)); + } #else snprintf(buff, buff_size, "%p [%s] %s", (void *)obj, @@ -9490,10 +9525,25 @@ rb_raw_obj_info(char *buff, const int buff_size, VALUE obj) UNEXPECTED_NODE(rb_raw_obj_info); break; case T_ARRAY: - snprintf(buff, buff_size, "%s [%s%s] len: %d", buff, - C(ARY_EMBED_P(obj), "E"), - C(ARY_SHARED_P(obj), "S"), - (int)RARRAY_LEN(obj)); + if (FL_TEST(obj, ELTS_SHARED)) { + snprintf(buff, buff_size, "%s shared -> %s", buff, + rb_obj_info(RARRAY(obj)->as.heap.aux.shared)); + } + else if (FL_TEST(obj, RARRAY_EMBED_FLAG)) { + snprintf(buff, buff_size, "%s [%s%s] len: %d (embed)", buff, + C(ARY_EMBED_P(obj), "E"), + C(ARY_SHARED_P(obj), "S"), + (int)RARRAY_LEN(obj)); + } + else { + snprintf(buff, buff_size, "%s [%s%s%s] len: %d, capa:%d ptr:%p", buff, + C(ARY_EMBED_P(obj), "E"), + C(ARY_SHARED_P(obj), "S"), + C(ARY_TRANSIENT_P(obj), "T"), + (int)RARRAY_LEN(obj), + ARY_EMBED_P(obj) ? -1 : (int)RARRAY(obj)->as.heap.aux.capa, + RARRAY_CONST_PTR(obj)); + } break; case T_STRING: { snprintf(buff, buff_size, "%s %s", buff, RSTRING_PTR(obj)); @@ -9855,6 +9905,7 @@ Init_GC(void) /* internal methods */ rb_define_singleton_method(rb_mGC, "verify_internal_consistency", gc_verify_internal_consistency, 0); + rb_define_singleton_method(rb_mGC, "verify_transient_heap_internal_consistency", rb_transient_heap_verify, 0); #if MALLOC_ALLOCATED_SIZE rb_define_singleton_method(rb_mGC, "malloc_allocated_size", gc_malloc_allocated_size, 0); rb_define_singleton_method(rb_mGC, "malloc_allocations", gc_malloc_allocations, 0); diff --git a/hash.c b/hash.c index c9d60c7a56912c..72f25f06eafe90 100644 --- a/hash.c +++ b/hash.c @@ -20,7 +20,8 @@ #include "id.h" #include "symbol.h" #include "gc.h" - +#include "transient_heap.h" +#include "ruby_assert.h" #ifdef __APPLE__ # ifdef HAVE_CRT_EXTERNS_H # include @@ -299,6 +300,627 @@ static const struct st_hash_type identhash = { rb_ident_hash, }; +#define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0) +#define PTR_EQUAL(tab, ptr, hash_val, key_) \ + ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key)) + +#define RESERVED_HASH_VAL ((st_hash_t) 0) +#define RESERVED_HASH_SUBSTITUTION_VAL (~(st_hash_t) 0) + +#define RHASH_TABLE_EMPTY(hash) !(RHASH(hash)->as.ltbl) +#define RHASH_TABLE_NONEMPTY(hash) RHASH(hash)->as.ltbl + +#define SET_KEY(entry, _key) (entry)->key = (_key) +#define SET_HASH(entry, _hash) (entry)->hash = (_hash) +#define SET_RECORD(entry, _value) (entry)->record = (_value) + +#define RHASH_TYPE(hash) (RHASH_ARRAY_P(hash) ? RHASH(hash)->as.ltbl->type : RHASH(hash)->as.ntbl->type) + +typedef st_data_t st_hash_t; + +static inline st_hash_t +do_hash(st_data_t key, li_table *tab) +{ + return (st_hash_t)(tab->type->hash)(key); + // st_hash_t hash = (st_hash_t)(tab->type->hash)(key); + // return hash == ((st_hash_t) 0) ? (~(st_hash_t) 0) : hash; +} + +static inline void +set_entry(li_table_entry *entry, st_data_t key, st_data_t val, st_hash_t hash) +{ + SET_HASH(entry, hash); + SET_KEY(entry, key); + SET_RECORD(entry, val); +} + +static inline void +clear_entry(li_table_entry* entry) +{ + SET_KEY(entry, Qundef); + SET_RECORD(entry, Qundef); + SET_HASH(entry, 0); +} + +static inline int +empty_entry(li_table_entry *entry) +{ + return entry->hash == 0; +} +//#define RHASH_DEBUG + +#ifdef RHASH_DEBUG +#define hash_varify(hash) hash_varify_(hash, __FILE__, __LINE__) +#define HASH_ASSERT(expr) RUBY_ASSERT_MESG_WHEN(1, expr, #expr) + +static VALUE +hash_varify_(VALUE hash, const char *file, int line) +{ + HASH_ASSERT(RB_TYPE_P(hash, T_HASH)); + if (RHASH_ARRAY_P(hash)) { + li_table *tab = RHASH(hash)->as.ltbl; + li_table_entry *cur_entry, *entries; + st_data_t h, k, v; + uint8_t i, n = 0; + HASH_ASSERT(tab->type != NULL); + entries = tab->entries; + for (i = 0; i < LINEAR_TABLE_BOUND; i++) { + cur_entry = &entries[i]; + if (!empty_entry(cur_entry)) { + h = cur_entry->hash; + k = cur_entry->key; + v = cur_entry->record; + HASH_ASSERT(h != 0); + HASH_ASSERT(k != Qundef); + HASH_ASSERT(v != Qundef); + n++; + } + } + HASH_ASSERT(n == RHASH_ARRAY_LEN(hash)); + } + + if (RHASH_TRANSIENT_P(hash)) { + HASH_ASSERT(RHASH(hash)->as.ltbl != NULL); + HASH_ASSERT(rb_transient_heap_managed_ptr_p(RHASH(hash)->as.ltbl)); + } + return hash; +} +#else +#define hash_varify(h) ((void)0) +#define HASH_ASSERT(e) ((void)0) +#endif + +#define RHASH_SET_ARRAY_FLAG(h) FL_SET_RAW(h, RHASH_ARRAY_FLAG) +#define RHASH_UNSET_ARRAY_FLAG(h) FL_UNSET_RAW(h, RHASH_ARRAY_FLAG) +#define RHASH_SET_TRANSIENT_FLAG(h) FL_SET_RAW(h, RHASH_TRANSIENT_FLAG) +#define RHASH_UNSET_TRANSIENT_FLAG(h) FL_UNSET_RAW(h, RHASH_TRANSIENT_FLAG) + +#define RHASH_SET_ARRAY_LEN(h, n) do { \ + long tmp_n = n; \ + HASH_ASSERT(RHASH_ARRAY_P(h)); \ + RBASIC(h)->flags &= ~RHASH_ARRAY_LEN_MASK; \ + RBASIC(h)->flags |= (tmp_n) << RHASH_ARRAY_LEN_SHIFT; \ +} while (0) + +#define RHASH_ARRAY_BOUND(h) \ + (HASH_ASSERT(RHASH_ARRAY_P(h)), \ + (long)((RBASIC(h)->flags >> RHASH_ARRAY_BOUND_SHIFT) & \ + (RHASH_ARRAY_BOUND_MASK >> RHASH_ARRAY_BOUND_SHIFT))) + +#define RHASH_SET_ARRAY_BOUND(h, n) do { \ + long tmp_n = n; \ + HASH_ASSERT(RHASH_ARRAY_P(h)); \ + RBASIC(h)->flags &= ~RHASH_ARRAY_BOUND_MASK; \ + RBASIC(h)->flags |= (tmp_n) << RHASH_ARRAY_BOUND_SHIFT; \ +} while (0) + +#define HASH_ARRAY_INCREASE_LEN(h, n) do { \ + HASH_ASSERT(RHASH_ARRAY_P(h)); \ + RHASH_SET_ARRAY_LEN((h), RHASH_ARRAY_LEN(h)+(n)); \ +} while (0) + +#define RHASH_ARRAY_LEN_ADD_ONE(h) do { \ + HASH_ASSERT(RHASH_ARRAY_P(h)); \ + RHASH_SET_ARRAY_LEN((h), RHASH_ARRAY_LEN(h)+(1)); \ +} while (0) + +#define RHASH_ARRAY_LEN_MINUS_ONE(h) do { \ + HASH_ASSERT(RHASH_ARRAY_P(h)); \ + RHASH_SET_ARRAY_LEN((h), RHASH_ARRAY_LEN(h)-(1)); \ +} while (0) + +#define RHASH_CLEAR_BITS(h) do { \ + RBASIC(h)->flags &= ~RHASH_ARRAY_LEN_MASK; \ + RBASIC(h)->flags &= ~RHASH_ARRAY_BOUND_MASK; \ +} while (0) + + +static li_table* +linear_init_table(VALUE hash, const struct st_hash_type *type) +{ + li_table *tab; + uint8_t i; + tab = (li_table*)rb_transient_heap_alloc(hash, sizeof(li_table)); + if (tab != NULL) { + RHASH_SET_TRANSIENT_FLAG(hash); + } + else { + RHASH_UNSET_TRANSIENT_FLAG(hash); + tab = (li_table*)malloc(sizeof(li_table)); + } + tab->type = type; + RHASH_SET_ARRAY_FLAG(hash); + RHASH_SET_ARRAY_LEN(hash, 0); + RHASH_SET_ARRAY_BOUND(hash, 0); + for (i = 0; i < LINEAR_TABLE_BOUND; i++) + clear_entry(tab->entries + i); + RHASH(hash)->as.ltbl = tab; + return tab; +} + +static li_table* +linear_init_identtable(VALUE hash) +{ + return linear_init_table(hash, &identhash); +} + +static li_table* +linear_init_objtable(VALUE hash) +{ + return linear_init_table(hash, &objhash); +} + +static st_index_t +find_entry(VALUE hash, st_hash_t hash_value, st_data_t key) +{ + uint8_t i, bound = RHASH_ARRAY_BOUND(hash); + li_table *tab = RHASH(hash)->as.ltbl; + li_table_entry *entries = tab->entries; + + if (RHASH_ARRAY_LEN(hash) == 0) return LINEAR_TABLE_BOUND; + for (i = 0; i < bound; i++) { + if (PTR_EQUAL(tab, &entries[i], hash_value, key)) + return i; + } + return LINEAR_TABLE_BOUND; +} + +static inline void +linear_free_table(VALUE hash) +{ + if (RHASH_ARRAY_P(hash)) { + RHASH_UNSET_ARRAY_FLAG(hash); + RHASH_CLEAR_BITS(hash); + if (!RHASH_TRANSIENT_P(hash)) { + free(RHASH(hash)->as.ltbl); + } + RHASH(hash)->as.ltbl = NULL; + } +} + +static void +rb_hash_heap_free(VALUE hash) +{ + // fprintf(stderr, "rb_hash_heap_free: %p\n", (void*)hash); + if (RHASH_TRANSIENT_P(hash)) { + RHASH_UNSET_TRANSIENT_FLAG(hash); + } else { + linear_free_table(hash); + } +} + +void +rb_hash_free(VALUE hash) +{ + if (RHASH_ARRAY_P(hash)) { + rb_hash_heap_free(hash); + } + else if (RHASH_TABLE_P(hash)) { + st_free_table(RHASH(hash)->as.ntbl); + RHASH(hash)->as.ntbl = NULL; + } +} + +static void +try_convert_table(VALUE hash) +{ + st_table *new_tab; + li_table *tab = RHASH(hash)->as.ltbl; + li_table_entry *entries; + uint8_t size = RHASH_ARRAY_LEN(hash); + st_index_t i; + + if (!RHASH_ARRAY_P(hash) || size < LINEAR_TABLE_MAX_SIZE) + return; + + new_tab = st_init_table_with_size(tab->type, size * 2); + + entries = tab->entries; + for (i = 0; i < LINEAR_TABLE_BOUND; i++) { + HASH_ASSERT(entries[i].hash != 0); + st_add_direct(new_tab, entries[i].key, entries[i].record); + } + HASH_ASSERT(tab == RHASH(hash)->as.ltbl); + linear_free_table(hash); + /* converting table means to promote the hash, unset the transient flag anyway*/ + RHASH_UNSET_TRANSIENT_FLAG(hash); + RHASH_CLEAR_BITS(hash); + RHASH_UNSET_ARRAY_FLAG(hash); + RHASH(hash)->as.ntbl = new_tab; + return; +} + +static st_table * +force_convert_table(VALUE hash) +{ + st_table *new_tab; + li_table *tab; + li_table_entry *cur_entry, *entries; + uint8_t i; + + if (RHASH_TABLE_P(hash)) + return RHASH(hash)->as.ntbl; + + tab = RHASH(hash)->as.ltbl; + if (tab) { + new_tab = st_init_table_with_size(tab->type, RHASH_ARRAY_LEN(hash)); + entries = tab->entries; + for (i = 0; i < LINEAR_TABLE_BOUND; i++) { + cur_entry = &entries[i]; + if (empty_entry(cur_entry)) continue; + st_add_direct(new_tab, cur_entry->key, cur_entry->record); + } + HASH_ASSERT(tab == RHASH(hash)->as.ltbl); + linear_free_table(hash); + /* converting table means to promote the hash, unset the transient flag anyway*/ + RHASH_UNSET_TRANSIENT_FLAG(hash); + } + else if (!RHASH(hash)->as.ntbl) { + new_tab = st_init_table(&objhash); + } + RHASH_CLEAR_BITS(hash); + RHASH_UNSET_ARRAY_FLAG(hash); + RHASH(hash)->as.ntbl = new_tab; + return RHASH(hash)->as.ntbl; +} + +static int +compact_table(li_table *tab) +{ + li_table_entry *entries = tab->entries; + uint8_t empty = 0, non_empty = 1; + + for (; non_empty < LINEAR_TABLE_BOUND; empty++, non_empty++) { + while (!empty_entry(&entries[empty])) { + empty++; + if (empty == LINEAR_TABLE_BOUND - 1) goto done; + } + + if (non_empty <= empty) non_empty = empty + 1; + while (empty_entry(&entries[non_empty])) { + non_empty++; + if (non_empty == LINEAR_TABLE_BOUND) goto done; + } + + entries[empty] = entries[non_empty]; + clear_entry(&entries[non_empty]); + } +done: + HASH_ASSERT(empty < LINEAR_TABLE_BOUND); + return empty; +} + +static int +add_direct_with_hash(VALUE hash, st_data_t key, st_data_t val, st_hash_t hash_value) +{ + uint8_t bin = RHASH_ARRAY_BOUND(hash); + li_table *tab = RHASH(hash)->as.ltbl; + li_table_entry *entry; + + if (RHASH_ARRAY_LEN(hash) >= LINEAR_TABLE_MAX_SIZE) + return 1; + if (UNLIKELY(bin >= LINEAR_TABLE_BOUND)) + bin = compact_table(tab); + HASH_ASSERT(bin < LINEAR_TABLE_BOUND); + entry = &tab->entries[bin++]; + set_entry(entry, key, val, hash_value); + RHASH_SET_ARRAY_BOUND(hash, bin); + RHASH_ARRAY_LEN_ADD_ONE(hash); + return 0; +} + +int +linear_foreach(VALUE hash, int (*func)(ANYARGS), st_data_t arg) +{ + uint8_t i, bound = RHASH_ARRAY_BOUND(hash); + li_table_entry *cur_entry, *entries = RHASH(hash)->as.ltbl->entries; + enum st_retval retval; + + for (i = 0; i < bound; i++) { + cur_entry = &entries[i]; + if (empty_entry(cur_entry)) + continue; + retval = (*func)(cur_entry->key, cur_entry->record, arg, 0); + switch (retval) { + case ST_CONTINUE: + break; + case ST_CHECK: + case ST_STOP: + return 0; + case ST_DELETE: + clear_entry(cur_entry); + RHASH_ARRAY_LEN_MINUS_ONE(hash); + break; + } + } + return 0; +} + +static int +linear_foreach_check(VALUE hash, int (*func)(ANYARGS), st_data_t arg, + st_data_t never) +{ + uint8_t i, ret = 0, bound = RHASH_ARRAY_BOUND(hash); + li_table *tab = RHASH(hash)->as.ltbl; + li_table_entry *cur_entry, *entries; + enum st_retval retval; + st_data_t key; + st_hash_t hash_value; + + if (RHASH_ARRAY_LEN(hash) == 0) return 0; + entries = tab->entries; + for (i = 0; i < bound; i++) { + cur_entry = &entries[i]; + if (empty_entry(cur_entry)) + continue; + key = cur_entry->key; + hash_value = cur_entry->hash; + retval = (*func)(key, cur_entry->record, arg, 0); + switch (retval) { + case ST_CHECK: { + if (entries[i].key == never && entries[i].hash == 0) + break; + ret = find_entry(hash, hash_value, key); + if (ret == LINEAR_TABLE_BOUND) { + retval = (*func)(0, 0, arg, 1); + return 2; + } + } + case ST_CONTINUE: + break; + case ST_STOP: + return 0; + case ST_DELETE: { + clear_entry(cur_entry); + RHASH_ARRAY_LEN_MINUS_ONE(hash); + break; + } + } + } + return 0; +} + +static int +linear_update(VALUE hash, st_data_t key, + st_update_callback_func *func, st_data_t arg) +{ + li_table *tab = RHASH(hash)->as.ltbl; + li_table_entry *entry; + int retval, existing; + uint8_t bin; + st_data_t value = 0, old_key; + st_hash_t hash_value = do_hash(key, tab); + + bin = find_entry(hash, hash_value, key); + existing = bin != LINEAR_TABLE_BOUND; + entry = &tab->entries[bin]; + if (existing) { + key = entry->key; + value = entry->record; + } + old_key = key; + retval = (*func)(&key, &value, arg, existing); + + switch (retval) { + case ST_CONTINUE: + if (!existing) { + if (add_direct_with_hash(hash, key, value, hash_value)) + return -1; + break; + } + if (old_key != key) { + entry->key = key; + } + entry->record = value; + break; + case ST_DELETE: + if (existing) { + clear_entry(entry); + RHASH_ARRAY_LEN_MINUS_ONE(hash); + } + break; + } + return existing; +} + +static int +linear_insert(VALUE hash, st_data_t key, st_data_t value) +{ + st_index_t bin = RHASH_ARRAY_BOUND(hash); + li_table *tab = RHASH(hash)->as.ltbl; + st_hash_t hash_value = do_hash(key, tab); + + bin = find_entry(hash, hash_value, key); + if (bin == LINEAR_TABLE_BOUND) { + if (RHASH_ARRAY_LEN(hash) >= LINEAR_TABLE_MAX_SIZE) + return -1; + if (bin >= LINEAR_TABLE_BOUND) + bin = compact_table(tab); + HASH_ASSERT(bin < LINEAR_TABLE_BOUND); + set_entry(&tab->entries[bin++], key, value, hash_value); + RHASH_SET_ARRAY_BOUND(hash, bin); + RHASH_ARRAY_LEN_ADD_ONE(hash); + return 0; + } + tab->entries[bin].record = value; + return 1; +} + +int +linear_lookup(VALUE hash, st_data_t key, st_data_t *value) +{ + st_index_t bin; + st_hash_t hash_value; + li_table *tab = RHASH(hash)->as.ltbl; + + hash_value = do_hash(key, tab); + bin = find_entry(hash, hash_value, key); + if (bin == LINEAR_TABLE_BOUND) { + return 0; + } + HASH_ASSERT(bin < LINEAR_TABLE_BOUND); + if (value != 0) + *value = tab->entries[bin].record; + return 1; +} + +static int +linear_delete(VALUE hash, st_data_t *key, st_data_t *value) +{ + st_index_t bin; + li_table *tab = RHASH(hash)->as.ltbl; + st_hash_t hash_value = do_hash(*key, tab); + li_table_entry *entry; + + bin = find_entry(hash, hash_value, *key); + if (bin == LINEAR_TABLE_BOUND) { + if (value != 0) *value = 0; + return 0; + } + entry = &tab->entries[bin]; + if (value != 0) *value = entry->record; + clear_entry(entry); + RHASH_ARRAY_LEN_MINUS_ONE(hash); + return 1; +} + +static int +linear_shift(VALUE hash, st_data_t *key, st_data_t *value) +{ + uint8_t i, bound = RHASH_ARRAY_BOUND(hash); + li_table_entry *entry, *entries = RHASH(hash)->as.ltbl->entries; + + for (i = 0; i < bound; i++) { + entry = &entries[i]; + if (!empty_entry(entry)) { + if (value != 0) *value = entry->record; + *key = entry->key; + clear_entry(entry); + RHASH_ARRAY_LEN_MINUS_ONE(hash); + return 1; + } + } + if (value != 0) *value = 0; + return 0; +} + +static int +linear_keys(VALUE hash, st_data_t *keys, st_index_t size) +{ + uint8_t i, bound = RHASH_ARRAY_BOUND(hash); + st_data_t *keys_start, *keys_end; + li_table_entry *cur_entry, *entries = RHASH(hash)->as.ltbl->entries; + + keys_start = keys; + keys_end = keys + size; + for (i = 0; i < bound; i++) { + if (keys == keys_end) + break; + cur_entry = &entries[i]; + if (!empty_entry(cur_entry)) + *keys++ = cur_entry->key; + } + + return keys - keys_start; +} + +static int +linear_values(VALUE hash, st_data_t *values, st_index_t size) +{ + uint8_t i, bound = RHASH_ARRAY_BOUND(hash); + st_data_t *values_start, *values_end; + li_table_entry *cur_entry, *entries = RHASH(hash)->as.ltbl->entries; + + values_start = values; + values_end = values + size; + for (i = 0; i < bound; i++) { + if (values == values_end) + break; + cur_entry = &entries[i]; + if (!empty_entry(cur_entry)) + *values++ = cur_entry->record; + } + + return values - values_start; +} + +static li_table* +linear_copy(VALUE hash1, VALUE hash2) +{ + li_table *new_tab; + li_table *old_tab = RHASH(hash2)->as.ltbl; + new_tab = (li_table*) rb_transient_heap_alloc(hash1, sizeof(li_table)); + if (new_tab != NULL) { + RHASH_SET_TRANSIENT_FLAG(hash1); + } + else { + RHASH_UNSET_TRANSIENT_FLAG(hash1); + new_tab = (li_table*) malloc(sizeof(li_table)); + } + *new_tab = *old_tab; + HASH_ASSERT(new_tab->type == old_tab->type); + RHASH_SET_ARRAY_FLAG(hash1); + RHASH_SET_ARRAY_BOUND(hash1, RHASH_ARRAY_BOUND(hash2)); + RHASH_SET_ARRAY_LEN(hash1, RHASH_ARRAY_LEN(hash2)); + RHASH(hash1)->as.ltbl = new_tab; + return new_tab; +} + +static void +linear_clear(VALUE hash) +{ + li_table *tab = RHASH(hash)->as.ltbl; + RHASH_SET_ARRAY_LEN(hash, 0); + RHASH_SET_ARRAY_BOUND(hash, 0); + memset(tab->entries, 0, LINEAR_TABLE_MAX_SIZE * sizeof(li_table_entry)); +} + +void +rb_hash_transient_heap_promote(VALUE hash, int promote) +{ + if (RHASH_TRANSIENT_P(hash)) { + li_table *new_tab; + li_table *old_tab = RHASH(hash)->as.ltbl; + if (UNLIKELY(old_tab == NULL)) { + rb_gc_force_recycle(hash); + return; + } + HASH_ASSERT(old_tab != NULL); + if (promote) { + new_tab = malloc(sizeof(li_table)); + RHASH_UNSET_TRANSIENT_FLAG(hash); + } + else { + new_tab = rb_transient_heap_alloc(hash, sizeof(li_table)); + } + *new_tab = *old_tab; + HASH_ASSERT(new_tab->type == old_tab->type); + RHASH(hash)->as.ltbl = new_tab; + } + hash_varify(hash); +} + + typedef int st_foreach_func(st_data_t, st_data_t, st_data_t); struct foreach_safe_arg { @@ -330,7 +952,7 @@ st_foreach_safe(st_table *table, int (*func)(ANYARGS), st_data_t a) arg.func = (st_foreach_func *)func; arg.arg = a; if (st_foreach_check(table, foreach_safe_i, (st_data_t)&arg, 0)) { - rb_raise(rb_eRuntimeError, "hash modified during iteration"); + rb_raise(rb_eRuntimeError, "1hash modified during iteration"); } } @@ -342,6 +964,31 @@ struct hash_foreach_arg { VALUE arg; }; +static int +hash_linear_foreach_iter(st_data_t key, st_data_t value, st_data_t argp, int error) +{ + struct hash_foreach_arg *arg = (struct hash_foreach_arg *)argp; + int status; + // li_table *tbl; + + if (error) return ST_STOP; + /* linear table will move it's position due to escaping from transient heap */ + // tbl = RHASH(arg->hash)->ltbl; + status = (*arg->func)((VALUE)key, (VALUE)value, arg->arg); + // if (RHASH(arg->hash)->ltbl != tbl) { + // rb_raise(rb_eRuntimeError, "rehash occurred during iteration"); + // } + switch (status) { + case ST_DELETE: + return ST_DELETE; + case ST_CONTINUE: + break; + case ST_STOP: + return ST_STOP; + } + return ST_CHECK; +} + static int hash_foreach_iter(st_data_t key, st_data_t value, st_data_t argp, int error) { @@ -350,10 +997,10 @@ hash_foreach_iter(st_data_t key, st_data_t value, st_data_t argp, int error) st_table *tbl; if (error) return ST_STOP; - tbl = RHASH(arg->hash)->ntbl; + tbl = RHASH(arg->hash)->as.ntbl; status = (*arg->func)((VALUE)key, (VALUE)value, arg->arg); - if (RHASH(arg->hash)->ntbl != tbl) { - rb_raise(rb_eRuntimeError, "rehash occurred during iteration"); + if (RHASH(arg->hash)->as.ntbl != tbl) { + rb_raise(rb_eRuntimeError, "rehash occurred during iteration"); } switch (status) { case ST_DELETE: @@ -384,8 +1031,15 @@ static VALUE hash_foreach_call(VALUE arg) { VALUE hash = ((struct hash_foreach_arg *)arg)->hash; - if (st_foreach_check(RHASH(hash)->ntbl, hash_foreach_iter, (st_data_t)arg, (st_data_t)Qundef)) { - rb_raise(rb_eRuntimeError, "hash modified during iteration"); + int ret = 0; + if (RHASH_ARRAY_P(hash)) + ret = linear_foreach_check(hash, hash_linear_foreach_iter, + (st_data_t)arg, (st_data_t)Qundef); + else if (RHASH_TABLE_P(hash)) + ret = st_foreach_check(RHASH(hash)->as.ntbl, hash_foreach_iter, + (st_data_t)arg, (st_data_t)Qundef); + if (ret) { + rb_raise(rb_eRuntimeError, "ret: %d, hash modified during iteration", ret); } return Qnil; } @@ -395,13 +1049,14 @@ rb_hash_foreach(VALUE hash, int (*func)(ANYARGS), VALUE farg) { struct hash_foreach_arg arg; - if (!RHASH(hash)->ntbl) + if (RHASH_TABLE_EMPTY(hash)) return; RHASH_ITER_LEV(hash)++; arg.hash = hash; arg.func = (rb_foreach_func *)func; arg.arg = farg; rb_ensure(hash_foreach_call, (VALUE)&arg, hash_foreach_ensure, hash); + hash_varify(hash); } static VALUE @@ -439,7 +1094,7 @@ VALUE rb_hash_new_compare_by_id(void) { VALUE hash = rb_hash_new(); - RHASH(hash)->ntbl = rb_init_identtable(); + RHASH(hash)->as.ntbl = rb_init_identtable(); return hash; } @@ -447,8 +1102,12 @@ MJIT_FUNC_EXPORTED VALUE rb_hash_new_with_size(st_index_t size) { VALUE ret = rb_hash_new(); - if (size) - RHASH(ret)->ntbl = st_init_table_with_size(&objhash, size); + if (size) { + if (size <= LINEAR_TABLE_MAX_SIZE) + RHASH(ret)->as.ltbl = linear_init_objtable(ret); + else + RHASH(ret)->as.ntbl = st_init_table_with_size(&objhash, size); + } return ret; } @@ -457,8 +1116,12 @@ hash_dup(VALUE hash, VALUE klass, VALUE flags) { VALUE ret = hash_alloc_flags(klass, flags, RHASH_IFNONE(hash)); - if (!RHASH_EMPTY_P(hash)) - RHASH(ret)->ntbl = st_copy(RHASH(hash)->ntbl); + if (!RHASH_EMPTY_P(hash)) { + if (RHASH_ARRAY_P(hash)) + linear_copy(ret, hash); + else if (RHASH_TABLE_P(hash)) + RHASH(ret)->as.ntbl = st_copy(RHASH(hash)->as.ntbl); + } return ret; } @@ -479,33 +1142,43 @@ rb_hash_modify_check(VALUE hash) rb_check_frozen(hash); } +static li_table * +hash_ltbl(VALUE hash) +{ + if (RHASH_TABLE_EMPTY(hash)) { + linear_init_objtable(hash); + } + return RHASH(hash)->as.ltbl; +} + static struct st_table * hash_tbl(VALUE hash) { - if (!RHASH(hash)->ntbl) { - RHASH(hash)->ntbl = st_init_table(&objhash); + if (RHASH_TABLE_EMPTY(hash)) { + RHASH(hash)->as.ntbl = st_init_table(&objhash); } - return RHASH(hash)->ntbl; + return RHASH(hash)->as.ntbl; } struct st_table * rb_hash_tbl(VALUE hash) { OBJ_WB_UNPROTECT(hash); - return hash_tbl(hash); + return force_convert_table(hash); } MJIT_FUNC_EXPORTED struct st_table * rb_hash_tbl_raw(VALUE hash) { - return hash_tbl(hash); + return force_convert_table(hash); } static void rb_hash_modify(VALUE hash) { rb_hash_modify_check(hash); - hash_tbl(hash); + if (RHASH_TABLE_EMPTY(hash)) + hash_ltbl(hash); } NORETURN(static void no_new_key(void)); @@ -558,7 +1231,15 @@ tbl_update(VALUE hash, VALUE key, tbl_update_func func, st_data_t optional_arg) arg.new_value = 0; arg.old_value = Qundef; - result = st_update(RHASH(hash)->ntbl, (st_data_t)key, func, (st_data_t)&arg); + if (RHASH_ARRAY_P(hash)) { + result = linear_update(hash, (st_data_t)key, func, (st_data_t)&arg); + if (result == -1) { + try_convert_table(hash); + result = st_update(RHASH(hash)->as.ntbl, (st_data_t)key, func, (st_data_t)&arg); + } + } + else if (RHASH_TABLE_P(hash)) + result = st_update(RHASH(hash)->as.ntbl, (st_data_t)key, func, (st_data_t)&arg); /* write barrier */ if (arg.new_key) RB_OBJ_WRITTEN(hash, arg.old_key, arg.new_key); @@ -673,11 +1354,14 @@ rb_hash_s_create(int argc, VALUE *argv, VALUE klass) VALUE hash, tmp; if (argc == 1) { - tmp = rb_hash_s_try_convert(Qnil, argv[0]); + tmp = rb_hash_s_try_convert(Qnil, argv[0]); //TODO tmp array flag if (!NIL_P(tmp)) { hash = hash_alloc(klass); - if (RHASH(tmp)->ntbl) { - RHASH(hash)->ntbl = st_copy(RHASH(tmp)->ntbl); + if (RHASH_ARRAY_P(tmp)) { + linear_copy(hash, tmp); + } + else if (RHASH(tmp)->as.ntbl) { + RHASH(hash)->as.ntbl = st_copy(RHASH(tmp)->as.ntbl); } return hash; } @@ -725,7 +1409,7 @@ rb_hash_s_create(int argc, VALUE *argv, VALUE klass) hash = hash_alloc(klass); rb_hash_bulk_insert(argc, argv, hash); - + hash_varify(hash); return hash; } @@ -767,9 +1451,12 @@ struct rehash_arg { static int rb_hash_rehash_i(VALUE key, VALUE value, VALUE arg) { - st_table *tbl = (st_table *)arg; - - st_insert(tbl, (st_data_t)key, (st_data_t)value); + if (RHASH_ARRAY_P(arg)) { + linear_insert(arg, (st_data_t)key, (st_data_t)value); + } + else { + st_insert(RHASH(arg)->as.ntbl, (st_data_t)key, (st_data_t)value); + } return ST_CONTINUE; } @@ -803,17 +1490,25 @@ rb_hash_rehash(VALUE hash) rb_raise(rb_eRuntimeError, "rehash during iteration"); } rb_hash_modify_check(hash); - if (!RHASH(hash)->ntbl) - return hash; - tmp = hash_alloc(0); - tbl = st_init_table_with_size(RHASH(hash)->ntbl->type, RHASH(hash)->ntbl->num_entries); - RHASH(tmp)->ntbl = tbl; - - rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tbl); - st_free_table(RHASH(hash)->ntbl); - RHASH(hash)->ntbl = tbl; - RHASH(tmp)->ntbl = 0; - + if (RHASH_ARRAY_P(hash)) { + tmp = hash_alloc(0); + linear_init_table(tmp, RHASH(hash)->as.ltbl->type); + rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tmp); + linear_free_table(hash); + linear_copy(hash, tmp); + linear_free_table(tmp); + } + else if (RHASH_TABLE_P(hash)) { + st_table *old_tab = RHASH(hash)->as.ntbl; + tmp = hash_alloc(0); + tbl = st_init_table_with_size(old_tab->type, old_tab->num_entries); + RHASH(tmp)->as.ntbl = tbl; + rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tmp); + st_free_table(old_tab); + RHASH(hash)->as.ntbl = tbl; + RHASH(tmp)->as.ntbl = NULL; + } + hash_varify(hash); return hash; } @@ -850,10 +1545,14 @@ rb_hash_aref(VALUE hash, VALUE key) { st_data_t val; - if (!RHASH(hash)->ntbl || !st_lookup(RHASH(hash)->ntbl, key, &val)) { - return rb_hash_default_value(hash, key); + if (RHASH_ARRAY_P(hash) && linear_lookup(hash, key, &val)) { + return (VALUE)val; } - return (VALUE)val; + else if (RHASH_TABLE_P(hash) && st_lookup(RHASH(hash)->as.ntbl, key, &val)) { + return (VALUE)val; + } + hash_varify(hash); + return rb_hash_default_value(hash, key); } VALUE @@ -861,10 +1560,14 @@ rb_hash_lookup2(VALUE hash, VALUE key, VALUE def) { st_data_t val; - if (!RHASH(hash)->ntbl || !st_lookup(RHASH(hash)->ntbl, key, &val)) { - return def; /* without Hash#default */ + if (RHASH_ARRAY_P(hash) && linear_lookup(hash, key, &val)) { + return (VALUE)val; } - return (VALUE)val; + else if (RHASH_TABLE_P(hash) && st_lookup(RHASH(hash)->as.ntbl, key, &val)) { + return (VALUE)val; + } + hash_varify(hash); + return def; /* without Hash#default */ } VALUE @@ -916,19 +1619,23 @@ rb_hash_fetch_m(int argc, VALUE *argv, VALUE hash) if (block_given && argc == 2) { rb_warn("block supersedes default value argument"); } - if (!RHASH(hash)->ntbl || !st_lookup(RHASH(hash)->ntbl, key, &val)) { - if (block_given) return rb_yield(key); - if (argc == 1) { - VALUE desc = rb_protect(rb_inspect, key, 0); - if (NIL_P(desc)) { - desc = rb_any_to_s(key); - } - desc = rb_str_ellipsize(desc, 65); - rb_key_err_raise(rb_sprintf("key not found: %"PRIsVALUE, desc), hash, key); + if (RHASH_ARRAY_P(hash) && linear_lookup(hash, key, &val)) { + return (VALUE)val; + } + else if (RHASH_TABLE_P(hash) && st_lookup(RHASH(hash)->as.ntbl, key, &val)) { + return (VALUE)val; + } + if (block_given) return rb_yield(key); + if (argc == 1) { + VALUE desc = rb_protect(rb_inspect, key, 0); + if (NIL_P(desc)) { + desc = rb_any_to_s(key); } - return argv[1]; + desc = rb_str_ellipsize(desc, 65); + rb_key_err_raise(rb_sprintf("key not found: %"PRIsVALUE, desc), hash, key); } - return (VALUE)val; + hash_varify(hash); + return argv[1]; } VALUE @@ -1117,10 +1824,10 @@ rb_hash_delete_entry(VALUE hash, VALUE key) { st_data_t ktmp = (st_data_t)key, val; - if (!RHASH(hash)->ntbl) { - return Qundef; + if (RHASH_ARRAY_P(hash) && linear_delete(hash, &ktmp, &val)) { + return (VALUE)val; } - else if (st_delete(RHASH(hash)->ntbl, &ktmp, &val)) { + else if (RHASH_TABLE_P(hash) && st_delete(RHASH(hash)->as.ntbl, &ktmp, &val)) { return (VALUE)val; } else { @@ -1219,10 +1926,25 @@ rb_hash_shift(VALUE hash) struct shift_var var; rb_hash_modify_check(hash); - if (RHASH(hash)->ntbl) { + if (RHASH_ARRAY_P(hash)) { + var.key = Qundef; + if (RHASH_ITER_LEV(hash) == 0) { + if (linear_shift(hash, &var.key, &var.val)) { + return rb_assoc_new(var.key, var.val); + } + } + else { + rb_hash_foreach(hash, shift_i_safe, (VALUE)&var); + if (var.key != Qundef) { + rb_hash_delete_entry(hash, var.key); + return rb_assoc_new(var.key, var.val); + } + } + } + if (RHASH_TABLE_P(hash)) { var.key = Qundef; if (RHASH_ITER_LEV(hash) == 0) { - if (st_shift(RHASH(hash)->ntbl, &var.key, &var.val)) { + if (st_shift(RHASH(hash)->as.ntbl, &var.key, &var.val)) { return rb_assoc_new(var.key, var.val); } } @@ -1272,7 +1994,7 @@ rb_hash_delete_if(VALUE hash) { RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size); rb_hash_modify_check(hash); - if (RHASH(hash)->ntbl) + if (RHASH_TABLE_NONEMPTY(hash)) rb_hash_foreach(hash, delete_if_i, hash); return hash; } @@ -1296,7 +2018,7 @@ rb_hash_reject_bang(VALUE hash) n = RHASH_SIZE(hash); if (!n) return Qnil; rb_hash_foreach(hash, delete_if_i, hash); - if (n == RHASH(hash)->ntbl->num_entries) return Qnil; + if (n == RHASH_SIZE(hash)) return Qnil; return hash; } @@ -1486,11 +2208,10 @@ rb_hash_select_bang(VALUE hash) RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size); rb_hash_modify_check(hash); - if (!RHASH(hash)->ntbl) - return Qnil; - n = RHASH(hash)->ntbl->num_entries; + n = RHASH_SIZE(hash); + if (!n) return Qnil; rb_hash_foreach(hash, keep_if_i, hash); - if (n == RHASH(hash)->ntbl->num_entries) return Qnil; + if (n == RHASH_SIZE(hash)) return Qnil; return hash; } @@ -1511,7 +2232,7 @@ rb_hash_keep_if(VALUE hash) { RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size); rb_hash_modify_check(hash); - if (RHASH(hash)->ntbl) + if (RHASH_TABLE_NONEMPTY(hash)) rb_hash_foreach(hash, keep_if_i, hash); return hash; } @@ -1537,13 +2258,17 @@ VALUE rb_hash_clear(VALUE hash) { rb_hash_modify_check(hash); - if (!RHASH(hash)->ntbl) - return hash; - if (RHASH(hash)->ntbl->num_entries > 0) { + if (RHASH_ARRAY_P(hash) && RHASH_ARRAY_LEN(hash) > 0) { + if (RHASH_ITER_LEV(hash) > 0) + rb_hash_foreach(hash, clear_i, 0); + else + linear_clear(hash); + } + else if (RHASH_TABLE_P(hash) && RHASH(hash)->as.ntbl->num_entries > 0) { if (RHASH_ITER_LEV(hash) > 0) rb_hash_foreach(hash, clear_i, 0); else - st_clear(RHASH(hash)->ntbl); + st_clear(RHASH(hash)->as.ntbl); } return hash; @@ -1640,14 +2365,14 @@ VALUE rb_hash_aset(VALUE hash, VALUE key, VALUE val) { int iter_lev = RHASH_ITER_LEV(hash); - st_table *tbl = RHASH(hash)->ntbl; rb_hash_modify(hash); - if (!tbl) { + if (RHASH_TABLE_EMPTY(hash)) { if (iter_lev > 0) no_new_key(); - tbl = hash_tbl(hash); + linear_init_objtable(hash); } - if (tbl->type == &identhash || rb_obj_class(key) != rb_cString) { + + if (RHASH_TYPE(hash) == &identhash || rb_obj_class(key) != rb_cString) { RHASH_UPDATE_ITER(hash, iter_lev, key, hash_aset, val); } else { @@ -1668,8 +2393,6 @@ replace_i(VALUE key, VALUE val, VALUE hash) static VALUE rb_hash_initialize_copy(VALUE hash, VALUE hash2) { - st_table *ntbl; - rb_hash_modify_check(hash); hash2 = to_hash(hash2); @@ -1677,15 +2400,23 @@ rb_hash_initialize_copy(VALUE hash, VALUE hash2) if (hash == hash2) return hash; - ntbl = RHASH(hash)->ntbl; - if (RHASH(hash2)->ntbl) { - if (ntbl) st_free_table(ntbl); - RHASH(hash)->ntbl = st_copy(RHASH(hash2)->ntbl); - if (RHASH(hash)->ntbl->num_entries) + if (RHASH_ARRAY_P(hash2)) { + if (RHASH_ARRAY_P(hash)) linear_free_table(hash); + linear_copy(hash, hash2); + if (RHASH_ARRAY_LEN(hash)) + rb_hash_rehash(hash); + } + else if (RHASH_TABLE_P(hash2)) { + if (RHASH_TABLE_P(hash)) st_free_table(RHASH(hash)->as.ntbl); + RHASH(hash)->as.ntbl = st_copy(RHASH(hash2)->as.ntbl); + if (RHASH(hash)->as.ntbl->num_entries) rb_hash_rehash(hash); } - else if (ntbl) { - st_clear(ntbl); + else if (RHASH_ARRAY_P(hash)) { + linear_clear(hash); + } + else if (RHASH_TABLE_P(hash)) { + st_clear(RHASH(hash)->as.ntbl); } COPY_DEFAULT(hash, hash2); @@ -1708,19 +2439,20 @@ rb_hash_initialize_copy(VALUE hash, VALUE hash2) static VALUE rb_hash_replace(VALUE hash, VALUE hash2) { - st_table *table2; - rb_hash_modify_check(hash); if (hash == hash2) return hash; hash2 = to_hash(hash2); COPY_DEFAULT(hash, hash2); - table2 = RHASH(hash2)->ntbl; - rb_hash_clear(hash); - if (table2) hash_tbl(hash)->type = table2->type; - rb_hash_foreach(hash2, replace_i, hash); + if (RHASH_ARRAY_P(hash2)) { + linear_copy(hash, hash2); + } + else if (RHASH_TABLE_P(hash2)) { + hash_tbl(hash)->type = RHASH(hash2)->as.ntbl->type; + rb_hash_foreach(hash2, replace_i, hash); + } return hash; } @@ -1941,7 +2673,7 @@ rb_hash_transform_keys_bang(VALUE hash) { RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size); rb_hash_modify_check(hash); - if (RHASH(hash)->ntbl) { + if (RHASH_TABLE_NONEMPTY(hash)) { long i; VALUE pairs = rb_hash_flatten(0, NULL, hash); rb_hash_clear(hash); @@ -2015,7 +2747,7 @@ rb_hash_transform_values_bang(VALUE hash) { RETURN_SIZED_ENUMERATOR(hash, 0, 0, hash_enum_size); rb_hash_modify_check(hash); - if (RHASH(hash)->ntbl) + if (RHASH_TABLE_NONEMPTY(hash)) rb_hash_foreach(hash, transform_values_i, hash); return hash; } @@ -2165,12 +2897,20 @@ rb_hash_keys(VALUE hash) if (size == 0) return keys; if (ST_DATA_COMPATIBLE_P(VALUE)) { - st_table *table = RHASH(hash)->ntbl; + if (RHASH_ARRAY_P(hash)) { + rb_gc_writebarrier_remember(keys); + RARRAY_PTR_USE(keys, ptr, { + size = linear_keys(hash, ptr, size); + }); + } + else if (RHASH_TABLE_P(hash)) { + st_table *table = RHASH(hash)->as.ntbl; - rb_gc_writebarrier_remember(keys); - RARRAY_PTR_USE(keys, ptr, { - size = st_keys(table, ptr, size); - }); + rb_gc_writebarrier_remember(keys); + RARRAY_PTR_USE(keys, ptr, { + size = st_keys(table, ptr, size); + }); + } rb_ary_set_len(keys, size); } else { @@ -2209,12 +2949,20 @@ rb_hash_values(VALUE hash) if (size == 0) return values; if (ST_DATA_COMPATIBLE_P(VALUE)) { - st_table *table = RHASH(hash)->ntbl; + if (RHASH_ARRAY_P(hash)) { + rb_gc_writebarrier_remember(values); + RARRAY_PTR_USE(values, ptr, { + size = linear_values(hash, ptr, size); + }); + } + else if (RHASH_TABLE_P(hash)) { + st_table *table = RHASH(hash)->as.ntbl; - rb_gc_writebarrier_remember(values); - RARRAY_PTR_USE(values, ptr, { - size = st_values(table, ptr, size); - }); + rb_gc_writebarrier_remember(values); + RARRAY_PTR_USE(values, ptr, { + size = st_values(table, ptr, size); + }); + } rb_ary_set_len(values, size); } else { @@ -2246,9 +2994,10 @@ rb_hash_values(VALUE hash) MJIT_FUNC_EXPORTED VALUE rb_hash_has_key(VALUE hash, VALUE key) { - if (!RHASH(hash)->ntbl) - return Qfalse; - if (st_lookup(RHASH(hash)->ntbl, key, 0)) { + if (RHASH_ARRAY_P(hash) && linear_lookup(hash, key, 0)) { + return Qtrue; + } + else if (RHASH_TABLE_P(hash) && st_lookup(RHASH(hash)->as.ntbl, key, 0)) { return Qtrue; } return Qfalse; @@ -2292,7 +3041,7 @@ rb_hash_has_value(VALUE hash, VALUE val) struct equal_data { VALUE result; - st_table *tbl; + VALUE hash; int eql; }; @@ -2302,10 +3051,15 @@ eql_i(VALUE key, VALUE val1, VALUE arg) struct equal_data *data = (struct equal_data *)arg; st_data_t val2; - if (!st_lookup(data->tbl, key, &val2)) { + if (RHASH_ARRAY_P(data->hash) && !linear_lookup(data->hash, key, &val2)) { data->result = Qfalse; return ST_STOP; } + else if (RHASH_TABLE_P(data->hash) && !st_lookup(RHASH(data->hash)->as.ntbl, key, &val2)) { + data->result = Qfalse; + return ST_STOP; + } + if (!(data->eql ? rb_eql(val1, (VALUE)val2) : (int)rb_equal(val1, (VALUE)val2))) { data->result = Qfalse; return ST_STOP; @@ -2350,19 +3104,21 @@ hash_equal(VALUE hash1, VALUE hash2, int eql) } if (RHASH_SIZE(hash1) != RHASH_SIZE(hash2)) return Qfalse; - if (!RHASH(hash1)->ntbl || !RHASH(hash2)->ntbl) - return Qtrue; - if (RHASH(hash1)->ntbl->type != RHASH(hash2)->ntbl->type) - return Qfalse; + if (RHASH_TABLE_NONEMPTY(hash1) && RHASH_TABLE_NONEMPTY(hash2)) { + if (RHASH_TYPE(hash1) != RHASH_TYPE(hash2)) + return Qfalse; + + data.hash = hash2; + data.eql = eql; + return rb_exec_recursive_paired(recursive_eql, hash1, hash2, (VALUE)&data); + } + #if 0 if (!(rb_equal(RHASH_IFNONE(hash1), RHASH_IFNONE(hash2)) && FL_TEST(hash1, HASH_PROC_DEFAULT) == FL_TEST(hash2, HASH_PROC_DEFAULT))) return Qfalse; #endif - - data.tbl = RHASH(hash2)->ntbl; - data.eql = eql; - return rb_exec_recursive_paired(recursive_eql, hash1, hash2, (VALUE)&data); + return Qtrue; } /* @@ -2683,7 +3439,10 @@ static VALUE reset_hash_type(VALUE arg) { struct reset_hash_type_arg *p = (struct reset_hash_type_arg *)arg; - RHASH(p->hash)->ntbl->type = p->orighash; + if (RHASH_ARRAY_P(p->hash)) + RHASH(p->hash)->as.ltbl->type = p->orighash; + else + RHASH(p->hash)->as.ntbl->type = p->orighash; return Qundef; } @@ -2717,12 +3476,19 @@ VALUE rb_hash_assoc(VALUE hash, VALUE key) { st_table *table; + li_table *ltable; const struct st_hash_type *orighash; VALUE args[2]; if (RHASH_EMPTY_P(hash)) return Qnil; - table = RHASH(hash)->ntbl; - orighash = table->type; + if (RHASH_ARRAY_P(hash)) { + ltable = RHASH(hash)->as.ltbl; + orighash = ltable->type; + } + else if (RHASH_TABLE_P(hash)) { + table = RHASH(hash)->as.ntbl; + orighash = table->type; + } if (orighash != &identhash) { VALUE value; @@ -2731,7 +3497,10 @@ rb_hash_assoc(VALUE hash, VALUE key) assochash.compare = assoc_cmp; assochash.hash = orighash->hash; - table->type = &assochash; + if (ltable) + ltable->type = &assochash; + else + table->type = &assochash; args[0] = hash; args[1] = key; ensure_arg.hash = hash; @@ -2898,11 +3667,12 @@ rb_hash_compact(VALUE hash) static VALUE rb_hash_compact_bang(VALUE hash) { + st_index_t n; rb_hash_modify_check(hash); - if (RHASH(hash)->ntbl) { - st_index_t n = RHASH(hash)->ntbl->num_entries; + n = RHASH_SIZE(hash); + if (n) { rb_hash_foreach(hash, delete_if_nil, hash); - if (n != RHASH(hash)->ntbl->num_entries) + if (n != RHASH_SIZE(hash)) return hash; } return Qnil; @@ -2927,16 +3697,30 @@ rb_hash_compact_bang(VALUE hash) static VALUE rb_hash_compare_by_id(VALUE hash) { - st_table *identtable; + VALUE tmp; if (rb_hash_compare_by_id_p(hash)) return hash; rb_hash_modify_check(hash); - identtable = rb_init_identtable_with_size(RHASH_SIZE(hash)); - rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)identtable); - if (RHASH(hash)->ntbl) - st_free_table(RHASH(hash)->ntbl); - RHASH(hash)->ntbl = identtable; - + if (RHASH_ARRAY_P(hash) || RHASH_TABLE_EMPTY(hash)) { + tmp = hash_alloc(0); + linear_init_identtable(tmp); + rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tmp); + if (RHASH_ARRAY_P(hash)) + linear_free_table(hash); + linear_copy(hash, tmp); + linear_free_table(tmp); + } + else if (RHASH_TABLE_P(hash)) { + st_table *identtable; + tmp = hash_alloc(0); + identtable = rb_init_identtable_with_size(RHASH_SIZE(hash)); + RHASH(tmp)->as.ntbl = identtable; + rb_hash_foreach(hash, rb_hash_rehash_i, (VALUE)tmp); + st_free_table(RHASH(hash)->as.ntbl); + RHASH(hash)->as.ntbl = identtable; + RHASH(tmp)->as.ntbl = NULL; + } + rb_gc_force_recycle(tmp); return hash; } @@ -2952,9 +3736,10 @@ rb_hash_compare_by_id(VALUE hash) MJIT_FUNC_EXPORTED VALUE rb_hash_compare_by_id_p(VALUE hash) { - if (!RHASH(hash)->ntbl) - return Qfalse; - if (RHASH(hash)->ntbl->type == &identhash) { + if (RHASH_ARRAY_P(hash) && RHASH(hash)->as.ltbl->type == &identhash) { + return Qtrue; + } + if (RHASH(hash)->as.ntbl && RHASH(hash)->as.ntbl->type == &identhash) { return Qtrue; } return Qfalse; @@ -2964,7 +3749,7 @@ VALUE rb_ident_hash_new(void) { VALUE hash = rb_hash_new(); - RHASH(hash)->ntbl = st_init_table(&identhash); + RHASH(hash)->as.ntbl = st_init_table(&identhash); return hash; } @@ -3221,13 +4006,67 @@ add_new_i(st_data_t *key, st_data_t *val, st_data_t arg, int existing) * returns non-zero if +key+ was contained. */ int -rb_hash_add_new_element(VALUE hash, VALUE key, VALUE val) +rb_hash_add_new_element(VALUE hash, VALUE key, VALUE val) //TODO { - st_table *tbl = rb_hash_tbl_raw(hash); + st_table *tbl; + int ret = 0; VALUE args[2]; args[0] = hash; args[1] = val; + if (RHASH_ARRAY_P(hash) || RHASH_TABLE_EMPTY(hash)) { + hash_ltbl(hash); + ret = linear_update(hash, (st_data_t)key, add_new_i, (st_data_t)args); + if (ret != -1) + return ret; + try_convert_table(hash); + } + tbl = rb_hash_tbl_raw(hash); return st_update(tbl, (st_data_t)key, add_new_i, (st_data_t)args); + +} + +static st_data_t +key_stringify(VALUE key) +{ + return (rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) ? + rb_hash_key_str(key) : key; +} + +static void +linear_bulk_insert(VALUE hash, long argc, const VALUE *argv) +{ + long i; + for (i = 0; i < argc; ) { + st_data_t k = key_stringify(argv[i++]); + st_data_t v = argv[i++]; + linear_insert(hash, k, v); + RB_OBJ_WRITTEN(hash, Qundef, k); + RB_OBJ_WRITTEN(hash, Qundef, v); + } +} + +MJIT_FUNC_EXPORTED void +rb_hash_bulk_insert(long argc, const VALUE *argv, VALUE hash) +{ + st_index_t size; + + HASH_ASSERT(argc % 2 == 0); + if (! argc) + return; + size = argc / 2; + if (RHASH_TABLE_EMPTY(hash)) { + if (size <= LINEAR_TABLE_MAX_SIZE) + hash_ltbl(hash); + else + hash_tbl(hash); + } + if (RHASH_ARRAY_P(hash) && + (RHASH_ARRAY_LEN(hash) + size <= LINEAR_TABLE_MAX_SIZE)) { + linear_bulk_insert(hash, argc, argv); + return; + } + + rb_hash_bulk_insert_into_st_table(argc, argv, hash); } static int path_tainted = -1; diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h index 93939ee7db0164..a2b352a01a399a 100644 --- a/include/ruby/encoding.h +++ b/include/ruby/encoding.h @@ -41,7 +41,10 @@ enum ruby_encoding_consts { #define ENCODING_SHIFT RUBY_ENCODING_SHIFT #define ENCODING_MASK RUBY_ENCODING_MASK +int rb_enc_capable(VALUE obj); + #define RB_ENCODING_SET_INLINED(obj,i) do {\ + if (!rb_enc_capable(obj)) rb_bug("RB_ENCODING_SET_INLINED: not capable"); \ RBASIC(obj)->flags &= ~RUBY_ENCODING_MASK;\ RBASIC(obj)->flags |= (VALUE)(i) << RUBY_ENCODING_SHIFT;\ } while (0) diff --git a/include/ruby/intern.h b/include/ruby/intern.h index 9ecd8ce8e21347..78362890e5dc1f 100644 --- a/include/ruby/intern.h +++ b/include/ruby/intern.h @@ -517,6 +517,7 @@ int rb_path_check(const char*); int rb_env_path_tainted(void); VALUE rb_env_clear(void); VALUE rb_hash_size(VALUE); +void rb_hash_free(VALUE); /* io.c */ #define rb_defout rb_stdout RUBY_EXTERN VALUE rb_fs; diff --git a/include/ruby/st.h b/include/ruby/st.h index ede3ff44567fc0..149e0ebaef3945 100644 --- a/include/ruby/st.h +++ b/include/ruby/st.h @@ -143,7 +143,7 @@ CONSTFUNC(st_index_t st_hash_end(st_index_t h)); CONSTFUNC(st_index_t st_hash_start(st_index_t h)); #define st_hash_start(h) ((st_index_t)(h)) -void rb_hash_bulk_insert(long, const VALUE *, VALUE); +void rb_hash_bulk_insert_into_st_table(long, const VALUE *, VALUE); RUBY_SYMBOL_EXPORT_END diff --git a/inits.c b/inits.c index 13fa0692363562..f85b90da070ab3 100644 --- a/inits.c +++ b/inits.c @@ -16,6 +16,7 @@ void rb_call_inits(void) { + CALL(TransientHeap); CALL(Method); CALL(RandomSeedCore); CALL(sym); diff --git a/internal.h b/internal.h index 45c499d8690542..6af6ffde11dfd1 100644 --- a/internal.h +++ b/internal.h @@ -670,9 +670,50 @@ struct RComplex { #define RCOMPLEX_SET_IMAG(cmp, i) RB_OBJ_WRITE((cmp), &((struct RComplex *)(cmp))->imag,(i)) #endif +enum ruby_rhash_flags { + RHASH_ARRAY_LEN_MAX = 8, + RHASH_ARRAY_FLAG = FL_USER3, + RHASH_ARRAY_LEN_MASK = (FL_USER4|FL_USER5|FL_USER6|FL_USER7), + RHASH_ARRAY_LEN_SHIFT = (FL_USHIFT+4), + RHASH_ARRAY_BOUND_MASK = (FL_USER8|FL_USER9|FL_USER10|FL_USER11), + RHASH_ARRAY_BOUND_SHIFT = (FL_USHIFT+8), + + RHASH_ENUM_END +}; + +#define HASH_PROC_DEFAULT FL_USER2 +#define RHASH_ARRAY_FLAG (VALUE)RHASH_ARRAY_FLAG +#define RHASH_ARRAY_P(hash) FL_TEST_RAW((hash), RHASH_ARRAY_FLAG) +#define RHASH_ARRAY_LEN_MASK (VALUE)RHASH_ARRAY_LEN_MASK +#define RHASH_ARRAY_LEN_SHIFT RHASH_ARRAY_LEN_SHIFT +#define RHASH_ARRAY_BOUND_MASK (VALUE)RHASH_ARRAY_BOUND_MASK +#define RHASH_ARRAY_BOUND_SHIFT RHASH_ARRAY_BOUND_SHIFT +#define RHASH_TRANSIENT_FLAG FL_USER14 +#define RHASH_TRANSIENT_P(hash) FL_TEST_RAW((hash), RHASH_TRANSIENT_FLAG) + +#define RHASH_ARRAY_LEN(h) \ + (long)((RBASIC(h)->flags & RHASH_ARRAY_LEN_MASK) >> RHASH_ARRAY_LEN_SHIFT) + +#define LINEAR_TABLE_MAX_SIZE 8 +#define LINEAR_TABLE_BOUND LINEAR_TABLE_MAX_SIZE + +typedef struct li_table_entry { + VALUE hash; + VALUE key; + VALUE record; +} li_table_entry; + +typedef struct LinearTable { + const struct st_hash_type *type; + li_table_entry entries[LINEAR_TABLE_MAX_SIZE]; +} li_table; + struct RHash { struct RBasic basic; - struct st_table *ntbl; /* possibly 0 */ + union { + struct st_table *ntbl; /* possibly 0 */ + struct LinearTable *ltbl; + } as; int iter_lev; const VALUE ifnone; }; @@ -685,7 +726,9 @@ struct RHash { #undef RHASH_SIZE #define RHASH_ITER_LEV(h) (RHASH(h)->iter_lev) #define RHASH_IFNONE(h) (RHASH(h)->ifnone) -#define RHASH_SIZE(h) (RHASH(h)->ntbl ? RHASH(h)->ntbl->num_entries : (st_index_t)0) +#define RHASH_SIZE_NTBL(h) (RHASH(h)->as.ntbl ? RHASH(h)->as.ntbl->num_entries : (st_index_t)0) +#define RHASH_SIZE(h) (RHASH_ARRAY_P(h) ? RHASH_ARRAY_LEN(h) : RHASH_SIZE_NTBL(h)) +#define RHASH_TABLE_P(h) (!RHASH_ARRAY_P(h) && RHASH(h)->as.ntbl) #endif /* missing/setproctitle.c */ @@ -1073,6 +1116,9 @@ VALUE rb_gvar_defined(struct rb_global_entry *); struct vtm; /* defined by timev.h */ /* array.c */ +#define RARRAY_TRANSIENT_FLAG FL_USER13 +#define ARY_TRANSIENT_P(ary) FL_TEST_RAW((ary), RARRAY_TRANSIENT_FLAG) + VALUE rb_ary_last(int, const VALUE *, VALUE); void rb_ary_set_len(VALUE, long); void rb_ary_delete_same(VALUE, VALUE); @@ -1355,8 +1401,11 @@ VALUE rb_hash_key_str(VALUE); VALUE rb_hash_keys(VALUE hash); VALUE rb_hash_values(VALUE hash); VALUE rb_hash_rehash(VALUE hash); +void rb_hash_free(VALUE hash); int rb_hash_add_new_element(VALUE hash, VALUE key, VALUE val); -#define HASH_PROC_DEFAULT FL_USER2 +int linear_foreach(VALUE hash, int (*)(ANYARGS), st_data_t); +int linear_lookup(VALUE hash, st_data_t, st_data_t *); +void rb_hash_bulk_insert(long, const VALUE *, VALUE); /* inits.c */ void rb_call_inits(void); diff --git a/st.c b/st.c index 1a47525707a239..b7506537d33d85 100644 --- a/st.c +++ b/st.c @@ -2281,24 +2281,16 @@ st_insert_generic(st_table *tab, long argc, const VALUE *argv, VALUE hash) st_rehash(tab); } -/* Mimics ruby's { foo => bar } syntax. This function is placed here - because it touches table internals and write barriers at once. */ +/* Mimics ruby's { foo => bar } syntax. This function is subpart + of rb_hash_bulk_insert. */ void -rb_hash_bulk_insert(long argc, const VALUE *argv, VALUE hash) +rb_hash_bulk_insert_into_st_table(long argc, const VALUE *argv, VALUE hash) { - st_index_t n; - st_table *tab = RHASH(hash)->ntbl; - - st_assert(argc % 2 == 0); - if (! argc) - return; - if (! tab) { - VALUE tmp = rb_hash_new_with_size(argc / 2); - RBASIC_CLEAR_CLASS(tmp); - RHASH(hash)->ntbl = tab = RHASH(tmp)->ntbl; - RHASH(tmp)->ntbl = NULL; - } - n = tab->num_entries + argc / 2; + st_index_t n, size = argc / 2; + st_table *tab = RHASH(hash)->as.ntbl; + + tab = rb_hash_tbl_raw(hash); + n = tab->num_entries + size; st_expand_table(tab, n); if (UNLIKELY(tab->num_entries)) st_insert_generic(tab, argc, argv, hash); diff --git a/test/drb/drbtest.rb b/test/drb/drbtest.rb index 2796100280b43f..6a3700bcb89ff3 100644 --- a/test/drb/drbtest.rb +++ b/test/drb/drbtest.rb @@ -276,16 +276,23 @@ def test_10_yield_undumped end def test_11_remote_no_method_error + #tp = TracePoint.new(:line){ GC.verify_transient_heap_internal_consistency } + #tp.enable do assert_raise(DRb::DRbRemoteError) do + GC.verify_transient_heap_internal_consistency @there.remote_no_method_error end begin + GC.verify_transient_heap_internal_consistency @there.remote_no_method_error rescue + GC.verify_transient_heap_internal_consistency error = $! assert_match(/^undefined method .*\(NoMethodError\)/, error.message) assert_equal('NoMethodError', error.reason) + GC.verify_transient_heap_internal_consistency end + #end end end diff --git a/test/lib/leakchecker.rb b/test/lib/leakchecker.rb index af9200bf77a284..75ff5da1b0279b 100644 --- a/test/lib/leakchecker.rb +++ b/test/lib/leakchecker.rb @@ -20,6 +20,7 @@ def check(test_name) check_verbose(test_name), ] GC.start if leaks.any? + # GC.verify_internal_consistency end def check_safe test_name diff --git a/test/ruby/test_time.rb b/test/ruby/test_time.rb index 50ac569c4eee83..0aac07b05d6e4a 100644 --- a/test/ruby/test_time.rb +++ b/test/ruby/test_time.rb @@ -1138,6 +1138,7 @@ def test_memsize case size when 20 then expect = 50 when 40 then expect = 86 + when 48 then expect = 94 else flunk "Unsupported RVALUE_SIZE=#{size}, update test_memsize" end diff --git a/thread.c b/thread.c index 3943cf0fc631e2..656107261c0f18 100644 --- a/thread.c +++ b/thread.c @@ -3506,10 +3506,10 @@ rb_thread_variable_p(VALUE thread, VALUE key) locals = rb_ivar_get(thread, id_locals); - if (!RHASH(locals)->ntbl) - return Qfalse; - - if (st_lookup(RHASH(locals)->ntbl, ID2SYM(id), 0)) { + if (RHASH_ARRAY_P(locals) && linear_lookup(locals, ID2SYM(id), 0)) { + return Qtrue; + } + else if (RHASH_TABLE_P(locals) && st_lookup(RHASH(locals)->as.ntbl, ID2SYM(id), 0)) { return Qtrue; } diff --git a/transient_heap.c b/transient_heap.c new file mode 100644 index 00000000000000..09a5f76a88ee5b --- /dev/null +++ b/transient_heap.c @@ -0,0 +1,841 @@ +#include "ruby/ruby.h" +#include "ruby/debug.h" +#include "vm_debug.h" +#include "gc.h" +#include "internal.h" +#include +#include +#include "ruby_assert.h" +#include "transient_heap.h" +#include + +/* + * 1: enable assertions + * 2: enable verify + */ +#ifndef TRANSIENT_HEAP_CHECK_MODE +#define TRANSIENT_HEAP_CHECK_MODE 0 +#endif +#define TH_ASSERT(expr) RUBY_ASSERT_MESG_WHEN(TRANSIENT_HEAP_CHECK_MODE > 0, expr, #expr) + +/* + * 1: show events + * 2: show dump at events + * 3: show all operations + */ +#define TRANSIENT_HEAP_DEBUG 0 + +/* For Debug: Provide blocks infinitely. + * This mode generates blocks unlimitedly + * and prohibit access free'ed blocks to check invalid access. + */ +#define TRANSIENT_HEAP_DEBUG_INFINITE_BLOCK 0 + +/* For Debug: Prohibit promoting to malloc space. + */ +#define TRANSIENT_HEAP_DEBUG_DONT_PROMOTE 0 + +/* size configuration */ +#define TRANSIENT_HEAP_PROMOTED_DEFAULT_SIZE 1024 + + /* K M */ +#define TRANSIENT_HEAP_BLOCK_SIZE (1024 * 32 ) /* int16_t */ +#define TRANSIENT_HEAP_TOTAL_SIZE (1024 * 1024 * 16) +//#define TRANSIENT_HEAP_TOTAL_SIZE (TRANSIENT_HEAP_BLOCK_SIZE * 2) // (1024 * 1024 * 16) +#define TRANSIENT_HEAP_ALLOC_MAX (1024 * 2 ) +#define TRANSIENT_HEAP_BLOCK_NUM (TRANSIENT_HEAP_TOTAL_SIZE / TRANSIENT_HEAP_BLOCK_SIZE) + +#define TRANSIENT_HEAP_ALLOC_MAGIC 0xfeab +#define TRANSIENT_HEAP_ALLOC_ALIGN RUBY_ALIGNOF(void *) + +#define TRANSIENT_HEAP_ALLOC_MARKING_LAST -1 +#define TRANSIENT_HEAP_ALLOC_MARKING_FREE -2 + + +enum transient_heap_status { + transient_heap_none, + transient_heap_marking, + transient_heap_escaping +}; + +struct transient_heap_block { + struct transient_heap_block_header { + int16_t size; /* sizeof(block) = TRANSIENT_HEAP_BLOCK_SIZE - sizeof(struct transient_heap_block_header) */ + int16_t index; + int16_t last_marked_index; + int16_t objects; + struct transient_heap_block *next_block; + } info; + char buff[TRANSIENT_HEAP_BLOCK_SIZE - sizeof(struct transient_heap_block_header)]; +}; + +struct transient_heap { + struct transient_heap_block *using_blocks; + struct transient_heap_block *marked_blocks; + struct transient_heap_block *free_blocks; + int total_objects; + int total_marked_objects; + int total_blocks; + enum transient_heap_status status; + + VALUE *promoted_objects; + int promoted_objects_size; + int promoted_objects_index; + + struct transient_heap_block *arena; + int arena_index; /* increment only */ +}; + +struct transient_alloc_header { + uint16_t magic; + uint16_t size; + int16_t next_marked_index; + int16_t dummy; + VALUE obj; +}; + +static struct transient_heap global_transient_heap; + +static void transient_heap_promote_add(struct transient_heap* theap, VALUE obj); +static void *transient_heap_ptr(VALUE obj, int error); +static int transient_header_managed_ptr_p(struct transient_heap* theap, const void *ptr); + + +#define ROUND_UP(v, a) (((size_t)(v) + (a) - 1) & ~((a) - 1)) + +static void +transient_heap_block_dump(struct transient_heap* theap, struct transient_heap_block *block) +{ + int i=0, n=0; + struct transient_alloc_header *header = NULL; + + while (iinfo.index) { + header = (void *)&block->buff[i]; + fprintf(stderr, "%4d %8d %p size:%4d next:%4d %s\n", n, i, header, header->size, header->next_marked_index, rb_obj_info(header->obj)); + i += header->size; + n++; + } +} + +static void +transient_heap_blocks_dump(struct transient_heap* theap, struct transient_heap_block *block, const char *type_str) +{ + while (block) { + fprintf(stderr, "- transient_heap_dump: %s:%p index:%d objects:%d last_marked_index:%d next:%p\n", + type_str, block, block->info.index, block->info.objects, block->info.last_marked_index, block->info.next_block); + + transient_heap_block_dump(theap, block); + block = block->info.next_block; + } +} + +static void +transient_heap_dump(struct transient_heap* theap) +{ + fprintf(stderr, "transient_heap_dump objects:%d marked_objects:%d blocks:%d\n", theap->total_objects, theap->total_marked_objects, theap->total_blocks); + transient_heap_blocks_dump(theap, theap->using_blocks, "using_blocks"); + transient_heap_blocks_dump(theap, theap->marked_blocks, "marked_blocks"); + transient_heap_blocks_dump(theap, theap->free_blocks, "free_blocks"); +} + +void +rb_transient_heap_dump(void) +{ + transient_heap_dump(&global_transient_heap); +} + +#if TRANSIENT_HEAP_CHECK_MODE >= 2 +static void +transient_heap_ptr_check(struct transient_heap *theap, VALUE obj) +{ + if (obj != Qundef) { + void *ptr = transient_heap_ptr(obj, FALSE); + TH_ASSERT(ptr == NULL || transient_header_managed_ptr_p(theap, ptr)); + } +} + +static int +transient_heap_block_verify(struct transient_heap *theap, struct transient_heap_block *block) +{ + int i=0, n=0; + struct transient_alloc_header *header; + + while (iinfo.index) { + header = (void *)&block->buff[i]; + TH_ASSERT(header->magic == TRANSIENT_HEAP_ALLOC_MAGIC); + transient_heap_ptr_check(theap, header->obj); + n ++; + i += header->size; + } + TH_ASSERT(block->info.objects == n); + + return n; +} +#endif + +static void +transient_heap_verify(struct transient_heap *theap) +{ +#if TRANSIENT_HEAP_CHECK_MODE >= 2 + struct transient_heap_block *block; + int n=0, block_num=0; + + // using_blocks + block = theap->using_blocks; + while (block) { + n += transient_heap_block_verify(theap, block); + block_num++; + block = block->info.next_block; + } + + // marked_blocks + block = theap->marked_blocks; + while (block) { + n += transient_heap_block_verify(theap, block); + block_num++; + TH_ASSERT(block->info.index > 0); + block = block->info.next_block; + } + + TH_ASSERT(n == theap->total_objects); + TH_ASSERT(n >= theap->total_marked_objects); + TH_ASSERT(block_num == theap->total_blocks); +#endif +} + +void +rb_transient_heap_verify(void) +{ + transient_heap_verify(&global_transient_heap); +} + +static struct transient_heap* +transient_heap_get(void) +{ + struct transient_heap* theap = &global_transient_heap; + transient_heap_verify(theap); + return theap; +} + +static void +reset_block(struct transient_heap_block *block) +{ + block->info.size = TRANSIENT_HEAP_BLOCK_SIZE - sizeof(struct transient_heap_block_header); + block->info.index = 0; + block->info.objects = 0; + block->info.last_marked_index = TRANSIENT_HEAP_ALLOC_MARKING_LAST; + block->info.next_block = NULL; +} + +static void +connect_to_free_blocks(struct transient_heap *theap, struct transient_heap_block *block) +{ + block->info.next_block = theap->free_blocks; + theap->free_blocks = block; +} + +static void +connect_to_using_blocks(struct transient_heap *theap, struct transient_heap_block *block) +{ + block->info.next_block = theap->using_blocks; + theap->using_blocks = block; +} + +#if 0 +static void +connect_to_marked_blocks(struct transient_heap *theap, struct transient_heap_block *block) +{ + block->info.next_block = theap->marked_blocks; + theap->marked_blocks = block; +} +#endif + +static void +append_to_marked_blocks(struct transient_heap *theap, struct transient_heap_block *append_blocks) +{ + if (theap->marked_blocks) { + struct transient_heap_block *block = theap->marked_blocks, *last_block = NULL; + while (block) { + last_block = block; + block = block->info.next_block; + } + + TH_ASSERT(last_block->info.next_block == NULL); + last_block->info.next_block = append_blocks; + } + else { + theap->marked_blocks = append_blocks; + } +} + +static struct transient_heap_block * +transient_heap_block_alloc(struct transient_heap* theap) +{ + struct transient_heap_block *block; +#if TRANSIENT_HEAP_DEBUG_INFINITE_BLOCK + block = mmap(NULL, TRANSIENT_HEAP_BLOCK_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + -1, 0); + if (block == MAP_FAILED) rb_bug("transient_heap_block_alloc: err:%d\n", errno); +#else + if (theap->arena == NULL) { + int err = posix_memalign((void **)&theap->arena, TRANSIENT_HEAP_BLOCK_SIZE, TRANSIENT_HEAP_TOTAL_SIZE); + if (err != 0) rb_bug("transient_heap_block_alloc: posix_memalign error: %d", err); + } + TH_ASSERT(theap->arena_index < TRANSIENT_HEAP_BLOCK_NUM); + block = &theap->arena[theap->arena_index++]; + TH_ASSERT(((intptr_t)block & (TRANSIENT_HEAP_BLOCK_SIZE - 1)) == 0); +#endif + reset_block(block); + + TH_ASSERT(((intptr_t)block->buff & (TRANSIENT_HEAP_ALLOC_ALIGN-1)) == 0); + // fprintf(stderr, "transient_heap_block_alloc: %4d %p\n", theap->total_blocks, block); + return block; +} + + +static struct transient_heap_block * +transient_heap_allocatable_block(struct transient_heap* theap) +{ + struct transient_heap_block *block; + +#if TRANSIENT_HEAP_DEBUG_INFINITE_BLOCK + block = transient_heap_block_alloc(theap); + theap->total_blocks++; +#else + // get one block from free_blocks + block = theap->free_blocks; + if (block) { + theap->free_blocks = block->info.next_block; + block->info.next_block = NULL; + theap->total_blocks++; + } +#endif + + return block; +} + +static struct transient_alloc_header * +transient_heap_allocatable_header(struct transient_heap* theap, size_t size) +{ + struct transient_heap_block *block = theap->using_blocks; + + while (block) { + TH_ASSERT(block->info.size >= block->info.index); + + if (block->info.size - block->info.index >= (int32_t)size) { + struct transient_alloc_header *header = (void *)&block->buff[block->info.index]; + block->info.index += size; + block->info.objects++; + return header; + } + else { + block = transient_heap_allocatable_block(theap); + if (block) connect_to_using_blocks(theap, block); + } + } + + return NULL; +} + +void * +rb_transient_heap_alloc(VALUE obj, size_t req_size) +{ + struct transient_heap* theap = transient_heap_get(); + size_t size = ROUND_UP(req_size + sizeof(struct transient_alloc_header), TRANSIENT_HEAP_ALLOC_ALIGN); + + TH_ASSERT(RB_TYPE_P(obj, T_ARRAY) || RB_TYPE_P(obj, T_HASH)); /* supported types */ + + if (size > TRANSIENT_HEAP_ALLOC_MAX) { + if (TRANSIENT_HEAP_DEBUG >= 3) fprintf(stderr, "rb_transient_heap_alloc: [too big: %ld] %s\n", (long)size, rb_obj_info(obj)); + return NULL; + } +#if TRANSIENT_HEAP_DEBUG_DONT_PROMOTE == 0 + else if (RB_OBJ_PROMOTED_RAW(obj)) { + if (TRANSIENT_HEAP_DEBUG >= 3) fprintf(stderr, "rb_transient_heap_alloc: [promoted object] %s\n", rb_obj_info(obj)); + return NULL; + } +#else + else if (RBASIC_CLASS(obj) == 0) { + if (TRANSIENT_HEAP_DEBUG >= 3) fprintf(stderr, "rb_transient_heap_alloc: [hidden object] %s\n", rb_obj_info(obj)); + return NULL; + } +#endif + else { + struct transient_alloc_header *header = transient_heap_allocatable_header(theap, size); + if (header) { + void *ptr; + + header->size = size; + header->magic = TRANSIENT_HEAP_ALLOC_MAGIC; + header->next_marked_index = TRANSIENT_HEAP_ALLOC_MARKING_FREE; + header->obj = obj; // TODO: for verify + + // stat info + theap->total_objects++; + ptr = header + 1; + +#if TRANSIENT_HEAP_DEBUG_DONT_PROMOTE + if (RB_OBJ_PROMOTED_RAW(obj)) { + transient_heap_promote_add(theap, obj); + } +#endif + if (TRANSIENT_HEAP_DEBUG >= 3) fprintf(stderr, "rb_transient_heap_alloc: header:%p ptr:%p size:%d obj:%s\n", header, ptr, (int)size, rb_obj_info(obj)); + return ptr; + } + else { + if (TRANSIENT_HEAP_DEBUG >= 3) fprintf(stderr, "rb_transient_heap_alloc: [no enough space: %ld] %s\n", (long)size, rb_obj_info(obj)); + return NULL; + } + } +} + +void +Init_TransientHeap(void) +{ + int i, block_num; + struct transient_heap* theap = transient_heap_get(); + +#if TRANSIENT_HEAP_DEBUG_INFINITE_BLOCK + block_num = 0; +#else + TH_ASSERT(TRANSIENT_HEAP_BLOCK_SIZE * TRANSIENT_HEAP_BLOCK_NUM == TRANSIENT_HEAP_TOTAL_SIZE); + block_num = TRANSIENT_HEAP_BLOCK_NUM; +#endif + for (i=0; iusing_blocks = transient_heap_allocatable_block(theap); + + theap->promoted_objects_size = TRANSIENT_HEAP_PROMOTED_DEFAULT_SIZE; + theap->promoted_objects_index = 0; + /* should not use ALLOC_N to be free from GC */ + theap->promoted_objects = malloc(sizeof(VALUE) * theap->promoted_objects_size); + if (theap->promoted_objects == NULL) rb_bug("Init_TransientHeap: malloc failed."); +} + +static struct transient_heap_block * +blocks_alloc_header_to_block(struct transient_heap *theap, struct transient_heap_block *blocks, struct transient_alloc_header *header) +{ + struct transient_heap_block *block = blocks; + + while (block) { + if (block->buff <= (char *)header && (char *)header < block->buff + block->info.size) { + return block; + } + block = block->info.next_block; + } + + return NULL; +} + +static struct transient_heap_block * +alloc_header_to_block_verbose(struct transient_heap *theap, struct transient_alloc_header *header) +{ + struct transient_heap_block *block; + + if ((block = blocks_alloc_header_to_block(theap, theap->marked_blocks, header)) != NULL) { + // if (TRANSIENT_HEAP_DEBUG >= 3) fprintf(stderr, "alloc_header_to_block: found in marked_blocks\n"); + return block; + } + else if ((block = blocks_alloc_header_to_block(theap, theap->using_blocks, header)) != NULL) { + // if (TRANSIENT_HEAP_DEBUG >= 3) fprintf(stderr, "alloc_header_to_block: found in using_blocks\n"); + return block; + } + else { + return NULL; + } + return block; +} + +static struct transient_alloc_header * +ptr_to_alloc_header(const void *ptr) +{ + struct transient_alloc_header *header = (void *)ptr; + header -= 1; + return header; +} + +static int +transient_header_managed_ptr_p(struct transient_heap* theap, const void *ptr) +{ + if (alloc_header_to_block_verbose(theap, ptr_to_alloc_header(ptr))) { + return TRUE; + } + else { + return FALSE; + } +} + + +int +rb_transient_heap_managed_ptr_p(const void *ptr) +{ + return transient_header_managed_ptr_p(transient_heap_get(), ptr); +} + +static struct transient_heap_block * +alloc_header_to_block(struct transient_heap *theap, struct transient_alloc_header *header) +{ + struct transient_heap_block *block; +#if TRANSIENT_HEAP_DEBUG_INFINITE_BLOCK + block = alloc_header_to_block_verbose(theap, header); + if (block == NULL) { + transient_heap_dump(theap); + rb_bug("alloc_header_to_block: not found in mark_blocks (%p)\n", header); + } +#else + block = (void *)((intptr_t)header & ~(TRANSIENT_HEAP_BLOCK_SIZE-1)); + TH_ASSERT(block == alloc_header_to_block_verbose(theap, header)); +#endif + return block; +} + +void +rb_transient_heap_mark(VALUE obj, const void *ptr) +{ + struct transient_alloc_header *header = ptr_to_alloc_header(ptr); + if (header->magic != TRANSIENT_HEAP_ALLOC_MAGIC) rb_bug("rb_transient_heap_mark: wrong header, %s (%p)", rb_obj_info(obj), ptr); + if (TRANSIENT_HEAP_DEBUG >= 3) fprintf(stderr, "rb_transient_heap_mark: %s (%p)\n", rb_obj_info(obj), ptr); + +#if TRANSIENT_HEAP_CHECK_MODE > 0 + { + struct transient_heap* theap = transient_heap_get(); + TH_ASSERT(theap->status == transient_heap_marking); + TH_ASSERT(transient_header_managed_ptr_p(theap, ptr)); + + if (header->magic != TRANSIENT_HEAP_ALLOC_MAGIC) { + transient_heap_dump(theap); + rb_bug("rb_transient_heap_mark: magic is broken"); + } + else if (header->obj != obj) { + // transient_heap_dump(theap); + rb_bug("rb_transient_heap_mark: unmatch (%s is stored, but %s is given)\n", + rb_obj_info(header->obj), rb_obj_info(obj)); + } + } +#endif + + if (header->next_marked_index != TRANSIENT_HEAP_ALLOC_MARKING_FREE) { + // already marked + return; + } + else { + struct transient_heap* theap = transient_heap_get(); + struct transient_heap_block *block = alloc_header_to_block(theap, header); + header->next_marked_index = block->info.last_marked_index; + block->info.last_marked_index = (int)((char *)header - block->buff); + theap->total_marked_objects++; + + transient_heap_verify(theap); + } +} + +static void * +transient_heap_ptr(VALUE obj, int error) +{ + void *ptr; + + switch (BUILTIN_TYPE(obj)) { + case T_ARRAY: + if (ARY_TRANSIENT_P(obj)) { + ptr = (VALUE *)RARRAY_CONST_PTR(obj); + } + else { + ptr = NULL; + } + break; + case T_HASH: + if (RHASH_TRANSIENT_P(obj)) { + ptr = (VALUE *)(RHASH(obj)->as.ltbl); + } + else { + ptr = NULL; + } + break; + default: + if (error) { + rb_bug("transient_heap_ptr: unknown obj %s\n", rb_obj_info(obj)); + } + else { + ptr = NULL; + } + } + + return ptr; +} + +static void +transient_heap_promote_add(struct transient_heap* theap, VALUE obj) +{ + if (TRANSIENT_HEAP_DEBUG >= 3) fprintf(stderr, "rb_transient_heap_promote: %s\n", rb_obj_info(obj)); + + if (TRANSIENT_HEAP_DEBUG_DONT_PROMOTE) { + /* duplicate check */ + int i; + for (i=0; ipromoted_objects_index; i++) { + if (theap->promoted_objects[i] == obj) return; + } + } + + if (theap->promoted_objects_size <= theap->promoted_objects_index) { + theap->promoted_objects_size *= 2; + if (TRANSIENT_HEAP_DEBUG >= 1) fprintf(stderr, "rb_transient_heap_promote: expand table to %d\n", theap->promoted_objects_size); + theap->promoted_objects = realloc(theap->promoted_objects, theap->promoted_objects_size * sizeof(VALUE)); + if (theap->promoted_objects == NULL) rb_bug("rb_transient_heap_promote: realloc failed"); + } + theap->promoted_objects[theap->promoted_objects_index++] = obj; +} + +void +rb_transient_heap_promote(VALUE obj) +{ + + if (transient_heap_ptr(obj, FALSE)) { + struct transient_heap* theap = transient_heap_get(); + transient_heap_promote_add(theap, obj); + } + else { + /* ignore */ + } +} + +static struct transient_alloc_header * +alloc_header(struct transient_heap_block* block, int index) +{ + return (void *)&block->buff[index]; +} + +void rb_ary_transient_heap_promote(VALUE ary, int promote); +void rb_hash_transient_heap_promote(VALUE hash, int promote); + +static void +transient_heap_reset(void) +{ + struct transient_heap* theap = transient_heap_get(); + struct transient_heap_block* block; + + if (TRANSIENT_HEAP_DEBUG >= 1) fprintf(stderr, "!! transient_heap_reset\n"); + + block = theap->marked_blocks; + while (block) { + struct transient_heap_block *next_block = block->info.next_block; + theap->total_objects -= block->info.objects; +#if TRANSIENT_HEAP_DEBUG_INFINITE_BLOCK + // debug mode + if (madvise(block, TRANSIENT_HEAP_BLOCK_SIZE, MADV_DONTNEED) != 0) { + rb_bug("madvise err:%d", errno); + } + if (mprotect(block, TRANSIENT_HEAP_BLOCK_SIZE, PROT_NONE) != 0) { + rb_bug("mprotect err:%d", errno); + } +#else + reset_block(block); + connect_to_free_blocks(theap, block); +#endif + theap->total_blocks--; + block = next_block; + } + + if (TRANSIENT_HEAP_DEBUG >= 1) fprintf(stderr, "!! transient_heap_reset block_num:%d\n", theap->total_blocks); + + theap->marked_blocks = NULL; + theap->total_marked_objects = 0; +} + +static void +transient_heap_block_escape(struct transient_heap* theap, struct transient_heap_block* block) +{ + int marked_index = block->info.last_marked_index; + block->info.last_marked_index = TRANSIENT_HEAP_ALLOC_MARKING_LAST; + + while (marked_index >= 0) { + struct transient_alloc_header *header = alloc_header(block, marked_index); + VALUE obj = header->obj; + TH_ASSERT(header->magic == TRANSIENT_HEAP_ALLOC_MAGIC); + if (header->magic != TRANSIENT_HEAP_ALLOC_MAGIC) rb_bug("rb_transient_heap_mark: wrong header %s\n", rb_obj_info(obj)); + + if (TRANSIENT_HEAP_DEBUG >= 3) fprintf(stderr, " * transient_heap_block_escape %p %s\n", header, rb_obj_info(obj)); + + if (obj != Qnil) { + switch (BUILTIN_TYPE(obj)) { + case T_ARRAY: +#if TRANSIENT_HEAP_DEBUG_DONT_PROMOTE + rb_ary_transient_heap_promote(obj, FALSE); +#else + rb_ary_transient_heap_promote(obj, TRUE); +#endif + break; + case T_HASH: + rb_hash_transient_heap_promote(obj, TRUE); + break; + default: + rb_bug("unsupporeted: %s\n", rb_obj_info(obj)); + } + header->obj = Qundef; // to verify + } + marked_index = header->next_marked_index; + } +} + +static void +transient_heap_update_status(struct transient_heap* theap, enum transient_heap_status status) +{ + TH_ASSERT(theap->status != status); + theap->status = status; +} + +static void +transient_heap_escape(void *dmy) +{ + struct transient_heap* theap = transient_heap_get(); + + if (theap->status == transient_heap_marking) { + if (TRANSIENT_HEAP_DEBUG >= 1) fprintf(stderr, "!! transient_heap_escape: skip while transient_heap_marking\n"); + } + else { + VALUE gc_disabled = rb_gc_disable(); + struct transient_heap_block* block; + + if (TRANSIENT_HEAP_DEBUG >= 1) { + int i; + fprintf(stderr, "!! transient_heap_escape start total_blocks:%d\n", theap->total_blocks); + if (TRANSIENT_HEAP_DEBUG >= 4) { + for (i=0; ipromoted_objects_index; i++) fprintf(stderr, "%4d %s\n", i, rb_obj_info(theap->promoted_objects[i])); + } + } + if (TRANSIENT_HEAP_DEBUG >= 2) transient_heap_dump(theap); + + TH_ASSERT(theap->status == transient_heap_none); + transient_heap_update_status(theap, transient_heap_escaping); + + // escape marked blocks + block = theap->marked_blocks; + while (block) { + transient_heap_block_escape(theap, block); + block = block->info.next_block; + } + + // escape using blocks + // only affect incremental marking + block = theap->using_blocks; + while (block) { + transient_heap_block_escape(theap, block); + block = block->info.next_block; + } + + // all objects in marked_objects are escaped. + transient_heap_reset(); + + if (TRANSIENT_HEAP_DEBUG > 0) { + fprintf(stderr, "!! transient_heap_escape end total_blocks:%d\n", theap->total_blocks); + // transient_heap_dump(theap); + } + + transient_heap_verify(theap); + transient_heap_update_status(theap, transient_heap_none); + if (gc_disabled != Qtrue) rb_gc_enable(); + } +} + +static void +clear_marked_index(struct transient_heap_block* block) +{ + int marked_index = block->info.last_marked_index; + + while (marked_index != TRANSIENT_HEAP_ALLOC_MARKING_LAST) { + struct transient_alloc_header *header = alloc_header(block, marked_index); + TH_ASSERT(marked_index != TRANSIENT_HEAP_ALLOC_MARKING_FREE); + // fprintf(stderr, "clear_marked_index - block:%p mark_index:%d\n", block, marked_index); + + marked_index = header->next_marked_index; + header->next_marked_index = TRANSIENT_HEAP_ALLOC_MARKING_FREE; + } + + block->info.last_marked_index = TRANSIENT_HEAP_ALLOC_MARKING_LAST; +} + +void +rb_transient_heap_start_marking(int full_marking) +{ + struct transient_heap* theap = transient_heap_get(); + struct transient_heap_block* block; + + if (TRANSIENT_HEAP_DEBUG >= 1) fprintf(stderr, "!! rb_transient_heap_start_marking objects:%d blocks:%d promtoed:%d full_marking:%d\n", + theap->total_objects, theap->total_blocks, theap->promoted_objects_index, full_marking); + if (TRANSIENT_HEAP_DEBUG >= 2) transient_heap_dump(theap); + + // clear marking info + block = theap->marked_blocks; + while (block) { + clear_marked_index(block); + block = block->info.next_block; + } + + block = theap->using_blocks; + while (block) { + clear_marked_index(block); + block = block->info.next_block; + } + + if (theap->using_blocks) { + if (theap->using_blocks->info.objects > 0) { + append_to_marked_blocks(theap, theap->using_blocks); + theap->using_blocks = NULL; + } + else { + append_to_marked_blocks(theap, theap->using_blocks->info.next_block); + theap->using_blocks->info.next_block = NULL; + } + } + + if (theap->using_blocks == NULL) { + theap->using_blocks = transient_heap_allocatable_block(theap); + } + + TH_ASSERT(theap->status == transient_heap_none); + transient_heap_update_status(theap, transient_heap_marking); + theap->total_marked_objects = 0; + + if (full_marking) { + theap->promoted_objects_index = 0; + } + else { /* mark promoted objects */ + int i; + for (i=0; ipromoted_objects_index; i++) { + VALUE obj = theap->promoted_objects[i]; + void *ptr = transient_heap_ptr(obj, TRUE); + if (ptr) { + rb_transient_heap_mark(obj, ptr); + } + } + } + + transient_heap_verify(theap); +} + +void +rb_transient_heap_finish_marking(void) +{ + struct transient_heap* theap = transient_heap_get(); + + if (TRANSIENT_HEAP_DEBUG >= 1) fprintf(stderr, "!! rb_transient_heap_finish_marking objects:%d, marked:%d\n", + theap->total_objects, + theap->total_marked_objects); + if (TRANSIENT_HEAP_DEBUG >= 2) transient_heap_dump(theap); + + TH_ASSERT(theap->total_objects >= theap->total_marked_objects); + + TH_ASSERT(theap->status == transient_heap_marking); + transient_heap_update_status(theap, transient_heap_none); + + if (theap->total_marked_objects > 0) { + if (TRANSIENT_HEAP_DEBUG >= 1) fprintf(stderr, "-> rb_transient_heap_finish_marking register escape func.\n"); + rb_postponed_job_register_one(0, transient_heap_escape, NULL); + } + else { + transient_heap_reset(); + } + + transient_heap_verify(theap); +} diff --git a/transient_heap.h b/transient_heap.h new file mode 100644 index 00000000000000..22860413531bb0 --- /dev/null +++ b/transient_heap.h @@ -0,0 +1,13 @@ +#ifndef RUBY_TRANSIENT_HEAP_H +#define RUBY_TRANSIENT_HEAP_H + +void rb_transient_heap_promote(VALUE obj); +void rb_transient_heap_dump(void); +void *rb_transient_heap_alloc(VALUE obj, size_t req_size); +void rb_transient_heap_mark(VALUE obj, const void *ptr); +void rb_transient_heap_start_marking(int full_marking); +void rb_transient_heap_finish_marking(void); +int rb_transient_heap_managed_ptr_p(const void *ptr); +void rb_transient_heap_verify(void); + +#endif diff --git a/vm_eval.c b/vm_eval.c index df93d000dd3b0a..64f03ea3b2c065 100644 --- a/vm_eval.c +++ b/vm_eval.c @@ -2012,7 +2012,7 @@ static void local_var_list_init(struct local_var_list *vars) { vars->tbl = rb_hash_new(); - RHASH(vars->tbl)->ntbl = st_init_numtable(); /* compare_by_identity */ + RHASH(vars->tbl)->as.ntbl = st_init_numtable(); /* compare_by_identity */ RBASIC_CLEAR_CLASS(vars->tbl); }