Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 26 additions & 22 deletions src/object.c
Original file line number Diff line number Diff line change
Expand Up @@ -1202,48 +1202,51 @@ size_t streamRadixTreeMemoryUsage(rax *rax) {
return size;
}

/* Returns the size in bytes consumed by the key's value in RAM.
/* Returns the size in bytes consumed by the object header, key and value in RAM.
* Note that the returned value is just an approximation, especially in the
* case of aggregated data types where only "sample_size" elements
* are checked and averaged to estimate the total size. */
#define OBJ_COMPUTE_SIZE_DEF_SAMPLES 5 /* Default sample size. */
size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) {
size_t kvobjComputeSize(robj *key, kvobj *o, size_t sample_size, int dbid) {
dict *d;
dictIterator di;
struct dictEntry *de;
size_t asize = 0, elesize = 0, elecount = 0, samples = 0;
size_t elesize = 0, elecount = 0, samples = 0;

/* All kv-objects has at least kvobj header and embedded key */
size_t asize = malloc_usable_size((void *)o);

if (o->type == OBJ_STRING) {
if(o->encoding == OBJ_ENCODING_INT) {
asize = sizeof(*o);
/* Value already counted (reuse the "ptr" in header to store int) */
} else if(o->encoding == OBJ_ENCODING_RAW) {
asize = sdsZmallocSize(o->ptr)+sizeof(*o);
asize += sdsZmallocSize(o->ptr);
} else if(o->encoding == OBJ_ENCODING_EMBSTR) {
asize = zmalloc_size((void *)o);
/* Value already counted (Value embedded in the object as well) */
} else {
serverPanic("Unknown string encoding");
}
} else if (o->type == OBJ_LIST) {
if (o->encoding == OBJ_ENCODING_QUICKLIST) {
quicklist *ql = o->ptr;
quicklistNode *node = ql->head;
asize = sizeof(*o)+sizeof(quicklist);
asize += sizeof(quicklist);
do {
elesize += sizeof(quicklistNode)+zmalloc_size(node->entry);
elecount += node->count;
samples++;
} while ((node = node->next) && samples < sample_size);
asize += (double)elesize/elecount*ql->count;
asize += (double)elesize/samples*ql->count;
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Bug: Quicklist averaging formula has unit mismatch after fix

The quicklist memory estimation formula was changed from (double)elesize/elecount*ql->count to (double)elesize/samples*ql->count. This introduces a dimensional mismatch:

  • elesize = total memory of sampled nodes
  • samples = number of sampled nodes
  • ql->count = total number of elements across all nodes

So the new formula computes (average memory per node) × (total elements), which mixes units and will significantly overestimate memory (by a factor of ~elements-per-node).

The old formula was dimensionally consistent: (memory per element avg) × (total elements). If the intent was to change to a per-node estimation, the multiplier should also change from ql->count (total elements) to ql->len (total nodes):

asize += (double)elesize/samples*ql->len;

Both approaches (elesize/elecount*ql->count and elesize/samples*ql->len) are valid estimation strategies but the current code elesize/samples*ql->count is incorrect.

Was this helpful? React with 👍 / 👎

Suggested change
asize += (double)elesize/samples*ql->count;
asize += (double)elesize/samples*ql->len;
  • Apply suggested fix

} else if (o->encoding == OBJ_ENCODING_LISTPACK) {
asize = sizeof(*o)+zmalloc_size(o->ptr);
asize += zmalloc_size(o->ptr);
} else {
serverPanic("Unknown list encoding");
}
} else if (o->type == OBJ_SET) {
if (o->encoding == OBJ_ENCODING_HT) {
d = o->ptr;
dictInitIterator(&di, d);
asize = sizeof(*o)+sizeof(dict)+(sizeof(struct dictEntry*)*dictBuckets(d));
asize += sizeof(dict) + (sizeof(struct dictEntry*) * dictBuckets(d));
while((de = dictNext(&di)) != NULL && samples < sample_size) {
sds ele = dictGetKey(de);
elesize += dictEntryMemUsage(0) + sdsZmallocSize(ele);
Expand All @@ -1252,20 +1255,20 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) {
dictResetIterator(&di);
if (samples) asize += (double)elesize/samples*dictSize(d);
} else if (o->encoding == OBJ_ENCODING_INTSET) {
asize = sizeof(*o)+zmalloc_size(o->ptr);
asize += zmalloc_size(o->ptr);
} else if (o->encoding == OBJ_ENCODING_LISTPACK) {
asize = sizeof(*o)+zmalloc_size(o->ptr);
asize += zmalloc_size(o->ptr);
} else {
serverPanic("Unknown set encoding");
}
} else if (o->type == OBJ_ZSET) {
if (o->encoding == OBJ_ENCODING_LISTPACK) {
asize = sizeof(*o)+zmalloc_size(o->ptr);
asize += zmalloc_size(o->ptr);
} else if (o->encoding == OBJ_ENCODING_SKIPLIST) {
d = ((zset*)o->ptr)->dict;
zskiplist *zsl = ((zset*)o->ptr)->zsl;
zskiplistNode *znode = zsl->header->level[0].forward;
asize = sizeof(*o)+sizeof(zset)+sizeof(zskiplist)+sizeof(dict)+
asize += sizeof(zset) + sizeof(zskiplist) + sizeof(dict) +
(sizeof(struct dictEntry*)*dictBuckets(d))+
zmalloc_size(zsl->header);
while(znode != NULL && samples < sample_size) {
Expand All @@ -1280,14 +1283,14 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) {
}
} else if (o->type == OBJ_HASH) {
if (o->encoding == OBJ_ENCODING_LISTPACK) {
asize = sizeof(*o)+zmalloc_size(o->ptr);
asize += zmalloc_size(o->ptr);
} else if (o->encoding == OBJ_ENCODING_LISTPACK_EX) {
listpackEx *lpt = o->ptr;
asize = sizeof(*o) + zmalloc_size(lpt) + zmalloc_size(lpt->lp);
asize += zmalloc_size(lpt) + zmalloc_size(lpt->lp);
} else if (o->encoding == OBJ_ENCODING_HT) {
d = o->ptr;
dictInitIterator(&di, d);
asize = sizeof(*o)+sizeof(dict)+(sizeof(struct dictEntry*)*dictBuckets(d));
asize += sizeof(dict) + (sizeof(struct dictEntry*) * dictBuckets(d));
while((de = dictNext(&di)) != NULL && samples < sample_size) {
hfield ele = dictGetKey(de);
sds ele2 = dictGetVal(de);
Expand All @@ -1302,7 +1305,7 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) {
}
} else if (o->type == OBJ_STREAM) {
stream *s = o->ptr;
asize = sizeof(*o)+sizeof(*s);
asize += sizeof(*s);
asize += streamRadixTreeMemoryUsage(s->rax);

/* Now we have to add the listpacks. The last listpack is often non
Expand All @@ -1312,7 +1315,8 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) {
raxIterator ri;
raxStart(&ri,s->rax);
raxSeek(&ri,"^",NULL,0);
size_t lpsize = 0, samples = 0;
size_t lpsize = 0;
size_t samples = 0;
while(samples < sample_size && raxNext(&ri)) {
unsigned char *lp = ri.data;
/* Use the allocated size, since we overprovision the node initially. */
Expand All @@ -1323,7 +1327,7 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) {
asize += lpsize;
} else {
if (samples) lpsize /= samples; /* Compute the average. */
asize += lpsize * (s->rax->numele-1);
asize += lpsize * s->rax->numele;
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Bug: Stream listpack memory double-counts last node

Removing the -1 from s->rax->numele-1 causes the last listpack to be counted twice. The algorithm (as documented in the comment on lines 1311-1314) is:

  1. Sample the first N listpacks and compute average size
  2. Use the average to estimate memory for the first N-1 listpacks (all except the last)
  3. Seek to the last node ("$") and add its real size (lines 1333-1336)

The old code correctly implemented this:

asize += lpsize * (s->rax->numele - 1);  // estimate all but last
asize += zmalloc_size(ri.data);            // add real size of last

The new code double-counts the last node:

asize += lpsize * s->rax->numele;          // estimates ALL nodes including last
asize += zmalloc_size(ri.data);            // adds last node AGAIN

The -1 was not an off-by-one error — it was intentional to avoid double-counting. The comment right above still describes the correct N-1 algorithm.

Was this helpful? React with 👍 / 👎

Suggested change
asize += lpsize * s->rax->numele;
asize += lpsize * (s->rax->numele-1);
  • Apply suggested fix

/* No need to check if seek succeeded, we enter this branch only
* if there are a few elements in the radix tree. */
raxSeek(&ri,"$",NULL,0);
Expand Down Expand Up @@ -1364,7 +1368,7 @@ size_t objectComputeSize(robj *key, robj *o, size_t sample_size, int dbid) {
raxStop(&ri);
}
} else if (o->type == OBJ_MODULE) {
asize = moduleGetMemUsage(key, o, sample_size, dbid);
asize += moduleGetMemUsage(key, o, sample_size, dbid);
} else {
serverPanic("Unknown object type");
}
Expand Down Expand Up @@ -1780,7 +1784,7 @@ NULL
addReplyNull(c);
return;
}
size_t usage = objectComputeSize(c->argv[2], (robj *)kv, samples, c->db->id);
size_t usage = kvobjComputeSize(c->argv[2], kv, samples, c->db->id);
addReplyLongLong(c,usage);
} else if (!strcasecmp(c->argv[1]->ptr,"stats") && c->argc == 2) {
struct redisMemOverhead *mh = getMemoryOverheadData();
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/type/hash.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ start_server {tags {"hash"}} {
create_hash myhash $contents
assert_encoding $type myhash

# coverage for objectComputeSize
# coverage for kvobjComputeSize
assert_morethan [memory_usage myhash] 0

test "HRANDFIELD - $type" {
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/type/list.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -2241,7 +2241,7 @@ foreach {pop} {BLPOP BLMPOP_RIGHT} {
set k [r lrange k 0 -1]
set dump [r dump k]

# coverage for objectComputeSize
# coverage for kvobjComputeSize
assert_morethan [memory_usage k] 0

config_set sanitize-dump-payload no mayfail
Expand Down
27 changes: 27 additions & 0 deletions tests/unit/type/string.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -684,6 +684,33 @@ if {[string match {*jemalloc*} [s mem_allocator]]} {
lappend res [r get bar]
} {12 12}

# coverage for kvobjComputeSize
test {MEMORY USAGE - STRINGS} {
set sizes {1 5 8 15 16 17 31 32 33 63 64 65 127 128 129 255 256 257}
set hdrsize [expr {[s arch_bits] == 32 ? 12 : 16}]

foreach ksize $sizes {
set key [string repeat "k" $ksize]
# OBJ_ENCODING_EMBSTR, OBJ_ENCODING_RAW
foreach vsize $sizes {
set value [string repeat "v" $vsize]
r set $key $value
set memory_used [r memory usage $key]
set min [expr $hdrsize + $ksize + $vsize]
assert_lessthan_equal $min $memory_used
set max [expr {32 > $min ? 64 : [expr $min * 2]}]
assert_morethan_equal $max $memory_used
}

# OBJ_ENCODING_INT
foreach value {1 100 10000 10000000} {
r set $key $value
set min [expr $hdrsize + $ksize]
assert_lessthan_equal $min [r memory usage $key]
}
}
}

if {[string match {*jemalloc*} [s mem_allocator]]} {
test {Check MEMORY USAGE for embedded key strings with jemalloc} {

Expand Down
Loading