Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 77 additions & 51 deletions notebooks/01-blob-inclusion.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"import altair as alt\n",
"import numpy as np\n",
"import pandas as pd\n",
"import polars as pl\n",
"import plotly.express as px\n",
"import plotly.graph_objects as go\n",
"\n",
Expand Down Expand Up @@ -118,13 +119,16 @@
"metadata": {},
"outputs": [],
"source": [
"df_blocks_blob_epoch = load_parquet(\"blocks_blob_epoch\", target_date)\n",
"df_blocks_blob_epoch_pd = load_parquet(\"blocks_blob_epoch\", target_date)\n",
"df_blocks_blob_epoch = pl.from_pandas(df_blocks_blob_epoch_pd)\n",
"\n",
"# Format blob count as \"XX blobs\" for display (moved from SQL for cleaner queries)\n",
"df_blocks_blob_epoch[\"series\"] = df_blocks_blob_epoch[\"blob_count\"].apply(lambda x: f\"{int(x):02d} blobs\")\n",
"df_blocks_blob_epoch = df_blocks_blob_epoch.with_columns(\n",
" pl.col(\"blob_count\").cast(pl.Int64).map_elements(lambda x: f\"{x:02d} blobs\", return_dtype=pl.Utf8).alias(\"series\")\n",
")\n",
"\n",
"chart = (\n",
" alt.Chart(df_blocks_blob_epoch)\n",
" alt.Chart(df_blocks_blob_epoch.to_pandas())\n",
" .mark_bar()\n",
" .encode(\n",
" x=alt.X(\"time:T\"),\n",
Expand Down Expand Up @@ -177,23 +181,32 @@
"metadata": {},
"outputs": [],
"source": [
"df_blob_popularity = load_parquet(\"blob_popularity\", target_date)\n",
"df_blob_popularity_pd = load_parquet(\"blob_popularity\", target_date)\n",
"df_blob_popularity = pl.from_pandas(df_blob_popularity_pd)\n",
"\n",
"# Pivot for heatmap\n",
"df_pivot = df_blob_popularity.pivot(index=\"blob_count\", columns=\"time\", values=\"count\").fillna(0)\n",
"df_pivot = df_blob_popularity.pivot(on=\"time\", index=\"blob_count\", values=\"count\").fill_null(0)\n",
"\n",
"# Extract column order (time columns)\n",
"time_cols = [c for c in df_pivot.columns if c != \"blob_count\"]\n",
"blob_counts = df_pivot[\"blob_count\"].to_list()\n",
"\n",
"# Create epoch lookup for hover data\n",
"epoch_lookup = df_blob_popularity.drop_duplicates(subset=[\"time\"]).set_index(\"time\")[\"epoch\"].to_dict()\n",
"df_epoch_lookup = df_blob_popularity.unique(subset=[\"time\"]).select([\"time\", \"epoch\"])\n",
"epoch_lookup = dict(zip(df_epoch_lookup[\"time\"].to_list(), df_epoch_lookup[\"epoch\"].to_list()))\n",
"\n",
"# Extract z values as numpy array\n",
"z_values = df_pivot.select(time_cols).to_numpy()\n",
"\n",
"fig = go.Figure(\n",
" data=go.Heatmap(\n",
" z=df_pivot.values,\n",
" x=df_pivot.columns,\n",
" y=[str(int(b)) for b in df_pivot.index],\n",
" z=z_values,\n",
" x=time_cols,\n",
" y=[str(int(b)) for b in blob_counts],\n",
" colorscale=\"inferno\",\n",
" reversescale=False,\n",
" colorbar=dict(title=\"Block Count\"),\n",
" customdata=[[epoch_lookup.get(t, \"\") for t in df_pivot.columns] for _ in df_pivot.index],\n",
" customdata=[[epoch_lookup.get(t, \"\") for t in time_cols] for _ in blob_counts],\n",
" hovertemplate=\"<b>Epoch Time:</b> %{x}<br><b>Epoch:</b> %{customdata}<br><b>Blob Count:</b> %{y}<br><b>Block Count:</b> %{z}<extra></extra>\",\n",
" ),\n",
")\n",
Expand Down Expand Up @@ -237,27 +250,36 @@
"metadata": {},
"outputs": [],
"source": [
"df_slot_in_epoch = load_parquet(\"slot_in_epoch\", target_date)\n",
"df_slot_in_epoch_pd = load_parquet(\"slot_in_epoch\", target_date)\n",
"df_slot_in_epoch = pl.from_pandas(df_slot_in_epoch_pd)\n",
"\n",
"df_pivot = df_slot_in_epoch.pivot(index=\"slot_in_epoch\", columns=\"time\", values=\"blob_count\").fillna(0)\n",
"# Pivot for heatmap\n",
"df_pivot = df_slot_in_epoch.pivot(on=\"time\", index=\"slot_in_epoch\", values=\"blob_count\").fill_null(0)\n",
"df_slot_pivot = df_slot_in_epoch.pivot(on=\"time\", index=\"slot_in_epoch\", values=\"slot\").fill_null(0)\n",
"\n",
"# Create slot lookup for hover data (slot number for each cell)\n",
"df_slot_pivot = df_slot_in_epoch.pivot(index=\"slot_in_epoch\", columns=\"time\", values=\"slot\").fillna(0)\n",
"# Extract column order (time columns) and index values\n",
"time_cols = [c for c in df_pivot.columns if c != \"slot_in_epoch\"]\n",
"slot_in_epoch_vals = df_pivot[\"slot_in_epoch\"].to_list()\n",
"\n",
"# Create epoch lookup for hover data\n",
"epoch_lookup = df_slot_in_epoch.drop_duplicates(subset=[\"time\"]).set_index(\"time\")[\"epoch\"].to_dict()\n",
"df_epoch_lookup = df_slot_in_epoch.unique(subset=[\"time\"]).select([\"time\", \"epoch\"])\n",
"epoch_lookup = dict(zip(df_epoch_lookup[\"time\"].to_list(), df_epoch_lookup[\"epoch\"].to_list()))\n",
"\n",
"# Extract z and slot values as numpy arrays\n",
"z_values = df_pivot.select(time_cols).to_numpy()\n",
"slot_values = df_slot_pivot.select(time_cols).to_numpy()\n",
"\n",
"# Build customdata with [slot, epoch] for each cell\n",
"customdata = np.dstack([\n",
" df_slot_pivot.values.T,\n",
" [[epoch_lookup.get(t, \"\") for _ in df_pivot.index] for t in df_pivot.columns]\n",
" slot_values.T,\n",
" [[epoch_lookup.get(t, \"\") for _ in slot_in_epoch_vals] for t in time_cols]\n",
"])\n",
"\n",
"fig = go.Figure(\n",
" data=go.Heatmap(\n",
" z=df_pivot.values.T,\n",
" x=[str(int(s)) for s in df_pivot.index],\n",
" y=df_pivot.columns,\n",
" z=z_values.T,\n",
" x=[str(int(s)) for s in slot_in_epoch_vals],\n",
" y=time_cols,\n",
" colorscale=\"thermal\",\n",
" reversescale=True,\n",
" colorbar=dict(\n",
Expand Down Expand Up @@ -298,59 +320,63 @@
"metadata": {},
"outputs": [],
"source": [
"df_pivot = df_slot_in_epoch.pivot(index=\"slot_in_epoch\", columns=\"time\", values=\"blob_count\").fillna(0)\n",
"df_slot_pivot = df_slot_in_epoch.pivot(index=\"slot_in_epoch\", columns=\"time\", values=\"slot\").fillna(0)\n",
"epoch_lookup = df_slot_in_epoch.drop_duplicates(subset=[\"time\"]).set_index(\"time\")[\"epoch\"].to_dict()\n",
"# Pivot data using polars (reusing df_slot_in_epoch from previous cell)\n",
"df_pivot = df_slot_in_epoch.pivot(on=\"time\", index=\"slot_in_epoch\", values=\"blob_count\").fill_null(0)\n",
"df_slot_pivot = df_slot_in_epoch.pivot(on=\"time\", index=\"slot_in_epoch\", values=\"slot\").fill_null(0)\n",
"\n",
"# Extract column order (time columns) and index values\n",
"time_cols = [c for c in df_pivot.columns if c != \"slot_in_epoch\"]\n",
"slot_in_epoch_vals = df_pivot[\"slot_in_epoch\"].to_list()\n",
"\n",
"# Create epoch lookup for hover data\n",
"df_epoch_lookup = df_slot_in_epoch.unique(subset=[\"time\"]).select([\"time\", \"epoch\"])\n",
"epoch_lookup = dict(zip(df_epoch_lookup[\"time\"].to_list(), df_epoch_lookup[\"epoch\"].to_list()))\n",
"\n",
"# Extract values as numpy arrays and transpose for row-based processing\n",
"z_matrix = df_pivot.select(time_cols).to_numpy().T # shape: (n_times, n_slots)\n",
"slot_matrix = df_slot_pivot.select(time_cols).to_numpy().T # shape: (n_times, n_slots)\n",
"\n",
"# Parameters\n",
"n_columns = 4\n",
"n_rows = len(df_pivot.columns)\n",
"n_rows = len(time_cols)\n",
"rows_per_chunk = n_rows // n_columns\n",
"n_slots = len(slot_in_epoch_vals)\n",
"\n",
"# Reshape: stack chunks horizontally\n",
"chunks = []\n",
"z_chunks = []\n",
"slot_chunks = []\n",
"for i in range(n_columns):\n",
" chunk = df_pivot.T.iloc[i*rows_per_chunk:(i+1)*rows_per_chunk, :]\n",
" chunk = chunk.reset_index(drop=True)\n",
" chunks.append(chunk)\n",
" \n",
" slot_chunk = df_slot_pivot.T.iloc[i*rows_per_chunk:(i+1)*rows_per_chunk, :]\n",
" slot_chunk = slot_chunk.reset_index(drop=True)\n",
" slot_chunks.append(slot_chunk)\n",
" start_idx = i * rows_per_chunk\n",
" end_idx = (i + 1) * rows_per_chunk\n",
" z_chunks.append(z_matrix[start_idx:end_idx, :])\n",
" slot_chunks.append(slot_matrix[start_idx:end_idx, :])\n",
"\n",
"# Concatenate horizontally (side by side)\n",
"df_combined = pd.concat(chunks, axis=1, ignore_index=True)\n",
"df_slot_combined = pd.concat(slot_chunks, axis=1, ignore_index=True)\n",
"df_combined = np.hstack(z_chunks)\n",
"df_slot_combined = np.hstack(slot_chunks)\n",
"\n",
"# Build epoch array matching combined layout\n",
"epoch_combined = []\n",
"for row_idx in range(rows_per_chunk):\n",
" epoch_row = []\n",
" for chunk_idx in range(n_columns):\n",
" time_idx = chunk_idx * rows_per_chunk + row_idx\n",
" if time_idx < len(df_pivot.columns):\n",
" time_val = df_pivot.columns[time_idx]\n",
" epoch_row.extend([epoch_lookup.get(time_val, \"\")] * len(df_pivot.index))\n",
" if time_idx < len(time_cols):\n",
" time_val = time_cols[time_idx]\n",
" epoch_row.extend([epoch_lookup.get(time_val, \"\")] * n_slots)\n",
" else:\n",
" epoch_row.extend([\"\"] * len(df_pivot.index))\n",
" epoch_row.extend([\"\"] * n_slots)\n",
" epoch_combined.append(epoch_row)\n",
"\n",
"customdata = np.dstack([df_slot_combined.values, epoch_combined])\n",
"customdata = np.dstack([df_slot_combined, epoch_combined])\n",
"\n",
"# Create x-axis labels with dividers\n",
"n_slots = len(df_pivot.index)\n",
"x_labels = list(range(n_slots)) * n_columns\n",
"\n",
"y_labels = []\n",
"for row_idx in range(rows_per_chunk):\n",
" time_val = df_pivot.columns[row_idx]\n",
" y_labels.append(str(time_val))\n",
"# Create y-axis labels\n",
"y_labels = [str(time_cols[row_idx]) for row_idx in range(rows_per_chunk)]\n",
"\n",
"fig = go.Figure(\n",
" data=go.Heatmap(\n",
" z=df_combined.values,\n",
" x=list(range(len(df_combined.columns))),\n",
" z=df_combined,\n",
" x=list(range(df_combined.shape[1])),\n",
" y=y_labels,\n",
" colorscale=\"thermal\",\n",
" reversescale=True,\n",
Expand Down Expand Up @@ -381,8 +407,8 @@
" yaxis_title=\"Epoch\",\n",
" yaxis=dict(autorange=\"reversed\"),\n",
" xaxis=dict(\n",
" tickvals=list(range(len(df_combined.columns))),\n",
" ticktext=[str(i % n_slots) for i in range(len(df_combined.columns))],\n",
" tickvals=list(range(df_combined.shape[1])),\n",
" ticktext=[str(i % n_slots) for i in range(df_combined.shape[1])],\n",
" tickangle=90,\n",
" tickfont=dict(size=6),\n",
" ),\n",
Expand Down
Loading