From 08e4c99c675e9bf1aaaf1eb81a9674ee6cbed00b Mon Sep 17 00:00:00 2001
From: Erdem Sariyuce <sariyuce@amazon.com>
Date: Tue, 10 Jun 2025 18:17:02 -0400
Subject: [PATCH 01/11] Implementing %degreeDistribution magic command

---
 .gitignore                               | 102 ++++++
 ChangeLog.md                             |   1 +
 pyproject.toml                           |   1 +
 requirements.txt                         |   1 +
 src/graph_notebook/magics/graph_magic.py | 415 ++++++++++++++++++++++-
 src/graph_notebook/neptune/client.py     |   1 +
 6 files changed, 520 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index eeee3ce5..38d2af96 100644
--- a/.gitignore
+++ b/.gitignore
@@ -31,3 +31,105 @@ src/graph_notebook/widgets/package-lock.json
 blazegraph.jnl
 rules.log
 *.env
+notebook/destination/dir/About-the-Neptune-Notebook.ipynb
+notebook/destination/dir/Overview.ipynb
+notebook/destination/dir/Untitled.ipynb
+notebook/destination/dir/Untitled1.ipynb
+notebook/destination/dir/.ipynb_checkpoints/Untitled-checkpoint.ipynb
+notebook/destination/dir/.ipynb_checkpoints/Untitled1-checkpoint.ipynb
+notebook/destination/dir/01-Neptune-Database/01-Getting-Started/01-About-the-Neptune-Notebook.ipynb
+notebook/destination/dir/01-Neptune-Database/01-Getting-Started/02-Using-Gremlin-to-Access-the-Graph.ipynb
+notebook/destination/dir/01-Neptune-Database/01-Getting-Started/03-Using-RDF-and-SPARQL-to-Access-the-Graph.ipynb
+notebook/destination/dir/01-Neptune-Database/01-Getting-Started/04-Social-Network-Recommendations-with-Gremlin.ipynb
+notebook/destination/dir/01-Neptune-Database/01-Getting-Started/05-Dining-By-Friends-in-Amazon-Neptune.ipynb
+notebook/destination/dir/01-Neptune-Database/02-Visualization/Air-Routes-Gremlin.ipynb
+notebook/destination/dir/01-Neptune-Database/02-Visualization/Air-Routes-openCypher.ipynb
+notebook/destination/dir/01-Neptune-Database/02-Visualization/Air-Routes-SPARQL.ipynb
+notebook/destination/dir/01-Neptune-Database/02-Visualization/Blog Workbench Visualization.ipynb
+notebook/destination/dir/01-Neptune-Database/02-Visualization/EPL-Gremlin.ipynb
+notebook/destination/dir/01-Neptune-Database/02-Visualization/EPL-openCypher.ipynb
+notebook/destination/dir/01-Neptune-Database/02-Visualization/EPL-SPARQL.ipynb
+notebook/destination/dir/01-Neptune-Database/02-Visualization/Grouping-and-Appearance-Customization-Gremlin.ipynb
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/00-Sample-Applications-Overview.ipynb
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/README.md
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/01-Fraud-Graphs/01-Building-a-Fraud-Graph-Application.ipynb
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/02-Knowledge-Graphs/Building-a-Knowledge-Graph-Application-Gremlin.ipynb
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/02-Knowledge-Graphs/Building-a-Knowledge-Graph-Application-openCypher.ipynb
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/03-Identity-Graphs/01-Building-an-Identity-Graph-Application.ipynb
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/03-Identity-Graphs/02-Data-Modeling-for-Identity-Graphs.ipynb
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/03-Identity-Graphs/03-Jumpstart-Identity-Graphs-Using-Canonical-Model-and-ETL/03-Jumpstart-Identity-Graphs-Using-Canonical-Model-and-ETL.ipynb
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/03-Identity-Graphs/03-Jumpstart-Identity-Graphs-Using-Canonical-Model-and-ETL/glue_utils.py
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/03-Identity-Graphs/03-Jumpstart-Identity-Graphs-Using-Canonical-Model-and-ETL/script/neptune-glue-demographics.py
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/03-Identity-Graphs/03-Jumpstart-Identity-Graphs-Using-Canonical-Model-and-ETL/script/neptune-glue-telemetry.py
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/03-Identity-Graphs/03-Jumpstart-Identity-Graphs-Using-Canonical-Model-and-ETL/script/neptune-glue-transactions.py
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/04-Security-Graphs/01-Building-a-Security-Graph-Application-with-Gremlin.ipynb
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/04-Security-Graphs/01-Building-a-Security-Graph-Application-with-openCypher.ipynb
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/05-Healthcare-and-Life-Sciences-Graphs/01-Modeling-Molecular-Structures-as-Graph-Data-Gremlin.ipynb
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/06-Data-Science-Samples/01-Identifying-Fraud-Rings-Using-Social-Network-Analytics.ipynb
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/06-Data-Science-Samples/02-Identifying-1st-Person-Synthetic-Identity-Fraud-Using-Graph-Similarity.ipynb
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/06-Data-Science-Samples/03-Logistics-Analysis-using-a-Transportation-Network.ipynb
+notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/07-Games-Industry-Graphs/01-Building-a-Social-Network-for-Games-Gremlin.ipynb
+notebook/destination/dir/02-Neptune-Analytics/01-Getting-Started/01-Getting-Started-With-Neptune-Analytics.ipynb
+notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/00-Amazon-Neptune-Analytics-Algorithm-Support.pdf
+notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/01-Getting-Started-With-Graph-Algorithms.ipynb
+notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/02-Path-Finding-Algorithms.ipynb
+notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/03-Centrality-Algorithms.ipynb
+notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/04-Community-Detection-Algorithms.ipynb
+notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/05-Similarity-Algorithms.ipynb
+notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/06-Vector-Similarity-Algorithms.ipynb
+notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/.ipynb_checkpoints/03-Centrality-Algorithms-checkpoint.ipynb
+notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/Overview.ipynb
+notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/01-FinTech/01-Fraud-Ring-Identifcation.ipynb
+notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/02-Investment-Analysis/01-EDGAR-Competitor-Analysis-using-Knowledge-Graph-Graph-Algorithms-and-Vector-Search.ipynb
+notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/00-Intro-to-Software-Bill-Of-Materials.ipynb
+notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/01-SBOM-Dependency-Analysis.ipynb
+notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/02-SBOM-Vulnerability-Analysis.ipynb
+notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/nodestream_template.yaml
+notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/example_sboms/01/aws-sdk-pandas_aws_de5d1610d6d4ea3be44a01ab3f09b64e291a4ab7.json
+notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/example_sboms/01/boto3_boto_6bbdf83ee00b749587f0fe54778fbec5411147b5.json
+notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/example_sboms/01/graph-explorer_aws_39eed2c8bae4afc1b38fa7975c720461a7c7c3a6.json
+notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/example_sboms/01/graph-notebook_aws_bb96dd8d0d9ef9d0e9060f8c5e26a042a3db40c4.json
+notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/example_sboms/02/aws-cli-2-0-6.json
+notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/example_sboms/02/gremlin-console-3-7-1_cydx.json
+notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/example_sboms/02/gremlin-server-3-7-1-cydx.json
+notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/sbom_code/sbom_helper.py
+notebook/destination/dir/02-Neptune-Analytics/04-OpenCypher-Over-RDF/01-Air-Routes.ipynb
+notebook/destination/dir/02-Neptune-Analytics/04-OpenCypher-Over-RDF/02-Air-Routes-GeoNames.ipynb
+notebook/destination/dir/02-Neptune-Analytics/04-OpenCypher-Over-RDF/Air-Routes-Ontology-Diagram.png
+notebook/destination/dir/03-Neptune-ML/neptune_ml_utils.py
+notebook/destination/dir/03-Neptune-ML/neptune-ml-pretrained-model-config.json
+notebook/destination/dir/03-Neptune-ML/01-Gremlin/01-Getting-Started-with-Neptune-ML-Gremlin.ipynb
+notebook/destination/dir/03-Neptune-ML/01-Gremlin/02-Introduction-to-Node-Classification-Gremlin.ipynb
+notebook/destination/dir/03-Neptune-ML/01-Gremlin/03-Introduction-to-Node-Regression-Gremlin.ipynb
+notebook/destination/dir/03-Neptune-ML/01-Gremlin/04-Introduction-to-Link-Prediction-Gremlin.ipynb
+notebook/destination/dir/03-Neptune-ML/01-Gremlin/05-Introduction-to-Edge-Classification-Gremlin.ipynb
+notebook/destination/dir/03-Neptune-ML/01-Gremlin/06-Introduction-to-Edge-Regression-Gremlin.ipynb
+notebook/destination/dir/03-Neptune-ML/02-SPARQL/neptune_ml_sparql_utils.py
+notebook/destination/dir/03-Neptune-ML/02-SPARQL/Neptune-ML-00-Getting-Started-with-Neptune-ML-SPARQL.ipynb
+notebook/destination/dir/03-Neptune-ML/02-SPARQL/Neptune-ML-01-Introduction-to-Object-Classification-SPARQL.ipynb
+notebook/destination/dir/03-Neptune-ML/02-SPARQL/Neptune-ML-02-Introduction-to-Object-Regression-SPARQL.ipynb
+notebook/destination/dir/03-Neptune-ML/02-SPARQL/Neptune-ML-03-Introduction-to-Link-Prediction-SPARQL.ipynb
+notebook/destination/dir/03-Neptune-ML/02-SPARQL/neptune-ml-pretrained-rdf-model-config.json
+notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/02-Job-Recommendation-Text-Encoding.ipynb
+notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/03-Real-Time-Fraud-Detection-Using-Inductive-Inference.ipynb
+notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/01-People-Analytics/People-Analytics-using-Neptune-ML.ipynb
+notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/01-People-Analytics/WA_Fn-UseC_-HR-Employee-Attrition.csv
+notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/1a-Use-case.ipynb
+notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/1b-Graph_init.ipynb
+notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/2a-GraphQueryGremlin.ipynb
+notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/2b-GraphQueryLLM.ipynb
+notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/3a-TransductiveMode-CellPrediction.ipynb
+notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/3b-InductiveModeCell-Prediction.ipynb
+notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/neptune_ml_utils.py
+notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/Transform2Neptune.py
+notebook/destination/dir/04-Language-Tutorials/01-Gremlin/01-Basic-Read-Queries.ipynb
+notebook/destination/dir/04-Language-Tutorials/01-Gremlin/02-Loops-Repeats.ipynb
+notebook/destination/dir/04-Language-Tutorials/01-Gremlin/03-Ordering-Functions-Grouping.ipynb
+notebook/destination/dir/04-Language-Tutorials/01-Gremlin/04-Creating-Updating-Deleting-Queries.ipynb
+notebook/destination/dir/04-Language-Tutorials/01-Gremlin/Gremlin-Exercises-Answer-Sheet.ipynb
+notebook/destination/dir/04-Language-Tutorials/02-openCypher/01-Basic-Read-Queries.ipynb
+notebook/destination/dir/04-Language-Tutorials/02-openCypher/02-Variable-Length-Paths.ipynb
+notebook/destination/dir/04-Language-Tutorials/02-openCypher/03-Ordering-Functions-Grouping.ipynb
+notebook/destination/dir/04-Language-Tutorials/02-openCypher/04-Creating-Updating-Delete-Queries.ipynb
+notebook/destination/dir/04-Language-Tutorials/02-openCypher/openCypher-Exercises-Answer-Key.ipynb
+notebook/destination/dir/04-Language-Tutorials/03-SPARQL/01-SPARQL-Basics.ipynb
diff --git a/ChangeLog.md b/ChangeLog.md
index af0d7cbe..af079d5e 100644
--- a/ChangeLog.md
+++ b/ChangeLog.md
@@ -3,6 +3,7 @@
 Starting with v1.31.6, this file will contain a record of major features and updates made in each release of graph-notebook.
 
 ## Upcoming
+- Added %degreeDistribution magic command ([PR](https://github.com/aws/graph-notebook/pull/749)) TODO: add to the specific release below when it's released
 - Locked numba dependency to 0.60.0 to avoid numpy conflict ([Link to PR](https://github.com/aws/graph-notebook/pull/735))
 - Fixed library target for nbclassic nbextension for graph_notebook_widget ([Link to PR](https://github.com/aws/graph-notebook/pull/739))
   
diff --git a/pyproject.toml b/pyproject.toml
index e7a95ccf..391ba89c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,6 +46,7 @@ dependencies = [
     'networkx==2.4',
     'numpy>=1.23.5,<1.24.0',
     'pandas>=2.1.0,<=2.2.2',
+    'matplotlib>=3.9.4',
 
     # Graph databases and query languages
     'gremlinpython>=3.5.1,<=3.7.2',
diff --git a/requirements.txt b/requirements.txt
index a80fd44e..7e1f7328 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -18,6 +18,7 @@ itables>=2.0.0,<=2.1.0
 networkx==2.4
 numpy>=1.23.5,<1.24.0
 pandas>=2.1.0,<=2.2.2
+matplotlib>=3.9.4
 
 # Graph databases and query languages
 gremlinpython>=3.5.1,<=3.7.2
diff --git a/src/graph_notebook/magics/graph_magic.py b/src/graph_notebook/magics/graph_magic.py
index acf1a0e3..fba4d15d 100644
--- a/src/graph_notebook/magics/graph_magic.py
+++ b/src/graph_notebook/magics/graph_magic.py
@@ -16,6 +16,12 @@
 import ast
 import re
 
+import numpy as np
+import matplotlib.pyplot as plt
+
+import numpy as np
+import matplotlib.pyplot as plt
+
 from ipyfilechooser import FileChooser
 from enum import Enum
 from copy import copy
@@ -53,7 +59,8 @@
     STATISTICS_LANGUAGE_INPUTS, STATISTICS_LANGUAGE_INPUTS_SPARQL, STATISTICS_MODES, SUMMARY_MODES, \
     SPARQL_EXPLAIN_MODES, OPENCYPHER_EXPLAIN_MODES, GREMLIN_EXPLAIN_MODES, \
     OPENCYPHER_PLAN_CACHE_MODES, OPENCYPHER_DEFAULT_TIMEOUT, OPENCYPHER_STATUS_STATE_MODES, \
-    normalize_service_name, NEPTUNE_DB_SERVICE_NAME, NEPTUNE_ANALYTICS_SERVICE_NAME, GRAPH_PG_INFO_METRICS, \
+    normalize_service_name, NEPTUNE_DB_SERVICE_NAME, NEPTUNE_ANALYTICS_SERVICE_NAME, GRAPH_PG_INFO_METRICS, TRAVERSAL_DIRECTIONS, \
+    normalize_service_name, NEPTUNE_DB_SERVICE_NAME, NEPTUNE_ANALYTICS_SERVICE_NAME, GRAPH_PG_INFO_METRICS, TRAVERSAL_DIRECTIONS, \
     GREMLIN_PROTOCOL_FORMATS, DEFAULT_HTTP_PROTOCOL, DEFAULT_WS_PROTOCOL, GRAPHSONV4_UNTYPED, \
     GREMLIN_SERIALIZERS_WS, get_gremlin_serializer_mime, normalize_protocol_name, generate_snapshot_name)
 from graph_notebook.network import SPARQLNetwork
@@ -3920,3 +3927,409 @@ def handle_opencypher_status(self, line, local_ns):
             store_to_ns(args.store_to, js, local_ns)
             if not args.silent:
                 print(json.dumps(js, indent=2))
+
+
+
+
+
+    # %degreeDistribution magic command.
+    # It obtains the degree distribution of a graph in the form of a visual histogram in notebook. Histogram simply
+    # shows the number of vertices with a given degree, where degree is shown on the x-axis and the count on y-axis.
+    # It takes traversalDirection [both (default), inbound, outbound], vertexLabels [default is empty list],
+    # edgeLabels parameters [default is empty list], and then gives the histogram for the specified degree
+    # (both/in/out) distribution of the vertices in the graph filtered by the specified vertex labels and edge
+    # labels. Parameters can be defined as command line argument and/or through the dropdown widgets.
+    # Example usages:
+    # > %degreeDistribution
+    # > %degreeDistribution --traversalDirection inbound
+    # > %degreeDistribution --traversalDirection inbound --vertexLabels airport country
+
+    # TODO: Error handling
+
+    @line_magic
+    @needs_local_scope
+    @display_exceptions
+    @neptune_graph_only
+    def degreeDistribution(self, line, local_ns: dict = None):
+        if not self.client.is_analytics_domain():
+            print("This command is only supported for Neptune Analytics domains.")
+            return
+    
+        parser = argparse.ArgumentParser()
+
+        # Get the vertexLabels and edgeLabels from graph summary, to be shown in the widgets for selection.
+        try:
+            summary_res = self.client.statistics("propertygraph", True, "detailed", True)
+            summary_res.raise_for_status()
+            summary_res_json = summary_res.json()
+            available_vertex_labels = summary_res_json['graphSummary']['nodeLabels']
+            available_edge_labels = summary_res_json['graphSummary']['edgeLabels']
+        except Exception as e:
+            print(f"Error retrieving graph summary: {e}")
+            return
+
+        # traversalDirection: Type of the degree computed:
+        # - inbound: Counts only the incoming edges for each vertex
+        # - outbound: Counts only the outgoing edges for each vertex
+        # - both [default]: Counts both the incoming and outgoing edges for each vertex.
+        parser.add_argument('--traversalDirection', nargs='?', type=str.lower, default='both',
+                            help=f'Type of the degree for which the distribution is shown. Valid inputs: {TRAVERSAL_DIRECTIONS}. '
+                                 f'Default: both.',
+                            choices=TRAVERSAL_DIRECTIONS)
+        
+        # vertexLabels: List of the vertex labels, space separated, for which the degrees are computed:
+        # - default value is empty list, which means the degrees are computed for any vertex label.
+        parser.add_argument('--vertexLabels', nargs='*', default=[],
+                            help="The vertex labels for which the induced graph is considered and the degree distribution is shown. "
+                                 "If not supplied, we will default to using all the vertex labels.")
+        
+        # edgeLabels: List of the edge labels, space separated, for which the degrees are computed:
+        # - default value is empty list, which means the degrees are computed for any edge label.
+        parser.add_argument('--edgeLabels', nargs='*', default=[],
+                            help="The edge labels for which the degree distribution is shown. If not supplied, "
+                                 "we will default to using all the edge labels.")
+        
+
+        # TODO: Additional parameter for saving the visualization?
+        # parser.add_argument('--export-to', type=str, default='',
+        #                     help='Export the degree distribution results to the provided file path.')
+        
+        args = parser.parse_args(line.split())
+        
+        # If the traversalDirection parameter selection is specified on the command line, it is shown as the default
+        # in the dropdown menu. Othweise, the default in the dropdown is 'both'
+        td_val = args.traversalDirection
+        td_val = td_val.lower() if td_val else 'both' 
+
+        td_dropdown = widgets.Dropdown(
+            options=TRAVERSAL_DIRECTIONS,
+            description='Traversal direction:',
+            disabled=False,
+            style=SEED_WIDGET_STYLE,
+            value = td_val
+        )
+
+        # Existing vertex labels in the graph are shown in the dropdown menu. If any vertex label is specified on
+        # the command line, they are shown to be selected in the dropdown menu. Otherwise, no label is selected
+        # in the dropdown menu, which means any label and all the labels are considered in the computation.
+        available_vertex_labels = sorted(available_vertex_labels)
+        selected_vlabels = args.vertexLabels if args.vertexLabels else []
+        vertex_labels_select = widgets.SelectMultiple(
+            options=available_vertex_labels,
+            description='Vertex labels:',
+            disabled=False,
+            style=SEED_WIDGET_STYLE,
+            value = selected_vlabels
+        )
+
+        # Existing edge labels in the graph are shown in the dropdown menu. If any edge label is specified on
+        # the command line, they are shown to be selected in the dropdown menu. Otherwise, no label is selected
+        # in the dropdown menu, which means any label and all the labels are considered in the computation.
+        available_edge_labels = sorted(available_edge_labels)
+        selected_elabels = args.edgeLabels if args.edgeLabels else []
+        edge_labels_select = widgets.SelectMultiple(
+            options=available_edge_labels,
+            description='Edge labels:',
+            disabled=False,
+            style=SEED_WIDGET_STYLE,
+            value = selected_elabels
+        )
+
+        submit_button = widgets.Button(description="Submit")
+        output = widgets.Output()
+        
+        # Display widgets
+        display(td_dropdown, vertex_labels_select, edge_labels_select, submit_button, output)
+        
+        def on_button_clicked(b):
+            # Get the selected parameters
+            td = td_dropdown.value
+            vlabels = list(vertex_labels_select.value)
+            elabels = list(edge_labels_select.value)
+
+            # Clear the output widget before displaying new content
+            output.clear_output(wait=True)
+            
+            # Call the function with the selected parameters
+            with output:
+                res = self.execute_degree_distribution_query(td, vlabels, elabels, local_ns)
+                
+                # Retrieve the distribution
+                pairs = np.array(res['results'][0]['output']['distribution'])
+                keys = pairs[:,0]
+                values = pairs[:,1]
+
+                # Retrieve some statistics
+                max_deg = res['results'][0]['output']['statistics']['maxDeg']
+                median_deg = res['results'][0]['output']['statistics']['p50']
+                mean_deg = res['results'][0]['output']['statistics']['mean']
+
+                # Create the interactive visualization
+                self.plot_interactive_degree_distribution(keys, values, max_deg, median_deg, mean_deg)
+        
+        submit_button.on_click(on_button_clicked)
+
+    def execute_degree_distribution_query (self, td, vlabels, elabels, local_ns):
+        query_parts = [f'traversalDirection: "{td}"']
+        
+        if vlabels:
+            vertex_str = ", ".join([f'"{v}"' for v in vlabels])
+            query_parts.append(f'vertexLabels: [{vertex_str}]')
+            
+        if elabels:
+            edge_str = ", ".join([f'"{e}"' for e in elabels])
+            query_parts.append(f'edgeLabels: [{edge_str}]')
+            
+        # Construct the query
+        line = "CALL neptune.algo.degreeDistribution({" + ", ".join(query_parts) + "}) YIELD output RETURN output"
+
+        oc_rebuild_args = (f"{f'--store-to js --silent'}")
+        
+        self.handle_opencypher_query(oc_rebuild_args, line, local_ns)
+        
+        return local_ns['js']
+
+
+    def plot_interactive_degree_distribution(self, unique_degrees, counts, max_deg, median_deg, mean_deg):
+
+        min_deg = 0
+
+        def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, show_maxdeg):
+            # Start timing
+            start_time = time.time()
+            
+            alpha = 1
+            plt.clf()
+                        
+            # Get zero degree count
+            zero_idx = np.where(unique_degrees == 0)[0]
+            zero_degree_count = counts[zero_idx[0]] if len(zero_idx) > 0 else 0
+
+            isolateds_exist = zero_degree_count > 0
+            # Get non-zero degrees and counts
+            mask = unique_degrees > 0
+            filtered_degrees = unique_degrees[mask]
+            filtered_counts = counts[mask]
+            
+            # Obtain the minimum non-zero degree, unless it's all zero degrees
+            if len(filtered_degrees) == 0:
+                min_deg = 0
+            else:
+                min_deg = np.min(filtered_degrees)                
+
+            n_bins = 1
+            # Create histogram only if there are non-zero degree nodes
+            if len(filtered_degrees) > 0:
+                if bin_type != 'Raw':
+                    # Arrange the bins for a given bin_width
+                    if bin_type == 'Linear':
+                        n_bins = max(1, int((max_deg - min_deg) / bin_width))
+                        bins = np.linspace(min_deg, max_deg, n_bins + 1)
+                    else:  # Logarithmic
+                        min_deg_log = np.log10(min_deg) if min_deg > 0 else 0
+                        max_deg_log = np.log10(max_deg) if max_deg > 0 else 1
+                        n_bins = max(1, int((max_deg_log - min_deg_log) / np.log10(bin_width+0.01)))
+                        bins = np.logspace(min_deg_log, max_deg_log, n_bins + 1)
+                    
+                    all_degrees = np.repeat(filtered_degrees, filtered_counts)
+
+                    plt.hist(all_degrees, bins=bins, density=False, alpha=alpha,
+                            histtype='bar', color='#000080')
+                else:
+                    # For raw data, create bars at each unique degree
+                    plt.bar(filtered_degrees, filtered_counts, alpha=alpha,
+                        label='Raw', color='#000000')
+            
+            # Plot zero degree node count separately
+            if isolateds_exist:
+                # Use a special x position for zero degree nodes in log scale
+                zero_x_pos = 0.1 if scale_type in ['Log-Log', 'Log(x)-Linear(y)'] else 0
+                plt.bar(zero_x_pos, zero_degree_count, color='red', 
+                    label='Isolated', alpha=alpha, width=0.1 if scale_type in ['Log-Log', 'Log(x)-Linear(y)'] else 2)
+
+            plt.xlim(x_range[0], x_range[1])
+
+            if isolateds_exist:
+                plt.xlim(x_range[0], x_range[1])
+
+            # Set scales based on selection
+            if scale_type == 'Log-Log':
+                plt.xscale('log')
+                plt.yscale('log')
+                if isolateds_exist:
+                    plt.xlim(0.05, x_range[1])
+                else:
+                    plt.xlim(x_range[0]+0.05, x_range[1])
+
+            elif scale_type == 'Log(x)-Linear(y)':
+                plt.xscale('log')
+                if isolateds_exist:
+                    plt.xlim(0.05, x_range[1])
+                else:
+                    plt.xlim(x_range[0]+0.05, x_range[1])
+            elif scale_type == 'Linear(x)-Log(y)':
+                plt.yscale('log')
+            
+            plt.gca().set_ylim(top=y_max)
+            
+            # Add vertical dashed lines for min and max degree, if enabled
+            if show_mindeg and min_deg > 0:
+                plt.axvline(x=min_deg, color='darkgreen', linestyle='--', linewidth=2, label=f'Min non-zero degree: {min_deg}')
+            
+            if show_maxdeg:
+                plt.axvline(x=max_deg, color='darkred', linestyle='--', linewidth=2, label=f'Max degree: {max_deg}')
+                
+            plt.grid(True, which="both", ls="-", alpha=0.2)
+            plt.xlabel('Degree')
+            plt.ylabel('Number of nodes')
+            plt.legend()   
+                
+            plt.title(f'Degree Distribution')
+
+            # End timing and display
+            end_time = time.time()
+            runtime = end_time - start_time
+                        
+            # Update statistics
+            with stats_output:
+                stats_output.clear_output(wait=True)
+                total_nodes = sum(counts)
+                total_edges = sum(d * c for d, c in zip(unique_degrees, counts)) // 2
+                avg_degree = sum(d * c for d, c in zip(unique_degrees, counts)) / total_nodes
+                
+                print(f"Render time: {runtime:.3f} seconds")
+                print(f"--------------------")
+
+                print(f"Number of nodes: {total_nodes}")
+                print(f"Number of edges: {total_edges}")
+                print(f"Number of isolated nodes: {zero_degree_count}")
+                print(f"Average degree: {mean_deg:.2f}")
+                print(f"Median degree: {median_deg:.2f}")
+                print(f"Max degree: {max_deg}")
+                if min_deg > 0:
+                    print(f"Min non-zero degree: {min_deg}")            
+                if bin_type != 'Raw':
+                    print(f"Number of bins: {n_bins}")
+        
+        
+        max_count = np.max(counts)
+        
+        # Scale widget, four options
+        scale_widget = widgets.Dropdown(
+            options=['Linear-Linear', 'Log-Log', 'Log(x)-Linear(y)', 'Linear(x)-Log(y)'],
+            value='Linear-Linear',
+            description='Scale:'
+        )
+        
+        # Binning widget, three options
+        bin_widget = widgets.Dropdown(
+            options=['Raw', 'Linear', 'Logarithmic'],
+            value='Linear',
+            description='Binning:'
+        )
+        
+        # Define a function to update bin_width_widget based on bin_type
+        def update_bin_width_widget(change):
+            if change['new'] == 'Logarithmic':
+                # For logarithmic binning, use a FloatSlider with smaller values
+                bin_width_widget.min = 1.00
+                bin_width_widget.max = 10.00
+                bin_width_widget.step = 0.01
+                bin_width_widget.value = 1.00
+                bin_width_widget.readout_format = '.2f'
+                bin_width_widget.disabled = False
+            elif change['new'] == 'Raw':
+                # For raw binning, disable the widget
+                bin_width_widget.value = 1
+                bin_width_widget.disabled = True
+            else:
+                # For linear binning, use integer values
+                bin_width_widget.min = 1
+                bin_width_widget.max = (max_deg+2)/10
+                bin_width_widget.step = 1
+                bin_width_widget.value = 1
+                bin_width_widget.readout_format = 'd'
+                bin_width_widget.disabled = False
+
+        def update_y_max_widget(change):
+            if bin_widget.value == 'Raw':
+                # For raw data, use the original max count
+                y_max_widget.max = max_count * 1.1
+                y_max_widget.value = max_count * 1.1
+            elif bin_widget.value == 'Linear':
+                y_max_widget.max = max_count * bin_width_widget.value * 0.5
+                y_max_widget.value = max_count * bin_width_widget.value * 0.5
+            else: # 'Logarithmic'
+                y_max_widget.max = max_count * (10 ** bin_width_widget.value) * 0.5
+                y_max_widget.value = max_count * (10 ** bin_width_widget.value) * 0.5
+                    
+        # Bin width widget, integer options in [1, 1+(max_deg/2)] interval 
+        bin_width_widget = widgets.FloatSlider(
+            value=1,
+            min=1,
+            max=(max_deg+2)/10,
+            step=1,
+            description='Bin width:',
+            tooltip=('For linear binning: actual width\n'
+                    'For log binning: multiplicative factor')
+        )
+
+        # Observe changes to bin_width_widget and bin_widget
+        bin_width_widget.observe(update_y_max_widget, names='value')
+        bin_widget.observe(update_y_max_widget, names='value')
+
+        # Upper limit for y-axis range, enables zooming (lower limit is always zero)
+        y_max_widget = widgets.IntSlider(
+            value=max_count * 1.1,
+            min=1,
+            max=max_count * 1.1,
+            step=1,
+            description='y-max:',
+        )
+
+        # Range slider for x-axis, enables zooming
+        x_range_widget = widgets.FloatRangeSlider(            
+            min=0,
+            max=max_deg * 1.1 + 5,
+            value=[0, max_deg * 1.1 + 5],
+            step=1,
+            description='x-axis range:',
+            disabled=False,
+            continuous_update=True,
+            readout=True,
+            readout_format='.0f',
+        )
+        
+        # Toggle switches for min/max degree lines
+        show_mindeg_widget = widgets.Checkbox(
+            value=True,
+            description='Show Min Degree Line',
+            disabled=False
+        )
+        
+        show_maxdeg_widget = widgets.Checkbox(
+            value=True,
+            description='Show Max Degree Line',
+            disabled=False
+        )
+        
+        # Output widget for statistics
+        stats_output = widgets.Output()
+
+        # Interactive plot
+        interactive_plot = widgets.interactive(
+            update_plot,
+            scale_type=scale_widget,
+            bin_type=bin_widget,
+            bin_width=bin_width_widget,
+            y_max=y_max_widget,
+            x_range=x_range_widget,
+            show_mindeg=show_mindeg_widget,
+            show_maxdeg=show_maxdeg_widget
+        )
+        
+        # Vertical box layout
+        vbox = widgets.VBox([interactive_plot, stats_output])
+        
+        # Display the interactive plot and stats
+        display(vbox)
diff --git a/src/graph_notebook/neptune/client.py b/src/graph_notebook/neptune/client.py
index 2a683452..b5b94d83 100644
--- a/src/graph_notebook/neptune/client.py
+++ b/src/graph_notebook/neptune/client.py
@@ -174,6 +174,7 @@
 
 GRAPH_PG_INFO_METRICS = {'numVertices', 'numEdges', 'numVertexProperties', 'numEdgeProperties'}
 
+TRAVERSAL_DIRECTIONS = ['both', 'inbound', 'outbound']
 
 def is_allowed_neptune_host(hostname: str, host_allowlist: list):
     for host_snippet in host_allowlist:

From 718dc36770569bad51e3c4646e11881f814c5e55 Mon Sep 17 00:00:00 2001
From: Erdem Sariyuce <sariyuce@amazon.com>
Date: Tue, 17 Jun 2025 17:15:12 -0400
Subject: [PATCH 02/11] Error handling, faster plotting by step-plots, a few
 bugs, spinning wheel for progress, plot is added as the first tab

---
 src/graph_notebook/magics/graph_magic.py | 687 ++++++++++++++---------
 1 file changed, 421 insertions(+), 266 deletions(-)

diff --git a/src/graph_notebook/magics/graph_magic.py b/src/graph_notebook/magics/graph_magic.py
index fba4d15d..af4586d1 100644
--- a/src/graph_notebook/magics/graph_magic.py
+++ b/src/graph_notebook/magics/graph_magic.py
@@ -19,9 +19,6 @@
 import numpy as np
 import matplotlib.pyplot as plt
 
-import numpy as np
-import matplotlib.pyplot as plt
-
 from ipyfilechooser import FileChooser
 from enum import Enum
 from copy import copy
@@ -3548,9 +3545,10 @@ def neptune_ml(self, line, cell='', local_ns: dict = None):
         with main_output:
             print(message)
 
-    def handle_opencypher_query(self, line, cell, local_ns):
+    def handle_opencypher_query(self, line, cell, local_ns, return_tabs=False):
         """
-        This method in its own handler so that the magics %%opencypher and %%oc can both call it
+        This method in its own handler so that the magics %%opencypher and %%oc can both call it.
+        return_tabs: If True, return the titles and children lists instead of displaying the tab (which are later displayed by the caller function).
         """
         parser = argparse.ArgumentParser()
         parser.add_argument('-pc', '--plan-cache', type=str.lower, default='auto',
@@ -3773,7 +3771,8 @@ def handle_opencypher_query(self, line, cell, local_ns):
 
             for i in range(len(titles)):
                 tab.set_title(i, titles[i])
-            display(tab)
+            if not return_tabs:
+                display(tab)
 
             with metadata_output:
                 display(HTML(oc_metadata.to_html()))
@@ -3851,6 +3850,9 @@ def handle_opencypher_query(self, line, cell, local_ns):
 
         store_to_ns(args.store_to, stored_results, local_ns)
 
+        if return_tabs:
+            return {'titles': titles, 'children': children}
+
     def handle_opencypher_status(self, line, local_ns):
         """
         This is refactored into its own handler method so that we can invoke it from
@@ -3944,8 +3946,6 @@ def handle_opencypher_status(self, line, local_ns):
     # > %degreeDistribution --traversalDirection inbound
     # > %degreeDistribution --traversalDirection inbound --vertexLabels airport country
 
-    # TODO: Error handling
-
     @line_magic
     @needs_local_scope
     @display_exceptions
@@ -4036,10 +4036,11 @@ def degreeDistribution(self, line, local_ns: dict = None):
         )
 
         submit_button = widgets.Button(description="Submit")
+        status_output = widgets.Output()
         output = widgets.Output()
-        
+
         # Display widgets
-        display(td_dropdown, vertex_labels_select, edge_labels_select, submit_button, output)
+        display(td_dropdown, vertex_labels_select, edge_labels_select, submit_button, status_output, output)
         
         def on_button_clicked(b):
             # Get the selected parameters
@@ -4049,287 +4050,441 @@ def on_button_clicked(b):
 
             # Clear the output widget before displaying new content
             output.clear_output(wait=True)
-            
+            status_output.clear_output(wait=True)
+
             # Call the function with the selected parameters
+            with status_output:
+                display_html(HTML(loading_wheel_html))               
+            
             with output:
-                res = self.execute_degree_distribution_query(td, vlabels, elabels, local_ns)
-                
-                # Retrieve the distribution
-                pairs = np.array(res['results'][0]['output']['distribution'])
-                keys = pairs[:,0]
-                values = pairs[:,1]
-
-                # Retrieve some statistics
-                max_deg = res['results'][0]['output']['statistics']['maxDeg']
-                median_deg = res['results'][0]['output']['statistics']['p50']
-                mean_deg = res['results'][0]['output']['statistics']['mean']
-
-                # Create the interactive visualization
-                self.plot_interactive_degree_distribution(keys, values, max_deg, median_deg, mean_deg)
+                try:
+                    tabs_data = self.execute_degree_distribution_query(td, vlabels, elabels, local_ns)            
+                    res = local_ns['js']
+
+                    # Get the titles and children
+                    titles = tabs_data['titles']
+                    children = tabs_data['children']
+
+                    # Clear the wheel display
+                    status_output.close()
+
+                    
+                    # Plot is the first tab
+                    plot_output = widgets.Output(layout=DEFAULT_LAYOUT)
+                    titles.insert(0, 'Plot')
+                    children.insert(0, plot_output)
+
+                    # Retrieve the distribution
+                    pairs = np.array(res['results'][0]['output']['distribution'])
+                    keys = pairs[:,0]
+                    values = pairs[:,1]
+
+                    # Retrieve some statistics
+                    max_deg = res['results'][0]['output']['statistics']['maxDeg']
+                    median_deg = res['results'][0]['output']['statistics']['p50']
+                    mean_deg = res['results'][0]['output']['statistics']['mean']
+
+                    # Create the interactive visualization
+                    with plot_output:
+                        self.plot_interactive_degree_distribution(keys, values, max_deg, median_deg, mean_deg)
+
+                    # Set up the tab widget                                       
+                    tab = widgets.Tab()
+                    tab.children = children
+                    for i, title in enumerate(titles):
+                        tab.set_title(i, title)
+                    
+                    display(tab)
+                except KeyError as e:
+                    print(f"Missing expected data in query results: {e}")
+                except IndexError as e:
+                    print(f"Unexpected result format: {e}")
+                except Exception as e:
+                    print(f"Error processing degree distribution: {e}")
         
         submit_button.on_click(on_button_clicked)
 
     def execute_degree_distribution_query (self, td, vlabels, elabels, local_ns):
-        query_parts = [f'traversalDirection: "{td}"']
-        
-        if vlabels:
-            vertex_str = ", ".join([f'"{v}"' for v in vlabels])
-            query_parts.append(f'vertexLabels: [{vertex_str}]')
-            
-        if elabels:
-            edge_str = ", ".join([f'"{e}"' for e in elabels])
-            query_parts.append(f'edgeLabels: [{edge_str}]')
+        try:
+            query_parts = [f'traversalDirection: "{td}"']
             
-        # Construct the query
-        line = "CALL neptune.algo.degreeDistribution({" + ", ".join(query_parts) + "}) YIELD output RETURN output"
+            if vlabels:
+                vertex_str = ", ".join([f'"{v}"' for v in vlabels])
+                query_parts.append(f'vertexLabels: [{vertex_str}]')
+                
+            if elabels:
+                edge_str = ", ".join([f'"{e}"' for e in elabels])
+                query_parts.append(f'edgeLabels: [{edge_str}]')
+                
+            # Construct the query
+            line = "CALL neptune.algo.degreeDistribution({" + ", ".join(query_parts) + "}) YIELD output RETURN output"
 
-        oc_rebuild_args = (f"{f'--store-to js --silent'}")
-        
-        self.handle_opencypher_query(oc_rebuild_args, line, local_ns)
+            # oc_rebuild_args = (f"{f'--store-to js --silent'}")
+            oc_rebuild_args = (f"{f'--store-to js'}")
+            tabs_data = self.handle_opencypher_query(oc_rebuild_args, line, local_ns, True)
+
+            if 'js' not in local_ns:
+               raise ValueError("Query execution failed to store results")
+            return tabs_data
+        except Exception as e:
+            print(f"Error executing degree distribution query: {e}")
+            return None
         
-        return local_ns['js']
 
+    def suggest_degree_distribution_params (self, n, min_nonzero_count, max_count, min_deg, max_deg):
 
-    def plot_interactive_degree_distribution(self, unique_degrees, counts, max_deg, median_deg, mean_deg):
+        degree_spread = max_deg / min_deg
+        count_spread = max_count / min_nonzero_count
+
+        # X-axis scale decision
+        if degree_spread >= 100 and n > 1000:
+            x_scale = 'Log'
+            bin_type = 'Logarithmic'
+            initial_bin_width = (np.log10(max_deg) - np.log10(min_deg)) / 20
+        else:
+            x_scale = 'Linear'
+            bin_type = 'Linear'
+            initial_bin_width = (max_deg - min_deg) / 20 if max_deg - min_deg > 0 else 1
+
+        # Y-axis scale decision
+        if count_spread >= 50 and n > 1000:
+            y_scale = 'Log'
+        else:
+            y_scale = 'Linear'
+
+        # Small graph override
+        if n <= 1000:
+            x_scale = 'Linear'
+            y_scale = 'Linear'
+            bin_type = 'Linear'
+            initial_bin_width = 1
+
+        return {
+            'x_scale': x_scale,
+            'y_scale': y_scale,
+            'bin_type': bin_type,
+            'initial_bin_width': initial_bin_width
+        }
 
+    def plot_interactive_degree_distribution(self, unique_degrees, counts, max_deg, median_deg, mean_deg):
         min_deg = 0
+        try:
+            if unique_degrees is None or counts is None or len(unique_degrees) == 0:
+                print("No data available to visualize")
+                return
 
-        def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, show_maxdeg):
-            # Start timing
-            start_time = time.time()
+            max_count = max(counts)
+            min_deg = min(unique_degrees)
             
-            alpha = 1
-            plt.clf()
-                        
-            # Get zero degree count
-            zero_idx = np.where(unique_degrees == 0)[0]
-            zero_degree_count = counts[zero_idx[0]] if len(zero_idx) > 0 else 0
-
-            isolateds_exist = zero_degree_count > 0
-            # Get non-zero degrees and counts
-            mask = unique_degrees > 0
-            filtered_degrees = unique_degrees[mask]
-            filtered_counts = counts[mask]
-            
-            # Obtain the minimum non-zero degree, unless it's all zero degrees
-            if len(filtered_degrees) == 0:
-                min_deg = 0
-            else:
-                min_deg = np.min(filtered_degrees)                
-
-            n_bins = 1
-            # Create histogram only if there are non-zero degree nodes
-            if len(filtered_degrees) > 0:
-                if bin_type != 'Raw':
-                    # Arrange the bins for a given bin_width
-                    if bin_type == 'Linear':
-                        n_bins = max(1, int((max_deg - min_deg) / bin_width))
-                        bins = np.linspace(min_deg, max_deg, n_bins + 1)
-                    else:  # Logarithmic
-                        min_deg_log = np.log10(min_deg) if min_deg > 0 else 0
-                        max_deg_log = np.log10(max_deg) if max_deg > 0 else 1
-                        n_bins = max(1, int((max_deg_log - min_deg_log) / np.log10(bin_width+0.01)))
-                        bins = np.logspace(min_deg_log, max_deg_log, n_bins + 1)
-                    
-                    all_degrees = np.repeat(filtered_degrees, filtered_counts)
-
-                    plt.hist(all_degrees, bins=bins, density=False, alpha=alpha,
-                            histtype='bar', color='#000080')
-                else:
-                    # For raw data, create bars at each unique degree
-                    plt.bar(filtered_degrees, filtered_counts, alpha=alpha,
-                        label='Raw', color='#000000')
+            # Scale widget, four options
+            scale_widget = widgets.Dropdown(
+                options=['Linear-Linear', 'Log-Log', 'Log(x)-Linear(y)', 'Linear(x)-Log(y)'],
+                value='Linear-Linear',
+                description='Scale:'
+            )
             
-            # Plot zero degree node count separately
-            if isolateds_exist:
-                # Use a special x position for zero degree nodes in log scale
-                zero_x_pos = 0.1 if scale_type in ['Log-Log', 'Log(x)-Linear(y)'] else 0
-                plt.bar(zero_x_pos, zero_degree_count, color='red', 
-                    label='Isolated', alpha=alpha, width=0.1 if scale_type in ['Log-Log', 'Log(x)-Linear(y)'] else 2)
-
-            plt.xlim(x_range[0], x_range[1])
+            # Binning widget, three options
+            bin_widget = widgets.Dropdown(
+                options=['Raw', 'Linear', 'Logarithmic'],
+                value='Linear',
+                description='Binning:'
+            )
 
-            if isolateds_exist:
-                plt.xlim(x_range[0], x_range[1])
+            # Bin width widget, integer options in [1, 1+(max_deg/2)] interval 
+            bin_width_widget = widgets.FloatSlider(
+                min=1,
+                max=(max_deg - min_deg) // 5,
+                step=1,
+                value=1,
+                readout_format = 'd',
+                description='Bin width:',
+                tooltip=('For linear binning: actual width\n'
+                        'For log binning: multiplicative factor')
+            )
 
-            # Set scales based on selection
-            if scale_type == 'Log-Log':
-                plt.xscale('log')
-                plt.yscale('log')
-                if isolateds_exist:
-                    plt.xlim(0.05, x_range[1])
-                else:
-                    plt.xlim(x_range[0]+0.05, x_range[1])
+            # Upper limit for y-axis range, enables zooming (lower limit is always zero)
+            y_max_widget = widgets.IntSlider(
+                value=max_count * 1.1,
+                min=1,
+                max=max_count * 1.1,
+                step=1,
+                description='y-max:',
+            )
 
-            elif scale_type == 'Log(x)-Linear(y)':
-                plt.xscale('log')
-                if isolateds_exist:
-                    plt.xlim(0.05, x_range[1])
-                else:
-                    plt.xlim(x_range[0]+0.05, x_range[1])
-            elif scale_type == 'Linear(x)-Log(y)':
-                plt.yscale('log')
-            
-            plt.gca().set_ylim(top=y_max)
+            # Range slider for x-axis, enables zooming
+            x_range_widget = widgets.FloatRangeSlider(            
+                min=0,
+                max=max_deg * 1.1 + 5,
+                value=[0, max_deg * 1.1 + 5],
+                step=1,
+                description='x-axis range:',
+                disabled=False,
+                continuous_update=True,
+                readout=True,
+                readout_format='.0f',
+            )
             
-            # Add vertical dashed lines for min and max degree, if enabled
-            if show_mindeg and min_deg > 0:
-                plt.axvline(x=min_deg, color='darkgreen', linestyle='--', linewidth=2, label=f'Min non-zero degree: {min_deg}')
+            # Toggle switches for min/max degree lines
+            show_mindeg_widget = widgets.Checkbox(
+                value=True,
+                description='Show Min Degree Line',
+                disabled=False
+            )
             
-            if show_maxdeg:
-                plt.axvline(x=max_deg, color='darkred', linestyle='--', linewidth=2, label=f'Max degree: {max_deg}')
-                
-            plt.grid(True, which="both", ls="-", alpha=0.2)
-            plt.xlabel('Degree')
-            plt.ylabel('Number of nodes')
-            plt.legend()   
-                
-            plt.title(f'Degree Distribution')
+            show_maxdeg_widget = widgets.Checkbox(
+                value=True,
+                description='Show Max Degree Line',
+                disabled=False
+            )
 
-            # End timing and display
-            end_time = time.time()
-            runtime = end_time - start_time
-                        
-            # Update statistics
-            with stats_output:
-                stats_output.clear_output(wait=True)
-                total_nodes = sum(counts)
-                total_edges = sum(d * c for d, c in zip(unique_degrees, counts)) // 2
-                avg_degree = sum(d * c for d, c in zip(unique_degrees, counts)) / total_nodes
-                
-                print(f"Render time: {runtime:.3f} seconds")
-                print(f"--------------------")
-
-                print(f"Number of nodes: {total_nodes}")
-                print(f"Number of edges: {total_edges}")
-                print(f"Number of isolated nodes: {zero_degree_count}")
-                print(f"Average degree: {mean_deg:.2f}")
-                print(f"Median degree: {median_deg:.2f}")
-                print(f"Max degree: {max_deg}")
-                if min_deg > 0:
-                    print(f"Min non-zero degree: {min_deg}")            
-                if bin_type != 'Raw':
-                    print(f"Number of bins: {n_bins}")
-        
-        
-        max_count = np.max(counts)
-        
-        # Scale widget, four options
-        scale_widget = widgets.Dropdown(
-            options=['Linear-Linear', 'Log-Log', 'Log(x)-Linear(y)', 'Linear(x)-Log(y)'],
-            value='Linear-Linear',
-            description='Scale:'
-        )
-        
-        # Binning widget, three options
-        bin_widget = widgets.Dropdown(
-            options=['Raw', 'Linear', 'Logarithmic'],
-            value='Linear',
-            description='Binning:'
-        )
-        
-        # Define a function to update bin_width_widget based on bin_type
-        def update_bin_width_widget(change):
-            if change['new'] == 'Logarithmic':
-                # For logarithmic binning, use a FloatSlider with smaller values
-                bin_width_widget.min = 1.00
-                bin_width_widget.max = 10.00
+            min_nonzero_count = min(counts[counts > 0])
+            min_nonzero_deg = min(unique_degrees[unique_degrees > 0])
+            params = self.suggest_degree_distribution_params (sum(counts), min_nonzero_count, max(counts), min_nonzero_deg, max_deg)
+
+            # Set scale_widget value based on params
+            if params['x_scale'] == 'Log' and params['y_scale'] == 'Log':
+                scale_widget.value = 'Log-Log'
+            elif params['x_scale'] == 'Log' and params['y_scale'] == 'Linear':
+                scale_widget.value = 'Log(x)-Linear(y)'
+            elif params['x_scale'] == 'Linear' and params['y_scale'] == 'Log':
+                scale_widget.value = 'Linear(x)-Log(y)'
+            else:  # Both linear
+                scale_widget.value = 'Linear-Linear'
+
+            # Set bin_widget and bin_widthz_widget based on params
+            bin_widget.value = params['bin_type']          
+            bin_width_widget.value = params['initial_bin_width']
+            if params['bin_type'] == 'Logarithmic':
                 bin_width_widget.step = 0.01
-                bin_width_widget.value = 1.00
                 bin_width_widget.readout_format = '.2f'
-                bin_width_widget.disabled = False
-            elif change['new'] == 'Raw':
-                # For raw binning, disable the widget
-                bin_width_widget.value = 1
-                bin_width_widget.disabled = True
-            else:
-                # For linear binning, use integer values
-                bin_width_widget.min = 1
-                bin_width_widget.max = (max_deg+2)/10
-                bin_width_widget.step = 1
-                bin_width_widget.value = 1
-                bin_width_widget.readout_format = 'd'
-                bin_width_widget.disabled = False
-
-        def update_y_max_widget(change):
-            if bin_widget.value == 'Raw':
-                # For raw data, use the original max count
-                y_max_widget.max = max_count * 1.1
-                y_max_widget.value = max_count * 1.1
-            elif bin_widget.value == 'Linear':
-                y_max_widget.max = max_count * bin_width_widget.value * 0.5
-                y_max_widget.value = max_count * bin_width_widget.value * 0.5
-            else: # 'Logarithmic'
-                y_max_widget.max = max_count * (10 ** bin_width_widget.value) * 0.5
-                y_max_widget.value = max_count * (10 ** bin_width_widget.value) * 0.5
-                    
-        # Bin width widget, integer options in [1, 1+(max_deg/2)] interval 
-        bin_width_widget = widgets.FloatSlider(
-            value=1,
-            min=1,
-            max=(max_deg+2)/10,
-            step=1,
-            description='Bin width:',
-            tooltip=('For linear binning: actual width\n'
-                    'For log binning: multiplicative factor')
-        )
 
-        # Observe changes to bin_width_widget and bin_widget
-        bin_width_widget.observe(update_y_max_widget, names='value')
-        bin_widget.observe(update_y_max_widget, names='value')
+            def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, show_maxdeg):
+                # Start timing
+                start_time = time.time()
+                
+                alpha = 1
+                plt.clf()
+                            
+                # Get zero degree count
+                zero_idx = np.where(unique_degrees == 0)[0]
+                zero_degree_count = counts[zero_idx[0]] if len(zero_idx) > 0 else 0
+
+                isolateds_exist = zero_degree_count > 0
+                # Get non-zero degrees and counts
+                mask = unique_degrees > 0
+                filtered_degrees = unique_degrees[mask]
+                filtered_counts = counts[mask]
+                
+                # Obtain the minimum non-zero degree, unless it's all zero degrees
+                if len(filtered_degrees) == 0:
+                    min_deg = 0
+                else:
+                    min_deg = np.min(filtered_degrees)                
+
+                n_bins = 1
+
+                # Create histogram only if there are non-zero degree nodes
+                if len(filtered_degrees) > 0:
+                    if bin_type != 'Raw':
+                        # Arrange the bins for a given bin_width
+                        if bin_type == 'Linear':
+                            n_bins = max(1, int((max_deg - min_deg) / bin_width))
+                            bins = np.linspace(min_deg, max_deg, n_bins + 1)
+                        else:  # Logarithmic
+                            min_deg_log = np.log10(min_deg) if min_deg > 0 else 0
+                            max_deg_log = np.log10(max_deg) if max_deg > 0 else 1
+                            n_bins = max(1, int((max_deg_log - min_deg_log) / np.log10(bin_width+0.01)))
+                            bins = np.logspace(min_deg_log, max_deg_log, n_bins + 1)
+                        
+                        # Efficient binning using digitize instead of histogram
+                        bin_indices = np.digitize(filtered_degrees, bins) - 1
+                        
+                        # Create histogram using prefix sum approach
+                        hist_counts = np.zeros(len(bins)-1)
+                        for i, count in zip(bin_indices, filtered_counts):
+                            if 0 <= i < len(hist_counts):  # Ensure index is valid
+                                hist_counts[i] += count
+                        
+                        bin_centers = bins[:-1]
+
+                        # If needed, downsample by averaging neighboring bins
+                        # if len(hist_counts) > 100000:
+                        #     downsample_factor = len(hist_counts) // 500 + 1
+                        #     downsampled_counts = np.zeros(len(hist_counts) // downsample_factor)
+                        #     downsampled_centers = np.zeros(len(hist_counts) // downsample_factor)
+                            
+                        #     for i in range(len(downsampled_counts)):
+                        #         start_idx = i * downsample_factor
+                        #         end_idx = min((i+1) * downsample_factor, len(hist_counts))
+                        #         downsampled_counts[i] = np.mean(hist_counts[start_idx:end_idx])
+                        #         downsampled_centers[i] = bin_centers[start_idx]
+                            
+                        #     plt.plot(downsampled_centers, downsampled_counts, 
+                        #             alpha=alpha, color='#000080', linewidth=1.5)
+
+                        # Create a step plot that looks like bars
+                        # Duplicate x values to create vertical lines
+                        x_steps = np.zeros(2 * len(bin_centers))
+                        x_steps[0::2] = bin_centers
+                        if bin_type == 'Linear':
+                            # For linear bins, use constant width
+                            x_steps[1::2] = bin_centers + (bins[1]-bins[0])
+                        else:  # Logarithmic
+                            # For logarithmic bins, use actual bin edges
+                            x_steps[1::2] = bins[1:]
+                        
+                        # Duplicate y values to create horizontal lines
+                        y_steps = np.zeros(2 * len(hist_counts))
+                        y_steps[0::2] = hist_counts
+                        y_steps[1::2] = hist_counts
+                        
+                        # Plot as a line with steps
+                        plt.plot(x_steps, y_steps, drawstyle='steps-pre', 
+                                alpha=alpha, color='#000080', linewidth=1.5)
+
+                        # Fill the area below the step plot
+                        plt.fill_between(x_steps, y_steps, 0, 
+                                            alpha=alpha*0.5, color='#000080', step='pre')
+
+                        # Costly way of plotting histogram:
+                        # all_degrees = np.repeat(filtered_degrees, filtered_counts)
+                        # plt.hist(all_degrees, bins=bins, density=False, alpha=alpha,
+                        #         histtype='bar', color='#000080')
+                    else:
+                        # For raw data, create bars at each unique degree
+                        plt.bar(filtered_degrees, filtered_counts, alpha=alpha,
+                            label='Raw', color='#000000')
+
+                # Plot zero degree node count separately
+                if isolateds_exist:
+                    # Use a special x position for zero degree nodes in log scale
+                    zero_x_pos = 0.1 if scale_type in ['Log-Log', 'Log(x)-Linear(y)'] else 0
+                    plt.bar(zero_x_pos, zero_degree_count, color='red', 
+                        label='Isolated', alpha=alpha, width=0.1 if scale_type in ['Log-Log', 'Log(x)-Linear(y)'] else 2)
 
-        # Upper limit for y-axis range, enables zooming (lower limit is always zero)
-        y_max_widget = widgets.IntSlider(
-            value=max_count * 1.1,
-            min=1,
-            max=max_count * 1.1,
-            step=1,
-            description='y-max:',
-        )
+                plt.xlim(x_range[0], x_range[1])
 
-        # Range slider for x-axis, enables zooming
-        x_range_widget = widgets.FloatRangeSlider(            
-            min=0,
-            max=max_deg * 1.1 + 5,
-            value=[0, max_deg * 1.1 + 5],
-            step=1,
-            description='x-axis range:',
-            disabled=False,
-            continuous_update=True,
-            readout=True,
-            readout_format='.0f',
-        )
-        
-        # Toggle switches for min/max degree lines
-        show_mindeg_widget = widgets.Checkbox(
-            value=True,
-            description='Show Min Degree Line',
-            disabled=False
-        )
-        
-        show_maxdeg_widget = widgets.Checkbox(
-            value=True,
-            description='Show Max Degree Line',
-            disabled=False
-        )
-        
-        # Output widget for statistics
-        stats_output = widgets.Output()
-
-        # Interactive plot
-        interactive_plot = widgets.interactive(
-            update_plot,
-            scale_type=scale_widget,
-            bin_type=bin_widget,
-            bin_width=bin_width_widget,
-            y_max=y_max_widget,
-            x_range=x_range_widget,
-            show_mindeg=show_mindeg_widget,
-            show_maxdeg=show_maxdeg_widget
-        )
-        
-        # Vertical box layout
-        vbox = widgets.VBox([interactive_plot, stats_output])
+                if isolateds_exist:
+                    plt.xlim(max(x_range[0], 0.05), x_range[1])
+
+                # Set scales based on selection
+                if scale_type == 'Log-Log':
+                    plt.xscale('log')
+                    plt.yscale('log')
+                    if isolateds_exist:
+                        plt.xlim(max(x_range[0], 0.05), x_range[1])
+                    else:
+                        plt.xlim(x_range[0]+0.05, x_range[1])
+
+                elif scale_type == 'Log(x)-Linear(y)':
+                    plt.xscale('log')
+                    if isolateds_exist:
+                        plt.xlim(max(x_range[0], 0.05), x_range[1])
+                    else:
+                        plt.xlim(x_range[0]+0.05, x_range[1])
+                elif scale_type == 'Linear(x)-Log(y)':
+                    plt.yscale('log')
+
+                plt.gca().set_ylim(top=y_max)
+                            
+                # Add vertical dashed lines for min and max degree, if enabled
+                if show_mindeg and min_deg > 0:
+                    plt.axvline(x=min_deg, color='darkgreen', linestyle='--', linewidth=2, label=f'Min non-zero degree: {min_deg}')
+                
+                if show_maxdeg:
+                    plt.axvline(x=max_deg, color='darkred', linestyle='--', linewidth=2, label=f'Max degree: {max_deg}')
+
+                plt.grid(True, which="both", ls="-", alpha=0.2)
+                plt.xlabel('Degree')
+                plt.ylabel('Number of nodes')
+                plt.legend()   
+                    
+                plt.title(f'Degree Distribution')
+
+                # End timing and display
+                end_time = time.time()
+                runtime = end_time - start_time
+
+                # Update statistics
+                with stats_output:
+                    stats_output.clear_output(wait=True)
+                    total_nodes = sum(counts)
+                    total_edges = sum(d * c for d, c in zip(unique_degrees, counts)) // 2
+                    print(f"Render time: {runtime:.3f} seconds passed")
+                    print(f"Number of nodes: {total_nodes:,}")
+                    print(f"Number of edges: {total_edges:,}")
+                    print(f"Number of isolated nodes: {zero_degree_count:,}")
+                    print(f"Average degree: {mean_deg:.2f}")
+                    print(f"Median degree: {median_deg:.2f}")
+                    print(f"Max degree: {max_deg:,}")
+                    if min_deg > 0:
+                        print(f"Min non-zero degree: {min_deg:,}")            
+                    if bin_type != 'Raw':
+                        print(f"Number of bins: {n_bins:,}")
         
-        # Display the interactive plot and stats
-        display(vbox)
+            max_count = np.max(counts)
+            total_nodes = sum(counts) 
+            
+            # Define a function to update bin_width_widget based on bin_type
+            def update_bin_width_widget(change):
+                if change['new'] == 'Logarithmic':
+                    # For logarithmic binning, use a FloatSlider with smaller values
+                    bin_width_widget.value = 1.00
+                    bin_width_widget.min = 1.00
+                    bin_width_widget.max = (np.log10(max_deg) - np.log10(min_deg+0.01)) / 2
+                    bin_width_widget.step = 0.01                    
+                    bin_width_widget.readout_format = '.2f'
+                    bin_width_widget.disabled = False
+                elif change['new'] == 'Raw':
+                    # For raw binning, disable the widget
+                    bin_width_widget.value = 1
+                    bin_width_widget.readout_format = 'd'
+                    bin_width_widget.disabled = True
+                elif change['new'] == 'Linear':
+                    # For linear binning, use integer values
+                    bin_width_widget.value = 1
+                    bin_width_widget.min = 1
+                    bin_width_widget.max = (max_deg - min_deg) // 5
+                    bin_width_widget.step = 1                    
+                    bin_width_widget.readout_format = 'd'
+                    bin_width_widget.disabled = False
+
+            def update_y_max_widget(change):                
+                if bin_widget.value == 'Raw':
+                    # For raw data, use the original max count
+                    y_max_widget.max = y_max_widget.value = 1.1 * max_count
+                elif bin_widget.value == 'Linear':
+                    y_max_widget.max = y_max_widget.value = 1.1 * min (total_nodes, max_count * (bin_width_widget.value ** 0.85))
+                elif bin_widget.value == 'Logarithmic':
+                    y_max_widget.max = y_max_widget.value = 1.1 * min (total_nodes, max_count * (10 ** (bin_width_widget.value ** 0.25)))
+
+            # Observe changes to bin_width_widget and bin_widget
+            bin_width_widget.observe(update_y_max_widget, names='value')
+            bin_widget.observe(update_y_max_widget, names='value')
+
+            bin_width_widget.observe(update_bin_width_widget, names='value')
+            bin_widget.observe(update_bin_width_widget, names='value')
+    
+            # Output widget for statistics
+            stats_output = widgets.Output()
+
+            # Interactive plot
+            interactive_plot = widgets.interactive(
+                update_plot,
+                scale_type=scale_widget,
+                bin_type=bin_widget,
+                bin_width=bin_width_widget,
+                y_max=y_max_widget,
+                x_range=x_range_widget,
+                show_mindeg=show_mindeg_widget,
+                show_maxdeg=show_maxdeg_widget
+            )
+            
+            # Vertical box layout
+            vbox = widgets.VBox([interactive_plot, stats_output])
+            
+            # Display the interactive plot and stats
+            display(vbox)
+     
+        except Exception as e:
+            print(f"Error creating visualization: {e}")

From dfe524a8a91221bbc8296ed850bb87bdccfcd465 Mon Sep 17 00:00:00 2001
From: Erdem Sariyuce <sariyuce@amazon.com>
Date: Tue, 17 Jun 2025 17:51:55 -0400
Subject: [PATCH 03/11] enlarged the widget size

---
 src/graph_notebook/magics/graph_magic.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/graph_notebook/magics/graph_magic.py b/src/graph_notebook/magics/graph_magic.py
index af4586d1..29d6b8cd 100644
--- a/src/graph_notebook/magics/graph_magic.py
+++ b/src/graph_notebook/magics/graph_magic.py
@@ -4088,8 +4088,22 @@ def on_button_clicked(b):
                     with plot_output:
                         self.plot_interactive_degree_distribution(keys, values, max_deg, median_deg, mean_deg)
 
+                    # Define a larger layout
+                    large_layout = widgets.Layout(
+                        width='100%',
+                        height='1000px',  # Increase the height as needed
+                        overflow='auto'
+                    )
+
+                    # Apply to the tab widget
+                    tab = widgets.Tab(layout=large_layout)
+
+                    # Apply to each output widget
+                    for i in range(len(children)):
+                        children[i].layout = large_layout
+
+                    # Apply to each output widget
                     # Set up the tab widget                                       
-                    tab = widgets.Tab()
                     tab.children = children
                     for i, title in enumerate(titles):
                         tab.set_title(i, title)

From e7af1402c9c64ad76a6707ee95fbb3801e9b0bdc Mon Sep 17 00:00:00 2001
From: Erdem Sariyuce <sariyuce@amazon.com>
Date: Thu, 19 Jun 2025 19:06:49 -0400
Subject: [PATCH 04/11] plotly lib for visualization

---
 pyproject.toml                           |   2 +-
 requirements.txt                         |   2 +-
 src/graph_notebook/magics/graph_magic.py | 229 ++++++++++++++++-------
 3 files changed, 168 insertions(+), 65 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8d17ed5a..f678a191 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,7 +46,7 @@ dependencies = [
     'networkx==2.4',
     'numpy>=1.23.5,<1.24.0',
     'pandas>=2.1.0,<=2.2.2',
-    'matplotlib>=3.9.4',
+    'plotly>=6.1.2',
 
     # Graph databases and query languages
     'gremlinpython>=3.5.1,<=3.7.2',
diff --git a/requirements.txt b/requirements.txt
index 7e1f7328..b29e8f7d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -18,7 +18,7 @@ itables>=2.0.0,<=2.1.0
 networkx==2.4
 numpy>=1.23.5,<1.24.0
 pandas>=2.1.0,<=2.2.2
-matplotlib>=3.9.4
+plotly>=6.1.2
 
 # Graph databases and query languages
 gremlinpython>=3.5.1,<=3.7.2
diff --git a/src/graph_notebook/magics/graph_magic.py b/src/graph_notebook/magics/graph_magic.py
index 29d6b8cd..6bff924d 100644
--- a/src/graph_notebook/magics/graph_magic.py
+++ b/src/graph_notebook/magics/graph_magic.py
@@ -17,7 +17,10 @@
 import re
 
 import numpy as np
-import matplotlib.pyplot as plt
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+from IPython.display import display
+
 
 from ipyfilechooser import FileChooser
 from enum import Enum
@@ -3946,6 +3949,7 @@ def handle_opencypher_status(self, line, local_ns):
     # > %degreeDistribution --traversalDirection inbound
     # > %degreeDistribution --traversalDirection inbound --vertexLabels airport country
 
+    # TODO: exit if graph is empty (no nodes)
     @line_magic
     @needs_local_scope
     @display_exceptions
@@ -3989,10 +3993,6 @@ def degreeDistribution(self, line, local_ns: dict = None):
                             help="The edge labels for which the degree distribution is shown. If not supplied, "
                                  "we will default to using all the edge labels.")
         
-
-        # TODO: Additional parameter for saving the visualization?
-        # parser.add_argument('--export-to', type=str, default='',
-        #                     help='Export the degree distribution results to the provided file path.')
         
         args = parser.parse_args(line.split())
         
@@ -4091,7 +4091,7 @@ def on_button_clicked(b):
                     # Define a larger layout
                     large_layout = widgets.Layout(
                         width='100%',
-                        height='1000px',  # Increase the height as needed
+                        height='1200px',  # Increase the height as needed
                         overflow='auto'
                     )
 
@@ -4277,7 +4277,10 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
                 start_time = time.time()
                 
                 alpha = 1
-                plt.clf()
+
+                fig = go.Figure()    
+                fig.data = []
+                fig.layout = go.Layout()
                             
                 # Get zero degree count
                 zero_idx = np.where(unique_degrees == 0)[0]
@@ -4321,21 +4324,6 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
                         
                         bin_centers = bins[:-1]
 
-                        # If needed, downsample by averaging neighboring bins
-                        # if len(hist_counts) > 100000:
-                        #     downsample_factor = len(hist_counts) // 500 + 1
-                        #     downsampled_counts = np.zeros(len(hist_counts) // downsample_factor)
-                        #     downsampled_centers = np.zeros(len(hist_counts) // downsample_factor)
-                            
-                        #     for i in range(len(downsampled_counts)):
-                        #         start_idx = i * downsample_factor
-                        #         end_idx = min((i+1) * downsample_factor, len(hist_counts))
-                        #         downsampled_counts[i] = np.mean(hist_counts[start_idx:end_idx])
-                        #         downsampled_centers[i] = bin_centers[start_idx]
-                            
-                        #     plt.plot(downsampled_centers, downsampled_counts, 
-                        #             alpha=alpha, color='#000080', linewidth=1.5)
-
                         # Create a step plot that looks like bars
                         # Duplicate x values to create vertical lines
                         x_steps = np.zeros(2 * len(bin_centers))
@@ -4353,67 +4341,180 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
                         y_steps[1::2] = hist_counts
                         
                         # Plot as a line with steps
-                        plt.plot(x_steps, y_steps, drawstyle='steps-pre', 
-                                alpha=alpha, color='#000080', linewidth=1.5)
+                        fig = go.Figure(data=go.Scatter(
+                            x=x_steps, 
+                            y=y_steps, 
+                            mode='lines', 
+                            line=dict(color='#000080', width=1.5, shape='hv'), 
+                            opacity=alpha,
+                            showlegend=False))
 
                         # Fill the area below the step plot
-                        plt.fill_between(x_steps, y_steps, 0, 
-                                            alpha=alpha*0.5, color='#000080', step='pre')
-
-                        # Costly way of plotting histogram:
-                        # all_degrees = np.repeat(filtered_degrees, filtered_counts)
-                        # plt.hist(all_degrees, bins=bins, density=False, alpha=alpha,
-                        #         histtype='bar', color='#000080')
+                        fig.add_trace(
+                            go.Scatter(
+                                x=x_steps,
+                                y=y_steps,
+                                mode='lines',
+                                line=dict(shape='hv', width=0),
+                                fill='tozeroy',
+                                fillcolor='rgba(0, 0, 128, ' + str(alpha*0.5) + ')',
+                                opacity=alpha,
+                                showlegend=False
+                            ))
                     else:
-                        # For raw data, create bars at each unique degree
-                        plt.bar(filtered_degrees, filtered_counts, alpha=alpha,
-                            label='Raw', color='#000000')
+                    # For raw data, create bars at each unique degree
+                        fig.add_trace(
+                            go.Bar(
+                                x=filtered_degrees,
+                                y=filtered_counts,
+                                name='Raw',
+                                marker_color='#000000',
+                                opacity=alpha,
+                                width=0.1,
+                            ))
 
                 # Plot zero degree node count separately
                 if isolateds_exist:
                     # Use a special x position for zero degree nodes in log scale
                     zero_x_pos = 0.1 if scale_type in ['Log-Log', 'Log(x)-Linear(y)'] else 0
-                    plt.bar(zero_x_pos, zero_degree_count, color='red', 
-                        label='Isolated', alpha=alpha, width=0.1 if scale_type in ['Log-Log', 'Log(x)-Linear(y)'] else 2)
+                    fig.add_trace(
+                        go.Bar(
+                            x=[zero_x_pos],
+                            y=[zero_degree_count],
+                            name='Isolated',
+                            marker_color='red',
+                            opacity=alpha,
+                            width=0.1,
+                        ))
+
+                fig.update_layout(
+                    # Size
+                    width=1200,
+                    height=600,
+                    # Margins
+                    margin=dict(
+                        l=80,    # left margin
+                        r=50,    # right margin
+                        b=80,    # bottom margin
+                        t=100,   # top margin
+                        pad=4    # padding
+                    ),
+                    xaxis=dict(
+                        showgrid=True,
+                        gridwidth=1,
+                        gridcolor='rgba(0,0,0,0.2)',
+                        minor=dict(
+                            tickmode='auto',
+                            tickcolor='gray',
+                            showgrid=True,
+                            gridwidth=1,
+                            gridcolor='lightgray',
+                            ticks=""
+                        )
+                    ),
+                    yaxis=dict(
+                        showgrid=True,
+                        gridwidth=1,
+                        gridcolor='rgba(0,0,0,0.2)',
+                    ),
+                    yaxis_range=[0.05, y_max],
+                    xaxis_title='Degree',
+                    yaxis_title='Number of nodes',
+                    title={
+                        'text': 'Degree Distribution',
+                        'x': 0.5,
+                        'xanchor': 'center',
+                        'yanchor': 'top'
+                    },    
+                    legend=dict(
+                        orientation="v",  # Horizontal orientation
+                        yanchor="auto", # Anchor legend to its bottom edge
+                        xanchor="auto",  # Anchor legend to its right edge
+                        x=1               # Position to the right of the plot
+                    )
+                )
 
-                plt.xlim(x_range[0], x_range[1])
+                x_min = x_range[0]
+                x_max = x_range[1]
 
                 if isolateds_exist:
-                    plt.xlim(max(x_range[0], 0.05), x_range[1])
+                    x_min = max(x_min, 0.05)
 
                 # Set scales based on selection
                 if scale_type == 'Log-Log':
-                    plt.xscale('log')
-                    plt.yscale('log')
+                    fig.update_layout(
+                        xaxis_type="log",  # Set x-axis to log scale
+                        yaxis_type="log",  # Set y-axis to log scale
+                        yaxis_range=[0.05, np.log10(y_max)],
+                        yaxis=dict(                        
+                            exponentformat='power',  # Use × 10ⁿ notation
+                            showexponent='all',
+                        ),
+                        xaxis=dict(                        
+                            exponentformat='power',  # Use × 10ⁿ notation
+                            showexponent='all',
+                        )
+                    )
+                    
                     if isolateds_exist:
-                        plt.xlim(max(x_range[0], 0.05), x_range[1])
+                        x_min = max(x_min, 0.05)
                     else:
-                        plt.xlim(x_range[0]+0.05, x_range[1])
-
+                        x_min = x_min+0.05
+                    x_min = np.log10(x_min)
+                    x_max = np.log10(x_max)
                 elif scale_type == 'Log(x)-Linear(y)':
-                    plt.xscale('log')
+                    fig.update_layout(
+                        xaxis_type="log",  # Set x-axis to log scale
+                        xaxis=dict(                        
+                            exponentformat='power',  # Use × 10ⁿ notation
+                            showexponent='all',
+                        )
+                    )
                     if isolateds_exist:
-                        plt.xlim(max(x_range[0], 0.05), x_range[1])
+                        x_min = max(x_min, 0.05)
                     else:
-                        plt.xlim(x_range[0]+0.05, x_range[1])
+                        x_min = x_min+0.05
+                    x_min = np.log10(x_min)
+                    x_max = np.log10(x_max)
                 elif scale_type == 'Linear(x)-Log(y)':
-                    plt.yscale('log')
+                    fig.update_layout(
+                        yaxis_type="log",  # Set y-axis to log scale
+                        yaxis_range=[0.05, np.log10(y_max)],
+                        yaxis=dict(                        
+                            exponentformat='power',  # Use × 10ⁿ notation
+                            showexponent='all',
+                        )
+                    )                   
+
+                fig.update_layout(
+                    xaxis_range=[x_min, x_max]
+                )
+
 
-                plt.gca().set_ylim(top=y_max)
-                            
                 # Add vertical dashed lines for min and max degree, if enabled
                 if show_mindeg and min_deg > 0:
-                    plt.axvline(x=min_deg, color='darkgreen', linestyle='--', linewidth=2, label=f'Min non-zero degree: {min_deg}')
-                
-                if show_maxdeg:
-                    plt.axvline(x=max_deg, color='darkred', linestyle='--', linewidth=2, label=f'Max degree: {max_deg}')
+                    fig.add_trace(
+                        go.Scatter(
+                            x=[min_deg, min_deg],
+                            y=[0, y_max],
+                            mode='lines',
+                            line=dict(color="darkgreen", width=2, dash="dash"),
+                            name = f'Min non-zero degree:{min_deg}'
+                        )
+                    )
 
-                plt.grid(True, which="both", ls="-", alpha=0.2)
-                plt.xlabel('Degree')
-                plt.ylabel('Number of nodes')
-                plt.legend()   
-                    
-                plt.title(f'Degree Distribution')
+                if show_maxdeg:                    
+                    fig.add_trace(
+                        go.Scatter(
+                            x=[max_deg, max_deg],
+                            y=[0, y_max],
+                            mode='lines',
+                            line=dict(color="darkred", width=2, dash="dash"),
+                            name = f'Max degree:{max_deg}'
+                        )
+                    )
+
+                fig.show()
 
                 # End timing and display
                 end_time = time.time()
@@ -4424,7 +4525,6 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
                     stats_output.clear_output(wait=True)
                     total_nodes = sum(counts)
                     total_edges = sum(d * c for d, c in zip(unique_degrees, counts)) // 2
-                    print(f"Render time: {runtime:.3f} seconds passed")
                     print(f"Number of nodes: {total_nodes:,}")
                     print(f"Number of edges: {total_edges:,}")
                     print(f"Number of isolated nodes: {zero_degree_count:,}")
@@ -4464,13 +4564,16 @@ def update_bin_width_widget(change):
                     bin_width_widget.disabled = False
 
             def update_y_max_widget(change):                
+                factor = 1
                 if bin_widget.value == 'Raw':
                     # For raw data, use the original max count
-                    y_max_widget.max = y_max_widget.value = 1.1 * max_count
+                    factor = max_count
                 elif bin_widget.value == 'Linear':
-                    y_max_widget.max = y_max_widget.value = 1.1 * min (total_nodes, max_count * (bin_width_widget.value ** 0.85))
+                    factor = min (total_nodes, max_count * (bin_width_widget.value ** 0.85))
                 elif bin_widget.value == 'Logarithmic':
-                    y_max_widget.max = y_max_widget.value = 1.1 * min (total_nodes, max_count * (10 ** (bin_width_widget.value ** 0.25)))
+                    factor = min (total_nodes, max_count * (10 ** (bin_width_widget.value ** 0.25)))
+                
+                y_max_widget.max = y_max_widget.value = 1.1 * factor
 
             # Observe changes to bin_width_widget and bin_widget
             bin_width_widget.observe(update_y_max_widget, names='value')

From 5d9793721858c48d0a7ed62163daba60d7f0f52f Mon Sep 17 00:00:00 2001
From: Erdem Sariyuce <sariyuce@amazon.com>
Date: Thu, 31 Jul 2025 14:02:02 -0400
Subject: [PATCH 05/11] Several changes to improve usability and appearance

---
 src/graph_notebook/magics/graph_magic.py | 430 ++++++++++++++---------
 1 file changed, 261 insertions(+), 169 deletions(-)

diff --git a/src/graph_notebook/magics/graph_magic.py b/src/graph_notebook/magics/graph_magic.py
index 6bff924d..b1e4e190 100644
--- a/src/graph_notebook/magics/graph_magic.py
+++ b/src/graph_notebook/magics/graph_magic.py
@@ -15,6 +15,8 @@
 import uuid
 import ast
 import re
+import sys
+import math
 
 import numpy as np
 import plotly.graph_objects as go
@@ -3933,10 +3935,6 @@ def handle_opencypher_status(self, line, local_ns):
             if not args.silent:
                 print(json.dumps(js, indent=2))
 
-
-
-
-
     # %degreeDistribution magic command.
     # It obtains the degree distribution of a graph in the form of a visual histogram in notebook. Histogram simply
     # shows the number of vertices with a given degree, where degree is shown on the x-axis and the count on y-axis.
@@ -3949,7 +3947,6 @@ def handle_opencypher_status(self, line, local_ns):
     # > %degreeDistribution --traversalDirection inbound
     # > %degreeDistribution --traversalDirection inbound --vertexLabels airport country
 
-    # TODO: exit if graph is empty (no nodes)
     @line_magic
     @needs_local_scope
     @display_exceptions
@@ -4003,7 +4000,7 @@ def degreeDistribution(self, line, local_ns: dict = None):
 
         td_dropdown = widgets.Dropdown(
             options=TRAVERSAL_DIRECTIONS,
-            description='Traversal direction:',
+            description='Degree type:',
             disabled=False,
             style=SEED_WIDGET_STYLE,
             value = td_val
@@ -4059,62 +4056,62 @@ def on_button_clicked(b):
             with output:
                 try:
                     tabs_data = self.execute_degree_distribution_query(td, vlabels, elabels, local_ns)            
-                    res = local_ns['js']
-
-                    # Get the titles and children
-                    titles = tabs_data['titles']
-                    children = tabs_data['children']
-
-                    # Clear the wheel display
-                    status_output.close()
-
-                    
-                    # Plot is the first tab
-                    plot_output = widgets.Output(layout=DEFAULT_LAYOUT)
-                    titles.insert(0, 'Plot')
-                    children.insert(0, plot_output)
-
-                    # Retrieve the distribution
-                    pairs = np.array(res['results'][0]['output']['distribution'])
-                    keys = pairs[:,0]
-                    values = pairs[:,1]
-
-                    # Retrieve some statistics
-                    max_deg = res['results'][0]['output']['statistics']['maxDeg']
-                    median_deg = res['results'][0]['output']['statistics']['p50']
-                    mean_deg = res['results'][0]['output']['statistics']['mean']
-
-                    # Create the interactive visualization
-                    with plot_output:
-                        self.plot_interactive_degree_distribution(keys, values, max_deg, median_deg, mean_deg)
-
-                    # Define a larger layout
-                    large_layout = widgets.Layout(
-                        width='100%',
-                        height='1200px',  # Increase the height as needed
-                        overflow='auto'
-                    )
-
-                    # Apply to the tab widget
-                    tab = widgets.Tab(layout=large_layout)
-
-                    # Apply to each output widget
-                    for i in range(len(children)):
-                        children[i].layout = large_layout
-
-                    # Apply to each output widget
-                    # Set up the tab widget                                       
-                    tab.children = children
-                    for i, title in enumerate(titles):
-                        tab.set_title(i, title)
-                    
-                    display(tab)
                 except KeyError as e:
                     print(f"Missing expected data in query results: {e}")
                 except IndexError as e:
                     print(f"Unexpected result format: {e}")
                 except Exception as e:
-                    print(f"Error processing degree distribution: {e}")
+                    print(f"Error processing degree distribution: {e}")    
+
+                res = local_ns['js']
+
+                # Clear the wheel display
+                status_output.clear_output()
+                
+                # Retrieve the distribution
+                pairs = np.array(res['results'][0]['output']['distribution'])
+                keys = pairs[:,0]
+                values = pairs[:,1]
+
+                # Retrieve some statistics
+                max_deg = res['results'][0]['output']['statistics']['maxDeg']
+                median_deg = res['results'][0]['output']['statistics']['p50']
+                mean_deg = res['results'][0]['output']['statistics']['mean']
+
+                # Get the titles and children
+                titles = tabs_data['titles']
+                children = tabs_data['children']
+
+                # Plot is the first tab
+                plot_output = widgets.Output(layout=DEFAULT_LAYOUT)
+                titles.insert(0, 'Plot')
+                children.insert(0, plot_output)
+
+                # Create the interactive visualization
+                with plot_output:
+                    self.plot_interactive_degree_distribution(keys, values, max_deg, median_deg, mean_deg, 
+                                                                td, vlabels, elabels, len(available_vertex_labels), len(available_edge_labels))
+
+                # Define a larger layout
+                large_layout = widgets.Layout(
+                    width='100%',
+                    height='1200px',  # Increase the height as needed
+                    overflow='auto'
+                )
+
+                # Apply to the tab widget
+                tab = widgets.Tab(layout=large_layout)
+
+                # Apply to each output widget
+                for i in range(len(children)):
+                    children[i].layout = large_layout
+
+                # Set up the tab widget                                       
+                tab.children = children
+                for i, title in enumerate(titles):
+                    tab.set_title(i, title)
+                
+                display(tab)
         
         submit_button.on_click(on_button_clicked)
 
@@ -4133,20 +4130,19 @@ def execute_degree_distribution_query (self, td, vlabels, elabels, local_ns):
             # Construct the query
             line = "CALL neptune.algo.degreeDistribution({" + ", ".join(query_parts) + "}) YIELD output RETURN output"
 
-            # oc_rebuild_args = (f"{f'--store-to js --silent'}")
             oc_rebuild_args = (f"{f'--store-to js'}")
             tabs_data = self.handle_opencypher_query(oc_rebuild_args, line, local_ns, True)
 
             if 'js' not in local_ns:
                raise ValueError("Query execution failed to store results")
+
             return tabs_data
         except Exception as e:
             print(f"Error executing degree distribution query: {e}")
             return None
         
-
-    def suggest_degree_distribution_params (self, n, min_nonzero_count, max_count, min_deg, max_deg):
-
+    def suggest_degree_distribution_params (self, expected_nbins, n, min_nonzero_count, max_count, min_deg, max_deg):
+        
         degree_spread = max_deg / min_deg
         count_spread = max_count / min_nonzero_count
 
@@ -4154,11 +4150,11 @@ def suggest_degree_distribution_params (self, n, min_nonzero_count, max_count, m
         if degree_spread >= 100 and n > 1000:
             x_scale = 'Log'
             bin_type = 'Logarithmic'
-            initial_bin_width = (np.log10(max_deg) - np.log10(min_deg)) / 20
+            initial_bin_width = (np.log10(max_deg) - np.log10(min_deg)) / np.log10(expected_nbins)
         else:
             x_scale = 'Linear'
             bin_type = 'Linear'
-            initial_bin_width = (max_deg - min_deg) / 20 if max_deg - min_deg > 0 else 1
+            initial_bin_width = (max_deg - min_deg) / expected_nbins if max_deg - min_deg > 0 else 1
 
         # Y-axis scale decision
         if count_spread >= 50 and n > 1000:
@@ -4180,7 +4176,9 @@ def suggest_degree_distribution_params (self, n, min_nonzero_count, max_count, m
             'initial_bin_width': initial_bin_width
         }
 
-    def plot_interactive_degree_distribution(self, unique_degrees, counts, max_deg, median_deg, mean_deg):
+    def plot_interactive_degree_distribution(self, unique_degrees, counts, max_deg, median_deg, mean_deg,
+                                             td, vlabels, elabels, num_vlabels, num_elabels):
+        expected_nbins = 20
         min_deg = 0
         try:
             if unique_degrees is None or counts is None or len(unique_degrees) == 0:
@@ -4204,10 +4202,10 @@ def plot_interactive_degree_distribution(self, unique_degrees, counts, max_deg,
                 description='Binning:'
             )
 
-            # Bin width widget, integer options in [1, 1+(max_deg/2)] interval 
+            # Bin width widget, integer/float options for linear/log scale
             bin_width_widget = widgets.FloatSlider(
                 min=1,
-                max=(max_deg - min_deg) // 5,
+                max=max(1, (max_deg - min_deg) // 5),
                 step=1,
                 value=1,
                 readout_format = 'd',
@@ -4216,14 +4214,36 @@ def plot_interactive_degree_distribution(self, unique_degrees, counts, max_deg,
                         'For log binning: multiplicative factor')
             )
 
-            # Upper limit for y-axis range, enables zooming (lower limit is always zero)
+            # Y-axis maximum control: slider for quick selection, text for custom values
             y_max_widget = widgets.IntSlider(
-                value=max_count * 1.1,
+                value=int(max_count * 1.1),
                 min=1,
-                max=max_count * 1.1,
+                max=max(10, int(max_count * 1.1)),
                 step=1,
                 description='y-max:',
             )
+            
+            # Extend the y-max limit
+            y_max_text_widget = widgets.BoundedIntText(
+                value=int(max_count * 1.1),
+                min=1,
+                max=sys.maxsize,
+                step=1,
+                description='Extend y-max:',
+                style={
+                    'description_width': '50%',
+                    'readout_width': '30%'
+                },
+            )
+
+            # Sync slider and text input
+            def handle_text_change(change):
+                new_max = int(change.new)
+                if new_max > y_max_widget.max:
+                    y_max_widget.max = new_max
+                y_max_widget.value = new_max
+                
+            y_max_text_widget.observe(handle_text_change, names='value')
 
             # Range slider for x-axis, enables zooming
             x_range_widget = widgets.FloatRangeSlider(            
@@ -4238,46 +4258,71 @@ def plot_interactive_degree_distribution(self, unique_degrees, counts, max_deg,
                 readout_format='.0f',
             )
             
+            # Add CSS to style the slider readout with a border
+            display(HTML("""
+            <style>
+            .widget-readout {
+                border: 1px solid #ccc !important;
+                border-radius: 3px !important;
+                padding: 1px 4px !important;
+            }
+            </style>
+            """))
+
             # Toggle switches for min/max degree lines
             show_mindeg_widget = widgets.Checkbox(
                 value=True,
-                description='Show Min Degree Line',
-                disabled=False
+                description='Show Min Degree',
+                disabled=False,
             )
             
             show_maxdeg_widget = widgets.Checkbox(
                 value=True,
-                description='Show Max Degree Line',
-                disabled=False
+                description='Show Max Degree',
+                disabled=False,
             )
 
-            min_nonzero_count = min(counts[counts > 0])
-            min_nonzero_deg = min(unique_degrees[unique_degrees > 0])
-            params = self.suggest_degree_distribution_params (sum(counts), min_nonzero_count, max(counts), min_nonzero_deg, max_deg)
-
-            # Set scale_widget value based on params
-            if params['x_scale'] == 'Log' and params['y_scale'] == 'Log':
-                scale_widget.value = 'Log-Log'
-            elif params['x_scale'] == 'Log' and params['y_scale'] == 'Linear':
-                scale_widget.value = 'Log(x)-Linear(y)'
-            elif params['x_scale'] == 'Linear' and params['y_scale'] == 'Log':
-                scale_widget.value = 'Linear(x)-Log(y)'
-            else:  # Both linear
+            max_deg = max(unique_degrees)
+            if max_deg <= 1:
                 scale_widget.value = 'Linear-Linear'
-
-            # Set bin_widget and bin_widthz_widget based on params
-            bin_widget.value = params['bin_type']          
-            bin_width_widget.value = params['initial_bin_width']
-            if params['bin_type'] == 'Logarithmic':
-                bin_width_widget.step = 0.01
-                bin_width_widget.readout_format = '.2f'
+                bin_widget.value = 'Linear'         
+                bin_width_widget.value = 1
+            else:                
+                min_nonzero_count = min(counts[counts > 0])
+                min_nonzero_deg = min(unique_degrees[unique_degrees > 0])
+                params = self.suggest_degree_distribution_params (expected_nbins, sum(counts), min_nonzero_count, max(counts), min_nonzero_deg, max_deg)
+
+                # Set scale_widget value based on params
+                if params['x_scale'] == 'Log' and params['y_scale'] == 'Log':
+                    scale_widget.value = 'Log-Log'
+                elif params['x_scale'] == 'Log' and params['y_scale'] == 'Linear':
+                    scale_widget.value = 'Log(x)-Linear(y)'
+                elif params['x_scale'] == 'Linear' and params['y_scale'] == 'Log':
+                    scale_widget.value = 'Linear(x)-Log(y)'
+                else:  # Both linear
+                    scale_widget.value = 'Linear-Linear'
+
+                # Set bin_widget and bin_widthz_widget based on params
+                bin_widget.value = params['bin_type']          
+                bin_width_widget.value = params['initial_bin_width']
+                if params['bin_type'] == 'Logarithmic':
+                    bin_width_widget.step = 0.01
+                    bin_width_widget.readout_format = '.2f'
+                    bin_width_widget.max = max (1, (np.log10(max_deg) - np.log10(min_nonzero_deg)) / np.log10(5))
+
+            # Adjust the initial y-max limit
+            total_nodes = sum(counts)
+            factor = 1
+            if bin_widget.value == 'Raw': # For raw data, use the original max count                
+                factor = max_count
+            elif bin_widget.value == 'Linear':
+                factor = min (total_nodes, max_count * (bin_width_widget.value ** 0.85))
+            elif bin_widget.value == 'Logarithmic':
+                factor = min (total_nodes, max_count * (10 ** (bin_width_widget.value ** 0.25)))
+            y_max_text_widget.value = y_max_widget.max = y_max_widget.value = max (1.1 * factor, y_max_text_widget.value)
 
             def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, show_maxdeg):
-                # Start timing
-                start_time = time.time()
-                
                 alpha = 1
-
                 fig = go.Figure()    
                 fig.data = []
                 fig.layout = go.Layout()
@@ -4285,8 +4330,8 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
                 # Get zero degree count
                 zero_idx = np.where(unique_degrees == 0)[0]
                 zero_degree_count = counts[zero_idx[0]] if len(zero_idx) > 0 else 0
-
                 isolateds_exist = zero_degree_count > 0
+
                 # Get non-zero degrees and counts
                 mask = unique_degrees > 0
                 filtered_degrees = unique_degrees[mask]
@@ -4300,55 +4345,51 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
 
                 n_bins = 1
 
-                # Create histogram only if there are non-zero degree nodes
+                # Create histogram only if there is at least one non-zero degree node
                 if len(filtered_degrees) > 0:
                     if bin_type != 'Raw':
                         # Arrange the bins for a given bin_width
                         if bin_type == 'Linear':
-                            n_bins = max(1, int((max_deg - min_deg) / bin_width))
-                            bins = np.linspace(min_deg, max_deg, n_bins + 1)
+                            n_bins = int((max_deg - min_deg) / bin_width)
+                            bins = np.linspace(min_deg, max_deg, n_bins+1)
                         else:  # Logarithmic
                             min_deg_log = np.log10(min_deg) if min_deg > 0 else 0
                             max_deg_log = np.log10(max_deg) if max_deg > 0 else 1
-                            n_bins = max(1, int((max_deg_log - min_deg_log) / np.log10(bin_width+0.01)))
-                            bins = np.logspace(min_deg_log, max_deg_log, n_bins + 1)
+                            n_bins = int (10 ** ((max_deg_log - min_deg_log) / (bin_width)))
+                            bins = np.logspace(min_deg_log, max_deg_log, n_bins+1)
                         
                         # Efficient binning using digitize instead of histogram
                         bin_indices = np.digitize(filtered_degrees, bins) - 1
                         
                         # Create histogram using prefix sum approach
-                        hist_counts = np.zeros(len(bins)-1)
+                        hist_counts = np.zeros(len(bins))
                         for i, count in zip(bin_indices, filtered_counts):
                             if 0 <= i < len(hist_counts):  # Ensure index is valid
                                 hist_counts[i] += count
                         
-                        bin_centers = bins[:-1]
-
-                        # Create a step plot that looks like bars
-                        # Duplicate x values to create vertical lines
-                        x_steps = np.zeros(2 * len(bin_centers))
-                        x_steps[0::2] = bin_centers
-                        if bin_type == 'Linear':
-                            # For linear bins, use constant width
-                            x_steps[1::2] = bin_centers + (bins[1]-bins[0])
-                        else:  # Logarithmic
-                            # For logarithmic bins, use actual bin edges
+                        # Create a step plot that looks like bars, duplicate x values to create vertical lines
+                        x_steps = np.zeros(2 * len(bins))
+                        x_steps[0::2] = bins
+                        if bin_type == 'Linear': # For linear bins, use constant width                            
+                            x_steps[1::2] = bins + bin_width
+                        else: # For logarithmic bins, use actual bin edges   
+                            bins = np.concatenate((bins, np.array([bins[-1]+bin_width])))                         
                             x_steps[1::2] = bins[1:]
                         
                         # Duplicate y values to create horizontal lines
                         y_steps = np.zeros(2 * len(hist_counts))
                         y_steps[0::2] = hist_counts
                         y_steps[1::2] = hist_counts
-                        
-                        # Plot as a line with steps
+
                         fig = go.Figure(data=go.Scatter(
                             x=x_steps, 
                             y=y_steps, 
                             mode='lines', 
                             line=dict(color='#000080', width=1.5, shape='hv'), 
                             opacity=alpha,
+                            hovertemplate='<extra></extra>',
                             showlegend=False))
-
+                        
                         # Fill the area below the step plot
                         fig.add_trace(
                             go.Scatter(
@@ -4359,10 +4400,11 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
                                 fill='tozeroy',
                                 fillcolor='rgba(0, 0, 128, ' + str(alpha*0.5) + ')',
                                 opacity=alpha,
-                                showlegend=False
-                            ))
-                    else:
-                    # For raw data, create bars at each unique degree
+                                # no text on hover because the line chart (that mimics the bars) have
+                                # multiple values for an x when it goes up/down, even when bin width = 1
+                                hovertemplate='<extra></extra>',
+                                showlegend=False))
+                    else: # For raw data, create bars at each unique degree                    
                         fig.add_trace(
                             go.Bar(
                                 x=filtered_degrees,
@@ -4370,13 +4412,24 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
                                 name='Raw',
                                 marker_color='#000000',
                                 opacity=alpha,
-                                width=0.1,
+                                # text on hover only for raw data
+                                hovertemplate='<b>%{y}</b> nodes of degree <b>%{x}</b><extra></extra>',
+                                width=0.9,
                             ))
 
                 # Plot zero degree node count separately
                 if isolateds_exist:
                     # Use a special x position for zero degree nodes in log scale
                     zero_x_pos = 0.1 if scale_type in ['Log-Log', 'Log(x)-Linear(y)'] else 0
+
+                    # Adjust width based on binning type
+                    if bin_type == 'Linear':
+                        bar_width = 1  # bin width
+                    elif bin_type == 'Logarithmic':
+                        bar_width = 0.05  # Fixed small width for log scale
+                    else:  # Raw
+                        bar_width = 0.9  # Standard width for raw data
+                    
                     fig.add_trace(
                         go.Bar(
                             x=[zero_x_pos],
@@ -4384,10 +4437,40 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
                             name='Isolated',
                             marker_color='red',
                             opacity=alpha,
-                            width=0.1,
+                            hovertemplate='<b>%{y}</b> nodes of degree zero<extra></extra>',
+                            width=bar_width,
                         ))
 
+                # Build title components
+                degree_type_map = {
+                    'both': 'Total',
+                    'inbound': 'In', 
+                    'outbound': 'Out'
+                }
+                
+                degree_display = degree_type_map.get(td, td.title())
+                
+                # Build vertex labels part
+                if not vlabels or len(vlabels) == num_vlabels: # if no label is selected, then all labels considered
+                    vertex_part = "for <b>all</b> vertex labels"
+                else:
+                    bold_vlabels = [f"<b>{label}</b>" for label in vlabels]
+                    vertex_part = f"for {', '.join(bold_vlabels)} vertices"
+                    
+                # Build edge labels part
+                if not elabels or len(elabels) == num_elabels:# if no label is selected, then all labels considered
+                    edge_part = "via <b>all</b> edge labels"
+                else:
+                    bold_elabels = [f"<b>{label}</b>" for label in elabels]
+                    edge_part = f"via {', '.join(bold_elabels)} edges"
+                    
+                title_text = f"<b>{degree_display}</b> degree distribution<br>{vertex_part}<br>{edge_part}"
+
                 fig.update_layout(
+                    hoverlabel=dict(
+                        bgcolor='rgba(0, 0, 128, ' + str(alpha*0.5) + ')',
+                        font_color="white"
+                    ),
                     # Size
                     width=1200,
                     height=600,
@@ -4421,15 +4504,15 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
                     xaxis_title='Degree',
                     yaxis_title='Number of nodes',
                     title={
-                        'text': 'Degree Distribution',
+                        'text': title_text,
                         'x': 0.5,
                         'xanchor': 'center',
                         'yanchor': 'top'
                     },    
                     legend=dict(
                         orientation="v",  # Horizontal orientation
-                        yanchor="auto", # Anchor legend to its bottom edge
-                        xanchor="auto",  # Anchor legend to its right edge
+                        yanchor="auto",   # Anchor legend to its bottom edge
+                        xanchor="auto",   # Anchor legend to its right edge
                         x=1               # Position to the right of the plot
                     )
                 )
@@ -4437,9 +4520,6 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
                 x_min = x_range[0]
                 x_max = x_range[1]
 
-                if isolateds_exist:
-                    x_min = max(x_min, 0.05)
-
                 # Set scales based on selection
                 if scale_type == 'Log-Log':
                     fig.update_layout(
@@ -4447,33 +4527,30 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
                         yaxis_type="log",  # Set y-axis to log scale
                         yaxis_range=[0.05, np.log10(y_max)],
                         yaxis=dict(                        
-                            exponentformat='power',  # Use × 10ⁿ notation
+                            exponentformat='power',  # Use scientific notation
                             showexponent='all',
+                            dtick=1  # Show only powers of 10
                         ),
                         xaxis=dict(                        
-                            exponentformat='power',  # Use × 10ⁿ notation
+                            exponentformat='power',  # Use scientific notation
                             showexponent='all',
+                            dtick=1  # Show only powers of 10
                         )
                     )
                     
-                    if isolateds_exist:
-                        x_min = max(x_min, 0.05)
-                    else:
-                        x_min = x_min+0.05
+                    x_min = max(x_min, 0.05)
                     x_min = np.log10(x_min)
                     x_max = np.log10(x_max)
                 elif scale_type == 'Log(x)-Linear(y)':
                     fig.update_layout(
                         xaxis_type="log",  # Set x-axis to log scale
                         xaxis=dict(                        
-                            exponentformat='power',  # Use × 10ⁿ notation
+                            exponentformat='power',  # Use scientific notation
                             showexponent='all',
+                            dtick=1  # Show only powers of 10
                         )
                     )
-                    if isolateds_exist:
-                        x_min = max(x_min, 0.05)
-                    else:
-                        x_min = x_min+0.05
+                    x_min = max(x_min, 0.05)
                     x_min = np.log10(x_min)
                     x_max = np.log10(x_max)
                 elif scale_type == 'Linear(x)-Log(y)':
@@ -4481,8 +4558,9 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
                         yaxis_type="log",  # Set y-axis to log scale
                         yaxis_range=[0.05, np.log10(y_max)],
                         yaxis=dict(                        
-                            exponentformat='power',  # Use × 10ⁿ notation
+                            exponentformat='power',  # Use scientific notation
                             showexponent='all',
+                            dtick=1  # Show only powers of 10
                         )
                     )                   
 
@@ -4490,7 +4568,6 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
                     xaxis_range=[x_min, x_max]
                 )
 
-
                 # Add vertical dashed lines for min and max degree, if enabled
                 if show_mindeg and min_deg > 0:
                     fig.add_trace(
@@ -4499,6 +4576,7 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
                             y=[0, y_max],
                             mode='lines',
                             line=dict(color="darkgreen", width=2, dash="dash"),
+                            hovertemplate='Min non-zero degree is <b>%{x}</b><extra></extra>',
                             name = f'Min non-zero degree:{min_deg}'
                         )
                     )
@@ -4510,70 +4588,71 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
                             y=[0, y_max],
                             mode='lines',
                             line=dict(color="darkred", width=2, dash="dash"),
+                            hovertemplate='Max degree is <b>%{x}</b><extra></extra>',
                             name = f'Max degree:{max_deg}'
                         )
                     )
 
                 fig.show()
 
-                # End timing and display
-                end_time = time.time()
-                runtime = end_time - start_time
-
                 # Update statistics
                 with stats_output:
                     stats_output.clear_output(wait=True)
                     total_nodes = sum(counts)
                     total_edges = sum(d * c for d, c in zip(unique_degrees, counts)) // 2
-                    print(f"Number of nodes: {total_nodes:,}")
-                    print(f"Number of edges: {total_edges:,}")
-                    print(f"Number of isolated nodes: {zero_degree_count:,}")
-                    print(f"Average degree: {mean_deg:.2f}")
-                    print(f"Median degree: {median_deg:.2f}")
-                    print(f"Max degree: {max_deg:,}")
+
+                    stats_data = {
+                        'Metric': ['Nodes', 'Edges', 'Isolated nodes', 'Average degree', 'Median degree', 'Max degree'],
+                        'Value': [f"{total_nodes:,}", f"{total_edges:,}", f"{zero_degree_count:,}", f"{mean_deg:.2f}", f"{median_deg:.2f}", f"{max_deg:,}"]
+                    }
+                    
                     if min_deg > 0:
-                        print(f"Min non-zero degree: {min_deg:,}")            
+                        stats_data['Metric'].append('Min non-zero degree')
+                        stats_data['Value'].append(f"{min_deg:,}")
+                    
                     if bin_type != 'Raw':
-                        print(f"Number of bins: {n_bins:,}")
+                        stats_data['Metric'].append('Number of bins')
+                        stats_data['Value'].append(f"{n_bins:,}")
+                    
+                    df = pd.DataFrame(stats_data)
+                    display(df.style.hide(axis="index").set_table_styles([
+                        {'selector': 'th', 'props': [('background-color', '#f8f9fa'), ('font-weight', 'bold'), ('text-align', 'left')]},
+                        {'selector': 'td', 'props': [('padding', '8px'), ('text-align', 'left')]}
+                    ]))
         
             max_count = np.max(counts)
             total_nodes = sum(counts) 
             
             # Define a function to update bin_width_widget based on bin_type
             def update_bin_width_widget(change):
-                if change['new'] == 'Logarithmic':
-                    # For logarithmic binning, use a FloatSlider with smaller values
+                if change['new'] == 'Logarithmic':                    
                     bin_width_widget.value = 1.00
                     bin_width_widget.min = 1.00
-                    bin_width_widget.max = (np.log10(max_deg) - np.log10(min_deg+0.01)) / 2
+                    bin_width_widget.max = max(1, (np.log10(max_deg) - np.log10(min_deg)) / np.log10(5))
                     bin_width_widget.step = 0.01                    
                     bin_width_widget.readout_format = '.2f'
                     bin_width_widget.disabled = False
                 elif change['new'] == 'Raw':
-                    # For raw binning, disable the widget
                     bin_width_widget.value = 1
                     bin_width_widget.readout_format = 'd'
                     bin_width_widget.disabled = True
                 elif change['new'] == 'Linear':
-                    # For linear binning, use integer values
                     bin_width_widget.value = 1
                     bin_width_widget.min = 1
-                    bin_width_widget.max = (max_deg - min_deg) // 5
+                    bin_width_widget.max = max(1, (max_deg - min_deg) // 5)
                     bin_width_widget.step = 1                    
                     bin_width_widget.readout_format = 'd'
                     bin_width_widget.disabled = False
 
             def update_y_max_widget(change):                
                 factor = 1
-                if bin_widget.value == 'Raw':
-                    # For raw data, use the original max count
+                if bin_widget.value == 'Raw': # For raw data, use the original max count                    
                     factor = max_count
                 elif bin_widget.value == 'Linear':
                     factor = min (total_nodes, max_count * (bin_width_widget.value ** 0.85))
                 elif bin_widget.value == 'Logarithmic':
                     factor = min (total_nodes, max_count * (10 ** (bin_width_widget.value ** 0.25)))
-                
-                y_max_widget.max = y_max_widget.value = 1.1 * factor
+                y_max_text_widget.value = y_max_widget.max = y_max_widget.value = max (1.1 * factor, y_max_text_widget.value)
 
             # Observe changes to bin_width_widget and bin_widget
             bin_width_widget.observe(update_y_max_widget, names='value')
@@ -4585,6 +4664,9 @@ def update_y_max_widget(change):
             # Output widget for statistics
             stats_output = widgets.Output()
 
+            # Create y_max container with both widgets
+            y_max_container = widgets.VBox([y_max_widget, y_max_text_widget])
+            
             # Interactive plot
             interactive_plot = widgets.interactive(
                 update_plot,
@@ -4596,12 +4678,22 @@ def update_y_max_widget(change):
                 show_mindeg=show_mindeg_widget,
                 show_maxdeg=show_maxdeg_widget
             )
-            
-            # Vertical box layout
-            vbox = widgets.VBox([interactive_plot, stats_output])
-            
+
+            # Replace the individual checkboxes with the HBox in the interactive_plot's children
+            controls = list(interactive_plot.children[:-1])
+
+            # Replace y_max_widget with y_max_container
+            y_max_index = controls.index(y_max_widget)
+            controls[y_max_index] = y_max_container
+
+            # Create a new VBox with the modified controls and the output
+            modified_interactive = widgets.VBox(controls + [interactive_plot.children[-1]])
+
+            # Vertical box layout with stats
+            vbox = widgets.VBox([modified_interactive, stats_output])
+
             # Display the interactive plot and stats
             display(vbox)
-     
+
         except Exception as e:
             print(f"Error creating visualization: {e}")

From c79ebbe9dd3956ea263e43871380b54137f72615 Mon Sep 17 00:00:00 2001
From: Erdem Sariyuce <sariyuce@amazon.com>
Date: Fri, 1 Aug 2025 12:58:33 -0400
Subject: [PATCH 06/11] Bug fix to prevent log10(0) errors

---
 src/graph_notebook/magics/graph_magic.py | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/src/graph_notebook/magics/graph_magic.py b/src/graph_notebook/magics/graph_magic.py
index b1e4e190..f4c47d44 100644
--- a/src/graph_notebook/magics/graph_magic.py
+++ b/src/graph_notebook/magics/graph_magic.py
@@ -4150,7 +4150,7 @@ def suggest_degree_distribution_params (self, expected_nbins, n, min_nonzero_cou
         if degree_spread >= 100 and n > 1000:
             x_scale = 'Log'
             bin_type = 'Logarithmic'
-            initial_bin_width = (np.log10(max_deg) - np.log10(min_deg)) / np.log10(expected_nbins)
+            initial_bin_width = (np.log10(max_deg+1) - np.log10(min_deg+1)) / np.log10(expected_nbins)
         else:
             x_scale = 'Linear'
             bin_type = 'Linear'
@@ -4308,7 +4308,7 @@ def handle_text_change(change):
                 if params['bin_type'] == 'Logarithmic':
                     bin_width_widget.step = 0.01
                     bin_width_widget.readout_format = '.2f'
-                    bin_width_widget.max = max (1, (np.log10(max_deg) - np.log10(min_nonzero_deg)) / np.log10(5))
+                    bin_width_widget.max = max (1, (np.log10(max_deg+1) - np.log10(min_nonzero_deg+1)) / np.log10(5))
 
             # Adjust the initial y-max limit
             total_nodes = sum(counts)
@@ -4525,7 +4525,7 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
                     fig.update_layout(
                         xaxis_type="log",  # Set x-axis to log scale
                         yaxis_type="log",  # Set y-axis to log scale
-                        yaxis_range=[0.05, np.log10(y_max)],
+                        yaxis_range=[0.05, np.log10(y_max+1)],
                         yaxis=dict(                        
                             exponentformat='power',  # Use scientific notation
                             showexponent='all',
@@ -4538,9 +4538,8 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
                         )
                     )
                     
-                    x_min = max(x_min, 0.05)
-                    x_min = np.log10(x_min)
-                    x_max = np.log10(x_max)
+                    x_min = np.log10(x_min+1)
+                    x_max = np.log10(x_max+1)
                 elif scale_type == 'Log(x)-Linear(y)':
                     fig.update_layout(
                         xaxis_type="log",  # Set x-axis to log scale
@@ -4550,13 +4549,12 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
                             dtick=1  # Show only powers of 10
                         )
                     )
-                    x_min = max(x_min, 0.05)
-                    x_min = np.log10(x_min)
-                    x_max = np.log10(x_max)
+                    x_min = np.log10(x_min+1)
+                    x_max = np.log10(x_max+1)
                 elif scale_type == 'Linear(x)-Log(y)':
                     fig.update_layout(
                         yaxis_type="log",  # Set y-axis to log scale
-                        yaxis_range=[0.05, np.log10(y_max)],
+                        yaxis_range=[0.05, np.log10(y_max+1)],
                         yaxis=dict(                        
                             exponentformat='power',  # Use scientific notation
                             showexponent='all',
@@ -4628,7 +4626,7 @@ def update_bin_width_widget(change):
                 if change['new'] == 'Logarithmic':                    
                     bin_width_widget.value = 1.00
                     bin_width_widget.min = 1.00
-                    bin_width_widget.max = max(1, (np.log10(max_deg) - np.log10(min_deg)) / np.log10(5))
+                    bin_width_widget.max = max(1, (np.log10(max_deg+1) - np.log10(min_deg+1)) / np.log10(5))
                     bin_width_widget.step = 0.01                    
                     bin_width_widget.readout_format = '.2f'
                     bin_width_widget.disabled = False

From 393431abc9112c01280b41aae82ef1a66b81e8b8 Mon Sep 17 00:00:00 2001
From: Erdem Sariyuce <sariyuce@amazon.com>
Date: Mon, 18 Aug 2025 17:00:01 -0400
Subject: [PATCH 07/11] fixed .gitignore

---
 .gitignore | 102 -----------------------------------------------------
 1 file changed, 102 deletions(-)

diff --git a/.gitignore b/.gitignore
index 38d2af96..eeee3ce5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -31,105 +31,3 @@ src/graph_notebook/widgets/package-lock.json
 blazegraph.jnl
 rules.log
 *.env
-notebook/destination/dir/About-the-Neptune-Notebook.ipynb
-notebook/destination/dir/Overview.ipynb
-notebook/destination/dir/Untitled.ipynb
-notebook/destination/dir/Untitled1.ipynb
-notebook/destination/dir/.ipynb_checkpoints/Untitled-checkpoint.ipynb
-notebook/destination/dir/.ipynb_checkpoints/Untitled1-checkpoint.ipynb
-notebook/destination/dir/01-Neptune-Database/01-Getting-Started/01-About-the-Neptune-Notebook.ipynb
-notebook/destination/dir/01-Neptune-Database/01-Getting-Started/02-Using-Gremlin-to-Access-the-Graph.ipynb
-notebook/destination/dir/01-Neptune-Database/01-Getting-Started/03-Using-RDF-and-SPARQL-to-Access-the-Graph.ipynb
-notebook/destination/dir/01-Neptune-Database/01-Getting-Started/04-Social-Network-Recommendations-with-Gremlin.ipynb
-notebook/destination/dir/01-Neptune-Database/01-Getting-Started/05-Dining-By-Friends-in-Amazon-Neptune.ipynb
-notebook/destination/dir/01-Neptune-Database/02-Visualization/Air-Routes-Gremlin.ipynb
-notebook/destination/dir/01-Neptune-Database/02-Visualization/Air-Routes-openCypher.ipynb
-notebook/destination/dir/01-Neptune-Database/02-Visualization/Air-Routes-SPARQL.ipynb
-notebook/destination/dir/01-Neptune-Database/02-Visualization/Blog Workbench Visualization.ipynb
-notebook/destination/dir/01-Neptune-Database/02-Visualization/EPL-Gremlin.ipynb
-notebook/destination/dir/01-Neptune-Database/02-Visualization/EPL-openCypher.ipynb
-notebook/destination/dir/01-Neptune-Database/02-Visualization/EPL-SPARQL.ipynb
-notebook/destination/dir/01-Neptune-Database/02-Visualization/Grouping-and-Appearance-Customization-Gremlin.ipynb
-notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/00-Sample-Applications-Overview.ipynb
-notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/README.md
-notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/01-Fraud-Graphs/01-Building-a-Fraud-Graph-Application.ipynb
-notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/02-Knowledge-Graphs/Building-a-Knowledge-Graph-Application-Gremlin.ipynb
-notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/02-Knowledge-Graphs/Building-a-Knowledge-Graph-Application-openCypher.ipynb
-notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/03-Identity-Graphs/01-Building-an-Identity-Graph-Application.ipynb
-notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/03-Identity-Graphs/02-Data-Modeling-for-Identity-Graphs.ipynb
-notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/03-Identity-Graphs/03-Jumpstart-Identity-Graphs-Using-Canonical-Model-and-ETL/03-Jumpstart-Identity-Graphs-Using-Canonical-Model-and-ETL.ipynb
-notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/03-Identity-Graphs/03-Jumpstart-Identity-Graphs-Using-Canonical-Model-and-ETL/glue_utils.py
-notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/03-Identity-Graphs/03-Jumpstart-Identity-Graphs-Using-Canonical-Model-and-ETL/script/neptune-glue-demographics.py
-notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/03-Identity-Graphs/03-Jumpstart-Identity-Graphs-Using-Canonical-Model-and-ETL/script/neptune-glue-telemetry.py
-notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/03-Identity-Graphs/03-Jumpstart-Identity-Graphs-Using-Canonical-Model-and-ETL/script/neptune-glue-transactions.py
-notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/04-Security-Graphs/01-Building-a-Security-Graph-Application-with-Gremlin.ipynb
-notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/04-Security-Graphs/01-Building-a-Security-Graph-Application-with-openCypher.ipynb
-notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/05-Healthcare-and-Life-Sciences-Graphs/01-Modeling-Molecular-Structures-as-Graph-Data-Gremlin.ipynb
-notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/06-Data-Science-Samples/01-Identifying-Fraud-Rings-Using-Social-Network-Analytics.ipynb
-notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/06-Data-Science-Samples/02-Identifying-1st-Person-Synthetic-Identity-Fraud-Using-Graph-Similarity.ipynb
-notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/06-Data-Science-Samples/03-Logistics-Analysis-using-a-Transportation-Network.ipynb
-notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/07-Games-Industry-Graphs/01-Building-a-Social-Network-for-Games-Gremlin.ipynb
-notebook/destination/dir/02-Neptune-Analytics/01-Getting-Started/01-Getting-Started-With-Neptune-Analytics.ipynb
-notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/00-Amazon-Neptune-Analytics-Algorithm-Support.pdf
-notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/01-Getting-Started-With-Graph-Algorithms.ipynb
-notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/02-Path-Finding-Algorithms.ipynb
-notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/03-Centrality-Algorithms.ipynb
-notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/04-Community-Detection-Algorithms.ipynb
-notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/05-Similarity-Algorithms.ipynb
-notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/06-Vector-Similarity-Algorithms.ipynb
-notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/.ipynb_checkpoints/03-Centrality-Algorithms-checkpoint.ipynb
-notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/Overview.ipynb
-notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/01-FinTech/01-Fraud-Ring-Identifcation.ipynb
-notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/02-Investment-Analysis/01-EDGAR-Competitor-Analysis-using-Knowledge-Graph-Graph-Algorithms-and-Vector-Search.ipynb
-notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/00-Intro-to-Software-Bill-Of-Materials.ipynb
-notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/01-SBOM-Dependency-Analysis.ipynb
-notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/02-SBOM-Vulnerability-Analysis.ipynb
-notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/nodestream_template.yaml
-notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/example_sboms/01/aws-sdk-pandas_aws_de5d1610d6d4ea3be44a01ab3f09b64e291a4ab7.json
-notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/example_sboms/01/boto3_boto_6bbdf83ee00b749587f0fe54778fbec5411147b5.json
-notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/example_sboms/01/graph-explorer_aws_39eed2c8bae4afc1b38fa7975c720461a7c7c3a6.json
-notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/example_sboms/01/graph-notebook_aws_bb96dd8d0d9ef9d0e9060f8c5e26a042a3db40c4.json
-notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/example_sboms/02/aws-cli-2-0-6.json
-notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/example_sboms/02/gremlin-console-3-7-1_cydx.json
-notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/example_sboms/02/gremlin-server-3-7-1-cydx.json
-notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/sbom_code/sbom_helper.py
-notebook/destination/dir/02-Neptune-Analytics/04-OpenCypher-Over-RDF/01-Air-Routes.ipynb
-notebook/destination/dir/02-Neptune-Analytics/04-OpenCypher-Over-RDF/02-Air-Routes-GeoNames.ipynb
-notebook/destination/dir/02-Neptune-Analytics/04-OpenCypher-Over-RDF/Air-Routes-Ontology-Diagram.png
-notebook/destination/dir/03-Neptune-ML/neptune_ml_utils.py
-notebook/destination/dir/03-Neptune-ML/neptune-ml-pretrained-model-config.json
-notebook/destination/dir/03-Neptune-ML/01-Gremlin/01-Getting-Started-with-Neptune-ML-Gremlin.ipynb
-notebook/destination/dir/03-Neptune-ML/01-Gremlin/02-Introduction-to-Node-Classification-Gremlin.ipynb
-notebook/destination/dir/03-Neptune-ML/01-Gremlin/03-Introduction-to-Node-Regression-Gremlin.ipynb
-notebook/destination/dir/03-Neptune-ML/01-Gremlin/04-Introduction-to-Link-Prediction-Gremlin.ipynb
-notebook/destination/dir/03-Neptune-ML/01-Gremlin/05-Introduction-to-Edge-Classification-Gremlin.ipynb
-notebook/destination/dir/03-Neptune-ML/01-Gremlin/06-Introduction-to-Edge-Regression-Gremlin.ipynb
-notebook/destination/dir/03-Neptune-ML/02-SPARQL/neptune_ml_sparql_utils.py
-notebook/destination/dir/03-Neptune-ML/02-SPARQL/Neptune-ML-00-Getting-Started-with-Neptune-ML-SPARQL.ipynb
-notebook/destination/dir/03-Neptune-ML/02-SPARQL/Neptune-ML-01-Introduction-to-Object-Classification-SPARQL.ipynb
-notebook/destination/dir/03-Neptune-ML/02-SPARQL/Neptune-ML-02-Introduction-to-Object-Regression-SPARQL.ipynb
-notebook/destination/dir/03-Neptune-ML/02-SPARQL/Neptune-ML-03-Introduction-to-Link-Prediction-SPARQL.ipynb
-notebook/destination/dir/03-Neptune-ML/02-SPARQL/neptune-ml-pretrained-rdf-model-config.json
-notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/02-Job-Recommendation-Text-Encoding.ipynb
-notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/03-Real-Time-Fraud-Detection-Using-Inductive-Inference.ipynb
-notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/01-People-Analytics/People-Analytics-using-Neptune-ML.ipynb
-notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/01-People-Analytics/WA_Fn-UseC_-HR-Employee-Attrition.csv
-notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/1a-Use-case.ipynb
-notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/1b-Graph_init.ipynb
-notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/2a-GraphQueryGremlin.ipynb
-notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/2b-GraphQueryLLM.ipynb
-notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/3a-TransductiveMode-CellPrediction.ipynb
-notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/3b-InductiveModeCell-Prediction.ipynb
-notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/neptune_ml_utils.py
-notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/Transform2Neptune.py
-notebook/destination/dir/04-Language-Tutorials/01-Gremlin/01-Basic-Read-Queries.ipynb
-notebook/destination/dir/04-Language-Tutorials/01-Gremlin/02-Loops-Repeats.ipynb
-notebook/destination/dir/04-Language-Tutorials/01-Gremlin/03-Ordering-Functions-Grouping.ipynb
-notebook/destination/dir/04-Language-Tutorials/01-Gremlin/04-Creating-Updating-Deleting-Queries.ipynb
-notebook/destination/dir/04-Language-Tutorials/01-Gremlin/Gremlin-Exercises-Answer-Sheet.ipynb
-notebook/destination/dir/04-Language-Tutorials/02-openCypher/01-Basic-Read-Queries.ipynb
-notebook/destination/dir/04-Language-Tutorials/02-openCypher/02-Variable-Length-Paths.ipynb
-notebook/destination/dir/04-Language-Tutorials/02-openCypher/03-Ordering-Functions-Grouping.ipynb
-notebook/destination/dir/04-Language-Tutorials/02-openCypher/04-Creating-Updating-Delete-Queries.ipynb
-notebook/destination/dir/04-Language-Tutorials/02-openCypher/openCypher-Exercises-Answer-Key.ipynb
-notebook/destination/dir/04-Language-Tutorials/03-SPARQL/01-SPARQL-Basics.ipynb

From 739fe3fa2407f48452711d9b4fbd320342f3c3fb Mon Sep 17 00:00:00 2001
From: Erdem Sariyuce <sariyuce@amazon.com>
Date: Fri, 22 Aug 2025 14:32:29 -0400
Subject: [PATCH 08/11] edited changelog

---
 ChangeLog.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ChangeLog.md b/ChangeLog.md
index b5da410e..49e3435a 100644
--- a/ChangeLog.md
+++ b/ChangeLog.md
@@ -3,7 +3,6 @@
 Starting with v1.31.6, this file will contain a record of major features and updates made in each release of graph-notebook.
 
 ## Upcoming
-- Added %degreeDistribution magic command ([PR](https://github.com/aws/graph-notebook/pull/749)) TODO: add to the specific release below when it's released
 
 ## Release 5.0.1 (May 19, 2025)
 - Locked numba dependency to 0.60.0 to avoid numpy conflict ([Link to PR](https://github.com/aws/graph-notebook/pull/735))

From 85641ab96dad2c9f1a52d92183358798f0b86c42 Mon Sep 17 00:00:00 2001
From: Erdem Sariyuce <sariyuce@amazon.com>
Date: Fri, 22 Aug 2025 14:38:40 -0400
Subject: [PATCH 09/11] updated ChangeLog

---
 ChangeLog.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ChangeLog.md b/ChangeLog.md
index 49e3435a..b5da410e 100644
--- a/ChangeLog.md
+++ b/ChangeLog.md
@@ -3,6 +3,7 @@
 Starting with v1.31.6, this file will contain a record of major features and updates made in each release of graph-notebook.
 
 ## Upcoming
+- Added %degreeDistribution magic command ([PR](https://github.com/aws/graph-notebook/pull/749)) TODO: add to the specific release below when it's released
 
 ## Release 5.0.1 (May 19, 2025)
 - Locked numba dependency to 0.60.0 to avoid numpy conflict ([Link to PR](https://github.com/aws/graph-notebook/pull/735))

From c7ed4b2aa133d2133a281784c5795a3fae8a5c1a Mon Sep 17 00:00:00 2001
From: Erdem Sariyuce <sariyuce@amazon.com>
Date: Fri, 22 Aug 2025 15:26:45 -0400
Subject: [PATCH 10/11] %degreeDistribution documentation

---
 additional-databases/neptune/README.md | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/additional-databases/neptune/README.md b/additional-databases/neptune/README.md
index bf6e138d..6e5bdad3 100644
--- a/additional-databases/neptune/README.md
+++ b/additional-databases/neptune/README.md
@@ -80,3 +80,24 @@ https://aws.amazon.com/amazon-linux-2/
 <b>DNS flush: </b> after setting your hosts file, do a DNS flush to make sure your changes are reflected in the client-side DNS.
 
 <b> HTTPS: </b> Check that when you connect to Neptune via a browser that you're using `https://` in the URL.
+
+## Magic commands specific to Neptune Analytics
+
+`%degreeDistribution` - Creates an interactive histogram visualizing the degree distribution of vertices in a graph. The histogram displays the number of vertices (y-axis) that have a specific degree (x-axis).
+
+**Parameters**
+-  **traversalDirection**: Specifies which degree to analyze
+	-  `both` (default): Total degree
+	-  `inbound`: In-degree
+	-  `outbound`: Out-degree
+-  **vertexLabels**: Filter vertices by label (default: empty list)
+-  **edgeLabels**: Filter edges by label (default: empty list)
+
+**Features**
+- Parameters can be set via command line arguments or dropdown widgets
+- Dropdowns are automatically populated with existing vertex/edge labels
+- Interactive controls for adjusting:
+	- Bin type and width
+	- Axis scales and limits
+
+For detailed examples and output formats, see the [documentation](https://docs.aws.amazon.com/neptune-analytics/latest/userguide/degreedistribution.html).

From cbea7f926b5ba199b32f2fb6c942bc33ba080ba6 Mon Sep 17 00:00:00 2001
From: Erdem Sariyuce <sariyuce@amazon.com>
Date: Fri, 22 Aug 2025 15:41:51 -0400
Subject: [PATCH 11/11] typos

---
 additional-databases/neptune/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/additional-databases/neptune/README.md b/additional-databases/neptune/README.md
index 6e5bdad3..a3986840 100644
--- a/additional-databases/neptune/README.md
+++ b/additional-databases/neptune/README.md
@@ -100,4 +100,4 @@ https://aws.amazon.com/amazon-linux-2/
 	- Bin type and width
 	- Axis scales and limits
 
-For detailed examples and output formats, see the [documentation](https://docs.aws.amazon.com/neptune-analytics/latest/userguide/degreedistribution.html).
+For detailed examples and output formats, see the [documentation](https://docs.aws.amazon.com/neptune-analytics/latest/userguide/degree-distribution.html).