From 2de570238b8d11d9468ca9c011b23f96fc8cea7a Mon Sep 17 00:00:00 2001 From: cr1speecr3m3 <166568593+cr1speecr3m3@users.noreply.github.com> Date: Thu, 31 Jul 2025 17:48:21 -0400 Subject: [PATCH 01/11] Update spatialtool.py --- knime_extension/src/nodes/spatialtool.py | 178 +++++++++++++++++++++++ 1 file changed, 178 insertions(+) diff --git a/knime_extension/src/nodes/spatialtool.py b/knime_extension/src/nodes/spatialtool.py index fb7e8413..eca92071 100644 --- a/knime_extension/src/nodes/spatialtool.py +++ b/knime_extension/src/nodes/spatialtool.py @@ -1692,3 +1692,181 @@ def execute(self, exec_context: knext.ExecutionContext, input_table1, input_tabl # append region id column gdf[self._COL_ID] = range(1, (gdf.shape[0] + 1)) return knut.to_table(gdf, exec_context) + + + +############################################ +# Mapclassify +############################################ + +@knext.node( + name="Mapclassifier", + node_type=knext.NodeType.MANIPULATOR, + icon_path=__NODE_ICON_PATH + "Mapclassifier.png", + category=__category, + after="", +) +@knext.input_table( + name="Input Table", + description="Input table with targeted columns for classification.", +) +@knext.output_table( + name="Output Table", + description="Output table with classified result.", +) +class Mapclassifier: + class ClassModes(knext.EnumParameterOptions): + BOXPLOT = ( # mapclassify.BoxPlot(y[, hinge]) + "Boxplot", + """PURPOSE: Creates class breaks based on the statistical properties of a box plot distribution. + HOW IT WORKS: Uses the quartiles (Q1, median, Q3) and interquartile range (IQR) to identify outliers and create meaningful breaks. Typically creates 6 classes: lower outlier, < Q1, Q1-median, median-Q3, > Q3, and upper outlier. + BEST FOR: Identifying and highlighting outliers in your data while maintaining interpretable breaks based on statistical distribution.""", + ) + EQUALINTERVAL = ( # mapclassify.EqualInterval(y[, k]) + "EqualInterval", + """PURPOSE: Divides the data range into equal-sized intervals. + HOW IT WORKS: Takes the difference between maximum and minimum values, then divides by the number of desired classes to create intervals of equal width. + BEST FOR: Data that is relatively evenly distributed and when you want consistent interval sizes for easy interpretation.""", + ) + FISHERJ = ( # mapclassify.FisherJenks(y[, k]) + "FisherJenks", + """PURPOSE: Finds optimal class breaks that minimize within-class variance while maximizing between-class variance. + HOW IT WORKS: Uses dynamic programming to find the optimal groupings that create the most homogeneous classes possible. + BEST FOR: Most types of data as it adapts to the natural clustering in your dataset. Considered one of the most statistically robust methods.""", + ) + FISHERJ_SAMPLED = ( # mapclassify.FisherJenksSampled(y[, k, pct, ...]) + "FisherJanksSampled", + """PURPOSE: Same optimization as FisherJenks but uses a random sample for computational efficiency. + HOW IT WORKS: Applies the Fisher-Jenks algorithm to a subset of the data, making it faster for large datasets. + BEST FOR: Large datasets where standard Fisher-Jenks would be computationally expensive but you still want optimal breaks.""", + ) + GREEDY = ( + "Greedy", + """PURPOSE: Colors geographic areas using graph coloring strategies to ensure adjacent areas have different colors. + HOW IT WORKS: Implements topological coloring algorithms (various strategies available) to minimize color conflicts between neighboring polygons. + BEST FOR: Categorical data or when you need to ensure visual distinction between adjacent geographic units regardless of data values.""" + ) + HEADT_BREAKS = ( + "HeadTailBreaks", + """PURPOSE: Recursively divides data around the mean, designed specifically for heavy-tailed distributions. + HOW IT WORKS: Splits data at the arithmetic mean, then recursively applies the same process to the "head" (above-mean values) until stopping criteria are met. + BEST FOR: Highly skewed data with heavy tails, such as city populations, income distributions, or social media network data.""" + ) + JENKS_CAS = ( + "JenksCaspall", + """PURPOSE: An iterative optimization method that moves class boundaries to minimize within-class variance. + HOW IT WORKS: Starts with initial class breaks and iteratively moves boundaries to improve the goodness of variance fit. + BEST FOR: When you want optimized breaks similar to Fisher-Jenks but prefer an iterative approach that can be stopped at any point.""" + + ) + JENKS_CASFORCED = ( + "JenksCaspallForced", + """PURPOSE: Similar to JenksCaspall but allows forcing specific values to be class boundaries. + HOW IT WORKS: Performs the iterative optimization while ensuring certain predetermined values remain as class breaks. + BEST FOR: When you have meaningful breakpoints (like 0, poverty line, etc.) that must be preserved while optimizing the remaining breaks.""" + ) + JENKS_CASSAMPLED = ( + "JenksCaspallSampled", + """PURPOSE: Applies JenksCaspall optimization to a random sample of the data. + HOW IT WORKS: Uses sampling to make the iterative process computationally feasible for large datasets. + BEST FOR: Large datasets where full JenksCaspall would be too slow but you want iteratively optimized breaks.""" + ) + MAXP = ( + "MaxP", + """PURPOSE: Creates the maximum number of classes possible while maintaining a minimum population threshold per class. + HOW IT WORKS: Aggregates spatial units to ensure each class meets minimum size requirements while maximizing the number of classes. + BEST FOR: Spatial analysis where you need to balance detail (number of classes) with statistical reliability (minimum sample sizes).""" + ) + MAXIMUMBREAKS = ( + "MaximumBreaks", + """PURPOSE: Places class breaks at the largest gaps in the sorted data values. + HOW IT WORKS: Identifies the biggest jumps between consecutive values and uses these as natural breaking points. + BEST FOR: Data with clear natural clusters or gaps, where you want breaks at the most obvious discontinuities.""" + ) + PERCENTILES = ( + "Percentiles", + """PURPOSE: Creates class breaks at specified percentile values. + HOW IT WORKS: Divides data based on percentile ranks (e.g., quintiles at 20th, 40th, 60th, 80th percentiles). + BEST FOR: When you want equal numbers of observations in each class, or when working with data where relative position matters more than absolute values.""" + ) + PRETTYBREAKS = ( + "PrettyBreaks", + """PURPOSE: Creates "nice" round numbers as class breaks for improved readability. + HOW IT WORKS: Chooses aesthetically pleasing break points (round numbers) that are close to optimal statistical breaks. + BEST FOR: Maps intended for general audiences where readability and round numbers are more important than statistical optimization.""" + ) + QUANTILES = ( + "Quantiles", + """PURPOSE: Divides data so each class contains an equal number of observations. + HOW IT WORKS: Sorts data and creates breaks at quantile boundaries to ensure equal sample sizes per class. + BEST FOR: Comparing relative rankings across areas, or when you want to ensure balanced representation across all classes.""" + ) + STDMEAN = ( + "StdMean", + """PURPOSE: Creates classes based on standard deviations from the mean. + HOW IT WORKS: Sets breaks at intervals of standard deviations above and below the mean (e.g., mean±1σ, mean±2σ). + BEST FOR: Normally distributed data where you want to highlight areas that are statistically typical vs. unusual relative to the average.""" + ) + USERDEFINED = ( + "UserDefined", + """PURPOSE: Allows manual specification of class break values. + HOW IT WORKS: Uses exactly the break points you provide, giving complete control over classification. + BEST FOR: When you have domain knowledge about meaningful thresholds, need to match existing standards, or want to compare across multiple maps with consistent breaks.""" + ) + + @classmethod + def get_default(cls): + return cls.FISHERJENKS + + + class_col = knext.ColumnFilterParameter( + "Targeted columns", + """The zoom level of the grid from 0 to 15 (default value is 8). The bigger the zoom level, the smaller the + hexagon. If the zoom level is too small, the hexagon might be too big to fit in the input polygon which will + result in an error. A very small zoom level might result in a very large output table even for smaller + input polygons. + For more details about the zoom levels refer to + [Tables of Cell Statistics Across Resolutions.](https://h3geo.org/docs/core-library/restable/) + """, + default_value="", + ) + + n_cluster = knext.IntParameter( + "Number of classification", + """The zoom level of the grid from 0 to 15 (default value is 8). The bigger the zoom level, the smaller the + hexagon. If the zoom level is too small, the hexagon might be too big to fit in the input polygon which will + result in an error. A very small zoom level might result in a very large output table even for smaller + input polygons. + For more details about the zoom levels refer to + [Tables of Cell Statistics Across Resolutions.](https://h3geo.org/docs/core-library/restable/) + """, + default_value=5, + min_value=2, + ) + + classifier_param = knext.EnumParameter( + label="Classifier Selection", + description="Select the type of coffee you like to drink.", + default_value=ClassModes.get_default().name, + enum=ClassModes, + ) + + + def configure(self, configure_context, input_schema): + + return None + + def execute(self, exec_context: knext.ExecutionContext, input_table): + import mapclassify as mc + + k=self.n_cluster + gdf = input_table.to_pandas() + + if self.classifier_param=="EqualInterval": + y = gdf[self.class_col] + grid = mc.EqualInterval(y, k) + elif self.classifier_param=="FisherJenks": + y = gdf[self.class_col] + grid = mc.FisherJenks(y, k) + + return knut.to_table(grid, exec_context) \ No newline at end of file From 8f5160fd4caf667fd72ea1cdfb3952c12050d12d Mon Sep 17 00:00:00 2001 From: cr1speecr3m3 <166568593+cr1speecr3m3@users.noreply.github.com> Date: Tue, 5 Aug 2025 15:46:56 -0400 Subject: [PATCH 02/11] Update spatialtool.py --- knime_extension/src/nodes/spatialtool.py | 107 ++++++++++++++++++----- 1 file changed, 86 insertions(+), 21 deletions(-) diff --git a/knime_extension/src/nodes/spatialtool.py b/knime_extension/src/nodes/spatialtool.py index eca92071..a6afaa8b 100644 --- a/knime_extension/src/nodes/spatialtool.py +++ b/knime_extension/src/nodes/spatialtool.py @@ -1699,6 +1699,7 @@ def execute(self, exec_context: knext.ExecutionContext, input_table1, input_tabl # Mapclassify ############################################ + @knext.node( name="Mapclassifier", node_type=knext.NodeType.MANIPULATOR, @@ -1740,74 +1741,78 @@ class ClassModes(knext.EnumParameterOptions): HOW IT WORKS: Applies the Fisher-Jenks algorithm to a subset of the data, making it faster for large datasets. BEST FOR: Large datasets where standard Fisher-Jenks would be computationally expensive but you still want optimal breaks.""", ) - GREEDY = ( + GREEDY = ( # mapclassify.greedy(gdf[, strategy, balance, ...]) "Greedy", """PURPOSE: Colors geographic areas using graph coloring strategies to ensure adjacent areas have different colors. HOW IT WORKS: Implements topological coloring algorithms (various strategies available) to minimize color conflicts between neighboring polygons. BEST FOR: Categorical data or when you need to ensure visual distinction between adjacent geographic units regardless of data values.""" ) - HEADT_BREAKS = ( + HEADT_BREAKS = ( # mapclassify.HeadTailBreaks(y) "HeadTailBreaks", """PURPOSE: Recursively divides data around the mean, designed specifically for heavy-tailed distributions. HOW IT WORKS: Splits data at the arithmetic mean, then recursively applies the same process to the "head" (above-mean values) until stopping criteria are met. BEST FOR: Highly skewed data with heavy tails, such as city populations, income distributions, or social media network data.""" ) - JENKS_CAS = ( + JENKS_CAS = ( # mapclassify.JenksCaspall(y[, k]) "JenksCaspall", """PURPOSE: An iterative optimization method that moves class boundaries to minimize within-class variance. HOW IT WORKS: Starts with initial class breaks and iteratively moves boundaries to improve the goodness of variance fit. BEST FOR: When you want optimized breaks similar to Fisher-Jenks but prefer an iterative approach that can be stopped at any point.""" ) - JENKS_CASFORCED = ( + JENKS_CASFORCED = ( # mapclassify.JenksCaspallForced(y[, k]) "JenksCaspallForced", """PURPOSE: Similar to JenksCaspall but allows forcing specific values to be class boundaries. HOW IT WORKS: Performs the iterative optimization while ensuring certain predetermined values remain as class breaks. BEST FOR: When you have meaningful breakpoints (like 0, poverty line, etc.) that must be preserved while optimizing the remaining breaks.""" ) - JENKS_CASSAMPLED = ( + JENKS_CASSAMPLED = ( # mapclassify.JenksCaspallSampled(y[, k, pct]) "JenksCaspallSampled", """PURPOSE: Applies JenksCaspall optimization to a random sample of the data. HOW IT WORKS: Uses sampling to make the iterative process computationally feasible for large datasets. BEST FOR: Large datasets where full JenksCaspall would be too slow but you want iteratively optimized breaks.""" ) - MAXP = ( + MAXP = ( # mapclassify.MaxP(y[, k, initial, seed1, seed2]) "MaxP", """PURPOSE: Creates the maximum number of classes possible while maintaining a minimum population threshold per class. HOW IT WORKS: Aggregates spatial units to ensure each class meets minimum size requirements while maximizing the number of classes. BEST FOR: Spatial analysis where you need to balance detail (number of classes) with statistical reliability (minimum sample sizes).""" ) - MAXIMUMBREAKS = ( + MAXIMUMBREAKS = ( # mapclassify.MaximumBreaks(y[, k, mindiff]) "MaximumBreaks", """PURPOSE: Places class breaks at the largest gaps in the sorted data values. HOW IT WORKS: Identifies the biggest jumps between consecutive values and uses these as natural breaking points. BEST FOR: Data with clear natural clusters or gaps, where you want breaks at the most obvious discontinuities.""" ) - PERCENTILES = ( + NATURALBREAKS = ( # mapclassify.NaturalBreaks(y[, k, initial, ...]) needs to be revised + "NaturalBreaks", + """PURPOSE: Identifies class breaks that minimize variance within classes while maximizing variance between classes""" + ) + PERCENTILES = ( # mapclassify.Percentiles(y[, pct]) "Percentiles", """PURPOSE: Creates class breaks at specified percentile values. HOW IT WORKS: Divides data based on percentile ranks (e.g., quintiles at 20th, 40th, 60th, 80th percentiles). BEST FOR: When you want equal numbers of observations in each class, or when working with data where relative position matters more than absolute values.""" ) - PRETTYBREAKS = ( + PRETTYBREAKS = ( # mapclassify.PrettyBreaks(y[, k]) "PrettyBreaks", """PURPOSE: Creates "nice" round numbers as class breaks for improved readability. HOW IT WORKS: Chooses aesthetically pleasing break points (round numbers) that are close to optimal statistical breaks. BEST FOR: Maps intended for general audiences where readability and round numbers are more important than statistical optimization.""" ) - QUANTILES = ( + QUANTILES = ( # mapclassify.Quantiles(y[, k]) "Quantiles", """PURPOSE: Divides data so each class contains an equal number of observations. HOW IT WORKS: Sorts data and creates breaks at quantile boundaries to ensure equal sample sizes per class. BEST FOR: Comparing relative rankings across areas, or when you want to ensure balanced representation across all classes.""" ) - STDMEAN = ( + STDMEAN = ( # mapclassify.StdMean(y[, multiples, anchor]) "StdMean", """PURPOSE: Creates classes based on standard deviations from the mean. HOW IT WORKS: Sets breaks at intervals of standard deviations above and below the mean (e.g., mean±1σ, mean±2σ). BEST FOR: Normally distributed data where you want to highlight areas that are statistically typical vs. unusual relative to the average.""" ) - USERDEFINED = ( + USERDEFINED = ( # mapclassify.UserDefined(y, bins[, lowest]) "UserDefined", """PURPOSE: Allows manual specification of class break values. HOW IT WORKS: Uses exactly the break points you provide, giving complete control over classification. @@ -1816,10 +1821,9 @@ class ClassModes(knext.EnumParameterOptions): @classmethod def get_default(cls): - return cls.FISHERJENKS + return cls.FISHERJ - - class_col = knext.ColumnFilterParameter( + class_col = knext.MultiColumnParameter( "Targeted columns", """The zoom level of the grid from 0 to 15 (default value is 8). The bigger the zoom level, the smaller the hexagon. If the zoom level is too small, the hexagon might be too big to fit in the input polygon which will @@ -1828,7 +1832,7 @@ def get_default(cls): For more details about the zoom levels refer to [Tables of Cell Statistics Across Resolutions.](https://h3geo.org/docs/core-library/restable/) """, - default_value="", + column_filter=knut.is_numeric, ) n_cluster = knext.IntParameter( @@ -1850,10 +1854,8 @@ def get_default(cls): default_value=ClassModes.get_default().name, enum=ClassModes, ) - def configure(self, configure_context, input_schema): - return None def execute(self, exec_context: knext.ExecutionContext, input_table): @@ -1861,12 +1863,75 @@ def execute(self, exec_context: knext.ExecutionContext, input_table): k=self.n_cluster gdf = input_table.to_pandas() + gdf1 = gdf.copy() - if self.classifier_param=="EqualInterval": + # equal interval + if self.classifier_param == "EqualInterval": y = gdf[self.class_col] grid = mc.EqualInterval(y, k) - elif self.classifier_param=="FisherJenks": + gdf1["class"] = grid.yb.tolist() + # fisher jenks + if self.classifier_param == "FisherJenks": + y = gdf[self.class_col] + grid = mc.FisherJenks(y, k) + gdf1['class'] = grid.yb.tolist() + # fisher jenks sampled + elif self.classifier_param=="FisherJenksSampled": + y = gdf[self.class_col] + grid = mc.FisherJenksSampled(y, k) + # boxplot + elif self.classifier_param == "Boxplot": + y = gdf[self.class_col] + grid = mc.BoxPlot(y, hinge) + gdf1["class"] = grid.yb.tolist() + # greedy + elif self.classifier_param=="Greedy": + y = gdf[self.class_col] + grid = mc.greedy(y, k) + # head tail breaks + elif self.classifier_param=="HeadTailBreaks": + y = gdf[self.class_col] + grid = mc.HeadTailBreaks(y, k) + # jenks caspall + elif self.classifier_param=="JenksCaspall": + y = gdf[self.class_col] + grid = mc.JenksCaspall(y, k) + gdf1["class"] = grid.yb.tolist() + # jenks caspall forced + elif self.classifier_param=="JenksCaspallForced": + y = gdf[self.class_col] + grid = mc.JenksCaspallForced(y, k) + # jenks caspall sampled + elif self.classifier_param=="JenksCaspallSampled": + y = gdf[self.class_col] + grid = mc.JenksCaspallSampled(y, k) + # maxp + elif self.classifier_param=="MaxP": + y = gdf[self.class_col] + grid = mc.MaxP(y, k) + # maximum breaks + elif self.classifier_param=="MaximumBreaks": + y = gdf[self.class_col] + grid = mc.FisherJenks(y, k) + # percentiles + elif self.classifier_param=="Percentiles": + y = gdf[self.class_col] + grid = mc.FisherJenks(y, k) + # pretty breaks + elif self.classifier_param=="PrettyBreaks": + y = gdf[self.class_col] + grid = mc.FisherJenks(y, k) + # quantiles + elif self.classifier_param=="Quantiles": + y = gdf[self.class_col] + grid = mc.FisherJenks(y, k) + # std mean + elif self.classifier_param=="StdMean": + y = gdf[self.class_col] + grid = mc.FisherJenks(y, k) + # user defined + else: y = gdf[self.class_col] grid = mc.FisherJenks(y, k) - return knut.to_table(grid, exec_context) \ No newline at end of file + return knut.to_table(gdf1, exec_context) \ No newline at end of file From 476c932a95a375d9f8082e5c67eeb07047190b31 Mon Sep 17 00:00:00 2001 From: cr1speecr3m3 <166568593+cr1speecr3m3@users.noreply.github.com> Date: Wed, 6 Aug 2025 11:40:55 -0400 Subject: [PATCH 03/11] Update spatialtool.py --- knime_extension/src/nodes/spatialtool.py | 148 +++++++++++++++-------- 1 file changed, 96 insertions(+), 52 deletions(-) diff --git a/knime_extension/src/nodes/spatialtool.py b/knime_extension/src/nodes/spatialtool.py index a6afaa8b..43f0899d 100644 --- a/knime_extension/src/nodes/spatialtool.py +++ b/knime_extension/src/nodes/spatialtool.py @@ -1716,6 +1716,7 @@ def execute(self, exec_context: knext.ExecutionContext, input_table1, input_tabl description="Output table with classified result.", ) class Mapclassifier: + class ClassModes(knext.EnumParameterOptions): BOXPLOT = ( # mapclassify.BoxPlot(y[, hinge]) "Boxplot", @@ -1832,7 +1833,14 @@ def get_default(cls): For more details about the zoom levels refer to [Tables of Cell Statistics Across Resolutions.](https://h3geo.org/docs/core-library/restable/) """, - column_filter=knut.is_numeric, + column_filter = knut.is_numeric, + ) + + append_replace = knext.BoolParameter( + "Append Classification Results", + """If checked, the node will append the classification result to the input table. + If unchecked, the node will replace the input table with the classification result.""", + default_value = False ) n_cluster = knext.IntParameter( @@ -1844,15 +1852,15 @@ def get_default(cls): For more details about the zoom levels refer to [Tables of Cell Statistics Across Resolutions.](https://h3geo.org/docs/core-library/restable/) """, - default_value=5, - min_value=2, + default_value = 5, + min_value = 2, ) classifier_param = knext.EnumParameter( - label="Classifier Selection", - description="Select the type of coffee you like to drink.", - default_value=ClassModes.get_default().name, - enum=ClassModes, + label = "Classifier Selection", + description = "Select the type of coffee you like to drink.", + default_value = ClassModes.get_default().name, + enum = ClassModes ) def configure(self, configure_context, input_schema): @@ -1861,77 +1869,113 @@ def configure(self, configure_context, input_schema): def execute(self, exec_context: knext.ExecutionContext, input_table): import mapclassify as mc - k=self.n_cluster + ar = self.append_replace + k = self.n_cluster gdf = input_table.to_pandas() - gdf1 = gdf.copy() # equal interval - if self.classifier_param == "EqualInterval": - y = gdf[self.class_col] - grid = mc.EqualInterval(y, k) - gdf1["class"] = grid.yb.tolist() + if self.classifier_param == self.ClassModes.EQUALINTERVAL.name: + for col in self.class_col: + grid = mc.EqualInterval(gdf[col], k) + if (ar): + gdf[f'{col}_class'] = grid.yb.tolist() + else: + gdf[col] = grid.yb.tolist() + # fisher jenks - if self.classifier_param == "FisherJenks": - y = gdf[self.class_col] - grid = mc.FisherJenks(y, k) - gdf1['class'] = grid.yb.tolist() - # fisher jenks sampled - elif self.classifier_param=="FisherJenksSampled": - y = gdf[self.class_col] - grid = mc.FisherJenksSampled(y, k) + elif self.classifier_param == self.ClassModes.FISHERJ.name: + for col in self.class_col: + grid = mc.FisherJenkins(gdf[col], k) + if (ar): + gdf[f'{col}_class'] = grid.yb.tolist() + else: + gdf[col] = grid.yb.tolist() + + # jenks caspall + elif self.classifier_param == self.ClassModes.JENKS_CAS.name: + for col in self.class_col: + grid = mc.JenksCaspall(gdf[col], k) + if (ar): + gdf[f'{col}_class'] = grid.yb.tolist() + else: + gdf[col] = grid.yb.tolist() + + # jenks caspall forced + elif self.classifier_param == self.ClassModes.JENKS_CASFORCED.name: + for col in self.class_col: + grid = mc.JenksCaspallForced(gdf[col], k) + if (ar): + gdf[f'{col}_class'] = grid.yb.tolist() + else: + gdf[col] = grid.yb.tolist() + + # pretty breaks + elif self.classifier_param == self.ClassModes.PRETTYBREAKS.name: + for col in self.class_col: + grid = mc.PrettyBreaks(gdf[col], k) + if (ar): + gdf[f'{col}_class'] = grid.yb.tolist() + else: + gdf[col] = grid.yb.tolist() + + # quantiles + elif self.classifier_param == self.ClassModes.QUANTILES.name: + for col in self.class_col: + grid = mc.Quanitles(gdf[col], k) + if (ar): + gdf[f'{col}_class'] = grid.yb.tolist() + else: + gdf[col] = grid.yb.tolist() # boxplot - elif self.classifier_param == "Boxplot": + elif self.classifier_param == self.ClassModes.BOXPLOT.name: y = gdf[self.class_col] grid = mc.BoxPlot(y, hinge) - gdf1["class"] = grid.yb.tolist() + gdf["class"] = grid.yb.tolist() + +# fisher jenks sampled + elif self.classifier_param == self.ClassModes.FISHERJ_SAMPLED.name: + y = gdf[self.class_col] + grid = mc.FisherJenksSampled(y, k) + # greedy - elif self.classifier_param=="Greedy": + elif self.classifier_param == self.ClassModes.GREEDY.name: y = gdf[self.class_col] grid = mc.greedy(y, k) + # head tail breaks - elif self.classifier_param=="HeadTailBreaks": + elif self.classifier_param == self.ClassModes.HEADT_BREAKS.name: y = gdf[self.class_col] grid = mc.HeadTailBreaks(y, k) - # jenks caspall - elif self.classifier_param=="JenksCaspall": - y = gdf[self.class_col] - grid = mc.JenksCaspall(y, k) - gdf1["class"] = grid.yb.tolist() - # jenks caspall forced - elif self.classifier_param=="JenksCaspallForced": - y = gdf[self.class_col] - grid = mc.JenksCaspallForced(y, k) + # jenks caspall sampled - elif self.classifier_param=="JenksCaspallSampled": + elif self.classifier_param == self.ClassModes.JENKS_CASSAMPLED.name: y = gdf[self.class_col] grid = mc.JenksCaspallSampled(y, k) + # maxp - elif self.classifier_param=="MaxP": + elif self.classifier_param == self.ClassModes.MAXP.name: y = gdf[self.class_col] grid = mc.MaxP(y, k) + # maximum breaks - elif self.classifier_param=="MaximumBreaks": + elif self.classifier_param == self.ClassModes.MAXIMUMBREAKS.name: y = gdf[self.class_col] - grid = mc.FisherJenks(y, k) + grid = mc.NaturalBreaks(y, k) + # percentiles - elif self.classifier_param=="Percentiles": - y = gdf[self.class_col] - grid = mc.FisherJenks(y, k) - # pretty breaks - elif self.classifier_param=="PrettyBreaks": + elif self.classifier_param == self.ClassModes.PERCENTILES.name: y = gdf[self.class_col] - grid = mc.FisherJenks(y, k) - # quantiles - elif self.classifier_param=="Quantiles": - y = gdf[self.class_col] - grid = mc.FisherJenks(y, k) + grid = mc.Percentiles(y, k) + # std mean - elif self.classifier_param=="StdMean": + elif self.classifier_param == self.ClassModes.STDMEAN.name: y = gdf[self.class_col] - grid = mc.FisherJenks(y, k) + grid = mc.StdMean(y, k) + # user defined else: y = gdf[self.class_col] - grid = mc.FisherJenks(y, k) + grid = mc.UserDefined(y, 3) + gdf['class'] = grid.yb.tolist() - return knut.to_table(gdf1, exec_context) \ No newline at end of file + return knut.to_table(gdf, exec_context) \ No newline at end of file From a0bc49a01d1592113b8479123fa44216d9dfe9a6 Mon Sep 17 00:00:00 2001 From: cr1speecr3m3 <166568593+cr1speecr3m3@users.noreply.github.com> Date: Thu, 7 Aug 2025 17:36:02 -0400 Subject: [PATCH 04/11] Update spatialtool.py --- knime_extension/src/nodes/spatialtool.py | 99 ++++++++++++++++-------- 1 file changed, 66 insertions(+), 33 deletions(-) diff --git a/knime_extension/src/nodes/spatialtool.py b/knime_extension/src/nodes/spatialtool.py index 43f0899d..cb83f9d4 100644 --- a/knime_extension/src/nodes/spatialtool.py +++ b/knime_extension/src/nodes/spatialtool.py @@ -1824,6 +1824,7 @@ class ClassModes(knext.EnumParameterOptions): def get_default(cls): return cls.FISHERJ + # classifier selection parameter class_col = knext.MultiColumnParameter( "Targeted columns", """The zoom level of the grid from 0 to 15 (default value is 8). The bigger the zoom level, the smaller the @@ -1833,19 +1834,21 @@ def get_default(cls): For more details about the zoom levels refer to [Tables of Cell Statistics Across Resolutions.](https://h3geo.org/docs/core-library/restable/) """, - column_filter = knut.is_numeric, + column_filter = knut.is_numeric ) - append_replace = knext.BoolParameter( - "Append Classification Results", - """If checked, the node will append the classification result to the input table. - If unchecked, the node will replace the input table with the classification result.""", - default_value = False + # classifier_param for selecting the classification method + classifier_param = knext.EnumParameter( + label = "Classifier Selection", + description = "Select the classifier that you want to apply to the targeted columns.", + default_value = ClassModes.get_default().name, + enum = ClassModes ) - n_cluster = knext.IntParameter( - "Number of classification", - """The zoom level of the grid from 0 to 15 (default value is 8). The bigger the zoom level, the smaller the + # k_param for all classifiers + k_param = knext.IntParameter( + label = "Number of classification", + description = """The zoom level of the grid from 0 to 15 (default value is 8). The bigger the zoom level, the smaller the hexagon. If the zoom level is too small, the hexagon might be too big to fit in the input polygon which will result in an error. A very small zoom level might result in a very large output table even for smaller input polygons. @@ -1854,13 +1857,35 @@ def get_default(cls): """, default_value = 5, min_value = 2, - ) + ).rule(knext.OneOf(classifier_param, [ClassModes.FISHERJ.name]), knext.Effect.HIDE) + + # pct_param for JenksCaspallSampled + pct_param = knext.DoubleParameter( + label = "Natural Breaks Percentage for FisherJenks Sampled", + description = """The percentage of the data to be randomly sampled to determine the natural breaks.""", + default_value = 0.10 + ).rule(knext.OneOf(classifier_param, [ClassModes.JENKS_CASFORCED.name]), knext.Effect.SHOW) + + # hinge_param for Boxplot + hinge_param = knext.DoubleParameter( + label = "Hinge prompt for Boxplot", + description = """The hinge value is used to determine the lower and upper quartiles of the data.""", + default_value = 1.5 + ).rule(knext.OneOf(classifier_param, [ClassModes.BOXPLOT.name]), knext.Effect.SHOW) + + # determines if the output is to truncate the classification result to the number of classes specified by the k parameter + jc_sampledtruncate = knext.BoolParameter( + "Truncate for JenksCaspall Sampled", + """If checked, the node will truncate the classification result to the number of classes specified by the k parameter.""", + default_value = False + ).rule(knext.OneOf(classifier_param, [ClassModes.JENKS_CASSAMPLED.name]), knext.Effect.SHOW) - classifier_param = knext.EnumParameter( - label = "Classifier Selection", - description = "Select the type of coffee you like to drink.", - default_value = ClassModes.get_default().name, - enum = ClassModes + # determines if the output is to replace the original columns or append them + append_replace = knext.BoolParameter( + "Append Classification Results", + """If checked, the node will append the classification result to the input table. + If unchecked, the node will replace the input table with the classification result.""", + default_value = False ) def configure(self, configure_context, input_schema): @@ -1869,15 +1894,13 @@ def configure(self, configure_context, input_schema): def execute(self, exec_context: knext.ExecutionContext, input_table): import mapclassify as mc - ar = self.append_replace - k = self.n_cluster gdf = input_table.to_pandas() # equal interval if self.classifier_param == self.ClassModes.EQUALINTERVAL.name: for col in self.class_col: - grid = mc.EqualInterval(gdf[col], k) - if (ar): + grid = mc.EqualInterval(gdf[col], self.k_param) + if (self.append_replace): gdf[f'{col}_class'] = grid.yb.tolist() else: gdf[col] = grid.yb.tolist() @@ -1885,8 +1908,8 @@ def execute(self, exec_context: knext.ExecutionContext, input_table): # fisher jenks elif self.classifier_param == self.ClassModes.FISHERJ.name: for col in self.class_col: - grid = mc.FisherJenkins(gdf[col], k) - if (ar): + grid = mc.FisherJenkins(gdf[col], self.k_param) + if (self.append_replace): gdf[f'{col}_class'] = grid.yb.tolist() else: gdf[col] = grid.yb.tolist() @@ -1894,8 +1917,8 @@ def execute(self, exec_context: knext.ExecutionContext, input_table): # jenks caspall elif self.classifier_param == self.ClassModes.JENKS_CAS.name: for col in self.class_col: - grid = mc.JenksCaspall(gdf[col], k) - if (ar): + grid = mc.JenksCaspall(gdf[col], self.k_param) + if (self.append_replace): gdf[f'{col}_class'] = grid.yb.tolist() else: gdf[col] = grid.yb.tolist() @@ -1903,8 +1926,8 @@ def execute(self, exec_context: knext.ExecutionContext, input_table): # jenks caspall forced elif self.classifier_param == self.ClassModes.JENKS_CASFORCED.name: for col in self.class_col: - grid = mc.JenksCaspallForced(gdf[col], k) - if (ar): + grid = mc.JenksCaspallForced(gdf[col], self.k_param) + if (self.append_replace): gdf[f'{col}_class'] = grid.yb.tolist() else: gdf[col] = grid.yb.tolist() @@ -1912,8 +1935,8 @@ def execute(self, exec_context: knext.ExecutionContext, input_table): # pretty breaks elif self.classifier_param == self.ClassModes.PRETTYBREAKS.name: for col in self.class_col: - grid = mc.PrettyBreaks(gdf[col], k) - if (ar): + grid = mc.PrettyBreaks(gdf[col], self.k_param) + if (self.append_replace): gdf[f'{col}_class'] = grid.yb.tolist() else: gdf[col] = grid.yb.tolist() @@ -1921,18 +1944,23 @@ def execute(self, exec_context: knext.ExecutionContext, input_table): # quantiles elif self.classifier_param == self.ClassModes.QUANTILES.name: for col in self.class_col: - grid = mc.Quanitles(gdf[col], k) - if (ar): + grid = mc.Quanitles(gdf[col], self.k_param) + if (self.append_replace): gdf[f'{col}_class'] = grid.yb.tolist() else: gdf[col] = grid.yb.tolist() + # boxplot elif self.classifier_param == self.ClassModes.BOXPLOT.name: y = gdf[self.class_col] - grid = mc.BoxPlot(y, hinge) - gdf["class"] = grid.yb.tolist() + for col in self.class_col: + grid = mc.BoxPlot(y, self.hinge_param) + if (self.append_replace): + gdf[f'{col}_class'] = grid.yb.tolist() + else: + gdf[col] = grid.yb.tolist() -# fisher jenks sampled + # fisher jenks sampled elif self.classifier_param == self.ClassModes.FISHERJ_SAMPLED.name: y = gdf[self.class_col] grid = mc.FisherJenksSampled(y, k) @@ -1950,7 +1978,12 @@ def execute(self, exec_context: knext.ExecutionContext, input_table): # jenks caspall sampled elif self.classifier_param == self.ClassModes.JENKS_CASSAMPLED.name: y = gdf[self.class_col] - grid = mc.JenksCaspallSampled(y, k) + for col in self.class_col: + grid = mc.JenksCaspallSampled(y, k, self.pct_param, self.jc_sampledtruncate) + if (ar): + gdf[f'{col}_class'] = grid.yb.tolist() + else: + gdf[col] = grid.yb.tolist() # maxp elif self.classifier_param == self.ClassModes.MAXP.name: From 6ce705f6fffa0f7c683e35fb787ff22f482c6613 Mon Sep 17 00:00:00 2001 From: cr1speecr3m3 <166568593+cr1speecr3m3@users.noreply.github.com> Date: Sun, 10 Aug 2025 13:06:49 -0400 Subject: [PATCH 05/11] Update spatialtool.py --- knime_extension/src/nodes/spatialtool.py | 93 ++++++++++++++++++++++-- 1 file changed, 87 insertions(+), 6 deletions(-) diff --git a/knime_extension/src/nodes/spatialtool.py b/knime_extension/src/nodes/spatialtool.py index cb83f9d4..3103be38 100644 --- a/knime_extension/src/nodes/spatialtool.py +++ b/knime_extension/src/nodes/spatialtool.py @@ -1694,7 +1694,6 @@ def execute(self, exec_context: knext.ExecutionContext, input_table1, input_tabl return knut.to_table(gdf, exec_context) - ############################################ # Mapclassify ############################################ @@ -1822,8 +1821,8 @@ class ClassModes(knext.EnumParameterOptions): @classmethod def get_default(cls): - return cls.FISHERJ - + return cls.FISHERJ + # classifier selection parameter class_col = knext.MultiColumnParameter( "Targeted columns", @@ -1856,9 +1855,84 @@ def get_default(cls): [Tables of Cell Statistics Across Resolutions.](https://h3geo.org/docs/core-library/restable/) """, default_value = 5, - min_value = 2, + min_value = 2 ).rule(knext.OneOf(classifier_param, [ClassModes.FISHERJ.name]), knext.Effect.HIDE) + # strategy options for Greedy, sopme will require additional package: networkx.greedy_color + # see: https://networkx.github.io/documentation/stable/reference/algorithms/generated/networkx.algorithms.coloring.greedy_color.html + class Strategies(knext.EnumParameterOptions): + BALANCED = ( + "balanced", + ) + + LARGEST_FIRST = ( + "largest_first", + ) + + RANDOM_SEQUENTIAL = ( + "random_sequential", + ) + + SMALLEST_LAST = ( + "smallest_last", + ) + + CONNECTED_SEQUENTIAL_BFS = ( + "connected_sequential_bfs", + ) + + CONNECTED_SEQUENTIAL_DFS = ( + "connected_sequential_dfs", + ) + + CONNECTED_SEQUENTIAL = ( + "connected_sequential", + ) + + DASTUR = ( + "DASTUR", + ) + + @classmethod + def get_default(cls): + return cls.BALANCED + + # strategies_param for Greedy + strategy_param = knext.EnumParameter( + label = "Strategy Selection", + description = """Select the strategy to use for the greedy algorithm.""", + default_value = strategies.get_default().name + ).rule(knext.OneOf(classifier_param, [ClassModes.GREEDY.name]), knext.Effect.SHOW) + + # strategy_param for Greedy + strategy_param = knext.EnumParameter( + label = "Greedy Strategy", + description = """The strategy to use for the greedy algorithm.""", + default_value = Strategies.get_default().name + ).rule(knext.OneOf(classifier_param, [ClassModes.GREEDY.name]), knext.Effect.SHOW) + + class Balances(knext.EnumParameterOptions): + BALANCED = ('balanced') + + COUNT = ('count') + + AREA = ('area') + + CENTROID = ('centroid') + + DISTANCE = ('distance') + + @classmethod + def get_default(cls): + return cls.BALANCED + + # balance_param for Greedy + balance_param = knext.EnumParameter( + label = "Balance Strategy", + description = """The balance strategy to use for the greedy algorithm.""", + default_value = Balances.get_default().name + ).rule(knext.OneOf(classifier_param, [ClassModes.GREEDY.name]), knext.Effect.SHOW) + # pct_param for JenksCaspallSampled pct_param = knext.DoubleParameter( label = "Natural Breaks Percentage for FisherJenks Sampled", @@ -1950,6 +2024,8 @@ def execute(self, exec_context: knext.ExecutionContext, input_table): else: gdf[col] = grid.yb.tolist() + # ***BEGINNING OF COMMPLEX CLASSIFIERS*** + # boxplot elif self.classifier_param == self.ClassModes.BOXPLOT.name: y = gdf[self.class_col] @@ -1963,7 +2039,12 @@ def execute(self, exec_context: knext.ExecutionContext, input_table): # fisher jenks sampled elif self.classifier_param == self.ClassModes.FISHERJ_SAMPLED.name: y = gdf[self.class_col] - grid = mc.FisherJenksSampled(y, k) + for col in self.class_col: + grid = mc.FisherJenksSampled(y, self.k_param, self.pct_param, self.jc_sampledtruncate) + if (self.append_replace): + gdf[f'{col}_class'] = grid.yb.tolist() + else: + gdf[col] = grid.yb.tolist() # greedy elif self.classifier_param == self.ClassModes.GREEDY.name: @@ -1980,7 +2061,7 @@ def execute(self, exec_context: knext.ExecutionContext, input_table): y = gdf[self.class_col] for col in self.class_col: grid = mc.JenksCaspallSampled(y, k, self.pct_param, self.jc_sampledtruncate) - if (ar): + if (self.append_replace): gdf[f'{col}_class'] = grid.yb.tolist() else: gdf[col] = grid.yb.tolist() From 797d27eb7493c711d583fc4ef5a5d43cde1f49be Mon Sep 17 00:00:00 2001 From: cr1speecr3m3 <166568593+cr1speecr3m3@users.noreply.github.com> Date: Sun, 10 Aug 2025 13:08:07 -0400 Subject: [PATCH 06/11] Update spatialtool.py --- knime_extension/src/nodes/spatialtool.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/knime_extension/src/nodes/spatialtool.py b/knime_extension/src/nodes/spatialtool.py index 3103be38..02fab519 100644 --- a/knime_extension/src/nodes/spatialtool.py +++ b/knime_extension/src/nodes/spatialtool.py @@ -1901,7 +1901,8 @@ def get_default(cls): strategy_param = knext.EnumParameter( label = "Strategy Selection", description = """Select the strategy to use for the greedy algorithm.""", - default_value = strategies.get_default().name + default_value = Strategies.get_default().name, + enum = Strategies ).rule(knext.OneOf(classifier_param, [ClassModes.GREEDY.name]), knext.Effect.SHOW) # strategy_param for Greedy @@ -1930,7 +1931,8 @@ def get_default(cls): balance_param = knext.EnumParameter( label = "Balance Strategy", description = """The balance strategy to use for the greedy algorithm.""", - default_value = Balances.get_default().name + default_value = Balances.get_default().name, + enum = Balances ).rule(knext.OneOf(classifier_param, [ClassModes.GREEDY.name]), knext.Effect.SHOW) # pct_param for JenksCaspallSampled From acf1204f58a51709f11ae24e418371ec7c904363 Mon Sep 17 00:00:00 2001 From: cr1speecr3m3 <166568593+cr1speecr3m3@users.noreply.github.com> Date: Sun, 10 Aug 2025 16:54:37 -0400 Subject: [PATCH 07/11] Update spatialtool.py --- knime_extension/src/nodes/spatialtool.py | 56 ++++++++++++++++-------- 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/knime_extension/src/nodes/spatialtool.py b/knime_extension/src/nodes/spatialtool.py index 02fab519..989d394f 100644 --- a/knime_extension/src/nodes/spatialtool.py +++ b/knime_extension/src/nodes/spatialtool.py @@ -1856,80 +1856,100 @@ def get_default(cls): """, default_value = 5, min_value = 2 - ).rule(knext.OneOf(classifier_param, [ClassModes.FISHERJ.name]), knext.Effect.HIDE) + ).rule(knext.OneOf(classifier_param, [ClassModes.BOXPLOT.name, + ClassModes.GREEDY.name, + ClassModes.PERCENTILES.name, + ClassModes.STDMEAN.name, + ClassModes.USERDEFINED.name]), knext.Effect.HIDE) # strategy options for Greedy, sopme will require additional package: networkx.greedy_color # see: https://networkx.github.io/documentation/stable/reference/algorithms/generated/networkx.algorithms.coloring.greedy_color.html class Strategies(knext.EnumParameterOptions): BALANCED = ( "balanced", + """""" ) LARGEST_FIRST = ( "largest_first", + """""" ) RANDOM_SEQUENTIAL = ( "random_sequential", + """""" ) SMALLEST_LAST = ( "smallest_last", + """""" ) CONNECTED_SEQUENTIAL_BFS = ( "connected_sequential_bfs", + """""" ) CONNECTED_SEQUENTIAL_DFS = ( "connected_sequential_dfs", + """""" ) CONNECTED_SEQUENTIAL = ( "connected_sequential", + """""" ) DASTUR = ( "DASTUR", + """""" ) @classmethod def get_default(cls): return cls.BALANCED - # strategies_param for Greedy - strategy_param = knext.EnumParameter( - label = "Strategy Selection", - description = """Select the strategy to use for the greedy algorithm.""", - default_value = Strategies.get_default().name, - enum = Strategies - ).rule(knext.OneOf(classifier_param, [ClassModes.GREEDY.name]), knext.Effect.SHOW) - # strategy_param for Greedy strategy_param = knext.EnumParameter( - label = "Greedy Strategy", + label = "Strategy", description = """The strategy to use for the greedy algorithm.""", - default_value = Strategies.get_default().name + default_value = Strategies.get_default().name, + enum = Strategies ).rule(knext.OneOf(classifier_param, [ClassModes.GREEDY.name]), knext.Effect.SHOW) class Balances(knext.EnumParameterOptions): - BALANCED = ('balanced') + BALANCEDb = ( + "balanced", + """""" + ) - COUNT = ('count') + COUNT = ( + "count", + """""" + ) - AREA = ('area') + AREA = ( + "area", + """""" + ) - CENTROID = ('centroid') + CENTROID = ( + "centroid", + """""" + ) - DISTANCE = ('distance') + DISTANCE = ( + "distance", + """""" + ) @classmethod def get_default(cls): - return cls.BALANCED + return cls.BALANCEDb # balance_param for Greedy balance_param = knext.EnumParameter( - label = "Balance Strategy", + label = "Balance", description = """The balance strategy to use for the greedy algorithm.""", default_value = Balances.get_default().name, enum = Balances From 598f792de1fb603f6efe2ba7db4ea122885cbcd9 Mon Sep 17 00:00:00 2001 From: cr1speecr3m3 <166568593+cr1speecr3m3@users.noreply.github.com> Date: Tue, 12 Aug 2025 18:13:24 -0400 Subject: [PATCH 08/11] Update spatialtool.py --- knime_extension/src/nodes/spatialtool.py | 88 +++++++++++++++++++++--- 1 file changed, 77 insertions(+), 11 deletions(-) diff --git a/knime_extension/src/nodes/spatialtool.py b/knime_extension/src/nodes/spatialtool.py index 989d394f..1a47c2e5 100644 --- a/knime_extension/src/nodes/spatialtool.py +++ b/knime_extension/src/nodes/spatialtool.py @@ -1862,6 +1862,8 @@ def get_default(cls): ClassModes.STDMEAN.name, ClassModes.USERDEFINED.name]), knext.Effect.HIDE) + # *** SETTINGS FOR GREEDY CLASSIFIER *** + # strategy options for Greedy, sopme will require additional package: networkx.greedy_color # see: https://networkx.github.io/documentation/stable/reference/algorithms/generated/networkx.algorithms.coloring.greedy_color.html class Strategies(knext.EnumParameterOptions): @@ -1918,7 +1920,7 @@ def get_default(cls): ).rule(knext.OneOf(classifier_param, [ClassModes.GREEDY.name]), knext.Effect.SHOW) class Balances(knext.EnumParameterOptions): - BALANCEDb = ( + BALANCED = ( "balanced", """""" ) @@ -1945,7 +1947,7 @@ class Balances(knext.EnumParameterOptions): @classmethod def get_default(cls): - return cls.BALANCEDb + return cls.BALANCED # balance_param for Greedy balance_param = knext.EnumParameter( @@ -1955,12 +1957,62 @@ def get_default(cls): enum = Balances ).rule(knext.OneOf(classifier_param, [ClassModes.GREEDY.name]), knext.Effect.SHOW) - # pct_param for JenksCaspallSampled - pct_param = knext.DoubleParameter( - label = "Natural Breaks Percentage for FisherJenks Sampled", - description = """The percentage of the data to be randomly sampled to determine the natural breaks.""", - default_value = 0.10 - ).rule(knext.OneOf(classifier_param, [ClassModes.JENKS_CASFORCED.name]), knext.Effect.SHOW) + # min_colors_param for Greedy + min_colors_param = knext.IntParameter( + label = "Minimum number of colors", + description = """The minimum number of colors to use for the greedy algorithm.""", + default_value = 4, + min_value = 2 + ).rule(knext.OneOf(balance_param, [Balances.BALANCED.name]), knext.Effect.SHOW) + + class SWOptions(knext.EnumParameterOptions): + + QUEEN = ( + "queen", + """""" + ) + + ROOK = ( + "rook", + """""" + ) + + LIBYSAL = ( + "libysal.weights.W", + """""" + ) + + @classmethod + def get_default(cls): + return cls.QUEEN + + # sw_param for Greedy + sw_param = knext.EnumParameter( + label = "Use spatial weights", + description = """If checked, the node will use spatial weights to determine the adjacency of the polygons.""", + default_value = None + ).rule(knext.OneOf(classifier_param, [ClassModes.GREEDY.name]), knext.Effect.SHOW) + + # min_distance_param for Greedy + min_distance_param = knext.DoubleParameter( + label = "Minimum distance", + description = """The minimum distance to consider two polygons as adjacent. Only used if spatial weights is checked.""", + default_value = 0.0, + ).rule(knext.OneOf(classifier_param, [ClassModes.GREEDY.name]), knext.Effect.SHOW) + + silence_warn_param = knext.BoolParameter( + label = "Silence warnings", + description = """If checked, the node will silence the warnings from the greedy algorithm.""", + default_value = False + ).rule(knext.OneOf(classifier_param, [ClassModes.GREEDY.name]), knext.Effect.SHOW) + + interchange_param = knext.BoolParameter( + label = "Interchange", + description = """If checked, the node will use the interchange algorithm to improve the coloring result.""", + default_value = False + ).rule(knext.OneOf(classifier_param, [ClassModes.GREEDY.name]), knext.Effect.SHOW) + + # *** SETTINGS FOR BOXPLOT CLASSIFIER *** # hinge_param for Boxplot hinge_param = knext.DoubleParameter( @@ -1969,20 +2021,31 @@ def get_default(cls): default_value = 1.5 ).rule(knext.OneOf(classifier_param, [ClassModes.BOXPLOT.name]), knext.Effect.SHOW) + # SETTINGS FOR FISHER JENKS SAMPLED CLASSIFIER *** + + # pct_param for JenksCaspallSampled + pct_param = knext.DoubleParameter( + label = "Natural Breaks Percentage for FisherJenks Sampled", + description = """The percentage of the data to be randomly sampled to determine the natural breaks.""", + default_value = 0.10 + ).rule(knext.OneOf(classifier_param, [ClassModes.JENKS_CASFORCED.name]), knext.Effect.SHOW) + # determines if the output is to truncate the classification result to the number of classes specified by the k parameter jc_sampledtruncate = knext.BoolParameter( - "Truncate for JenksCaspall Sampled", + "Truncate for FisherJenksSampled", """If checked, the node will truncate the classification result to the number of classes specified by the k parameter.""", default_value = False ).rule(knext.OneOf(classifier_param, [ClassModes.JENKS_CASSAMPLED.name]), knext.Effect.SHOW) + # *** Formatting Settings *** + # determines if the output is to replace the original columns or append them append_replace = knext.BoolParameter( "Append Classification Results", """If checked, the node will append the classification result to the input table. If unchecked, the node will replace the input table with the classification result.""", default_value = False - ) + ).rule(knext.OneOf(classifier_param, [ClassModes.GREEDY.name]), knext.Effect.HIDE) def configure(self, configure_context, input_schema): return None @@ -2071,7 +2134,10 @@ def execute(self, exec_context: knext.ExecutionContext, input_table): # greedy elif self.classifier_param == self.ClassModes.GREEDY.name: y = gdf[self.class_col] - grid = mc.greedy(y, k) + grid = mc.greedy(gdf, self.strategy_param, self.balance_param, self.min_colors_param, + self.sw_param, self.min_distance_param, self.silence_warn_param, + self.interchange_param) + # head tail breaks elif self.classifier_param == self.ClassModes.HEADT_BREAKS.name: From 5aa64a1a402810ad508406ae362138a665f70678 Mon Sep 17 00:00:00 2001 From: cr1speecr3m3 <166568593+cr1speecr3m3@users.noreply.github.com> Date: Wed, 13 Aug 2025 15:28:53 -0400 Subject: [PATCH 09/11] Update spatialtool.py --- knime_extension/src/nodes/spatialtool.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/knime_extension/src/nodes/spatialtool.py b/knime_extension/src/nodes/spatialtool.py index 1a47c2e5..41438f44 100644 --- a/knime_extension/src/nodes/spatialtool.py +++ b/knime_extension/src/nodes/spatialtool.py @@ -2134,10 +2134,15 @@ def execute(self, exec_context: knext.ExecutionContext, input_table): # greedy elif self.classifier_param == self.ClassModes.GREEDY.name: y = gdf[self.class_col] - grid = mc.greedy(gdf, self.strategy_param, self.balance_param, self.min_colors_param, + for col in self.class_col: + grid = mc.greedy(gdf, self.strategy_param, self.balance_param, self.min_colors_param, self.sw_param, self.min_distance_param, self.silence_warn_param, self.interchange_param) - + if (self.append_replace): + gdf[f'{col}_geoclass'] = grid.yb.tolist() + else: + gdf[col] = grid.yb.tolist() + # head tail breaks elif self.classifier_param == self.ClassModes.HEADT_BREAKS.name: From b2ccff31388d3450ad96e1005ad4fecefb71716b Mon Sep 17 00:00:00 2001 From: cr1speecr3m3 <166568593+cr1speecr3m3@users.noreply.github.com> Date: Wed, 20 Aug 2025 16:50:59 -0400 Subject: [PATCH 10/11] Update spatialtool.py --- knime_extension/src/nodes/spatialtool.py | 80 ++++++++++++++---------- 1 file changed, 47 insertions(+), 33 deletions(-) diff --git a/knime_extension/src/nodes/spatialtool.py b/knime_extension/src/nodes/spatialtool.py index 41438f44..999b84e1 100644 --- a/knime_extension/src/nodes/spatialtool.py +++ b/knime_extension/src/nodes/spatialtool.py @@ -1716,6 +1716,7 @@ def execute(self, exec_context: knext.ExecutionContext, input_table1, input_tabl ) class Mapclassifier: + # classmode_param options for mapclassify class ClassModes(knext.EnumParameterOptions): BOXPLOT = ( # mapclassify.BoxPlot(y[, hinge]) "Boxplot", @@ -1822,6 +1823,14 @@ class ClassModes(knext.EnumParameterOptions): @classmethod def get_default(cls): return cls.FISHERJ + + # classifier_param for selecting the classification method + classifier_param = knext.EnumParameter( + label = "Classifier Selection", + description = "Select the classifier that you want to apply to the targeted columns.", + default_value = ClassModes.get_default().name, + enum = ClassModes + ) # classifier selection parameter class_col = knext.MultiColumnParameter( @@ -1834,15 +1843,7 @@ def get_default(cls): [Tables of Cell Statistics Across Resolutions.](https://h3geo.org/docs/core-library/restable/) """, column_filter = knut.is_numeric - ) - - # classifier_param for selecting the classification method - classifier_param = knext.EnumParameter( - label = "Classifier Selection", - description = "Select the classifier that you want to apply to the targeted columns.", - default_value = ClassModes.get_default().name, - enum = ClassModes - ) + ).rule(knext.OneOf(classifier_param, [ClassModes.GREEDY.name]), knext.Effect.HIDE) # k_param for all classifiers k_param = knext.IntParameter( @@ -1862,9 +1863,11 @@ def get_default(cls): ClassModes.STDMEAN.name, ClassModes.USERDEFINED.name]), knext.Effect.HIDE) + + ''' # *** SETTINGS FOR GREEDY CLASSIFIER *** - # strategy options for Greedy, sopme will require additional package: networkx.greedy_color + # strategy_param options for Greedy, sopme will require additional package: networkx.greedy_color # see: https://networkx.github.io/documentation/stable/reference/algorithms/generated/networkx.algorithms.coloring.greedy_color.html class Strategies(knext.EnumParameterOptions): BALANCED = ( @@ -1919,6 +1922,7 @@ def get_default(cls): enum = Strategies ).rule(knext.OneOf(classifier_param, [ClassModes.GREEDY.name]), knext.Effect.SHOW) + # balance_param options for Greedy class Balances(knext.EnumParameterOptions): BALANCED = ( "balanced", @@ -1965,6 +1969,7 @@ def get_default(cls): min_value = 2 ).rule(knext.OneOf(balance_param, [Balances.BALANCED.name]), knext.Effect.SHOW) + # sw_options for Greedy class SWOptions(knext.EnumParameterOptions): QUEEN = ( @@ -1988,9 +1993,10 @@ def get_default(cls): # sw_param for Greedy sw_param = knext.EnumParameter( - label = "Use spatial weights", + label = "Spatial weights", description = """If checked, the node will use spatial weights to determine the adjacency of the polygons.""", - default_value = None + default_value = SWOptions.get_default().name, + enum = SWOptions ).rule(knext.OneOf(classifier_param, [ClassModes.GREEDY.name]), knext.Effect.SHOW) # min_distance_param for Greedy @@ -2000,17 +2006,21 @@ def get_default(cls): default_value = 0.0, ).rule(knext.OneOf(classifier_param, [ClassModes.GREEDY.name]), knext.Effect.SHOW) + # silence_warn_param for Greedy silence_warn_param = knext.BoolParameter( label = "Silence warnings", description = """If checked, the node will silence the warnings from the greedy algorithm.""", default_value = False ).rule(knext.OneOf(classifier_param, [ClassModes.GREEDY.name]), knext.Effect.SHOW) + # interchange_param for Greedy interchange_param = knext.BoolParameter( label = "Interchange", description = """If checked, the node will use the interchange algorithm to improve the coloring result.""", default_value = False ).rule(knext.OneOf(classifier_param, [ClassModes.GREEDY.name]), knext.Effect.SHOW) + ''' + # *** SETTINGS FOR BOXPLOT CLASSIFIER *** @@ -2052,6 +2062,7 @@ def configure(self, configure_context, input_schema): def execute(self, exec_context: knext.ExecutionContext, input_table): import mapclassify as mc + import geopandas as gpd gdf = input_table.to_pandas() @@ -2060,7 +2071,7 @@ def execute(self, exec_context: knext.ExecutionContext, input_table): for col in self.class_col: grid = mc.EqualInterval(gdf[col], self.k_param) if (self.append_replace): - gdf[f'{col}_class'] = grid.yb.tolist() + gdf[f'{col}applied'] = grid.yb.tolist() else: gdf[col] = grid.yb.tolist() @@ -2069,7 +2080,7 @@ def execute(self, exec_context: knext.ExecutionContext, input_table): for col in self.class_col: grid = mc.FisherJenkins(gdf[col], self.k_param) if (self.append_replace): - gdf[f'{col}_class'] = grid.yb.tolist() + gdf[f'{col}applied'] = grid.yb.tolist() else: gdf[col] = grid.yb.tolist() @@ -2078,7 +2089,7 @@ def execute(self, exec_context: knext.ExecutionContext, input_table): for col in self.class_col: grid = mc.JenksCaspall(gdf[col], self.k_param) if (self.append_replace): - gdf[f'{col}_class'] = grid.yb.tolist() + gdf[f'{col}applied'] = grid.yb.tolist() else: gdf[col] = grid.yb.tolist() @@ -2087,7 +2098,7 @@ def execute(self, exec_context: knext.ExecutionContext, input_table): for col in self.class_col: grid = mc.JenksCaspallForced(gdf[col], self.k_param) if (self.append_replace): - gdf[f'{col}_class'] = grid.yb.tolist() + gdf[f'{col}applied'] = grid.yb.tolist() else: gdf[col] = grid.yb.tolist() @@ -2096,7 +2107,7 @@ def execute(self, exec_context: knext.ExecutionContext, input_table): for col in self.class_col: grid = mc.PrettyBreaks(gdf[col], self.k_param) if (self.append_replace): - gdf[f'{col}_class'] = grid.yb.tolist() + gdf[f'{col}applied'] = grid.yb.tolist() else: gdf[col] = grid.yb.tolist() @@ -2105,7 +2116,7 @@ def execute(self, exec_context: knext.ExecutionContext, input_table): for col in self.class_col: grid = mc.Quanitles(gdf[col], self.k_param) if (self.append_replace): - gdf[f'{col}_class'] = grid.yb.tolist() + gdf[f'{col}applied'] = grid.yb.tolist() else: gdf[col] = grid.yb.tolist() @@ -2117,7 +2128,7 @@ def execute(self, exec_context: knext.ExecutionContext, input_table): for col in self.class_col: grid = mc.BoxPlot(y, self.hinge_param) if (self.append_replace): - gdf[f'{col}_class'] = grid.yb.tolist() + gdf[f'{col}_applied'] = grid.yb.tolist() else: gdf[col] = grid.yb.tolist() @@ -2127,42 +2138,45 @@ def execute(self, exec_context: knext.ExecutionContext, input_table): for col in self.class_col: grid = mc.FisherJenksSampled(y, self.k_param, self.pct_param, self.jc_sampledtruncate) if (self.append_replace): - gdf[f'{col}_class'] = grid.yb.tolist() + gdf[f'{col}_applied'] = grid.yb.tolist() else: gdf[col] = grid.yb.tolist() # greedy elif self.classifier_param == self.ClassModes.GREEDY.name: - y = gdf[self.class_col] - for col in self.class_col: - grid = mc.greedy(gdf, self.strategy_param, self.balance_param, self.min_colors_param, - self.sw_param, self.min_distance_param, self.silence_warn_param, - self.interchange_param) - if (self.append_replace): - gdf[f'{col}_geoclass'] = grid.yb.tolist() - else: - gdf[col] = grid.yb.tolist() + grid = mc.greedy(gdf, self.strategy_param, self.balance_param, self.min_colors_param, + self.sw_param, self.min_distance_param, self.silence_warn_param, + self.interchange_param) + if (self.append_replace): + grid[f'{col}_greedy_colors'] = grid.yb.tolist() + else: + grid["greedy_colors"] = grid.yb.tolist() # head tail breaks elif self.classifier_param == self.ClassModes.HEADT_BREAKS.name: y = gdf[self.class_col] - grid = mc.HeadTailBreaks(y, k) + grid = mc.HeadTailBreaks(y, self.k_param) # jenks caspall sampled elif self.classifier_param == self.ClassModes.JENKS_CASSAMPLED.name: y = gdf[self.class_col] for col in self.class_col: - grid = mc.JenksCaspallSampled(y, k, self.pct_param, self.jc_sampledtruncate) + grid = mc.JenksCaspallSampled(y, self.k_param, self.pct_param, self.jc_sampledtruncate) if (self.append_replace): - gdf[f'{col}_class'] = grid.yb.tolist() + gdf[f'{col}applied'] = grid.yb.tolist() else: gdf[col] = grid.yb.tolist() # maxp elif self.classifier_param == self.ClassModes.MAXP.name: y = gdf[self.class_col] - grid = mc.MaxP(y, k) + grid = mc.MaxP(self.cal_param) + for col in self.class_col: + if (self.append_replace): + gdf[f'{col}applied'] = grid.yb.tolist() + else: + gdf[col] = grid.yb.tolist() # maximum breaks elif self.classifier_param == self.ClassModes.MAXIMUMBREAKS.name: From 4437846eb6c5ae7746156bddc69d29d4646c1b6e Mon Sep 17 00:00:00 2001 From: Lingbo Liu Date: Thu, 13 Nov 2025 16:43:26 -0500 Subject: [PATCH 11/11] update some error in MapClassifer --- knime_extension/src/nodes/spatialtool.py | 554 ++++++++--------------- 1 file changed, 185 insertions(+), 369 deletions(-) diff --git a/knime_extension/src/nodes/spatialtool.py b/knime_extension/src/nodes/spatialtool.py index 999b84e1..438132c7 100644 --- a/knime_extension/src/nodes/spatialtool.py +++ b/knime_extension/src/nodes/spatialtool.py @@ -1714,124 +1714,119 @@ def execute(self, exec_context: knext.ExecutionContext, input_table1, input_tabl name="Output Table", description="Output table with classified result.", ) +@knut.geo_node_description( + short_description="Classifies numeric columns using Mapclassify algorithms.", + description="""Apply different classification schemes provided by the + [mapclassify](https://pysal.org/mapclassify/) library to selected numeric columns. + Choose a classifier, configure its parameters, and either replace the original + values with class labels or append new columns with the classification result. + The iterative classifiers `JenksCaspall`, `JenksCaspallForced`, and `JenksCaspallSampled` + may take noticeably longer on large datasets because they refine class boundaries + through multiple passes. The `HeadTailBreaks` classifier automatically determines + the number of classes based on the data distribution.""", + references={ + "mapclassify documentation": "https://pysal.org/mapclassify/", + }, +) class Mapclassifier: # classmode_param options for mapclassify class ClassModes(knext.EnumParameterOptions): - BOXPLOT = ( # mapclassify.BoxPlot(y[, hinge]) + BOXPLOT = ( # mapclassify.BoxPlot(y[, hinge]) "Boxplot", """PURPOSE: Creates class breaks based on the statistical properties of a box plot distribution. HOW IT WORKS: Uses the quartiles (Q1, median, Q3) and interquartile range (IQR) to identify outliers and create meaningful breaks. Typically creates 6 classes: lower outlier, < Q1, Q1-median, median-Q3, > Q3, and upper outlier. BEST FOR: Identifying and highlighting outliers in your data while maintaining interpretable breaks based on statistical distribution.""", ) - EQUALINTERVAL = ( # mapclassify.EqualInterval(y[, k]) + EQUALINTERVAL = ( # mapclassify.EqualInterval(y[, k]) "EqualInterval", """PURPOSE: Divides the data range into equal-sized intervals. HOW IT WORKS: Takes the difference between maximum and minimum values, then divides by the number of desired classes to create intervals of equal width. BEST FOR: Data that is relatively evenly distributed and when you want consistent interval sizes for easy interpretation.""", ) - FISHERJ = ( # mapclassify.FisherJenks(y[, k]) + FISHERJ = ( # mapclassify.FisherJenks(y[, k]) "FisherJenks", """PURPOSE: Finds optimal class breaks that minimize within-class variance while maximizing between-class variance. HOW IT WORKS: Uses dynamic programming to find the optimal groupings that create the most homogeneous classes possible. BEST FOR: Most types of data as it adapts to the natural clustering in your dataset. Considered one of the most statistically robust methods.""", ) - FISHERJ_SAMPLED = ( # mapclassify.FisherJenksSampled(y[, k, pct, ...]) + FISHERJ_SAMPLED = ( # mapclassify.FisherJenksSampled(y[, k, pct, ...]) "FisherJanksSampled", """PURPOSE: Same optimization as FisherJenks but uses a random sample for computational efficiency. HOW IT WORKS: Applies the Fisher-Jenks algorithm to a subset of the data, making it faster for large datasets. - BEST FOR: Large datasets where standard Fisher-Jenks would be computationally expensive but you still want optimal breaks.""", - ) - GREEDY = ( # mapclassify.greedy(gdf[, strategy, balance, ...]) - "Greedy", - """PURPOSE: Colors geographic areas using graph coloring strategies to ensure adjacent areas have different colors. - HOW IT WORKS: Implements topological coloring algorithms (various strategies available) to minimize color conflicts between neighboring polygons. - BEST FOR: Categorical data or when you need to ensure visual distinction between adjacent geographic units regardless of data values.""" - ) - HEADT_BREAKS = ( # mapclassify.HeadTailBreaks(y) + BEST FOR: Large datasets where standard Fisher-Jenks would be computationally expensive but you still want optimal breaks.""", + ) + HEADT_BREAKS = ( # mapclassify.HeadTailBreaks(y) "HeadTailBreaks", """PURPOSE: Recursively divides data around the mean, designed specifically for heavy-tailed distributions. HOW IT WORKS: Splits data at the arithmetic mean, then recursively applies the same process to the "head" (above-mean values) until stopping criteria are met. - BEST FOR: Highly skewed data with heavy tails, such as city populations, income distributions, or social media network data.""" + BEST FOR: Highly skewed data with heavy tails, such as city populations, income distributions, or social media network data.""", ) - JENKS_CAS = ( # mapclassify.JenksCaspall(y[, k]) + JENKS_CAS = ( # mapclassify.JenksCaspall(y[, k]) "JenksCaspall", """PURPOSE: An iterative optimization method that moves class boundaries to minimize within-class variance. HOW IT WORKS: Starts with initial class breaks and iteratively moves boundaries to improve the goodness of variance fit. - BEST FOR: When you want optimized breaks similar to Fisher-Jenks but prefer an iterative approach that can be stopped at any point.""" - + BEST FOR: When you want optimized breaks similar to Fisher-Jenks but prefer an iterative approach that can be stopped at any point.""", ) - JENKS_CASFORCED = ( # mapclassify.JenksCaspallForced(y[, k]) + JENKS_CASFORCED = ( # mapclassify.JenksCaspallForced(y[, k]) "JenksCaspallForced", """PURPOSE: Similar to JenksCaspall but allows forcing specific values to be class boundaries. HOW IT WORKS: Performs the iterative optimization while ensuring certain predetermined values remain as class breaks. - BEST FOR: When you have meaningful breakpoints (like 0, poverty line, etc.) that must be preserved while optimizing the remaining breaks.""" + BEST FOR: When you have meaningful breakpoints (like 0, poverty line, etc.) that must be preserved while optimizing the remaining breaks.""", ) - JENKS_CASSAMPLED = ( # mapclassify.JenksCaspallSampled(y[, k, pct]) + JENKS_CASSAMPLED = ( # mapclassify.JenksCaspallSampled(y[, k, pct]) "JenksCaspallSampled", """PURPOSE: Applies JenksCaspall optimization to a random sample of the data. HOW IT WORKS: Uses sampling to make the iterative process computationally feasible for large datasets. - BEST FOR: Large datasets where full JenksCaspall would be too slow but you want iteratively optimized breaks.""" - ) - MAXP = ( # mapclassify.MaxP(y[, k, initial, seed1, seed2]) - "MaxP", - """PURPOSE: Creates the maximum number of classes possible while maintaining a minimum population threshold per class. - HOW IT WORKS: Aggregates spatial units to ensure each class meets minimum size requirements while maximizing the number of classes. - BEST FOR: Spatial analysis where you need to balance detail (number of classes) with statistical reliability (minimum sample sizes).""" + BEST FOR: Large datasets where full JenksCaspall would be too slow but you want iteratively optimized breaks.""", ) - MAXIMUMBREAKS = ( # mapclassify.MaximumBreaks(y[, k, mindiff]) + MAXIMUMBREAKS = ( # mapclassify.MaximumBreaks(y[, k, mindiff]) "MaximumBreaks", """PURPOSE: Places class breaks at the largest gaps in the sorted data values. HOW IT WORKS: Identifies the biggest jumps between consecutive values and uses these as natural breaking points. - BEST FOR: Data with clear natural clusters or gaps, where you want breaks at the most obvious discontinuities.""" + BEST FOR: Data with clear natural clusters or gaps, where you want breaks at the most obvious discontinuities.""", ) - NATURALBREAKS = ( # mapclassify.NaturalBreaks(y[, k, initial, ...]) needs to be revised + NATURALBREAKS = ( # mapclassify.NaturalBreaks(y[, k, initial, ...]) needs to be revised "NaturalBreaks", - """PURPOSE: Identifies class breaks that minimize variance within classes while maximizing variance between classes""" + """PURPOSE: Identifies class breaks that minimize variance within classes while maximizing variance between classes""", ) - PERCENTILES = ( # mapclassify.Percentiles(y[, pct]) + PERCENTILES = ( # mapclassify.Percentiles(y[, pct]) "Percentiles", """PURPOSE: Creates class breaks at specified percentile values. HOW IT WORKS: Divides data based on percentile ranks (e.g., quintiles at 20th, 40th, 60th, 80th percentiles). - BEST FOR: When you want equal numbers of observations in each class, or when working with data where relative position matters more than absolute values.""" + BEST FOR: When you want equal numbers of observations in each class, or when working with data where relative position matters more than absolute values.""", ) - PRETTYBREAKS = ( # mapclassify.PrettyBreaks(y[, k]) + PRETTYBREAKS = ( # mapclassify.PrettyBreaks(y[, k]) "PrettyBreaks", """PURPOSE: Creates "nice" round numbers as class breaks for improved readability. HOW IT WORKS: Chooses aesthetically pleasing break points (round numbers) that are close to optimal statistical breaks. - BEST FOR: Maps intended for general audiences where readability and round numbers are more important than statistical optimization.""" + BEST FOR: Maps intended for general audiences where readability and round numbers are more important than statistical optimization.""", ) - QUANTILES = ( # mapclassify.Quantiles(y[, k]) + QUANTILES = ( # mapclassify.Quantiles(y[, k]) "Quantiles", """PURPOSE: Divides data so each class contains an equal number of observations. HOW IT WORKS: Sorts data and creates breaks at quantile boundaries to ensure equal sample sizes per class. - BEST FOR: Comparing relative rankings across areas, or when you want to ensure balanced representation across all classes.""" + BEST FOR: Comparing relative rankings across areas, or when you want to ensure balanced representation across all classes.""", ) - STDMEAN = ( # mapclassify.StdMean(y[, multiples, anchor]) + STDMEAN = ( # mapclassify.StdMean(y[, multiples, anchor]) "StdMean", """PURPOSE: Creates classes based on standard deviations from the mean. HOW IT WORKS: Sets breaks at intervals of standard deviations above and below the mean (e.g., mean±1σ, mean±2σ). - BEST FOR: Normally distributed data where you want to highlight areas that are statistically typical vs. unusual relative to the average.""" - ) - USERDEFINED = ( # mapclassify.UserDefined(y, bins[, lowest]) - "UserDefined", - """PURPOSE: Allows manual specification of class break values. - HOW IT WORKS: Uses exactly the break points you provide, giving complete control over classification. - BEST FOR: When you have domain knowledge about meaningful thresholds, need to match existing standards, or want to compare across multiple maps with consistent breaks.""" + BEST FOR: Normally distributed data where you want to highlight areas that are statistically typical vs. unusual relative to the average.""", ) @classmethod def get_default(cls): return cls.FISHERJ - + # classifier_param for selecting the classification method classifier_param = knext.EnumParameter( - label = "Classifier Selection", - description = "Select the classifier that you want to apply to the targeted columns.", - default_value = ClassModes.get_default().name, - enum = ClassModes + label="Classifier Selection", + description="Select the classifier that you want to apply to the targeted columns.", + default_value=ClassModes.get_default().name, + enum=ClassModes, ) - + # classifier selection parameter class_col = knext.MultiColumnParameter( "Targeted columns", @@ -1842,210 +1837,68 @@ def get_default(cls): For more details about the zoom levels refer to [Tables of Cell Statistics Across Resolutions.](https://h3geo.org/docs/core-library/restable/) """, - column_filter = knut.is_numeric - ).rule(knext.OneOf(classifier_param, [ClassModes.GREEDY.name]), knext.Effect.HIDE) + column_filter=knut.is_numeric, + ) # k_param for all classifiers k_param = knext.IntParameter( - label = "Number of classification", - description = """The zoom level of the grid from 0 to 15 (default value is 8). The bigger the zoom level, the smaller the + label="Number of classification", + description="""The zoom level of the grid from 0 to 15 (default value is 8). The bigger the zoom level, the smaller the hexagon. If the zoom level is too small, the hexagon might be too big to fit in the input polygon which will result in an error. A very small zoom level might result in a very large output table even for smaller input polygons. For more details about the zoom levels refer to [Tables of Cell Statistics Across Resolutions.](https://h3geo.org/docs/core-library/restable/) """, - default_value = 5, - min_value = 2 - ).rule(knext.OneOf(classifier_param, [ClassModes.BOXPLOT.name, - ClassModes.GREEDY.name, - ClassModes.PERCENTILES.name, - ClassModes.STDMEAN.name, - ClassModes.USERDEFINED.name]), knext.Effect.HIDE) - - - ''' - # *** SETTINGS FOR GREEDY CLASSIFIER *** - - # strategy_param options for Greedy, sopme will require additional package: networkx.greedy_color - # see: https://networkx.github.io/documentation/stable/reference/algorithms/generated/networkx.algorithms.coloring.greedy_color.html - class Strategies(knext.EnumParameterOptions): - BALANCED = ( - "balanced", - """""" - ) - - LARGEST_FIRST = ( - "largest_first", - """""" - ) - - RANDOM_SEQUENTIAL = ( - "random_sequential", - """""" - ) - - SMALLEST_LAST = ( - "smallest_last", - """""" - ) - - CONNECTED_SEQUENTIAL_BFS = ( - "connected_sequential_bfs", - """""" - ) - - CONNECTED_SEQUENTIAL_DFS = ( - "connected_sequential_dfs", - """""" - ) - - CONNECTED_SEQUENTIAL = ( - "connected_sequential", - """""" - ) - - DASTUR = ( - "DASTUR", - """""" - ) - - @classmethod - def get_default(cls): - return cls.BALANCED - - # strategy_param for Greedy - strategy_param = knext.EnumParameter( - label = "Strategy", - description = """The strategy to use for the greedy algorithm.""", - default_value = Strategies.get_default().name, - enum = Strategies - ).rule(knext.OneOf(classifier_param, [ClassModes.GREEDY.name]), knext.Effect.SHOW) - - # balance_param options for Greedy - class Balances(knext.EnumParameterOptions): - BALANCED = ( - "balanced", - """""" - ) - - COUNT = ( - "count", - """""" - ) - - AREA = ( - "area", - """""" - ) - - CENTROID = ( - "centroid", - """""" - ) - - DISTANCE = ( - "distance", - """""" - ) - - @classmethod - def get_default(cls): - return cls.BALANCED - - # balance_param for Greedy - balance_param = knext.EnumParameter( - label = "Balance", - description = """The balance strategy to use for the greedy algorithm.""", - default_value = Balances.get_default().name, - enum = Balances - ).rule(knext.OneOf(classifier_param, [ClassModes.GREEDY.name]), knext.Effect.SHOW) - - # min_colors_param for Greedy - min_colors_param = knext.IntParameter( - label = "Minimum number of colors", - description = """The minimum number of colors to use for the greedy algorithm.""", - default_value = 4, - min_value = 2 - ).rule(knext.OneOf(balance_param, [Balances.BALANCED.name]), knext.Effect.SHOW) - - # sw_options for Greedy - class SWOptions(knext.EnumParameterOptions): - - QUEEN = ( - "queen", - """""" - ) - - ROOK = ( - "rook", - """""" - ) - - LIBYSAL = ( - "libysal.weights.W", - """""" - ) - - @classmethod - def get_default(cls): - return cls.QUEEN - - # sw_param for Greedy - sw_param = knext.EnumParameter( - label = "Spatial weights", - description = """If checked, the node will use spatial weights to determine the adjacency of the polygons.""", - default_value = SWOptions.get_default().name, - enum = SWOptions - ).rule(knext.OneOf(classifier_param, [ClassModes.GREEDY.name]), knext.Effect.SHOW) - - # min_distance_param for Greedy - min_distance_param = knext.DoubleParameter( - label = "Minimum distance", - description = """The minimum distance to consider two polygons as adjacent. Only used if spatial weights is checked.""", - default_value = 0.0, - ).rule(knext.OneOf(classifier_param, [ClassModes.GREEDY.name]), knext.Effect.SHOW) - - # silence_warn_param for Greedy - silence_warn_param = knext.BoolParameter( - label = "Silence warnings", - description = """If checked, the node will silence the warnings from the greedy algorithm.""", - default_value = False - ).rule(knext.OneOf(classifier_param, [ClassModes.GREEDY.name]), knext.Effect.SHOW) - - # interchange_param for Greedy - interchange_param = knext.BoolParameter( - label = "Interchange", - description = """If checked, the node will use the interchange algorithm to improve the coloring result.""", - default_value = False - ).rule(knext.OneOf(classifier_param, [ClassModes.GREEDY.name]), knext.Effect.SHOW) - ''' - - - # *** SETTINGS FOR BOXPLOT CLASSIFIER *** + default_value=5, + min_value=2, + ).rule( + knext.OneOf( + classifier_param, + [ + ClassModes.BOXPLOT.name, + ClassModes.PERCENTILES.name, + ClassModes.STDMEAN.name, + ClassModes.HEADT_BREAKS.name, + ], + ), + knext.Effect.HIDE, + ) # hinge_param for Boxplot hinge_param = knext.DoubleParameter( - label = "Hinge prompt for Boxplot", - description = """The hinge value is used to determine the lower and upper quartiles of the data.""", - default_value = 1.5 + label="Hinge prompt for Boxplot", + description="""The hinge value is used to determine the lower and upper quartiles of the data.""", + default_value=1.5, ).rule(knext.OneOf(classifier_param, [ClassModes.BOXPLOT.name]), knext.Effect.SHOW) # SETTINGS FOR FISHER JENKS SAMPLED CLASSIFIER *** # pct_param for JenksCaspallSampled pct_param = knext.DoubleParameter( - label = "Natural Breaks Percentage for FisherJenks Sampled", - description = """The percentage of the data to be randomly sampled to determine the natural breaks.""", - default_value = 0.10 - ).rule(knext.OneOf(classifier_param, [ClassModes.JENKS_CASFORCED.name]), knext.Effect.SHOW) + label="Natural Breaks Percentage for FisherJenks Sampled", + description="""The percentage of the data to be randomly sampled to determine the natural breaks.""", + default_value=0.10, + ).rule( + knext.OneOf( + classifier_param, + [ + ClassModes.FISHERJ_SAMPLED.name, + ClassModes.JENKS_CASSAMPLED.name, + ], + ), + knext.Effect.SHOW, + ) # determines if the output is to truncate the classification result to the number of classes specified by the k parameter jc_sampledtruncate = knext.BoolParameter( "Truncate for FisherJenksSampled", """If checked, the node will truncate the classification result to the number of classes specified by the k parameter.""", - default_value = False - ).rule(knext.OneOf(classifier_param, [ClassModes.JENKS_CASSAMPLED.name]), knext.Effect.SHOW) + default_value=False, + ).rule( + knext.OneOf(classifier_param, [ClassModes.JENKS_CASSAMPLED.name]), + knext.Effect.SHOW, + ) # *** Formatting Settings *** @@ -2054,149 +1907,112 @@ def get_default(cls): "Append Classification Results", """If checked, the node will append the classification result to the input table. If unchecked, the node will replace the input table with the classification result.""", - default_value = False - ).rule(knext.OneOf(classifier_param, [ClassModes.GREEDY.name]), knext.Effect.HIDE) + default_value=False, + ) def configure(self, configure_context, input_schema): return None def execute(self, exec_context: knext.ExecutionContext, input_table): import mapclassify as mc - import geopandas as gpd + import numpy as np + import pandas as pd + + df = input_table.to_pandas() - gdf = input_table.to_pandas() - - # equal interval - if self.classifier_param == self.ClassModes.EQUALINTERVAL.name: - for col in self.class_col: - grid = mc.EqualInterval(gdf[col], self.k_param) - if (self.append_replace): - gdf[f'{col}applied'] = grid.yb.tolist() - else: - gdf[col] = grid.yb.tolist() - - # fisher jenks - elif self.classifier_param == self.ClassModes.FISHERJ.name: - for col in self.class_col: - grid = mc.FisherJenkins(gdf[col], self.k_param) - if (self.append_replace): - gdf[f'{col}applied'] = grid.yb.tolist() - else: - gdf[col] = grid.yb.tolist() - - # jenks caspall - elif self.classifier_param == self.ClassModes.JENKS_CAS.name: - for col in self.class_col: - grid = mc.JenksCaspall(gdf[col], self.k_param) - if (self.append_replace): - gdf[f'{col}applied'] = grid.yb.tolist() - else: - gdf[col] = grid.yb.tolist() - - # jenks caspall forced - elif self.classifier_param == self.ClassModes.JENKS_CASFORCED.name: - for col in self.class_col: - grid = mc.JenksCaspallForced(gdf[col], self.k_param) - if (self.append_replace): - gdf[f'{col}applied'] = grid.yb.tolist() - else: - gdf[col] = grid.yb.tolist() - - # pretty breaks - elif self.classifier_param == self.ClassModes.PRETTYBREAKS.name: - for col in self.class_col: - grid = mc.PrettyBreaks(gdf[col], self.k_param) - if (self.append_replace): - gdf[f'{col}applied'] = grid.yb.tolist() - else: - gdf[col] = grid.yb.tolist() - - # quantiles - elif self.classifier_param == self.ClassModes.QUANTILES.name: - for col in self.class_col: - grid = mc.Quanitles(gdf[col], self.k_param) - if (self.append_replace): - gdf[f'{col}applied'] = grid.yb.tolist() - else: - gdf[col] = grid.yb.tolist() - - # ***BEGINNING OF COMMPLEX CLASSIFIERS*** - - # boxplot - elif self.classifier_param == self.ClassModes.BOXPLOT.name: - y = gdf[self.class_col] - for col in self.class_col: - grid = mc.BoxPlot(y, self.hinge_param) - if (self.append_replace): - gdf[f'{col}_applied'] = grid.yb.tolist() - else: - gdf[col] = grid.yb.tolist() - - # fisher jenks sampled - elif self.classifier_param == self.ClassModes.FISHERJ_SAMPLED.name: - y = gdf[self.class_col] - for col in self.class_col: - grid = mc.FisherJenksSampled(y, self.k_param, self.pct_param, self.jc_sampledtruncate) - if (self.append_replace): - gdf[f'{col}_applied'] = grid.yb.tolist() - else: - gdf[col] = grid.yb.tolist() - - # greedy - elif self.classifier_param == self.ClassModes.GREEDY.name: - grid = mc.greedy(gdf, self.strategy_param, self.balance_param, self.min_colors_param, - self.sw_param, self.min_distance_param, self.silence_warn_param, - self.interchange_param) - if (self.append_replace): - grid[f'{col}_greedy_colors'] = grid.yb.tolist() + if not self.class_col: + LOGGER.warning("No target columns selected for Mapclassifier node.") + return knut.to_table(df, exec_context) + + selected_mode = self.classifier_param + + exec_context.set_progress(0.1, "Preparing classification.") + + def std_mean_multiples(k: int) -> list: + if k < 2: + return [-1, 0, 1] + half = k // 2 + multiples = list(range(-half, half + 1)) + if k % 2 == 0 and 0 in multiples: + multiples.remove(0) + return multiples + + def classify_series(series: pd.Series) -> pd.Series: + valid = series.dropna() + if valid.empty: + return pd.Series(pd.NA, index=series.index, dtype="Int64") + + values = valid.to_numpy() + mode = selected_mode + + if mode == self.ClassModes.EQUALINTERVAL.name: + classifier = mc.EqualInterval(values, k=self.k_param) + elif mode == self.ClassModes.FISHERJ.name: + classifier = mc.FisherJenks(values, k=self.k_param) + elif mode == self.ClassModes.FISHERJ_SAMPLED.name: + classifier = mc.FisherJenksSampled( + values, k=self.k_param, pct=self.pct_param + ) + elif mode == self.ClassModes.JENKS_CAS.name: + classifier = mc.JenksCaspall(values, k=self.k_param) + elif mode == self.ClassModes.JENKS_CASFORCED.name: + classifier = mc.JenksCaspallForced(values, k=self.k_param) + elif mode == self.ClassModes.JENKS_CASSAMPLED.name: + classifier = mc.JenksCaspallSampled( + values, + k=self.k_param, + pct=self.pct_param, + truncate=self.jc_sampledtruncate, + ) + elif mode == self.ClassModes.PRETTYBREAKS.name: + classifier = mc.PrettyBreaks(values, k=self.k_param) + elif mode == self.ClassModes.QUANTILES.name: + classifier = mc.Quantiles(values, k=self.k_param) + elif mode == self.ClassModes.BOXPLOT.name: + classifier = mc.BoxPlot(values, hinge=self.hinge_param) + elif mode == self.ClassModes.HEADT_BREAKS.name: + classifier = mc.HeadTailBreaks(values) + elif mode == self.ClassModes.MAXIMUMBREAKS.name: + classifier = mc.MaximumBreaks(values, k=self.k_param) + elif mode == self.ClassModes.NATURALBREAKS.name: + classifier = mc.NaturalBreaks(values, k=self.k_param) + elif mode == self.ClassModes.PERCENTILES.name: + if self.k_param < 2: + raise ValueError( + "Percentiles classifier requires at least 2 classes." + ) + percentiles = list(np.linspace(0, 100, self.k_param + 1)[1:-1]) + classifier = mc.Percentiles(values, pct=percentiles) + elif mode == self.ClassModes.STDMEAN.name: + multiples = std_mean_multiples(self.k_param) + classifier = mc.StdMean(values, multiples=multiples) else: - grid["greedy_colors"] = grid.yb.tolist() - - - # head tail breaks - elif self.classifier_param == self.ClassModes.HEADT_BREAKS.name: - y = gdf[self.class_col] - grid = mc.HeadTailBreaks(y, self.k_param) - - # jenks caspall sampled - elif self.classifier_param == self.ClassModes.JENKS_CASSAMPLED.name: - y = gdf[self.class_col] - for col in self.class_col: - grid = mc.JenksCaspallSampled(y, self.k_param, self.pct_param, self.jc_sampledtruncate) - if (self.append_replace): - gdf[f'{col}applied'] = grid.yb.tolist() - else: - gdf[col] = grid.yb.tolist() - - # maxp - elif self.classifier_param == self.ClassModes.MAXP.name: - y = gdf[self.class_col] - grid = mc.MaxP(self.cal_param) - for col in self.class_col: - if (self.append_replace): - gdf[f'{col}applied'] = grid.yb.tolist() - else: - gdf[col] = grid.yb.tolist() - - # maximum breaks - elif self.classifier_param == self.ClassModes.MAXIMUMBREAKS.name: - y = gdf[self.class_col] - grid = mc.NaturalBreaks(y, k) - - # percentiles - elif self.classifier_param == self.ClassModes.PERCENTILES.name: - y = gdf[self.class_col] - grid = mc.Percentiles(y, k) - - # std mean - elif self.classifier_param == self.ClassModes.STDMEAN.name: - y = gdf[self.class_col] - grid = mc.StdMean(y, k) - - # user defined - else: - y = gdf[self.class_col] - grid = mc.UserDefined(y, 3) - gdf['class'] = grid.yb.tolist() + raise ValueError(f"Unsupported classifier mode: {mode}") + + classified = pd.Series(classifier.yb, index=valid.index, dtype="int64") + result = classified.reindex(series.index).astype("Int64") + return result + + result_df = df.copy() + existing_columns = set(result_df.columns) + + for idx, col in enumerate(self.class_col, start=1): + knut.check_canceled(exec_context) + exec_context.set_progress( + 0.1 + 0.8 * idx / len(self.class_col), + f"Classifying column '{col}' ({idx}/{len(self.class_col)})", + ) + classified_series = classify_series(df[col]) + + if self.append_replace: + new_col_name = knut.get_unique_name( + f"{col}_classified", list(existing_columns) + ) + existing_columns.add(new_col_name) + result_df[new_col_name] = classified_series + else: + result_df[col] = classified_series + + exec_context.set_progress(1.0, "Classification complete.") - return knut.to_table(gdf, exec_context) \ No newline at end of file + return knut.to_table(result_df, exec_context)