From be04d88a1d7b98daafe8c16fd56a69c451a2ccce Mon Sep 17 00:00:00 2001
From: Herbie Bradley <mail@herbiebradley.com>
Date: Fri, 15 Jul 2022 23:26:27 +0100
Subject: [PATCH 1/4] update pre-commit config

---
 .pre-commit-config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 538b35e..43b804e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,10 +1,10 @@
 repos:
   - repo: https://github.com/psf/black
-    rev: 20.8b1
+    rev: 22.6.0
     hooks:
       - id: black
         language_version: python3.8
   - repo: https://github.com/pycqa/pylint
-    rev: pylint-2.6.0
+    rev: v2.14.4
     hooks:
       - id: pylint

From 1b827a0192e191b2a47a1d7a3ac9738272418d00 Mon Sep 17 00:00:00 2001
From: Herbie Bradley <mail@herbiebradley.com>
Date: Fri, 15 Jul 2022 23:43:04 +0100
Subject: [PATCH 2/4] fix: improve geograph loading perf.

---
 geograph/geograph.py | 124 ++++++++++++++++++++++---------------------
 pylintrc             |  12 -----
 2 files changed, 64 insertions(+), 72 deletions(-)

diff --git a/geograph/geograph.py b/geograph/geograph.py
index efd3e9f..104d85d 100644
--- a/geograph/geograph.py
+++ b/geograph/geograph.py
@@ -412,48 +412,57 @@ def _load_from_dataframe(
 
         # Reset index to ensure consistent indices
         df = df.reset_index(drop=True)
-        # Using this list and iterating through it is slightly faster than
-        # iterating through df due to the dataframe overhead
-        geom: List[shapely.Polygon] = df["geometry"].tolist()
-        # this dict maps polygon row numbers in df to a list
-        # of neighbouring polygon row numbers
-        graph_dict = {}
-
-        if tolerance > 0:
-            # Expand the borders of the polygons by `tolerance```
-            new_polygons: List[shapely.Polygon] = (
-                df["geometry"].buffer(tolerance).tolist()
-            )
+        # pylint: disable=protected-access
+        if gpd._compat.USE_PYGEOS:
+            if tolerance > 0:
+                neighbour_arr = df.sindex.query_bulk(
+                    df["geometry"].buffer(tolerance), predicate="intersects"
+                ).transpose()
+            else:
+                neighbour_arr = df.sindex.query_bulk(
+                    df["geometry"], predicate="intersects"
+                ).transpose()
+        else:
+            # this dict maps polygon row numbers in df to a list
+            # of neighbouring polygon row numbers
+            graph_dict = {}
+            if tolerance > 0:
+                # Expand the borders of the polygons by `tolerance```
+                new_polygons: gpd.GeoSeries = df["geometry"].buffer(tolerance)
         # Creating nodes (=vertices) and finding neighbors
         for index, polygon in tqdm(
-            enumerate(geom),
+            enumerate(df["geometry"]),
             desc="Step 1 of 2: Creating nodes and finding neighbours",
-            total=len(geom),
+            total=len(df),
         ):
-            if tolerance > 0:
-                # find the indexes of all polygons which intersect with this one
-                neighbours = df.sindex.query(
-                    new_polygons[index], predicate="intersects"
-                )
-            else:
-                neighbours = df.sindex.query(polygon, predicate="intersects")
-
-            graph_dict[index] = neighbours
-            # add each polygon as a node to the graph with useful attributes
-            self.graph.add_node(
-                index,
-                rep_point=polygon.representative_point(),
-                area=polygon.area,
-                perimeter=polygon.length,
-                bounds=polygon.bounds,
-            )
+            # pylint: disable=protected-access
+            if not gpd._compat.USE_PYGEOS:
+                if tolerance > 0:
+                    # find the indexes of all polygons which intersect with this one
+                    neighbours = df.sindex.query(
+                        new_polygons[index], predicate="intersects"
+                    )
+                else:
+                    neighbours = df.sindex.query(polygon, predicate="intersects")
+
+                graph_dict[index] = neighbours
+            # TODO: factor out for use_pygeos
+            self.graph.add_node(index)
         # iterate through the dict and add edges between neighbouring polygons
-        for polygon_id, neighbours in tqdm(
-            graph_dict.items(), desc="Step 2 of 2: Adding edges"
-        ):
-            for neighbour_id in neighbours:
-                if polygon_id != neighbour_id:
-                    self.graph.add_edge(polygon_id, neighbour_id)
+        # pylint: disable=protected-access
+        if gpd._compat.USE_PYGEOS:
+            for index, neighbour in tqdm(
+                neighbour_arr, desc="Step 2 of 2: Adding edges"
+            ):
+                if index != neighbour:
+                    self.graph.add_edge(index, neighbour)
+        else:
+            for polygon_id, neighbours in tqdm(
+                graph_dict.items(), desc="Step 2 of 2: Adding edges"
+            ):
+                for neighbour_id in neighbours:
+                    if polygon_id != neighbour_id:
+                        self.graph.add_edge(polygon_id, neighbour_id)
 
         # add index name
         df.index.name = "node_index"
@@ -677,12 +686,12 @@ def add_habitat(
                         # barrier polygon. If this results in multiple polygons,
                         # then the barrier polygon fully cuts the buffered polygon.
                         #
-                        # We find the resulting buffered polygon fragment that contains 
-                        # the original node polygon, by selecting the polygon that 
-                        # contains a representative point sampled from the original 
-                        # polygon. 
-                        # We then check if that polygon fragment intersects the 
-                        # neighbour polygon we're trying to reach. 
+                        # We find the resulting buffered polygon fragment that contains
+                        # the original node polygon, by selecting the polygon that
+                        # contains a representative point sampled from the original
+                        # polygon.
+                        # We then check if that polygon fragment intersects the
+                        # neighbour polygon we're trying to reach.
                         # If it does not, there is no path.
                         # If it does, there may be a path or there may not - it
                         # would require complex pathfinding code to discover, but
@@ -792,7 +801,9 @@ def get_graph_components(
         """
         components: List[set] = list(nx.connected_components(self.graph))
         if calc_polygons:
-            geom = [self.df["geometry"].loc[comp].unary_union for comp in components]
+            geom = [
+                self.df["geometry"].loc[list(comp)].unary_union for comp in components
+            ]
             gdf = gpd.GeoDataFrame(
                 {"geometry": geom, "class_label": -1}, crs=self.df.crs
             )
@@ -840,7 +851,7 @@ def get_metric(
                 result = metrics._get_metric(
                     name=name, geo_graph=self, class_value=class_value, **metric_kwargs
                 )
-                if name in self.class_metrics.keys():
+                if name in self.class_metrics:
                     self.class_metrics[name][class_value] = result
                 else:
                     self.class_metrics[name] = {class_value: result}
@@ -958,14 +969,7 @@ def _add_node(
         node_data = dict(data.items())
 
         # Add node to graph
-        self.graph.add_node(
-            node_id,
-            rep_point=node_data["geometry"].representative_point(),
-            area=node_data["geometry"].area,
-            perimeter=node_data["geometry"].length,
-            class_label=node_data["class_label"],
-            bounds=node_data["geometry"].bounds,
-        )
+        self.graph.add_node(node_id)
 
         # Add node data to dataframe
         missing_cols = {
@@ -1111,25 +1115,25 @@ class label in `valid_classes`, as long as they are less than
             f"and {self.graph.number_of_edges()} edges.",
         )
 
-    def _load_from_graph_path(self, load_path: pathlib.Path) -> None:
+    def _load_from_graph_path(self, graph_path: pathlib.Path) -> None:
         """
         Load networkx graph and dataframe objects from a pickle file.
 
         Args:
-            load_path (pathlib.Path): Path to a pickle file. Can be compressed
+            graph_path (pathlib.Path): Path to a pickle file. Can be compressed
             with gzip or bz2.
 
         Returns:
             gpd.GeoDataFrame: The dataframe containing polygon objects.
         """
-        if load_path.suffix == ".bz2":
-            with bz2.BZ2File(load_path, "rb") as bz2_file:
+        if graph_path.suffix == ".bz2":
+            with bz2.BZ2File(graph_path, "rb") as bz2_file:
                 data = pickle.load(bz2_file)
-        elif load_path.suffix == ".gz":
-            with gzip.GzipFile(load_path, "rb") as gz_file:
+        elif graph_path.suffix == ".gz":
+            with gzip.GzipFile(graph_path, "rb") as gz_file:
                 data = pickle.loads(gz_file.read())
         else:
-            with open(load_path, "rb") as file:
+            with open(graph_path, "rb") as file:
                 data = pickle.load(file)
         self.df = data["dataframe"]
         self.name = data["name"]
diff --git a/pylintrc b/pylintrc
index cbac165..c015dac 100644
--- a/pylintrc
+++ b/pylintrc
@@ -160,12 +160,6 @@ disable=abstract-method,
 # mypackage.mymodule.MyReporterClass.
 output-format=text
 
-# Put messages in a separate file for each module / package specified on the
-# command line instead of printing them on stdout. Reports (if any) will be
-# written in a file name "pylint_global.[txt|html]". This option is deprecated
-# and it will be removed in Pylint 2.0.
-files-output=no
-
 # Tells whether to display a full report or only the messages
 reports=no
 
@@ -284,12 +278,6 @@ ignore-long-lines=(?x)(
 # else.
 single-line-if-stmt=yes
 
-# List of optional constructs for which whitespace checking is disabled. `dict-
-# separator` is used to allow tabulation in dicts, etc.: {1  : 1,\n222: 2}.
-# `trailing-comma` allows a space between comma and closing bracket: (a, ).
-# `empty-line` allows space-only lines.
-no-space-check=
-
 # Maximum number of lines in a module
 max-module-lines=99999
 

From 92e8bc03733be455805053a4d50f5e75da0b96ec Mon Sep 17 00:00:00 2001
From: Herbie Bradley <mail@herbiebradley.com>
Date: Fri, 15 Jul 2022 23:59:16 +0100
Subject: [PATCH 3/4] fix: remove unnecessary node attributes from comps

---
 geograph/geograph.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/geograph/geograph.py b/geograph/geograph.py
index 104d85d..d696c83 100644
--- a/geograph/geograph.py
+++ b/geograph/geograph.py
@@ -1257,16 +1257,6 @@ def _load_from_dataframe(
             self.graph = nx.complete_graph(len(df))
         else:
             self.graph = nx.empty_graph(len(df))
-        # Add node attributes
-        for node in tqdm(
-            self.graph.nodes, desc="Constructing graph", total=len(self.graph)
-        ):
-            polygon = geom[node]
-            self.graph.nodes[node]["rep_point"] = polygon.representative_point()
-            self.graph.nodes[node]["area"] = polygon.area
-            self.graph.nodes[node]["perimeter"] = polygon.length
-            self.graph.nodes[node]["bounds"] = polygon.bounds
-
         # Add edge attributes if necessary
         if self.has_distance_edges:
             for u, v, attrs in tqdm(

From 3226af14aefa8b8878efcf250bbc26115a32e091 Mon Sep 17 00:00:00 2001
From: Herbie Bradley <mail@herbiebradley.com>
Date: Mon, 3 Oct 2022 14:06:58 +0100
Subject: [PATCH 4/4] Metrics update

---
 geograph/metrics.py | 55 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 50 insertions(+), 5 deletions(-)

diff --git a/geograph/metrics.py b/geograph/metrics.py
index eadc68c..81da538 100644
--- a/geograph/metrics.py
+++ b/geograph/metrics.py
@@ -2,7 +2,8 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any, Optional, Union
+from itertools import combinations
+from typing import TYPE_CHECKING, Any, Dict, Optional, Union
 
 import networkx as nx
 import numpy as np
@@ -10,7 +11,9 @@
 if TYPE_CHECKING:
     import geograph
 
-
+# TODO: refactor this file to return a tuple with the values to put inside the
+# metric such that the metric is only created once by the calling GeoGraph
+# since dataclass creation is slow
 # define a metric dataclass with < <= => > == comparisons that work as you would
 # expect intuitively
 @dataclass()
@@ -52,6 +55,8 @@ def __ge__(self, o: object) -> bool:
 ########################################################################################
 # 1. Landscape level metrics
 ########################################################################################
+
+
 def _num_patches(geo_graph: geograph.GeoGraph) -> Metric:
     return Metric(
         value=len(geo_graph.df),
@@ -164,7 +169,7 @@ def _simpson_diversity_index(geo_graph: geograph.GeoGraph) -> Metric:
     )
 
     return Metric(
-        value=1 - np.sum(class_prop_of_landscape ** 2),
+        value=1 - np.sum(class_prop_of_landscape**2),
         name="simpson_diversity_index",
         description=description,
         variant="conventional",
@@ -391,7 +396,7 @@ def _class_effective_mesh_size(
     )
 
     return Metric(
-        value=np.sum(class_areas ** 2) / total_area,
+        value=np.sum(class_areas**2) / total_area,
         name=f"effective_mesh_size_class={class_value}",
         description=description,
         variant="conventional",
@@ -413,7 +418,7 @@ def _class_effective_mesh_size(
 }
 
 ########################################################################################
-# 3. Habitat componment level metrics
+# 3. Habitat component level metrics
 ########################################################################################
 
 
@@ -482,10 +487,49 @@ def _avg_component_isolation(geo_graph: geograph.GeoGraph) -> Metric:
     )
 
 
+def _habitat_iic(
+    geo_graph: geograph.GeoGraph,
+    get_total_area: bool = False,
+    shortest_path_cutoff: Optional[int] = None,
+) -> Metric:
+    if get_total_area:
+        # Most efficient way to get area of convex hull of the GeoGraph
+        total_area = geo_graph.components.df.dissolve().convex_hull.values[0].area
+    iic = 0.0
+    idx_dict = dict(zip(geo_graph.df.index.values, range(len(geo_graph.df))))
+    path_lengths: Dict = dict(
+        nx.all_pairs_shortest_path_length(geo_graph.graph, cutoff=shortest_path_cutoff)
+    )
+    for x in combinations(geo_graph.df.index.values, 2):
+        if x[1] not in path_lengths[x[0]]:
+            continue
+        iic += (
+            geo_graph.graph.nodes[x[0]]["area"] * geo_graph.graph.nodes[x[1]]["area"]
+        ) / (1 + path_lengths[x[0]][x[1]])
+    if get_total_area:
+        # for node in geo_graph.graph.nodes:
+        #    iic += geo_graph.graph.nodes[node]["area"] ** 2
+        return Metric(
+            value=iic / total_area,
+            name="habitat_iic",
+            description="The habitat IIC metric",
+            variant="component",
+            unit="dimensionless",
+        )
+    return Metric(
+        value=iic,
+        name="habitat_iic",
+        description="The habitat IIC metric",
+        variant="component",
+        unit="dimensionless",
+    )
+
+
 COMPONENT_METRICS_DICT = {
     "num_components": _num_components,
     "avg_component_area": _avg_component_area,
     "avg_component_isolation": _avg_component_isolation,
+    "habitat_iic": _habitat_iic,
 }
 
 
@@ -493,6 +537,7 @@ def _avg_component_isolation(geo_graph: geograph.GeoGraph) -> Metric:
 # 4. Define access interface for GeoGraph
 ########################################################################################
 
+
 STANDARD_METRICS = ["num_components", "avg_patch_area", "total_area"]