-
- Read the Docs
- v: ${config.versions.current.slug}
-
-
-
-
- ${renderLanguages(config)}
- ${renderVersions(config)}
- ${renderDownloads(config)}
-
- On Read the Docs
-
- Project Home
-
-
- Builds
-
-
- Downloads
-
-
-
- Search
-
-
-
-
-
-
- Hosted by Read the Docs
-
-
-
- `;
-
- // Inject the generated flyout into the body HTML element.
- document.body.insertAdjacentHTML("beforeend", flyout);
-
- // Trigger the Read the Docs Addons Search modal when clicking on the "Search docs" input from inside the flyout.
- document
- .querySelector("#flyout-search-form")
- .addEventListener("focusin", () => {
- const event = new CustomEvent("readthedocs-search-show");
- document.dispatchEvent(event);
- });
- })
-}
-
-if (themeLanguageSelector || themeVersionSelector) {
- function onSelectorSwitch(event) {
- const option = event.target.selectedIndex;
- const item = event.target.options[option];
- window.location.href = item.dataset.url;
- }
-
- document.addEventListener("readthedocs-addons-data-ready", function (event) {
- const config = event.detail.data();
-
- const versionSwitch = document.querySelector(
- "div.switch-menus > div.version-switch",
- );
- if (themeVersionSelector) {
- let versions = config.versions.active;
- if (config.versions.current.hidden || config.versions.current.type === "external") {
- versions.unshift(config.versions.current);
- }
- const versionSelect = `
-
- ${versions
- .map(
- (version) => `
-
- ${version.slug}
- `,
- )
- .join("\n")}
-
- `;
-
- versionSwitch.innerHTML = versionSelect;
- versionSwitch.firstElementChild.addEventListener("change", onSelectorSwitch);
- }
-
- const languageSwitch = document.querySelector(
- "div.switch-menus > div.language-switch",
- );
-
- if (themeLanguageSelector) {
- if (config.projects.translations.length) {
- // Add the current language to the options on the selector
- let languages = config.projects.translations.concat(
- config.projects.current,
- );
- languages = languages.sort((a, b) =>
- a.language.name.localeCompare(b.language.name),
- );
-
- const languageSelect = `
-
- ${languages
- .map(
- (language) => `
-
- ${language.language.name}
- `,
- )
- .join("\n")}
-
- `;
-
- languageSwitch.innerHTML = languageSelect;
- languageSwitch.firstElementChild.addEventListener("change", onSelectorSwitch);
- }
- else {
- languageSwitch.remove();
- }
- }
- });
-}
-
-document.addEventListener("readthedocs-addons-data-ready", function (event) {
- // Trigger the Read the Docs Addons Search modal when clicking on "Search docs" input from the topnav.
- document
- .querySelector("[role='search'] input")
- .addEventListener("focusin", () => {
- const event = new CustomEvent("readthedocs-search-show");
- document.dispatchEvent(event);
- });
-});
\ No newline at end of file
diff --git a/docs/_build/html/_static/language_data.js b/docs/_build/html/_static/language_data.js
deleted file mode 100644
index c7fe6c6f..00000000
--- a/docs/_build/html/_static/language_data.js
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * This script contains the language-specific data used by searchtools.js,
- * namely the list of stopwords, stemmer, scorer and splitter.
- */
-
-var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"];
-
-
-/* Non-minified version is copied as a separate JS file, if available */
-
-/**
- * Porter Stemmer
- */
-var Stemmer = function() {
-
- var step2list = {
- ational: 'ate',
- tional: 'tion',
- enci: 'ence',
- anci: 'ance',
- izer: 'ize',
- bli: 'ble',
- alli: 'al',
- entli: 'ent',
- eli: 'e',
- ousli: 'ous',
- ization: 'ize',
- ation: 'ate',
- ator: 'ate',
- alism: 'al',
- iveness: 'ive',
- fulness: 'ful',
- ousness: 'ous',
- aliti: 'al',
- iviti: 'ive',
- biliti: 'ble',
- logi: 'log'
- };
-
- var step3list = {
- icate: 'ic',
- ative: '',
- alize: 'al',
- iciti: 'ic',
- ical: 'ic',
- ful: '',
- ness: ''
- };
-
- var c = "[^aeiou]"; // consonant
- var v = "[aeiouy]"; // vowel
- var C = c + "[^aeiouy]*"; // consonant sequence
- var V = v + "[aeiou]*"; // vowel sequence
-
- var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0
- var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1
- var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1
- var s_v = "^(" + C + ")?" + v; // vowel in stem
-
- this.stemWord = function (w) {
- var stem;
- var suffix;
- var firstch;
- var origword = w;
-
- if (w.length < 3)
- return w;
-
- var re;
- var re2;
- var re3;
- var re4;
-
- firstch = w.substr(0,1);
- if (firstch == "y")
- w = firstch.toUpperCase() + w.substr(1);
-
- // Step 1a
- re = /^(.+?)(ss|i)es$/;
- re2 = /^(.+?)([^s])s$/;
-
- if (re.test(w))
- w = w.replace(re,"$1$2");
- else if (re2.test(w))
- w = w.replace(re2,"$1$2");
-
- // Step 1b
- re = /^(.+?)eed$/;
- re2 = /^(.+?)(ed|ing)$/;
- if (re.test(w)) {
- var fp = re.exec(w);
- re = new RegExp(mgr0);
- if (re.test(fp[1])) {
- re = /.$/;
- w = w.replace(re,"");
- }
- }
- else if (re2.test(w)) {
- var fp = re2.exec(w);
- stem = fp[1];
- re2 = new RegExp(s_v);
- if (re2.test(stem)) {
- w = stem;
- re2 = /(at|bl|iz)$/;
- re3 = new RegExp("([^aeiouylsz])\\1$");
- re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
- if (re2.test(w))
- w = w + "e";
- else if (re3.test(w)) {
- re = /.$/;
- w = w.replace(re,"");
- }
- else if (re4.test(w))
- w = w + "e";
- }
- }
-
- // Step 1c
- re = /^(.+?)y$/;
- if (re.test(w)) {
- var fp = re.exec(w);
- stem = fp[1];
- re = new RegExp(s_v);
- if (re.test(stem))
- w = stem + "i";
- }
-
- // Step 2
- re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
- if (re.test(w)) {
- var fp = re.exec(w);
- stem = fp[1];
- suffix = fp[2];
- re = new RegExp(mgr0);
- if (re.test(stem))
- w = stem + step2list[suffix];
- }
-
- // Step 3
- re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
- if (re.test(w)) {
- var fp = re.exec(w);
- stem = fp[1];
- suffix = fp[2];
- re = new RegExp(mgr0);
- if (re.test(stem))
- w = stem + step3list[suffix];
- }
-
- // Step 4
- re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
- re2 = /^(.+?)(s|t)(ion)$/;
- if (re.test(w)) {
- var fp = re.exec(w);
- stem = fp[1];
- re = new RegExp(mgr1);
- if (re.test(stem))
- w = stem;
- }
- else if (re2.test(w)) {
- var fp = re2.exec(w);
- stem = fp[1] + fp[2];
- re2 = new RegExp(mgr1);
- if (re2.test(stem))
- w = stem;
- }
-
- // Step 5
- re = /^(.+?)e$/;
- if (re.test(w)) {
- var fp = re.exec(w);
- stem = fp[1];
- re = new RegExp(mgr1);
- re2 = new RegExp(meq1);
- re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
- if (re.test(stem) || (re2.test(stem) && !(re3.test(stem))))
- w = stem;
- }
- re = /ll$/;
- re2 = new RegExp(mgr1);
- if (re.test(w) && re2.test(w)) {
- re = /.$/;
- w = w.replace(re,"");
- }
-
- // and turn initial Y back to y
- if (firstch == "y")
- w = firstch.toLowerCase() + w.substr(1);
- return w;
- }
-}
-
diff --git a/docs/_build/html/_static/minus.png b/docs/_build/html/_static/minus.png
deleted file mode 100644
index d96755fd..00000000
Binary files a/docs/_build/html/_static/minus.png and /dev/null differ
diff --git a/docs/_build/html/_static/plus.png b/docs/_build/html/_static/plus.png
deleted file mode 100644
index 7107cec9..00000000
Binary files a/docs/_build/html/_static/plus.png and /dev/null differ
diff --git a/docs/_build/html/_static/pygments.css b/docs/_build/html/_static/pygments.css
deleted file mode 100644
index 84ab3030..00000000
--- a/docs/_build/html/_static/pygments.css
+++ /dev/null
@@ -1,75 +0,0 @@
-pre { line-height: 125%; }
-td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
-span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
-td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
-span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
-.highlight .hll { background-color: #ffffcc }
-.highlight { background: #f8f8f8; }
-.highlight .c { color: #3D7B7B; font-style: italic } /* Comment */
-.highlight .err { border: 1px solid #FF0000 } /* Error */
-.highlight .k { color: #008000; font-weight: bold } /* Keyword */
-.highlight .o { color: #666666 } /* Operator */
-.highlight .ch { color: #3D7B7B; font-style: italic } /* Comment.Hashbang */
-.highlight .cm { color: #3D7B7B; font-style: italic } /* Comment.Multiline */
-.highlight .cp { color: #9C6500 } /* Comment.Preproc */
-.highlight .cpf { color: #3D7B7B; font-style: italic } /* Comment.PreprocFile */
-.highlight .c1 { color: #3D7B7B; font-style: italic } /* Comment.Single */
-.highlight .cs { color: #3D7B7B; font-style: italic } /* Comment.Special */
-.highlight .gd { color: #A00000 } /* Generic.Deleted */
-.highlight .ge { font-style: italic } /* Generic.Emph */
-.highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */
-.highlight .gr { color: #E40000 } /* Generic.Error */
-.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */
-.highlight .gi { color: #008400 } /* Generic.Inserted */
-.highlight .go { color: #717171 } /* Generic.Output */
-.highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */
-.highlight .gs { font-weight: bold } /* Generic.Strong */
-.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
-.highlight .gt { color: #0044DD } /* Generic.Traceback */
-.highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */
-.highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */
-.highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */
-.highlight .kp { color: #008000 } /* Keyword.Pseudo */
-.highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */
-.highlight .kt { color: #B00040 } /* Keyword.Type */
-.highlight .m { color: #666666 } /* Literal.Number */
-.highlight .s { color: #BA2121 } /* Literal.String */
-.highlight .na { color: #687822 } /* Name.Attribute */
-.highlight .nb { color: #008000 } /* Name.Builtin */
-.highlight .nc { color: #0000FF; font-weight: bold } /* Name.Class */
-.highlight .no { color: #880000 } /* Name.Constant */
-.highlight .nd { color: #AA22FF } /* Name.Decorator */
-.highlight .ni { color: #717171; font-weight: bold } /* Name.Entity */
-.highlight .ne { color: #CB3F38; font-weight: bold } /* Name.Exception */
-.highlight .nf { color: #0000FF } /* Name.Function */
-.highlight .nl { color: #767600 } /* Name.Label */
-.highlight .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */
-.highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */
-.highlight .nv { color: #19177C } /* Name.Variable */
-.highlight .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */
-.highlight .w { color: #bbbbbb } /* Text.Whitespace */
-.highlight .mb { color: #666666 } /* Literal.Number.Bin */
-.highlight .mf { color: #666666 } /* Literal.Number.Float */
-.highlight .mh { color: #666666 } /* Literal.Number.Hex */
-.highlight .mi { color: #666666 } /* Literal.Number.Integer */
-.highlight .mo { color: #666666 } /* Literal.Number.Oct */
-.highlight .sa { color: #BA2121 } /* Literal.String.Affix */
-.highlight .sb { color: #BA2121 } /* Literal.String.Backtick */
-.highlight .sc { color: #BA2121 } /* Literal.String.Char */
-.highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */
-.highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */
-.highlight .s2 { color: #BA2121 } /* Literal.String.Double */
-.highlight .se { color: #AA5D1F; font-weight: bold } /* Literal.String.Escape */
-.highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */
-.highlight .si { color: #A45A77; font-weight: bold } /* Literal.String.Interpol */
-.highlight .sx { color: #008000 } /* Literal.String.Other */
-.highlight .sr { color: #A45A77 } /* Literal.String.Regex */
-.highlight .s1 { color: #BA2121 } /* Literal.String.Single */
-.highlight .ss { color: #19177C } /* Literal.String.Symbol */
-.highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */
-.highlight .fm { color: #0000FF } /* Name.Function.Magic */
-.highlight .vc { color: #19177C } /* Name.Variable.Class */
-.highlight .vg { color: #19177C } /* Name.Variable.Global */
-.highlight .vi { color: #19177C } /* Name.Variable.Instance */
-.highlight .vm { color: #19177C } /* Name.Variable.Magic */
-.highlight .il { color: #666666 } /* Literal.Number.Integer.Long */
\ No newline at end of file
diff --git a/docs/_build/html/_static/searchtools.js b/docs/_build/html/_static/searchtools.js
deleted file mode 100644
index 2c774d17..00000000
--- a/docs/_build/html/_static/searchtools.js
+++ /dev/null
@@ -1,632 +0,0 @@
-/*
- * Sphinx JavaScript utilities for the full-text search.
- */
-"use strict";
-
-/**
- * Simple result scoring code.
- */
-if (typeof Scorer === "undefined") {
- var Scorer = {
- // Implement the following function to further tweak the score for each result
- // The function takes a result array [docname, title, anchor, descr, score, filename]
- // and returns the new score.
- /*
- score: result => {
- const [docname, title, anchor, descr, score, filename, kind] = result
- return score
- },
- */
-
- // query matches the full name of an object
- objNameMatch: 11,
- // or matches in the last dotted part of the object name
- objPartialMatch: 6,
- // Additive scores depending on the priority of the object
- objPrio: {
- 0: 15, // used to be importantResults
- 1: 5, // used to be objectResults
- 2: -5, // used to be unimportantResults
- },
- // Used when the priority is not in the mapping.
- objPrioDefault: 0,
-
- // query found in title
- title: 15,
- partialTitle: 7,
- // query found in terms
- term: 5,
- partialTerm: 2,
- };
-}
-
-// Global search result kind enum, used by themes to style search results.
-class SearchResultKind {
- static get index() { return "index"; }
- static get object() { return "object"; }
- static get text() { return "text"; }
- static get title() { return "title"; }
-}
-
-const _removeChildren = (element) => {
- while (element && element.lastChild) element.removeChild(element.lastChild);
-};
-
-/**
- * See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#escaping
- */
-const _escapeRegExp = (string) =>
- string.replace(/[.*+\-?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string
-
-const _displayItem = (item, searchTerms, highlightTerms) => {
- const docBuilder = DOCUMENTATION_OPTIONS.BUILDER;
- const docFileSuffix = DOCUMENTATION_OPTIONS.FILE_SUFFIX;
- const docLinkSuffix = DOCUMENTATION_OPTIONS.LINK_SUFFIX;
- const showSearchSummary = DOCUMENTATION_OPTIONS.SHOW_SEARCH_SUMMARY;
- const contentRoot = document.documentElement.dataset.content_root;
-
- const [docName, title, anchor, descr, score, _filename, kind] = item;
-
- let listItem = document.createElement("li");
- // Add a class representing the item's type:
- // can be used by a theme's CSS selector for styling
- // See SearchResultKind for the class names.
- listItem.classList.add(`kind-${kind}`);
- let requestUrl;
- let linkUrl;
- if (docBuilder === "dirhtml") {
- // dirhtml builder
- let dirname = docName + "/";
- if (dirname.match(/\/index\/$/))
- dirname = dirname.substring(0, dirname.length - 6);
- else if (dirname === "index/") dirname = "";
- requestUrl = contentRoot + dirname;
- linkUrl = requestUrl;
- } else {
- // normal html builders
- requestUrl = contentRoot + docName + docFileSuffix;
- linkUrl = docName + docLinkSuffix;
- }
- let linkEl = listItem.appendChild(document.createElement("a"));
- linkEl.href = linkUrl + anchor;
- linkEl.dataset.score = score;
- linkEl.innerHTML = title;
- if (descr) {
- listItem.appendChild(document.createElement("span")).innerHTML =
- " (" + descr + ")";
- // highlight search terms in the description
- if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js
- highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted"));
- }
- else if (showSearchSummary)
- fetch(requestUrl)
- .then((responseData) => responseData.text())
- .then((data) => {
- if (data)
- listItem.appendChild(
- Search.makeSearchSummary(data, searchTerms, anchor)
- );
- // highlight search terms in the summary
- if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js
- highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted"));
- });
- Search.output.appendChild(listItem);
-};
-const _finishSearch = (resultCount) => {
- Search.stopPulse();
- Search.title.innerText = _("Search Results");
- if (!resultCount)
- Search.status.innerText = Documentation.gettext(
- "Your search did not match any documents. Please make sure that all words are spelled correctly and that you've selected enough categories."
- );
- else
- Search.status.innerText = Documentation.ngettext(
- "Search finished, found one page matching the search query.",
- "Search finished, found ${resultCount} pages matching the search query.",
- resultCount,
- ).replace('${resultCount}', resultCount);
-};
-const _displayNextItem = (
- results,
- resultCount,
- searchTerms,
- highlightTerms,
-) => {
- // results left, load the summary and display it
- // this is intended to be dynamic (don't sub resultsCount)
- if (results.length) {
- _displayItem(results.pop(), searchTerms, highlightTerms);
- setTimeout(
- () => _displayNextItem(results, resultCount, searchTerms, highlightTerms),
- 5
- );
- }
- // search finished, update title and status message
- else _finishSearch(resultCount);
-};
-// Helper function used by query() to order search results.
-// Each input is an array of [docname, title, anchor, descr, score, filename, kind].
-// Order the results by score (in opposite order of appearance, since the
-// `_displayNextItem` function uses pop() to retrieve items) and then alphabetically.
-const _orderResultsByScoreThenName = (a, b) => {
- const leftScore = a[4];
- const rightScore = b[4];
- if (leftScore === rightScore) {
- // same score: sort alphabetically
- const leftTitle = a[1].toLowerCase();
- const rightTitle = b[1].toLowerCase();
- if (leftTitle === rightTitle) return 0;
- return leftTitle > rightTitle ? -1 : 1; // inverted is intentional
- }
- return leftScore > rightScore ? 1 : -1;
-};
-
-/**
- * Default splitQuery function. Can be overridden in ``sphinx.search`` with a
- * custom function per language.
- *
- * The regular expression works by splitting the string on consecutive characters
- * that are not Unicode letters, numbers, underscores, or emoji characters.
- * This is the same as ``\W+`` in Python, preserving the surrogate pair area.
- */
-if (typeof splitQuery === "undefined") {
- var splitQuery = (query) => query
- .split(/[^\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu)
- .filter(term => term) // remove remaining empty strings
-}
-
-/**
- * Search Module
- */
-const Search = {
- _index: null,
- _queued_query: null,
- _pulse_status: -1,
-
- htmlToText: (htmlString, anchor) => {
- const htmlElement = new DOMParser().parseFromString(htmlString, 'text/html');
- for (const removalQuery of [".headerlink", "script", "style"]) {
- htmlElement.querySelectorAll(removalQuery).forEach((el) => { el.remove() });
- }
- if (anchor) {
- const anchorContent = htmlElement.querySelector(`[role="main"] ${anchor}`);
- if (anchorContent) return anchorContent.textContent;
-
- console.warn(
- `Anchored content block not found. Sphinx search tries to obtain it via DOM query '[role=main] ${anchor}'. Check your theme or template.`
- );
- }
-
- // if anchor not specified or not found, fall back to main content
- const docContent = htmlElement.querySelector('[role="main"]');
- if (docContent) return docContent.textContent;
-
- console.warn(
- "Content block not found. Sphinx search tries to obtain it via DOM query '[role=main]'. Check your theme or template."
- );
- return "";
- },
-
- init: () => {
- const query = new URLSearchParams(window.location.search).get("q");
- document
- .querySelectorAll('input[name="q"]')
- .forEach((el) => (el.value = query));
- if (query) Search.performSearch(query);
- },
-
- loadIndex: (url) =>
- (document.body.appendChild(document.createElement("script")).src = url),
-
- setIndex: (index) => {
- Search._index = index;
- if (Search._queued_query !== null) {
- const query = Search._queued_query;
- Search._queued_query = null;
- Search.query(query);
- }
- },
-
- hasIndex: () => Search._index !== null,
-
- deferQuery: (query) => (Search._queued_query = query),
-
- stopPulse: () => (Search._pulse_status = -1),
-
- startPulse: () => {
- if (Search._pulse_status >= 0) return;
-
- const pulse = () => {
- Search._pulse_status = (Search._pulse_status + 1) % 4;
- Search.dots.innerText = ".".repeat(Search._pulse_status);
- if (Search._pulse_status >= 0) window.setTimeout(pulse, 500);
- };
- pulse();
- },
-
- /**
- * perform a search for something (or wait until index is loaded)
- */
- performSearch: (query) => {
- // create the required interface elements
- const searchText = document.createElement("h2");
- searchText.textContent = _("Searching");
- const searchSummary = document.createElement("p");
- searchSummary.classList.add("search-summary");
- searchSummary.innerText = "";
- const searchList = document.createElement("ul");
- searchList.setAttribute("role", "list");
- searchList.classList.add("search");
-
- const out = document.getElementById("search-results");
- Search.title = out.appendChild(searchText);
- Search.dots = Search.title.appendChild(document.createElement("span"));
- Search.status = out.appendChild(searchSummary);
- Search.output = out.appendChild(searchList);
-
- const searchProgress = document.getElementById("search-progress");
- // Some themes don't use the search progress node
- if (searchProgress) {
- searchProgress.innerText = _("Preparing search...");
- }
- Search.startPulse();
-
- // index already loaded, the browser was quick!
- if (Search.hasIndex()) Search.query(query);
- else Search.deferQuery(query);
- },
-
- _parseQuery: (query) => {
- // stem the search terms and add them to the correct list
- const stemmer = new Stemmer();
- const searchTerms = new Set();
- const excludedTerms = new Set();
- const highlightTerms = new Set();
- const objectTerms = new Set(splitQuery(query.toLowerCase().trim()));
- splitQuery(query.trim()).forEach((queryTerm) => {
- const queryTermLower = queryTerm.toLowerCase();
-
- // maybe skip this "word"
- // stopwords array is from language_data.js
- if (
- stopwords.indexOf(queryTermLower) !== -1 ||
- queryTerm.match(/^\d+$/)
- )
- return;
-
- // stem the word
- let word = stemmer.stemWord(queryTermLower);
- // select the correct list
- if (word[0] === "-") excludedTerms.add(word.substr(1));
- else {
- searchTerms.add(word);
- highlightTerms.add(queryTermLower);
- }
- });
-
- if (SPHINX_HIGHLIGHT_ENABLED) { // set in sphinx_highlight.js
- localStorage.setItem("sphinx_highlight_terms", [...highlightTerms].join(" "))
- }
-
- // console.debug("SEARCH: searching for:");
- // console.info("required: ", [...searchTerms]);
- // console.info("excluded: ", [...excludedTerms]);
-
- return [query, searchTerms, excludedTerms, highlightTerms, objectTerms];
- },
-
- /**
- * execute search (requires search index to be loaded)
- */
- _performSearch: (query, searchTerms, excludedTerms, highlightTerms, objectTerms) => {
- const filenames = Search._index.filenames;
- const docNames = Search._index.docnames;
- const titles = Search._index.titles;
- const allTitles = Search._index.alltitles;
- const indexEntries = Search._index.indexentries;
-
- // Collect multiple result groups to be sorted separately and then ordered.
- // Each is an array of [docname, title, anchor, descr, score, filename, kind].
- const normalResults = [];
- const nonMainIndexResults = [];
-
- _removeChildren(document.getElementById("search-progress"));
-
- const queryLower = query.toLowerCase().trim();
- for (const [title, foundTitles] of Object.entries(allTitles)) {
- if (title.toLowerCase().trim().includes(queryLower) && (queryLower.length >= title.length/2)) {
- for (const [file, id] of foundTitles) {
- const score = Math.round(Scorer.title * queryLower.length / title.length);
- const boost = titles[file] === title ? 1 : 0; // add a boost for document titles
- normalResults.push([
- docNames[file],
- titles[file] !== title ? `${titles[file]} > ${title}` : title,
- id !== null ? "#" + id : "",
- null,
- score + boost,
- filenames[file],
- SearchResultKind.title,
- ]);
- }
- }
- }
-
- // search for explicit entries in index directives
- for (const [entry, foundEntries] of Object.entries(indexEntries)) {
- if (entry.includes(queryLower) && (queryLower.length >= entry.length/2)) {
- for (const [file, id, isMain] of foundEntries) {
- const score = Math.round(100 * queryLower.length / entry.length);
- const result = [
- docNames[file],
- titles[file],
- id ? "#" + id : "",
- null,
- score,
- filenames[file],
- SearchResultKind.index,
- ];
- if (isMain) {
- normalResults.push(result);
- } else {
- nonMainIndexResults.push(result);
- }
- }
- }
- }
-
- // lookup as object
- objectTerms.forEach((term) =>
- normalResults.push(...Search.performObjectSearch(term, objectTerms))
- );
-
- // lookup as search terms in fulltext
- normalResults.push(...Search.performTermsSearch(searchTerms, excludedTerms));
-
- // let the scorer override scores with a custom scoring function
- if (Scorer.score) {
- normalResults.forEach((item) => (item[4] = Scorer.score(item)));
- nonMainIndexResults.forEach((item) => (item[4] = Scorer.score(item)));
- }
-
- // Sort each group of results by score and then alphabetically by name.
- normalResults.sort(_orderResultsByScoreThenName);
- nonMainIndexResults.sort(_orderResultsByScoreThenName);
-
- // Combine the result groups in (reverse) order.
- // Non-main index entries are typically arbitrary cross-references,
- // so display them after other results.
- let results = [...nonMainIndexResults, ...normalResults];
-
- // remove duplicate search results
- // note the reversing of results, so that in the case of duplicates, the highest-scoring entry is kept
- let seen = new Set();
- results = results.reverse().reduce((acc, result) => {
- let resultStr = result.slice(0, 4).concat([result[5]]).map(v => String(v)).join(',');
- if (!seen.has(resultStr)) {
- acc.push(result);
- seen.add(resultStr);
- }
- return acc;
- }, []);
-
- return results.reverse();
- },
-
- query: (query) => {
- const [searchQuery, searchTerms, excludedTerms, highlightTerms, objectTerms] = Search._parseQuery(query);
- const results = Search._performSearch(searchQuery, searchTerms, excludedTerms, highlightTerms, objectTerms);
-
- // for debugging
- //Search.lastresults = results.slice(); // a copy
- // console.info("search results:", Search.lastresults);
-
- // print the results
- _displayNextItem(results, results.length, searchTerms, highlightTerms);
- },
-
- /**
- * search for object names
- */
- performObjectSearch: (object, objectTerms) => {
- const filenames = Search._index.filenames;
- const docNames = Search._index.docnames;
- const objects = Search._index.objects;
- const objNames = Search._index.objnames;
- const titles = Search._index.titles;
-
- const results = [];
-
- const objectSearchCallback = (prefix, match) => {
- const name = match[4]
- const fullname = (prefix ? prefix + "." : "") + name;
- const fullnameLower = fullname.toLowerCase();
- if (fullnameLower.indexOf(object) < 0) return;
-
- let score = 0;
- const parts = fullnameLower.split(".");
-
- // check for different match types: exact matches of full name or
- // "last name" (i.e. last dotted part)
- if (fullnameLower === object || parts.slice(-1)[0] === object)
- score += Scorer.objNameMatch;
- else if (parts.slice(-1)[0].indexOf(object) > -1)
- score += Scorer.objPartialMatch; // matches in last name
-
- const objName = objNames[match[1]][2];
- const title = titles[match[0]];
-
- // If more than one term searched for, we require other words to be
- // found in the name/title/description
- const otherTerms = new Set(objectTerms);
- otherTerms.delete(object);
- if (otherTerms.size > 0) {
- const haystack = `${prefix} ${name} ${objName} ${title}`.toLowerCase();
- if (
- [...otherTerms].some((otherTerm) => haystack.indexOf(otherTerm) < 0)
- )
- return;
- }
-
- let anchor = match[3];
- if (anchor === "") anchor = fullname;
- else if (anchor === "-") anchor = objNames[match[1]][1] + "-" + fullname;
-
- const descr = objName + _(", in ") + title;
-
- // add custom score for some objects according to scorer
- if (Scorer.objPrio.hasOwnProperty(match[2]))
- score += Scorer.objPrio[match[2]];
- else score += Scorer.objPrioDefault;
-
- results.push([
- docNames[match[0]],
- fullname,
- "#" + anchor,
- descr,
- score,
- filenames[match[0]],
- SearchResultKind.object,
- ]);
- };
- Object.keys(objects).forEach((prefix) =>
- objects[prefix].forEach((array) =>
- objectSearchCallback(prefix, array)
- )
- );
- return results;
- },
-
- /**
- * search for full-text terms in the index
- */
- performTermsSearch: (searchTerms, excludedTerms) => {
- // prepare search
- const terms = Search._index.terms;
- const titleTerms = Search._index.titleterms;
- const filenames = Search._index.filenames;
- const docNames = Search._index.docnames;
- const titles = Search._index.titles;
-
- const scoreMap = new Map();
- const fileMap = new Map();
-
- // perform the search on the required terms
- searchTerms.forEach((word) => {
- const files = [];
- const arr = [
- { files: terms[word], score: Scorer.term },
- { files: titleTerms[word], score: Scorer.title },
- ];
- // add support for partial matches
- if (word.length > 2) {
- const escapedWord = _escapeRegExp(word);
- if (!terms.hasOwnProperty(word)) {
- Object.keys(terms).forEach((term) => {
- if (term.match(escapedWord))
- arr.push({ files: terms[term], score: Scorer.partialTerm });
- });
- }
- if (!titleTerms.hasOwnProperty(word)) {
- Object.keys(titleTerms).forEach((term) => {
- if (term.match(escapedWord))
- arr.push({ files: titleTerms[term], score: Scorer.partialTitle });
- });
- }
- }
-
- // no match but word was a required one
- if (arr.every((record) => record.files === undefined)) return;
-
- // found search word in contents
- arr.forEach((record) => {
- if (record.files === undefined) return;
-
- let recordFiles = record.files;
- if (recordFiles.length === undefined) recordFiles = [recordFiles];
- files.push(...recordFiles);
-
- // set score for the word in each file
- recordFiles.forEach((file) => {
- if (!scoreMap.has(file)) scoreMap.set(file, {});
- scoreMap.get(file)[word] = record.score;
- });
- });
-
- // create the mapping
- files.forEach((file) => {
- if (!fileMap.has(file)) fileMap.set(file, [word]);
- else if (fileMap.get(file).indexOf(word) === -1) fileMap.get(file).push(word);
- });
- });
-
- // now check if the files don't contain excluded terms
- const results = [];
- for (const [file, wordList] of fileMap) {
- // check if all requirements are matched
-
- // as search terms with length < 3 are discarded
- const filteredTermCount = [...searchTerms].filter(
- (term) => term.length > 2
- ).length;
- if (
- wordList.length !== searchTerms.size &&
- wordList.length !== filteredTermCount
- )
- continue;
-
- // ensure that none of the excluded terms is in the search result
- if (
- [...excludedTerms].some(
- (term) =>
- terms[term] === file ||
- titleTerms[term] === file ||
- (terms[term] || []).includes(file) ||
- (titleTerms[term] || []).includes(file)
- )
- )
- break;
-
- // select one (max) score for the file.
- const score = Math.max(...wordList.map((w) => scoreMap.get(file)[w]));
- // add result to the result list
- results.push([
- docNames[file],
- titles[file],
- "",
- null,
- score,
- filenames[file],
- SearchResultKind.text,
- ]);
- }
- return results;
- },
-
- /**
- * helper function to return a node containing the
- * search summary for a given text. keywords is a list
- * of stemmed words.
- */
- makeSearchSummary: (htmlText, keywords, anchor) => {
- const text = Search.htmlToText(htmlText, anchor);
- if (text === "") return null;
-
- const textLower = text.toLowerCase();
- const actualStartPosition = [...keywords]
- .map((k) => textLower.indexOf(k.toLowerCase()))
- .filter((i) => i > -1)
- .slice(-1)[0];
- const startWithContext = Math.max(actualStartPosition - 120, 0);
-
- const top = startWithContext === 0 ? "" : "...";
- const tail = startWithContext + 240 < text.length ? "..." : "";
-
- let summary = document.createElement("p");
- summary.classList.add("context");
- summary.textContent = top + text.substr(startWithContext, 240).trim() + tail;
-
- return summary;
- },
-};
-
-_ready(Search.init);
diff --git a/docs/_build/html/_static/sphinx_highlight.js b/docs/_build/html/_static/sphinx_highlight.js
deleted file mode 100644
index 8a96c69a..00000000
--- a/docs/_build/html/_static/sphinx_highlight.js
+++ /dev/null
@@ -1,154 +0,0 @@
-/* Highlighting utilities for Sphinx HTML documentation. */
-"use strict";
-
-const SPHINX_HIGHLIGHT_ENABLED = true
-
-/**
- * highlight a given string on a node by wrapping it in
- * span elements with the given class name.
- */
-const _highlight = (node, addItems, text, className) => {
- if (node.nodeType === Node.TEXT_NODE) {
- const val = node.nodeValue;
- const parent = node.parentNode;
- const pos = val.toLowerCase().indexOf(text);
- if (
- pos >= 0 &&
- !parent.classList.contains(className) &&
- !parent.classList.contains("nohighlight")
- ) {
- let span;
-
- const closestNode = parent.closest("body, svg, foreignObject");
- const isInSVG = closestNode && closestNode.matches("svg");
- if (isInSVG) {
- span = document.createElementNS("http://www.w3.org/2000/svg", "tspan");
- } else {
- span = document.createElement("span");
- span.classList.add(className);
- }
-
- span.appendChild(document.createTextNode(val.substr(pos, text.length)));
- const rest = document.createTextNode(val.substr(pos + text.length));
- parent.insertBefore(
- span,
- parent.insertBefore(
- rest,
- node.nextSibling
- )
- );
- node.nodeValue = val.substr(0, pos);
- /* There may be more occurrences of search term in this node. So call this
- * function recursively on the remaining fragment.
- */
- _highlight(rest, addItems, text, className);
-
- if (isInSVG) {
- const rect = document.createElementNS(
- "http://www.w3.org/2000/svg",
- "rect"
- );
- const bbox = parent.getBBox();
- rect.x.baseVal.value = bbox.x;
- rect.y.baseVal.value = bbox.y;
- rect.width.baseVal.value = bbox.width;
- rect.height.baseVal.value = bbox.height;
- rect.setAttribute("class", className);
- addItems.push({ parent: parent, target: rect });
- }
- }
- } else if (node.matches && !node.matches("button, select, textarea")) {
- node.childNodes.forEach((el) => _highlight(el, addItems, text, className));
- }
-};
-const _highlightText = (thisNode, text, className) => {
- let addItems = [];
- _highlight(thisNode, addItems, text, className);
- addItems.forEach((obj) =>
- obj.parent.insertAdjacentElement("beforebegin", obj.target)
- );
-};
-
-/**
- * Small JavaScript module for the documentation.
- */
-const SphinxHighlight = {
-
- /**
- * highlight the search words provided in localstorage in the text
- */
- highlightSearchWords: () => {
- if (!SPHINX_HIGHLIGHT_ENABLED) return; // bail if no highlight
-
- // get and clear terms from localstorage
- const url = new URL(window.location);
- const highlight =
- localStorage.getItem("sphinx_highlight_terms")
- || url.searchParams.get("highlight")
- || "";
- localStorage.removeItem("sphinx_highlight_terms")
- url.searchParams.delete("highlight");
- window.history.replaceState({}, "", url);
-
- // get individual terms from highlight string
- const terms = highlight.toLowerCase().split(/\s+/).filter(x => x);
- if (terms.length === 0) return; // nothing to do
-
- // There should never be more than one element matching "div.body"
- const divBody = document.querySelectorAll("div.body");
- const body = divBody.length ? divBody[0] : document.querySelector("body");
- window.setTimeout(() => {
- terms.forEach((term) => _highlightText(body, term, "highlighted"));
- }, 10);
-
- const searchBox = document.getElementById("searchbox");
- if (searchBox === null) return;
- searchBox.appendChild(
- document
- .createRange()
- .createContextualFragment(
- '
' +
- '' +
- _("Hide Search Matches") +
- "
"
- )
- );
- },
-
- /**
- * helper function to hide the search marks again
- */
- hideSearchWords: () => {
- document
- .querySelectorAll("#searchbox .highlight-link")
- .forEach((el) => el.remove());
- document
- .querySelectorAll("span.highlighted")
- .forEach((el) => el.classList.remove("highlighted"));
- localStorage.removeItem("sphinx_highlight_terms")
- },
-
- initEscapeListener: () => {
- // only install a listener if it is really needed
- if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return;
-
- document.addEventListener("keydown", (event) => {
- // bail for input elements
- if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return;
- // bail with special keys
- if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return;
- if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) {
- SphinxHighlight.hideSearchWords();
- event.preventDefault();
- }
- });
- },
-};
-
-_ready(() => {
- /* Do not call highlightSearchWords() when we are on the search page.
- * It will highlight words from the *previous* search query.
- */
- if (typeof Search === "undefined") SphinxHighlight.highlightSearchWords();
- SphinxHighlight.initEscapeListener();
-});
diff --git a/docs/_build/html/base_module.html b/docs/_build/html/base_module.html
deleted file mode 100644
index d6959ffc..00000000
--- a/docs/_build/html/base_module.html
+++ /dev/null
@@ -1,141 +0,0 @@
-
-
-
-
-
-
-
-
-
base_module package — NOMAD 0.0.1 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- NOMAD
-
-
-
-
-
-
-
-
-
-base_module package
-
-
-base_module.doc_test module
-
-
-base_module.setup module
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/_build/html/genindex.html b/docs/_build/html/genindex.html
deleted file mode 100644
index 9f4b7ef5..00000000
--- a/docs/_build/html/genindex.html
+++ /dev/null
@@ -1,649 +0,0 @@
-
-
-
-
-
-
-
-
Index — NOMAD 0.0.1 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- NOMAD
-
-
-
-
-
-
-
-
-
-
Index
-
-
-
_
- |
A
- |
B
- |
C
- |
D
- |
F
- |
G
- |
I
- |
L
- |
M
- |
N
- |
O
- |
P
- |
Q
- |
R
- |
S
- |
T
- |
W
-
-
-
_
-
-
-
A
-
-
-
B
-
-
-
C
-
-
-
D
-
-
-
F
-
-
-
G
-
-
-
I
-
-
-
L
-
-
-
M
-
-
-
N
-
-
- nocturnal_stops() (in module nomad.home_attribution)
-
-
- nomad.agg
-
-
-
- nomad.aggregation
-
-
-
- nomad.city_gen
-
-
-
- nomad.constants
-
-
-
- nomad.contact_estimation
-
-
-
- nomad.displacement
-
-
-
- nomad.filters
-
-
-
- nomad.filters_spark
-
-
-
- nomad.generation
-
-
-
-
-
-
-
O
-
-
-
P
-
-
-
Q
-
-
-
R
-
-
-
S
-
-
-
T
-
-
-
W
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/_build/html/index.html b/docs/_build/html/index.html
deleted file mode 100644
index fa2304b2..00000000
--- a/docs/_build/html/index.html
+++ /dev/null
@@ -1,225 +0,0 @@
-
-
-
-
-
-
-
-
-
NOMAD: Network for Open Mobility Analysis and Data — NOMAD 0.0.1 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- NOMAD
-
-
-
-
-
-
-
-
-
-NOMAD: Network for Open Mobility Analysis and Data
-
-
-
-
-NOMAD is an open-source Python library for end-to-end processing of large-scale GPS mobility data.
-Part of the NSF-funded NOMAD research infrastructure project, it provides production-ready tools for
-mobility data analysis with seamless scaling from local workstations to Spark clusters.
-NOMAD builds on previous software resources—like scikit-mobility, mobilkit, and trackintel—with the
-goal of providing a single, production-ready library that covers the entire processing pipeline in a
-form suitable for analysis of massive datasets and to aid in the replicability of existing research.
-All functions are implemented in Python with parallel equivalents in PySpark, enabling the same
-analysis notebook to run on a workstation or a Spark cluster without API changes.
-
-
-Installation
- pip install git+https://github.com/Watts-Lab/nomad.git
-
-
-
-
-
-
-
-
-
-
-Modules Overview
-
-
-
-
-
-
-Module
-Description
-
-
-
-Data Ingestion
-Read CSV, Parquet, GeoJSON; validate schemas; return Pandas or Spark DataFrames
-
-Filtering & Completeness
-Assess coverage; filter by completeness, geography, time; handle projections
-
-Tessellation
-Map pings to H3, S2, or custom grids for grid-based algorithms
-
-Stop Detection
-DBSCAN, HDBSCAN, grid-based, and sequential (Lachesis) algorithms
-
-Visit Attribution
-Frequency and time-window heuristics for home/workplace inference
-
-Mobility Metrics
-Radius of gyration, travel distance, entropy, and related indicators
-
-Co-location & Contact Networks
-Build proximity graphs from POI visits or spatial-temporal proximity
-
-Aggregation & Debiasing
-Differential privacy, k-anonymity, debiasing, and post-stratification
-
-Synthetic Data Generation
-EPR models and point process samplers for trajectory generation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/_build/html/modules.html b/docs/_build/html/modules.html
deleted file mode 100644
index f5253134..00000000
--- a/docs/_build/html/modules.html
+++ /dev/null
@@ -1,140 +0,0 @@
-
-
-
-
-
-
-
-
-
src — NOMAD 0.0.1 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/_build/html/objects.inv b/docs/_build/html/objects.inv
deleted file mode 100644
index 29e7fc8b..00000000
Binary files a/docs/_build/html/objects.inv and /dev/null differ
diff --git a/docs/_build/html/py-modindex.html b/docs/_build/html/py-modindex.html
deleted file mode 100644
index 298133a3..00000000
--- a/docs/_build/html/py-modindex.html
+++ /dev/null
@@ -1,225 +0,0 @@
-
-
-
-
-
-
-
-
Python Module Index — NOMAD 0.0.1 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- NOMAD
-
-
-
-
-
-
-
- Python Module Index
-
-
-
-
-
-
-
-
-
-
Python Module Index
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/_build/html/search.html b/docs/_build/html/search.html
deleted file mode 100644
index ea508c0a..00000000
--- a/docs/_build/html/search.html
+++ /dev/null
@@ -1,145 +0,0 @@
-
-
-
-
-
-
-
-
Search — NOMAD 0.0.1 documentation
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- NOMAD
-
-
-
-
-
-
-
-
-
-
-
- Please activate JavaScript to enable the search functionality.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/docs/_build/html/searchindex.js b/docs/_build/html/searchindex.js
deleted file mode 100644
index 3e96b2f6..00000000
--- a/docs/_build/html/searchindex.js
+++ /dev/null
@@ -1 +0,0 @@
-Search.setIndex({"alltitles": {"API Reference": [[17, null]], "Aggregation & Debiasing": [[0, null]], "Basic Installation": [[22, "basic-installation"]], "Benchmarking Stop Detection Algorithms": [[26, null]], "Co-location & Contact Networks": [[19, null]], "Community & Support": [[23, "community-support"]], "DBSCAN": [[33, "dbscan"]], "Data Ingestion": [[20, null]], "Download Data by Bounding Box": [[30, "Download-Data-by-Bounding-Box"]], "Download Data by City Name": [[30, "Download-Data-by-City-Name"]], "Downloading places of interest (POI) Data from OSM": [[30, null]], "Examples": [[22, "examples"]], "Filtering & Completeness": [[21, null]], "Generating synthetic places of interest (POI) Data": [[31, null]], "Getting Started": [[22, null]], "Grid-Based Stop Detection": [[27, null]], "Grid-based": [[33, "grid-based"]], "HDBSCAN": [[33, "hdbscan"]], "HDBSCAN Stop Detection": [[28, null]], "Indices and tables": [[23, "indices-and-tables"]], "Installation": [[22, "installation"], [23, "installation"]], "Key Features": [[22, "key-features"]], "Lachesis": [[33, "lachesis"]], "Lachesis Stop Detection": [[29, null]], "License": [[22, "license"]], "Mobility Metrics": [[24, null]], "Module contents": [[18, "module-contents"]], "Modules": [[17, "modules"]], "Modules Overview": [[23, "modules-overview"]], "NOMAD: Network for Open Mobility Analysis and Data": [[23, null]], "Next Steps": [[22, "next-steps"]], "Overall Runtime Comparison": [[26, "Overall-Runtime-Comparison"]], "Overview of stop-detection methods": [[33, "overview-of-stop-detection-methods"]], "Parameters:": [[3, "parameters"]], "Performance Comparison": [[26, "Performance-Comparison"]], "Quick Links": [[23, "quick-links"]], "Quick Start": [[22, "quick-start"]], "Requirements": [[22, "requirements"]], "Returns:": [[3, "returns"]], "Runtime Scalability": [[26, "Runtime-Scalability"]], "Setup": [[26, "Setup"]], "Stop Detection": [[33, null]], "Submodules": [[18, "submodules"]], "Synthetic Data Generation": [[34, null]], "TADBSCAN Stop Detection": [[32, null]], "Tessellation": [[35, null]], "Visit Attribution": [[36, null]], "Visualization": [[26, "Visualization"]], "What is NOMAD?": [[22, "what-is-nomad"]], "With Spark Support": [[22, "with-spark-support"]], "agg": [[1, null]], "aggregation": [[2, null]], "base_module package": [[18, null]], "base_module.doc_test module": [[18, "base-module-doc-test-module"]], "base_module.setup module": [[18, "base-module-setup-module"]], "city_gen": [[3, null]], "constants": [[4, null]], "contact_estimation": [[5, null]], "displacement": [[6, null]], "filters": [[7, null]], "filters_spark": [[8, null]], "generation": [[9, null]], "home_attribution": [[10, null]], "io": [[11, null]], "map_utils": [[12, null]], "metrics": [[13, null]], "src": [[25, null]], "stop_detection": [[14, null]], "traj_gen": [[15, null]], "visit_attribution": [[16, null]]}, "docnames": ["aggregation", "api/nomad.agg", "api/nomad.aggregation", "api/nomad.city_gen", "api/nomad.constants", "api/nomad.contact_estimation", "api/nomad.displacement", "api/nomad.filters", "api/nomad.filters_spark", "api/nomad.generation", "api/nomad.home_attribution", "api/nomad.io", "api/nomad.map_utils", "api/nomad.metrics", "api/nomad.stop_detection", "api/nomad.traj_gen", "api/nomad.visit_attribution", "api_reference", "base_module", "colocation", "data_ingestion", "filtering", "getting_started", "index", "metrics", "modules", "source/benchmarking_of_stop_detection_algorithms", "source/grid_based_demo", "source/hdbscan_demo", "source/lachesis_demo", "source/poi_osm", "source/poi_synthetic", "source/tadbscan_demo", "stop_detection", "synthetic_data_generation", "tessellation", "visit_attribution"], "envversion": {"nbsphinx": 4, "sphinx": 64, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1}, "filenames": ["aggregation.rst", "api\\nomad.agg.rst", "api\\nomad.aggregation.rst", "api\\nomad.city_gen.rst", "api\\nomad.constants.rst", "api\\nomad.contact_estimation.rst", "api\\nomad.displacement.rst", "api\\nomad.filters.rst", "api\\nomad.filters_spark.rst", "api\\nomad.generation.rst", "api\\nomad.home_attribution.rst", "api\\nomad.io.rst", "api\\nomad.map_utils.rst", "api\\nomad.metrics.rst", "api\\nomad.stop_detection.rst", "api\\nomad.traj_gen.rst", "api\\nomad.visit_attribution.rst", "api_reference.rst", "base_module.rst", "colocation.rst", "data_ingestion.rst", "filtering.rst", "getting_started.rst", "index.rst", "metrics.rst", "modules.rst", "source\\benchmarking_of_stop_detection_algorithms.ipynb", "source\\grid_based_demo.ipynb", "source\\hdbscan_demo.ipynb", "source\\lachesis_demo.ipynb", "source\\poi_osm.ipynb", "source\\poi_synthetic.ipynb", "source\\tadbscan_demo.ipynb", "stop_detection.rst", "synthetic_data_generation.rst", "tessellation.rst", "visit_attribution.rst"], "indexentries": {"__init__() (agent method)": [[15, "nomad.traj_gen.Agent.__init__", false]], "__init__() (city method)": [[3, "nomad.city_gen.City.__init__", false]], "__init__() (population method)": [[15, "nomad.traj_gen.Population.__init__", false]], "__init__() (randomcitygenerator method)": [[3, "nomad.city_gen.RandomCityGenerator.__init__", false]], "__init__() (street method)": [[3, "nomad.city_gen.Street.__init__", false]], "add_agent() (population method)": [[15, "nomad.traj_gen.Population.add_agent", false]], "add_building() (city method)": [[3, "id0", false], [3, "nomad.city_gen.City.add_building", false]], "add_buildings_from_gdf() (city method)": [[3, "nomad.city_gen.City.add_buildings_from_gdf", false]], "add_street() (city method)": [[3, "nomad.city_gen.City.add_street", false]], "agent (class in nomad.traj_gen)": [[15, "nomad.traj_gen.Agent", false]], "allowed_buildings() (in module nomad.traj_gen)": [[15, "nomad.traj_gen.allowed_buildings", false]], "blocks_to_mercator() (in module nomad.map_utils)": [[12, "nomad.map_utils.blocks_to_mercator", false]], "buildings (city attribute)": [[3, "nomad.city_gen.City.buildings", false]], "buildings_df (city property)": [[3, "nomad.city_gen.City.buildings_df", false]], "buildings_outline (city attribute)": [[3, "nomad.city_gen.City.buildings_outline", false]], "check_adjacent() (in module nomad.city_gen)": [[3, "nomad.city_gen.check_adjacent", false]], "city (class in nomad.city_gen)": [[3, "nomad.city_gen.City", false]], "city (population attribute)": [[15, "nomad.traj_gen.Population.city", false]], "city_boundary (city attribute)": [[3, "nomad.city_gen.City.city_boundary", false]], "completeness() (in module nomad.filters)": [[7, "nomad.filters.completeness", false]], "compute_candidate_homes() (in module nomad.home_attribution)": [[10, "nomad.home_attribution.compute_candidate_homes", false]], "compute_stop_detection_metrics() (in module nomad.contact_estimation)": [[5, "nomad.contact_estimation.compute_stop_detection_metrics", false]], "compute_visitation_errors() (in module nomad.contact_estimation)": [[5, "nomad.contact_estimation.compute_visitation_errors", false]], "condense_destinations() (in module nomad.traj_gen)": [[15, "nomad.traj_gen.condense_destinations", false]], "coordinates (street attribute)": [[3, "nomad.city_gen.Street.coordinates", false]], "coverage_matrix() (in module nomad.filters)": [[7, "nomad.filters.coverage_matrix", false]], "dimensions (city attribute)": [[3, "nomad.city_gen.City.dimensions", false]], "download_osm_buildings() (in module nomad.map_utils)": [[12, "nomad.map_utils.download_osm_buildings", false]], "download_osm_streets() (in module nomad.map_utils)": [[12, "nomad.map_utils.download_osm_streets", false]], "downsample() (in module nomad.filters)": [[7, "nomad.filters.downsample", false]], "dt (agent attribute)": [[15, "nomad.traj_gen.Agent.dt", false]], "dt (population attribute)": [[15, "nomad.traj_gen.Population.dt", false]], "fill_block() (randomcitygenerator method)": [[3, "nomad.city_gen.RandomCityGenerator.fill_block", false]], "filter_users() (in module nomad.filters_spark)": [[8, "nomad.filters_spark.filter_users", false]], "from_geodataframes() (city class method)": [[3, "nomad.city_gen.City.from_geodataframes", false]], "from_geopackage() (city class method)": [[3, "nomad.city_gen.City.from_geopackage", false]], "from_mercator() (city method)": [[3, "nomad.city_gen.City.from_mercator", false]], "generate_agents() (population method)": [[15, "nomad.traj_gen.Population.generate_agents", false]], "generate_city() (randomcitygenerator method)": [[3, "nomad.city_gen.RandomCityGenerator.generate_city", false]], "generate_streets() (randomcitygenerator method)": [[3, "nomad.city_gen.RandomCityGenerator.generate_streets", false]], "generate_trajectory() (agent method)": [[15, "nomad.traj_gen.Agent.generate_trajectory", false]], "geometry (street attribute)": [[3, "nomad.city_gen.Street.geometry", false]], "get_adjacent_street() (randomcitygenerator method)": [[3, "nomad.city_gen.RandomCityGenerator.get_adjacent_street", false]], "get_block() (city method)": [[3, "id1", false], [3, "nomad.city_gen.City.get_block", false]], "get_block_type() (randomcitygenerator method)": [[3, "nomad.city_gen.RandomCityGenerator.get_block_type", false]], "get_building() (city method)": [[3, "nomad.city_gen.City.get_building", false]], "get_building_coordinates() (city method)": [[3, "nomad.city_gen.City.get_building_coordinates", false]], "get_category_for_subtype() (in module nomad.map_utils)": [[12, "nomad.map_utils.get_category_for_subtype", false]], "get_category_summary() (in module nomad.map_utils)": [[12, "nomad.map_utils.get_category_summary", false]], "get_city_boundary_osm() (in module nomad.map_utils)": [[12, "nomad.map_utils.get_city_boundary_osm", false]], "get_prominent_streets() (in module nomad.map_utils)": [[12, "nomad.map_utils.get_prominent_streets", false]], "get_shortest_path() (city method)": [[3, "nomad.city_gen.City.get_shortest_path", false]], "get_street_graph() (city method)": [[3, "id2", false], [3, "nomad.city_gen.City.get_street_graph", false]], "get_subtype_summary() (in module nomad.map_utils)": [[12, "nomad.map_utils.get_subtype_summary", false]], "gravity (city attribute)": [[3, "nomad.city_gen.City.gravity", false]], "id (street attribute)": [[3, "nomad.city_gen.Street.id", false]], "id_to_door_cell() (city method)": [[3, "nomad.city_gen.City.id_to_door_cell", false]], "is_within() (in module nomad.filters)": [[7, "nomad.filters.is_within", false]], "load() (in module nomad.city_gen)": [[3, "nomad.city_gen.load", false]], "mercator_to_blocks() (in module nomad.map_utils)": [[12, "nomad.map_utils.mercator_to_blocks", false]], "module": [[1, "module-nomad.agg", false], [2, "module-nomad.aggregation", false], [3, "module-nomad.city_gen", false], [4, "module-nomad.constants", false], [5, "module-nomad.contact_estimation", false], [6, "module-nomad.displacement", false], [7, "module-nomad.filters", false], [8, "module-nomad.filters_spark", false], [9, "module-nomad.generation", false], [10, "module-nomad.home_attribution", false], [11, "module-nomad.io", false], [12, "module-nomad.map_utils", false], [13, "module-nomad.metrics", false], [14, "module-nomad.stop_detection", false], [15, "module-nomad.traj_gen", false], [16, "module-nomad.visit_attribution", false]], "nocturnal_stops() (in module nomad.home_attribution)": [[10, "nomad.home_attribution.nocturnal_stops", false]], "nomad.agg": [[1, "module-nomad.agg", false]], "nomad.aggregation": [[2, "module-nomad.aggregation", false]], "nomad.city_gen": [[3, "module-nomad.city_gen", false]], "nomad.constants": [[4, "module-nomad.constants", false]], "nomad.contact_estimation": [[5, "module-nomad.contact_estimation", false]], "nomad.displacement": [[6, "module-nomad.displacement", false]], "nomad.filters": [[7, "module-nomad.filters", false]], "nomad.filters_spark": [[8, "module-nomad.filters_spark", false]], "nomad.generation": [[9, "module-nomad.generation", false]], "nomad.home_attribution": [[10, "module-nomad.home_attribution", false]], "nomad.io": [[11, "module-nomad.io", false]], "nomad.map_utils": [[12, "module-nomad.map_utils", false]], "nomad.metrics": [[13, "module-nomad.metrics", false]], "nomad.stop_detection": [[14, "module-nomad.stop_detection", false]], "nomad.traj_gen": [[15, "module-nomad.traj_gen", false]], "nomad.visit_attribution": [[16, "module-nomad.visit_attribution", false]], "normalize_od() (in module nomad.displacement)": [[6, "nomad.displacement.normalize_od", false]], "normalized_moved() (in module nomad.displacement)": [[6, "nomad.displacement.normalized_moved", false]], "normalized_remained() (in module nomad.displacement)": [[6, "nomad.displacement.normalized_remained", false]], "overlapping_visits() (in module nomad.contact_estimation)": [[5, "nomad.contact_estimation.overlapping_visits", false]], "parse_agent_attr() (in module nomad.traj_gen)": [[15, "nomad.traj_gen.parse_agent_attr", false]], "place_buildings_in_blocks() (randomcitygenerator method)": [[3, "nomad.city_gen.RandomCityGenerator.place_buildings_in_blocks", false]], "plot_city() (city method)": [[3, "id4", false], [3, "nomad.city_gen.City.plot_city", false]], "plot_traj() (agent method)": [[15, "id0", false], [15, "nomad.traj_gen.Agent.plot_traj", false]], "population (class in nomad.traj_gen)": [[15, "nomad.traj_gen.Population", false]], "precision_recall_f1_from_minutes() (in module nomad.contact_estimation)": [[5, "nomad.contact_estimation.precision_recall_f1_from_minutes", false]], "q_filter() (in module nomad.filters)": [[7, "nomad.filters.q_filter", false]], "randomcitygenerator (class in nomad.city_gen)": [[3, "nomad.city_gen.RandomCityGenerator", false]], "remove_overlaps() (in module nomad.map_utils)": [[12, "nomad.map_utils.remove_overlaps", false]], "reproject_to_mercator() (population method)": [[15, "nomad.traj_gen.Population.reproject_to_mercator", false]], "reset_trajectory() (agent method)": [[15, "nomad.traj_gen.Agent.reset_trajectory", false]], "roster (population attribute)": [[15, "nomad.traj_gen.Population.roster", false]], "rotate() (in module nomad.map_utils)": [[12, "nomad.map_utils.rotate", false]], "rotate_streets_to_align() (in module nomad.map_utils)": [[12, "nomad.map_utils.rotate_streets_to_align", false]], "sample_hier_nhpp() (in module nomad.traj_gen)": [[15, "nomad.traj_gen.sample_hier_nhpp", false]], "sample_traj_hier_nhpp() (agent method)": [[15, "nomad.traj_gen.Agent.sample_traj_hier_nhpp", false]], "sample_trajectory() (agent method)": [[15, "nomad.traj_gen.Agent.sample_trajectory", false]], "save() (city method)": [[3, "id3", false], [3, "nomad.city_gen.City.save", false]], "save() (in module nomad.city_gen)": [[3, "nomad.city_gen.save", false]], "save_geopackage() (city method)": [[3, "nomad.city_gen.City.save_geopackage", false]], "save_pop() (population method)": [[15, "nomad.traj_gen.Population.save_pop", false]], "select_home() (in module nomad.home_attribution)": [[10, "nomad.home_attribution.select_home", false]], "set_osmnx_cache_mode() (in module nomad.map_utils)": [[12, "nomad.map_utils.set_osmnx_cache_mode", false]], "shortest_paths (city attribute)": [[3, "nomad.city_gen.City.shortest_paths", false]], "speeds (agent attribute)": [[15, "nomad.traj_gen.Agent.speeds", false]], "still_probs (agent attribute)": [[15, "nomad.traj_gen.Agent.still_probs", false]], "street (class in nomad.city_gen)": [[3, "nomad.city_gen.Street", false]], "street_adjacency_edges() (city method)": [[3, "nomad.city_gen.City.street_adjacency_edges", false]], "street_graph (city attribute)": [[3, "nomad.city_gen.City.street_graph", false]], "streets (city attribute)": [[3, "nomad.city_gen.City.streets", false]], "streets_df (city property)": [[3, "nomad.city_gen.City.streets_df", false]], "to_file() (city method)": [[3, "nomad.city_gen.City.to_file", false]], "to_geodataframes() (city method)": [[3, "nomad.city_gen.City.to_geodataframes", false]], "to_mercator() (city method)": [[3, "nomad.city_gen.City.to_mercator", false]], "to_projection() (in module nomad.filters)": [[7, "nomad.filters.to_projection", false]], "to_projection() (in module nomad.filters_spark)": [[8, "nomad.filters_spark.to_projection", false]], "to_tessellation() (in module nomad.filters)": [[7, "nomad.filters.to_tessellation", false]], "to_timestamp() (in module nomad.filters)": [[7, "nomad.filters.to_timestamp", false]], "to_yyyymmdd() (in module nomad.filters)": [[7, "nomad.filters.to_yyyymmdd", false]], "to_zoned_datetime() (in module nomad.filters)": [[7, "nomad.filters.to_zoned_datetime", false]], "within() (in module nomad.filters)": [[7, "nomad.filters.within", false]]}, "objects": {"nomad": [[1, 0, 0, "-", "agg"], [2, 0, 0, "-", "aggregation"], [3, 0, 0, "-", "city_gen"], [4, 0, 0, "-", "constants"], [5, 0, 0, "-", "contact_estimation"], [6, 0, 0, "-", "displacement"], [7, 0, 0, "-", "filters"], [8, 0, 0, "-", "filters_spark"], [9, 0, 0, "-", "generation"], [10, 0, 0, "-", "home_attribution"], [11, 0, 0, "-", "io"], [12, 0, 0, "-", "map_utils"], [13, 0, 0, "-", "metrics"], [14, 0, 0, "-", "stop_detection"], [15, 0, 0, "-", "traj_gen"], [16, 0, 0, "-", "visit_attribution"]], "nomad.city_gen": [[3, 1, 1, "", "City"], [3, 1, 1, "", "RandomCityGenerator"], [3, 1, 1, "", "Street"], [3, 5, 1, "", "check_adjacent"], [3, 5, 1, "", "load"], [3, 5, 1, "", "save"]], "nomad.city_gen.City": [[3, 2, 1, "", "__init__"], [3, 2, 1, "id0", "add_building"], [3, 2, 1, "", "add_buildings_from_gdf"], [3, 2, 1, "", "add_street"], [3, 3, 1, "", "buildings"], [3, 4, 1, "", "buildings_df"], [3, 3, 1, "", "buildings_outline"], [3, 3, 1, "", "city_boundary"], [3, 3, 1, "", "dimensions"], [3, 2, 1, "", "from_geodataframes"], [3, 2, 1, "", "from_geopackage"], [3, 2, 1, "", "from_mercator"], [3, 2, 1, "id1", "get_block"], [3, 2, 1, "", "get_building"], [3, 2, 1, "", "get_building_coordinates"], [3, 2, 1, "", "get_shortest_path"], [3, 2, 1, "id2", "get_street_graph"], [3, 3, 1, "", "gravity"], [3, 2, 1, "", "id_to_door_cell"], [3, 2, 1, "id4", "plot_city"], [3, 2, 1, "id3", "save"], [3, 2, 1, "", "save_geopackage"], [3, 3, 1, "", "shortest_paths"], [3, 2, 1, "", "street_adjacency_edges"], [3, 3, 1, "", "street_graph"], [3, 3, 1, "", "streets"], [3, 4, 1, "", "streets_df"], [3, 2, 1, "", "to_file"], [3, 2, 1, "", "to_geodataframes"], [3, 2, 1, "", "to_mercator"]], "nomad.city_gen.RandomCityGenerator": [[3, 2, 1, "", "__init__"], [3, 2, 1, "", "fill_block"], [3, 2, 1, "", "generate_city"], [3, 2, 1, "", "generate_streets"], [3, 2, 1, "", "get_adjacent_street"], [3, 2, 1, "", "get_block_type"], [3, 2, 1, "", "place_buildings_in_blocks"]], "nomad.city_gen.Street": [[3, 2, 1, "", "__init__"], [3, 3, 1, "", "coordinates"], [3, 3, 1, "", "geometry"], [3, 3, 1, "", "id"]], "nomad.contact_estimation": [[5, 5, 1, "", "compute_stop_detection_metrics"], [5, 5, 1, "", "compute_visitation_errors"], [5, 5, 1, "", "overlapping_visits"], [5, 5, 1, "", "precision_recall_f1_from_minutes"]], "nomad.displacement": [[6, 5, 1, "", "normalize_od"], [6, 5, 1, "", "normalized_moved"], [6, 5, 1, "", "normalized_remained"]], "nomad.filters": [[7, 5, 1, "", "completeness"], [7, 5, 1, "", "coverage_matrix"], [7, 5, 1, "", "downsample"], [7, 5, 1, "", "is_within"], [7, 5, 1, "", "q_filter"], [7, 5, 1, "", "to_projection"], [7, 5, 1, "", "to_tessellation"], [7, 5, 1, "", "to_timestamp"], [7, 5, 1, "", "to_yyyymmdd"], [7, 5, 1, "", "to_zoned_datetime"], [7, 5, 1, "", "within"]], "nomad.filters_spark": [[8, 5, 1, "", "filter_users"], [8, 5, 1, "", "to_projection"]], "nomad.home_attribution": [[10, 5, 1, "", "compute_candidate_homes"], [10, 5, 1, "", "nocturnal_stops"], [10, 5, 1, "", "select_home"]], "nomad.map_utils": [[12, 5, 1, "", "blocks_to_mercator"], [12, 5, 1, "", "download_osm_buildings"], [12, 5, 1, "", "download_osm_streets"], [12, 5, 1, "", "get_category_for_subtype"], [12, 5, 1, "", "get_category_summary"], [12, 5, 1, "", "get_city_boundary_osm"], [12, 5, 1, "", "get_prominent_streets"], [12, 5, 1, "", "get_subtype_summary"], [12, 5, 1, "", "mercator_to_blocks"], [12, 5, 1, "", "remove_overlaps"], [12, 5, 1, "", "rotate"], [12, 5, 1, "", "rotate_streets_to_align"], [12, 5, 1, "", "set_osmnx_cache_mode"]], "nomad.traj_gen": [[15, 1, 1, "", "Agent"], [15, 1, 1, "", "Population"], [15, 5, 1, "", "allowed_buildings"], [15, 5, 1, "", "condense_destinations"], [15, 5, 1, "", "parse_agent_attr"], [15, 5, 1, "", "sample_hier_nhpp"]], "nomad.traj_gen.Agent": [[15, 2, 1, "", "__init__"], [15, 3, 1, "", "dt"], [15, 2, 1, "", "generate_trajectory"], [15, 2, 1, "id0", "plot_traj"], [15, 2, 1, "", "reset_trajectory"], [15, 2, 1, "", "sample_traj_hier_nhpp"], [15, 2, 1, "", "sample_trajectory"], [15, 3, 1, "", "speeds"], [15, 3, 1, "", "still_probs"]], "nomad.traj_gen.Population": [[15, 2, 1, "", "__init__"], [15, 2, 1, "", "add_agent"], [15, 3, 1, "", "city"], [15, 3, 1, "", "dt"], [15, 2, 1, "", "generate_agents"], [15, 2, 1, "", "reproject_to_mercator"], [15, 3, 1, "", "roster"], [15, 2, 1, "", "save_pop"]]}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "attribute", "Python attribute"], "4": ["py", "property", "Python property"], "5": ["py", "function", "Python function"]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:attribute", "4": "py:property", "5": "py:function"}, "terms": {"": [3, 6, 7, 8, 12, 15, 22, 26, 31], "0": [3, 7, 12, 15, 26, 30, 31], "01": [26, 27, 28, 29, 32], "0204081632653061": 15, "03": 32, "05": [], "060303": 26, "070307": 30, "08": 30, "08098": 30, "0e0e0": [], "1": [3, 7, 8, 12, 15, 22, 26, 27, 28, 29, 30, 31, 32], "10": [12, 26, 27, 28, 29, 30, 31, 32], "100": [6, 22], "1004": 30, "101": 30, "104": [], "106": 31, "11": 31, "11749631": 30, "11752856": 30, "11753099": 30, "11758546": 30, "12": [26, 31], "120": [], "13": 31, "14": [26, 30, 31], "14652246706544": 30, "15": [3, 12, 15, 26, 27, 28, 31, 32], "154": 30, "155": 30, "16": 31, "16617": 30, "16631": 30, "17": 31, "1700": 30, "17019": 30, "1704162819": 26, "17178": 30, "172": 30, "18": [30, 31], "180": 30, "18000": 7, "19": [10, 31], "191": 30, "19103": 30, "19104": 30, "19129": 30, "19130": 30, "19375": 30, "19378": 30, "19743": 30, "19745": 30, "19747721789525": 30, "2": [3, 12, 15, 22, 26, 28, 30, 31], "20": [26, 29, 31], "200": 12, "2024": [27, 28, 29, 32], "2025": [22, 23, 27], "2035": 30, "207118": 26, "21": [27, 30, 31], "22": 31, "224": 31, "23": [], "23172": 30, "240": [26, 27, 28, 32], "246": 30, "25": [], "255": [], "2c353c": [], "3": [3, 7, 22, 26, 28, 30, 31, 32], "30": 28, "300": [], "314632": 26, "31779": 30, "31782": 30, "32": 30, "323719": 30, "33003": 30, "3330": 30, "33482": 30, "34": [], "3600": 26, "36042": 30, "37962": 30, "3826530612244898": 15, "3857": [8, 12, 27, 28, 31, 32], "39": 30, "3f": [26, 31], "4": [3, 22, 26, 30, 31, 32], "4075": 30, "4096": [], "42": 3, "4265669": 12, "4265684": 12, "4265699": [3, 12], "4326": [12, 26, 32], "4392976": [3, 12], "4392991": 12, "450": [], "47604": 30, "47605": 30, "47648": 30, "47653": 30, "4766": 30, "48": 26, "5": [3, 7, 15, 26, 27, 28, 29, 30, 31, 32], "50": [], "504672": 26, "51444": 30, "515": 15, "51511": 30, "525774": 30, "5574": 30, "569": 30, "58": 30, "59249": 30, "6": [10, 26, 30, 31], "60": [26, 29], "64045": 30, "64079": 30, "64503": 30, "648": 30, "64825": 30, "656": 31, "68": 15, "6h": 26, "7": [30, 31], "72": 30, "720": [28, 32], "72495": 30, "72523": 30, "75": [15, 30], "7597": 30, "7747": 30, "7932": 30, "8": [15, 31], "80": [], "8192": [], "83": 30, "8601": [], "8928571428571429": 15, "9": [15, 22, 31], "931392279878246": 30, "947": 30, "94717": 30, "949": 30, "94942": 30, "95201": 30, "95204": 30, "953576": 30, "95649": 30, "95652": 30, "962": 30, "96231": 30, "96336810441389": 30, "99": [], "A": [3, 7, 8, 15, 33], "AND": [], "At": [], "But": [], "For": [7, 15, 22], "If": [3, 6, 7, 8, 12, 15], "In": [], "It": [8, 10, 33], "Its": [], "NOT": [], "No": 5, "On": [], "One": 7, "The": [3, 5, 7, 12, 15, 22, 27, 28, 29, 31, 32], "There": [], "These": 15, "To": [0, 7, 19, 20, 21, 24, 33, 34, 35, 36], "__init__": [3, 15], "_add_mani": 31, "_base_cdf": [], "about": [3, 15], "abov": [], "absent": 7, "absolut": [], "abstract": 12, "accept": [7, 31], "access": 30, "accuraci": 15, "across": 31, "act": [], "actual": [7, 15], "ad": [3, 15], "adapt": [32, 33], "add": [3, 6, 15, 31], "add_ag": 15, "add_build": [3, 31], "add_buildings_from_gdf": 3, "add_point": [], "add_street": 3, "addit": [7, 8, 15], "addr": 30, "address": [3, 15, 31], "adjac": 3, "adjust_zoom": [], "admiring_brattain": [27, 28, 29], "affin": 12, "after": [7, 15], "against": [], "agent": [3, 15], "agent_hom": 15, "agent_workplac": 15, "agg_freq": 7, "aggreg": [7, 10, 23], "aid": [22, 23], "algo": 26, "algorithm": [5, 22, 23, 27, 28, 29, 32, 33], "alia": [], "alias": 7, "align": [3, 7, 12], "all": [3, 7, 10, 12, 15, 22, 23], "allow": 15, "allowai": 30, "allowed_build": 15, "along": [], "alpha": [3, 15, 26, 30], "alreadi": [5, 10, 15], "also": [3, 12, 15, 32], "alter": 3, "altern": 27, "although": [], "alwai": [], "amount": 29, "an": [3, 5, 6, 12, 15, 22, 23, 29, 32], "analysi": 22, "ani": [3, 7, 12], "anonym": 23, "anoth": [8, 12], "any_coord": 3, "apart": 30, "api": [22, 23, 30], "appear": 7, "append": [15, 26], "appli": [7, 8, 12], "applic": 3, "approach": [], "appropri": 5, "approxim": [], "ar": [3, 6, 7, 8, 10, 12, 15, 23, 27, 32], "area": 8, "arg": [], "argument": [7, 15], "around": 12, "arrai": 15, "arriv": 15, "artefact": [], "assess": [22, 23], "assign": [3, 7, 26, 28, 32], "assum": 10, "attempt": 8, "attr": 15, "attribut": [15, 22, 23], "augment": [], "author": 12, "auto": [], "autodetect": [], "automat": [15, 17, 33], "avail": 3, "avenu": 30, "avoid": [], "awai": 29, "awar": 33, "ax": [3, 15, 26, 27, 28, 29, 30, 31, 32], "ax1": 26, "ax2": 26, "ax_barcod": [26, 27, 28, 29, 32], "ax_map": 26, "axi": [3, 15, 26], "b": [], "back": [3, 12], "backend": [], "background": [], "background_gdf": [], "bailei": 30, "bar": 26, "barcod": [], "barh": 26, "base": [3, 15, 22, 23, 26, 28, 29, 32], "base_geom_background": [], "base_geom_color": [], "base_geometri": 26, "base_modul": 25, "basic": [], "bbox": [12, 30], "bbox_or_c": 12, "bear": 12, "becaus": [], "been": [], "befor": [], "begin": [], "behav": [], "being": [], "below": 33, "beta_dur": 15, "beta_dur_rang": [], "beta_p": 15, "beta_ping_rang": [], "beta_start": 15, "beta_start_rang": [], "between": [3, 10, 29, 32, 33], "beyond": 10, "binari": [], "black": [15, 26, 30], "blksize": [], "block": [3, 12, 15, 30, 31], "block_i": 3, "block_side_length": [3, 15], "block_siz": 12, "block_typ": 3, "block_x": 3, "blockingioerror": [], "blocks_gdf": 3, "blocks_to_merc": 12, "blue": [28, 29, 30], "bool": [3, 7, 15], "boolean": 7, "border": [], "both": [8, 26], "bound": [3, 7, 8, 12], "boundari": [3, 12], "boundary_multipolygon": 12, "box": [3, 12, 27, 28, 29, 31, 32], "bridg": 30, "broken": [], "btype": 31, "bucket": 7, "budget": 6, "buffer": [27, 28, 32], "buffer_s": [], "bufferediobas": [], "bufferedrandom": [], "bufferedread": [], "bufferedrwpair": [], "bufferedwrit": [], "bug": 23, "build": [3, 12, 15, 22, 23, 30, 31], "building_id": [3, 5, 15], "building_typ": [3, 31], "buildings_df": 3, "buildings_gdf": [3, 15, 31], "buildings_outlin": 3, "buildings_path": 3, "built": 31, "builtin": [], "burst": 15, "by_chunk": 12, "byte": [], "bytearrai": [], "bytesio": [], "c": [], "cach": 12, "cache_mod": 12, "cache_traj": 15, "calcul": [3, 10], "calendar": [], "call": [27, 31], "callabl": 15, "caller": [5, 10], "can": [3, 15, 29], "candid": [], "candidate_hom": 10, "candidate_workplac": [], "cannot": 12, "canon": 15, "carolinechen": [], "carri": [], "carrol": 30, "case": [7, 33], "cast": [], "categor": 12, "categori": [12, 30], "cdf": [], "cdf_function": [], "cell": [], "center": 26, "center_coordin": 12, "centroid": 12, "certain": [], "chain": [], "chang": [22, 23], "charact": [], "characterist": 33, "check_adjac": 3, "choos": [], "chosen": [], "chronolog": [], "chunk": [], "chunk_mil": 12, "circl": [], "citi": [3, 12, 15, 26, 27, 28, 31, 32], "city_boundari": 3, "city_gen": 31, "city_nam": 30, "city_properti": 3, "class": [3, 15], "classmethod": 3, "clean": 12, "client": [], "clip": [10, 12, 30], "clip_spatial_outli": 26, "clip_stays_d": [], "clip_stops_datetim": [], "clip_to_gdf": 12, "close": 31, "closefd": [], "closest": 3, "cluster": [22, 23, 27, 28, 32, 33], "cluster_hierarchi": [], "cluster_id": [], "cluster_label": [], "cluster_st": [], "cluster_stability_df": [], "cmap": 26, "co": [5, 23], "code": 8, "codec": [], "coercion": 10, "col_nam": 15, "collect": [], "color": [3, 15, 26, 30], "colormap": [], "column": [3, 5, 6, 7, 8, 10, 12, 15, 27], "column_stack": [7, 32], "colunmn": [], "com": [22, 23], "combin": [3, 5, 27], "commerci": [27, 30], "common": [7, 27], "compar": 26, "comparison": 33, "complet": [3, 7, 22, 23, 26], "complete_output": [26, 27, 28, 29, 32], "comput": [5, 6, 7, 8, 22, 31, 33], "compute_candidate_hom": 10, "compute_candidate_workplac": [], "compute_cluster_st": [], "compute_radius_of_gyr": 22, "compute_stop_detection_metr": 5, "compute_visitation_error": 5, "concaten": [], "concentr": [], "conceptu": [], "condense_destin": 15, "confident_aryabhata": 32, "conflict": [], "connect": 3, "consecut": [7, 15, 29, 32], "consid": [12, 29, 32], "construct": [3, 28], "constructor": [], "contact": 23, "contain": [3, 7, 8, 12, 15, 17, 22], "content": 25, "contigu": 29, "contribut": [6, 7, 23], "control": [15, 22], "convent": 7, "convers": 7, "convert": [3, 6, 7, 12], "cooki": [], "coord": 3, "coord_i": 3, "coord_x": 3, "coordin": [3, 7, 8, 12, 15, 27, 32], "core": 22, "core_dist": [], "correctli": [], "correspond": [7, 15], "count": [6, 12], "count_night": [], "counterclockwis": 12, "counterpart": [], "cove": 30, "cover": [12, 15, 22, 23], "coverag": 23, "coverage_matrix": 7, "cr": [7, 8, 12], "crash": [], "creat": 3, "creation": [], "criterion": [], "crs_to": [7, 32], "csv": [22, 23, 26], "cumul": [], "current": [], "current_end": 26, "current_idx": [], "custom": 23, "d": [7, 15], "d_graph": [], "dai": [7, 8], "daili": 7, "data": [3, 6, 7, 12, 22, 26, 27, 28, 29, 32, 33], "data_cr": [7, 26, 27, 32], "databas": [], "datafram": [3, 5, 6, 7, 8, 12, 15, 23, 26], "dataset": [22, 23, 26, 27], "date": [7, 27, 28, 29, 32], "date_rang": 26, "datetim": [7, 8, 10, 15, 26], "datetime64": [7, 27], "dawn_hour": 10, "dawn_tim": [], "day_part": [], "daytim": [], "dbscan": [22, 23, 26, 32], "dd": [], "deal": [], "debias": 23, "declar": 31, "decod": [], "dedic": [], "dedupl": 15, "def": 31, "default": [3, 7, 8, 12, 15, 27], "default_buffer_s": [], "default_schema": [], "default_spe": 15, "default_still_prob": 15, "defer": [], "defin": [7, 8, 32], "degre": 12, "delawar": 30, "delimit": [], "delta_roam": [26, 29], "demonstr": 22, "dendogram_scal": [], "densiti": [32, 33], "depend": [22, 29], "deprec": [], "deriv": [15, 22], "descend": [], "describ": [], "descript": 23, "descriptor": [], "design": 22, "desktop": [], "dest_col": 6, "destin": [6, 15], "destination_diari": 15, "detach": [], "detail": 3, "detect": [5, 22, 23], "detect_stop": 22, "determin": [3, 29, 32], "determinist": 33, "dev_i": [27, 28, 29, 32], "dev_x": [27, 28, 29, 32], "develop": 12, "devic": [], "df": [7, 12, 22], "diamet": 26, "diari": 15, "diaries_path": 15, "dict": [3, 5, 7, 8, 12, 15, 31], "dictionari": [3, 5, 7, 8, 15], "diff_privacy_ep": 6, "differ": [22, 27, 28, 32], "differenti": 23, "dimens": [3, 31, 32], "directli": [7, 22], "directori": [], "disabl": [], "disconnect": [], "discret": 15, "disk": 3, "dist_thresh": [26, 28, 32], "distanc": [3, 23, 27, 29, 32], "distance_threshold": 33, "distinct": 8, "distinguish": 33, "distribut": [3, 8, 15, 22], "divid": 12, "do": [3, 10], "doc_test": 25, "docstr": 17, "document": 17, "doe": [], "doesn": [], "don": [], "door": [3, 15, 31], "door_cell_i": 3, "door_cell_x": 3, "door_coord": 3, "door_point": 3, "down": 7, "download": 12, "download_osm_build": [12, 30], "download_osm_street": [12, 30], "downsampl": 7, "dp": 6, "draw": [], "drawn": 15, "drive": 30, "driver": [3, 30], "drop": 10, "dt": 15, "dt_max": [26, 29], "dtype": 7, "dual": 22, "dummi": [], "duplic": 7, "dur_min": [26, 28, 29, 32], "durat": [5, 10, 15, 29], "duration_at_night_fast": [], "dusk_hour": 10, "dusk_tim": [], "dx": 3, "dy": 3, "dynam": 3, "e": [3, 7, 8, 15, 31], "each": [3, 6, 7, 10, 15, 26, 31], "east": [3, 12], "edg": 3, "edge_alpha": [], "edge_cmap": [], "edge_color": [], "edge_onli": [], "edgecolor": 30, "edges_df": 3, "edges_path": 3, "edges_sorted_df": [], "edu": [22, 23], "educ": 30, "effect": [], "effici": 12, "egg": 22, "either": [7, 31], "elaps": 31, "element": 15, "els": 31, "empti": 15, "enabl": 23, "encod": 7, "end": [3, 7, 8, 15, 22, 23, 26], "end_": [], "end_col": [], "end_coord": 3, "end_datetim": 10, "end_tim": [8, 15], "end_timestamp": 26, "enforc": [], "engin": [], "ensur": 3, "entir": [22, 23], "entri": [7, 15, 31], "entropi": 23, "eof": [], "ep": [22, 33], "epoch": 7, "epr": [15, 23], "epr_time_r": 15, "epsg": [8, 12, 26, 27, 28, 31, 32], "epsilon": [], "equal": 30, "equival": [22, 23], "error": [12, 15], "estim": [5, 12], "etc": 5, "euclidean": 27, "evalu": 26, "even": [], "event": 15, "everi": 7, "exact": 6, "exactli": [], "exampl": [7, 12, 26], "except": [], "exclude_categori": 12, "exclus": [], "execut": 26, "exist": [3, 12, 15, 22, 23], "expect": [7, 8, 15], "explicit": 7, "explod": [12, 30], "explode_stop": [], "explor": 15, "exponenti": 15, "express": 7, "extend": [], "extra": [], "extract": 29, "f": [15, 26, 30, 31], "f1": 5, "face": [], "failur": [], "fall": [], "fallback": 3, "fals": [3, 5, 7, 12, 15, 26, 27, 28, 29, 30, 31, 32], "false_east": 12, "false_north": 12, "fast": 33, "faster": [], "favor": [], "featur": 12, "fetch": 12, "field": 15, "fig": [26, 27, 28, 29, 30, 31, 32], "figsiz": [26, 27, 28, 29, 30, 31, 32], "figur": 3, "file": [3, 15], "fileexistserror": [], "fileio": [], "filenam": 3, "fileno": [], "filepath": [], "filepath_root": [27, 28, 29, 32], "filesystem": 15, "fill": [3, 5], "fill_block": 3, "fill_cmap": [], "fill_timestamp_gap": [], "filter": [8, 12, 22, 23, 26, 27, 28, 29, 32], "filter_us": 8, "final": [], "find": [3, 15, 33], "first": [7, 26, 29], "first_tim": [], "fix": [], "fixed_format": [], "flag": 7, "flexibl": 12, "float": [3, 6, 7, 12, 15], "flow": 6, "flush": [], "fmt": 15, "focus": 22, "folder": 22, "follow": [15, 29], "form": [23, 32], "format": [3, 22, 26, 27, 28, 29, 32], "found": [3, 7], "four": 26, "fp": [], "frac_record": [], "frac_us": [], "fraction": [5, 7], "freq": [7, 26], "frequenc": [7, 23, 33], "frequent": [], "friendli": 7, "from": [3, 5, 7, 8, 12, 15, 17, 22, 23, 26, 27, 28, 29, 31, 32], "from_df": [], "from_fil": [], "from_geodatafram": 3, "from_geopackag": 3, "from_merc": 3, "full": [15, 26], "full_path": 15, "full_traj": 15, "fulli": [3, 12], "func": 26, "function": [8, 12, 15, 22, 23], "fund": [22, 23], "fundament": 33, "further": [], "futur": [], "g": [3, 7, 8, 15], "gap": [5, 29], "garden": [3, 30], "garden_c": [12, 26, 27, 28, 30, 32], "garden_city_categori": 30, "gc_data": 26, "gc_data_long": [27, 28, 29, 32], "gc_identifi": [27, 28, 29, 32], "gdf": 12, "gdf_row": 3, "gen_params_rang": [], "gen_params_target_q": [], "gener": [3, 15, 17, 22, 23, 33], "generate_ag": 15, "generate_c": 3, "generate_dest_diari": 15, "generate_street": 3, "generate_trajectori": 15, "geodatafram": [3, 7, 12], "geograph": [27, 32], "geographi": [6, 23], "geohash": [7, 27], "geojson": [3, 22, 23, 26, 27, 28, 30, 32], "geolife_plu": 12, "geom": [3, 31], "geom1": 3, "geom2": 3, "geometri": [3, 8, 12, 27, 28, 29, 30, 31, 32], "geopackag": [3, 31], "geopanda": [3, 22, 26, 27, 28, 30, 32], "geospati": 12, "get": [3, 31], "get_adjacent_street": 3, "get_block": 3, "get_block_typ": 3, "get_build": 3, "get_building_coordin": 3, "get_category_for_subtyp": 12, "get_category_summari": 12, "get_city_boundary_osm": 12, "get_height": 26, "get_i": 26, "get_prominent_street": 12, "get_shortest_path": 3, "get_street_graph": [3, 31], "get_subtype_summari": 12, "get_table_head": [], "getbuff": [], "getpreferredencod": [], "getvalu": [], "git": [22, 23], "github": [22, 23], "give": [], "given": [3, 6, 7, 12, 15, 27], "global": 12, "goal": [22, 23], "good": 33, "govern": 15, "gp": [22, 23, 33], "gpd": [3, 26, 27, 28, 30, 32], "gpkg": [3, 31], "gpkg_path": 3, "granular": 15, "graph": [3, 23, 31], "graviti": 3, "greater": 29, "green": [27, 30], "grei": 30, "grid": [3, 12, 23, 26], "grid_bas": [26, 27], "grid_based_label": [], "grid_based_per_us": [], "grid_resolut": 33, "gridspec_kw": 26, "ground": [5, 15], "group": [], "grouped_data": [], "gt": 30, "guidelin": 23, "gyrat": 23, "h": [3, 7], "h3": [7, 23, 26, 27], "h3_cell": [26, 27], "h3_cell_to_polygon": [], "h3_resolut": [], "ha": [3, 12, 15, 27, 28, 29, 31, 32], "handl": [7, 12, 23], "hariharan": 29, "hash": [], "hashabl": [], "have": [8, 15], "haversin": [27, 32], "hdbscan": [23, 26], "hdbscan_label": 28, "head": 30, "heatmap": [3, 15], "heatmap_ag": 3, "height": 3, "height_ratio": 26, "help": [], "helper": [10, 31], "here": [5, 22], "heterogen": 7, "heurist": [12, 23], "hexagon": [], "hierarch": [15, 33], "hierarchi": 28, "hierarchy_df": [], "higher": 22, "highlight": [], "highwai": [12, 30], "histori": 29, "hit": [], "hold": [], "hollow": [], "home": [3, 10, 15, 22, 23, 31], "home_d": [], "home_ratio": 3, "homes_path": 15, "homogen": 15, "horizont": 15, "hospit": 30, "hour": [7, 26], "hourli": [], "housenumb": 30, "how": [22, 26], "howev": [], "http": [22, 23], "human": 33, "i": [3, 5, 6, 7, 8, 10, 12, 15, 23, 27, 29, 32, 33], "ic2s2": 27, "id": [3, 7, 15], "id_col": [], "id_to_door_cel": 3, "ident": 12, "identifi": [3, 5, 7, 15, 29, 33], "ignor": 7, "immedi": [], "immin": [], "immut": [], "implement": [0, 19, 20, 21, 22, 23, 24, 27, 29, 32, 33, 34, 35, 36], "impli": [], "implicit": [], "import": [12, 22, 26, 27, 28, 29, 30, 31, 32], "includ": [7, 12, 15], "include_schema": [], "include_weekend": [], "incomplet": [], "incorpor": 32, "increas": 26, "increment": 26, "index": [7, 12, 15, 23, 26, 27], "indic": [7, 15], "individu": [3, 5, 7], "inf": [], "infer": [7, 10, 15, 22, 23], "infer_building_typ": [12, 30], "inform": [3, 15, 22], "infrastructur": [22, 23], "ingest": [22, 23], "inherit": [], "initi": [3, 15, 31], "initial_byt": [], "initial_valu": [], "inlin": [27, 28, 29, 32], "inplac": 31, "input": [7, 12, 27], "input_cr": 8, "insid": [7, 8], "insipir": 29, "instead": [3, 7], "int": [3, 6, 7, 8, 15], "int0": 15, "int64": 7, "integ": [7, 27], "intend": [], "intent": [], "inter": 15, "interact": [], "interchang": [], "interfac": [], "intern": [], "interpret": [], "intersect": 10, "interv": 7, "invalid": [3, 7], "invalid_stop": [], "invers": 12, "invok": [], "io": [22, 26, 27, 28, 29, 32], "iobas": [], "is_within": 7, "isatti": [], "isn": [], "iso": [], "issu": 23, "item": 30, "iter": [], "its": 15, "jersei": 30, "join": 15, "join_styl": [27, 28, 32], "junction": 30, "jupyt": 22, "k": [12, 23], "keep": [7, 12, 15], "keep_col_nam": [26, 29], "kei": [3, 7, 12, 15], "kept": [], "keyerror": 7, "keyword": [10, 15], "kind": 3, "known": [], "kwarg": [5, 7, 8, 10, 15, 31], "lab": [22, 23], "label": 26, "label_history_df": [], "lachesi": [23, 26], "lachesis_label": [], "lachesis_per_us": [], "lack": 3, "lambda": 26, "lane": 30, "laplac": 6, "larg": [22, 23], "larger": 26, "largest": [], "last": 7, "last_dat": [], "last_p": 15, "last_tim": [], "lat": [7, 27, 32], "latent": 15, "later": [], "latitud": [8, 26, 32], "layer": 3, "layout": 15, "lead": [], "learn": 22, "least": [7, 8, 27, 29], "leav": [], "left": 5, "legal": [], "legend": 26, "len": [26, 30, 31], "length": [3, 12, 15, 30], "less": [], "librari": [12, 22, 23], "licens": 23, "like": [8, 22, 23, 27], "limit": [], "line": [], "line_buff": [], "linesep": [], "linestr": 30, "linewidth": [26, 30], "list": [3, 6, 15, 31], "listedcolormap": [], "live": [], "load": [3, 22, 27, 28, 29, 32], "loader": [22, 26, 27, 28, 29, 32], "local": [7, 8, 15, 23], "local_t": 15, "localize_from_offset": [], "locat": [3, 5, 10, 15, 22, 23, 27, 29, 33], "location_id": [26, 27], "logic": 7, "lon": [7, 27, 32], "long": [], "longitud": [8, 26, 32], "loop": [], "loss": [], "low": 33, "lower": 7, "lower_quantil": [], "lt": 30, "m": [7, 15], "mai": 7, "main": [], "major": [], "make": [], "manag": 3, "mani": 31, "manual": 3, "manual_street": 3, "map": [3, 5, 7, 8, 15, 23, 30], "map_util": 30, "mark": [], "marker": 26, "mask": [3, 7], "massiv": [22, 23], "match": 7, "match_loc": 5, "matplotlib": [3, 15, 26, 27, 28, 29, 30, 31, 32], "matrix": 7, "matter": [], "max": 26, "max_dist": [], "max_gap": [], "maxim": 28, "maximum": [29, 32], "maxspe": 30, "mean": 15, "meaning": 33, "measur": [7, 26], "medic": 30, "medoid": [], "meet": [], "memori": 3, "mercat": [3, 12, 15], "mercator_to_block": 12, "mere": 10, "merg": 5, "merged_fract": 5, "messag": 15, "meter": [3, 12, 27, 29, 32], "method": [3, 15, 28, 32], "metric": [5, 22, 23], "metrics_df": 22, "might": [], "min": [7, 26], "min_active_dai": 8, "min_cluster_s": [26, 28, 32, 33], "min_dai": 10, "min_dur": 33, "min_dwel": [], "min_pings_per_dai": 8, "min_pt": [26, 28, 32], "min_sampl": [22, 33], "min_week": 10, "minimum": [29, 32], "minut": [5, 7, 15, 29, 32], "mismatch": 7, "miss": [5, 8], "missed_fract": 5, "mit": [22, 23], "mitr": [27, 28, 32], "mix": [], "mixed_timezone_behavior": 15, "mm": [], "mobil": [22, 33], "mobilkit": [22, 23], "mode": 12, "model": [15, 23, 29], "modifi": 15, "modul": [12, 25], "moment": [], "monthli": 7, "more": 22, "most": [7, 12], "move": [3, 6, 29], "movement": 33, "mst": [], "much": [], "multi": [], "multilinestr": 12, "multipl": [3, 7], "multipolygon": [3, 7, 12], "must": [3, 5, 7, 15, 29], "n": [15, 27, 31], "n_ping": 26, "n_row": [], "na": [7, 30], "naiv": 15, "naive_datetime_from_unix_and_offset": [], "naive_dt": [], "name": [3, 5, 7, 8, 10, 12, 15, 26, 27, 33], "name_count": 15, "nan": 30, "nearest": [], "necessari": 10, "need": 8, "neg": 6, "neighbor": [3, 32], "neither": [], "network": [12, 22], "networkx": 22, "never": [], "new": [3, 8, 15, 30], "newlin": [], "next": [], "night": 10, "night_stop": [], "nightli": 10, "nj": 30, "nocturn": 10, "nocturnal_stop": 10, "node": 3, "nois": 6, "nomad": [12, 17, 26, 27, 28, 29, 30, 31, 32, 33], "non": [6, 15, 28], "none": [3, 5, 6, 7, 8, 10, 12, 15], "nor": [], "normalis": 6, "normalize_od": 6, "normalized_mov": 6, "normalized_remain": 6, "north": [3, 12], "note": [], "notebook": [22, 23, 26], "noth": [], "notic": 32, "np": [7, 27, 28, 32], "nsampl": 30, "nsf": [22, 23], "num_night": [], "num_week": [], "num_work_dai": [], "number": [7, 15, 26, 32], "numpi": [3, 15, 22, 27, 28, 32], "o": 26, "object": [3, 7, 15, 29], "observ": 7, "obtain": [], "occupi": 3, "occurr": [], "od": 6, "od_df": [], "off": 12, "offend": [], "offset": [7, 12], "offset_col": 7, "often": [], "oi": 3, "omit": 7, "onc": [], "one": [7, 8, 12, 29], "ones": [], "onewai": 30, "onli": [3, 7, 15, 27, 30], "opac": [], "open": [22, 30], "open_cod": [], "openstreetmap": 12, "oper": 3, "optim": [], "option": [3, 5, 6, 7, 8, 12, 15, 22], "oracle_map": [], "orang": 30, "order": 3, "origin": [6, 8, 12], "origin_col": 6, "oserror": [], "osm": 12, "osm_typ": [12, 30], "osmid": 30, "osmnx": 12, "other": [12, 30], "otherwis": [3, 7], "our": 23, "outer_box": [27, 28, 32], "outgo": 6, "outlier": [], "output": [7, 12, 15], "output_burst": 15, "output_cr": 8, "output_traj_col": [], "outsid": [], "over": 7, "overal": 7, "overlap": [3, 5, 12, 28, 30], "overlapping_visit": 5, "overpass": 30, "overrid": [7, 10], "overridden": [], "own": 7, "ox": 3, "p": 5, "pa": 30, "packag": [17, 25], "pad": [], "pad_short_stop": [], "page": 23, "pair": [3, 8, 15, 27], "panda": [3, 6, 7, 12, 15, 22, 23, 26, 27, 28, 30, 32], "parallel": [22, 23], "param": 6, "paramet": [5, 7, 8, 12, 15, 29, 32, 33], "pareto": 15, "park": [3, 12, 15, 30, 31], "park_ratio": 3, "parquet": [3, 15, 22, 23, 27, 28, 29, 32], "pars": [7, 15, 29], "parse_agent_attr": 15, "parse_d": [], "parser": [], "part": [22, 23], "partit": 15, "partition_bi": [], "partition_col": 15, "pass": 7, "passthrough": [], "passthrough_col": [], "path": [3, 15], "pattern": [3, 33], "pd": [3, 5, 7, 8, 12, 15, 26, 27, 28, 30, 32], "peek": [], "pennsylvania": [22, 23], "per": [6, 7, 10, 15], "percentag": 6, "perf_count": 31, "perform": [5, 10, 22], "period": [7, 33], "permit": 29, "persist": [3, 12, 31], "persist_block": 3, "persist_city_properti": 3, "philadelphia": 30, "philadelphia_build": 30, "philadelphia_street": 30, "pick": [], "pilesgrov": 30, "ping": [7, 8, 15, 23, 26, 29, 32], "pip": [22, 23], "pipe": [], "pipelin": [22, 23], "place": [], "place_buildings_in_block": 3, "plain": 32, "platform": [], "plausibl": [], "plot": [3, 15, 26, 30, 31], "plot_circl": [], "plot_citi": [3, 31], "plot_hexagon": [], "plot_od_map": [], "plot_p": 26, "plot_polygon": [], "plot_popul": 15, "plot_sparse_clust": [], "plot_stop": 26, "plot_stops_barcod": [26, 27, 28, 29, 32], "plot_time_barcod": [26, 27, 28, 29, 32], "plot_traj": 15, "plt": [26, 27, 28, 29, 30, 31, 32], "plu": [], "po": [], "poi": 23, "poi_data": 15, "poi_map": [], "poi_tabl": [], "point": [3, 7, 8, 23, 29, 30], "point_color": [], "point_in_polygon": [], "pointer": [], "points_gdf": [], "points\u00b2": [], "poisson": 15, "polici": [], "poly_cr": 7, "polygon": [3, 7, 8, 12, 30], "popul": [3, 12, 15], "portion": 10, "posit": [7, 15], "possibl": [15, 27], "post": [23, 28, 32], "postcod": 30, "postprocess": [28, 32], "pre": [], "precis": 5, "precision_recall_f1_from_minut": 5, "predefin": 3, "predict": 5, "preferenti": 15, "prepar": [], "presenc": 10, "present": [7, 12], "preserv": 33, "previou": [22, 23], "print": [7, 15, 26, 30, 31], "print_stop": [], "prioriti": 12, "privaci": [6, 23, 33], "probabl": 15, "process": [12, 15, 22, 23, 28, 30, 32, 33], "produc": 5, "product": [22, 23], "profession": 12, "progress": 26, "project": [3, 7, 8, 12, 22, 23, 27, 29], "promin": 12, "propag": [], "proper": [10, 30], "properti": [3, 15], "proport": [3, 7], "protocol": [], "provid": [3, 7, 8, 10, 12, 15, 22, 23, 27], "proxim": 23, "public": [], "purpl": 30, "purpos": 33, "put": [], "py": [], "pyarrow": 15, "pydeck": [], "pyfunc": [], "pyplot": [26, 27, 28, 29, 30, 31, 32], "pyproj": [], "pyspark": [22, 23], "python": [22, 23], "q": 7, "q_filter": 7, "q_rang": [], "q_stat": 7, "qbar": 7, "qualifi": 29, "qualiti": 22, "quantil": [], "queri": 12, "r": 5, "radiu": [15, 23, 26, 28, 33], "rais": [3, 5, 7, 8], "random": 15, "randomcitygener": 3, "rang": 7, "rank": [], "rate": 15, "rather": 15, "raw": [], "rawio": [], "rawiobas": [], "rb": [], "re": [7, 26, 27], "reach": [], "read": [22, 23], "read1": [], "read_csv": [], "read_data": 22, "read_fil": [26, 27, 28, 32], "readabl": [], "readal": [], "reader": [], "readi": [22, 23], "readinto": [], "readinto1": [], "readlin": [], "real": 33, "reassign": [], "recal": 5, "recent": [], "recogn": [], "recomput": 10, "reconfigur": [], "record": 7, "recurs": [], "red": [26, 32], "reduc": 12, "ref": 30, "refer": 8, "regardless": [], "region_gdf": [], "regist": [], "reiniti": 15, "rel": [6, 7], "relat": 23, "relev": [10, 15], "reli": 32, "remain": 6, "remov": [7, 12, 30], "remove_overlap": [12, 28, 30, 32], "repeat": 15, "repeatedli": [], "replac": 15, "replace_sparse_traj": 15, "replic": [22, 23], "report": 23, "repres": [3, 15, 29], "reproduc": [6, 15], "reproject": 15, "reproject_to_merc": 15, "request": 12, "requir": [3, 7, 27, 32], "research": [22, 23], "reset": 15, "reset_trajectori": 15, "resid": 15, "residenti": [22, 30], "resolut": 7, "resolv": 10, "resourc": [22, 23, 31], "respect": [], "result": [5, 7, 12, 26, 29, 30], "results_df": 26, "retail": [3, 15, 30, 31], "retail_ratio": 3, "retain": [7, 8], "retriev": [3, 12], "return": [5, 6, 7, 8, 12, 15, 23], "return_cor": [], "revers": 30, "rgba": [], "right": [5, 31], "riverwalk": 30, "rn": [], "road": 30, "roam": 29, "robust": 7, "rog": [], "rog_spark": [], "roster": 15, "rotat": 12, "rotate_streets_to_align": 12, "rotated_streets_gdf": 12, "rotation_deg": 12, "rotation_degre": 12, "routin": [], "row": [3, 6, 7, 10, 12], "rt": [], "rule": [], "run": [15, 22, 23, 26], "runtim": 33, "runtime_data": 26, "runtime_df": 26, "s2": [7, 23], "s3": 15, "s3f": 15, "s3filesystem": 15, "salem": 30, "salem_build": 30, "salem_street": 30, "same": [15, 22, 23, 27, 32], "sampl": [7, 15, 22, 30, 33], "sample_from_fil": [26, 27, 28, 29, 32], "sample_hier_nhpp": 15, "sample_step": 15, "sample_traj_hier_nhpp": 15, "sample_trajectori": 15, "sample_us": [], "sampler": 23, "satisfi": [], "saturdai": [], "save": [3, 15, 30], "save_geopackag": [3, 31], "save_pop": 15, "scalabl": 22, "scalar": 7, "scale": [3, 12, 15, 22, 23, 26], "scan": [], "schema": [12, 23, 30], "scikit": [22, 23], "sea": [22, 23], "seamless": 23, "search": 23, "second": [7, 26, 27, 32], "section": 17, "see": [6, 23], "seed": [3, 6, 15], "seek": [], "seek_cur": [], "seek_end": [], "seek_set": [], "seekabl": [], "segment": 33, "select": [10, 28, 33], "select_hom": 10, "select_most_stable_clust": [], "select_workplac": [], "self": 15, "semant": 7, "sep": [], "separ": [], "sequenc": 29, "sequenti": [23, 29, 33], "seri": [3, 7, 8], "serv": [], "servic": [12, 30], "session": 8, "set": [5, 6, 12, 15, 32], "set_aspect": 30, "set_axis_off": [], "set_cr": 31, "set_osmnx_cache_mod": 12, "set_titl": [26, 30], "set_xlabel": 26, "set_xlim": [26, 27, 28, 29, 32], "set_ylabel": 26, "setup": 25, "sever": [], "shape": [3, 7, 8, 12, 22, 27, 28, 29, 31, 32], "share": [6, 7], "shift": [], "short": [], "shorter": [], "shortest": 3, "shortest_path": 3, "shorthand": 7, "should": 29, "show": [26, 27, 28, 29, 30, 31, 32], "shown": 33, "shp_plt": [], "shuffl": [], "side": 3, "signatur": [], "significantli": [], "similar": [], "simpl": 22, "simpli": [], "simplifi": 12, "simul": 15, "simulate_traj": 15, "simultan": [], "sinc": 7, "singl": [3, 5, 6, 7, 10, 12, 15, 22, 23], "size": [3, 7, 12, 26, 29], "slice": 10, "slice_datetimes_interval_fast": [], "small": 31, "smallest": [], "snow_engin": [], "so": [7, 15], "socket": [], "softwar": [22, 23], "sole": [], "some": [], "sort": [], "sort_tim": [], "sourc": [3, 5, 6, 7, 8, 10, 12, 15, 22, 23], "spam": [], "span": 7, "spark": [8, 23], "spark_sess": 8, "sparksess": 8, "spars": [15, 33], "sparse_path": 15, "sparse_traj": 15, "sparsifi": 15, "spatial": [7, 8, 12, 22, 23, 26, 27, 32, 33], "specif": [], "specifi": [3, 7, 8, 32], "speed": 15, "spend": 29, "spent": [3, 15], "split": 5, "split_fract": 5, "spring": 30, "spurious": [], "sql": [], "sqlalchemi": [], "st_hdbscan": [26, 28], "st_hdbscan_per_us": [], "stabil": 28, "stagger": [], "stai": [15, 29], "standard": 7, "start": [3, 5, 7, 8, 15, 26], "start_": [], "start_col": [], "start_coord": 3, "start_datetim": 10, "start_tim": 8, "start_timestamp": 5, "stat": [], "state": [15, 30], "statement": [], "static": 15, "stationari": 33, "statist": [7, 10], "step": [12, 15], "step_se": 15, "still": [15, 29], "still_prob": 15, "stop": [5, 10, 22, 23], "stop_color": [27, 28, 29, 32], "stop_data": [], "stop_detect": [22, 26, 27, 28, 29, 32], "stop_tabl": [], "stops_gb": 27, "stops_hdb": 28, "stops_output": 26, "stops_subset": 26, "stops_tabl": 10, "stops_tadb": 32, "store": [3, 15], "str": [3, 5, 6, 7, 8, 15], "str_from_tim": 7, "strategi": [], "stratif": 23, "stream": [], "street": [3, 12, 30, 31], "street_adjacency_edg": 3, "street_graph": 3, "street_spac": 3, "streets_df": 3, "streets_gdf": [3, 12, 31], "streets_path": 3, "strftime": [], "strict": [], "strictli": 10, "string": [3, 7, 15], "stringio": [], "strip": [], "structur": 3, "style": [], "subclass": [], "submodul": 25, "subplot": [26, 27, 28, 29, 30, 31, 32], "subset": [8, 26, 30], "subtyp": [12, 30], "subtype_2": 30, "subtype_3": 30, "suit": [], "suitabl": [7, 22, 23], "sum": 6, "summar": [], "summari": [], "summarize_stop": [], "summarize_stop_grid": [], "sundai": [], "suppli": 5, "support": [8, 27], "suptitl": [28, 29], "surfac": 12, "surviv": 7, "switch": [], "symmetr": [], "syneth": 31, "synthet": [22, 23], "synthetic_poi": 31, "system": 8, "systemat": 3, "t": 26, "t0": [26, 31], "ta": [26, 32], "ta_dbscan": [26, 32], "ta_dbscan_label": 32, "ta_dbscan_per_us": [], "tabl": [5, 6, 15], "table_column": [], "table_column_filt": [], "table_column_uniqu": [], "table_nam": [], "tag": 12, "take": [8, 15], "target": 8, "tc": [27, 28, 29, 32], "team": 12, "tell": [], "temp": 12, "tempor": [22, 23, 26, 27, 32, 33], "temporari": [], "termin": [], "tessel": [23, 33], "test": [], "text": 26, "textio": [], "textiobas": [], "textiowrapp": [], "th": 15, "than": [15, 29], "thei": 8, "them": [8, 12, 32], "thi": [3, 7, 8, 10, 12, 15, 17, 26, 29, 32], "those": [8, 12, 28], "though": [], "threshold": 7, "through": [3, 10, 27], "ti": [], "tick": 15, "tight_layout": [26, 27, 29, 30, 32], "tile": 12, "time": [3, 7, 12, 15, 23, 26, 29, 31, 32, 33], "time_col": 15, "time_thresh": [26, 27, 28, 32], "time_threshold": 33, "time_valu": 7, "time_weight": [], "timedelta": 26, "timefram": 8, "timestamp": [5, 7, 8, 15, 26, 27, 28, 29, 32], "timezon": [7, 8], "timezone_offset": 7, "titl": [27, 32], "to_cr": [27, 28, 32], "to_dict": 30, "to_fil": [3, 30], "to_geodatafram": 3, "to_merc": 3, "to_project": [7, 8, 32], "to_rgba": [], "to_str": 26, "to_tessel": [7, 26, 27], "to_timestamp": 7, "to_yyyymmdd": 7, "to_zoned_datetim": 7, "todo": 8, "togeth": [], "tolist": 12, "too": [], "tool": 23, "top": [], "top_k_destin": [], "top_k_origin": [], "total": [5, 6, 7, 15, 26], "total_bound": [27, 28, 32], "total_dur": [], "total_pr": 5, "total_truth": 5, "toyama": 29, "tp": 5, "trace": 22, "trackintel": [22, 23], "traj": [8, 15, 26, 27, 28, 29, 32], "traj_clean": 26, "traj_col": [5, 7, 8, 10, 15, 27, 28, 29, 32], "traj_from_dest_diari": 15, "traj_subset": 26, "trajectori": [7, 8, 15, 22, 23, 26, 33], "transform": [12, 15, 22], "translat": [], "transpar": 15, "travel": [15, 23], "tree": [], "trigger": [], "trip": [6, 22, 33], "true": [3, 7, 12, 15, 26, 27, 28, 29, 30, 31, 32], "true_visit": 5, "truncat": 12, "truth": [5, 15], "try": [], "ts_seri": [], "tty": [], "tunnel": [12, 30], "tupl": [3, 12, 31], "tutori": 27, "twilight": [], "two": [3, 12, 29, 32], "two_dai": 26, "txt": [], "type": [3, 5, 6, 7, 8, 10, 12, 15, 31], "typic": [], "tz": [15, 27], "tz_offset": 7, "u": 3, "uid": [], "unchang": [], "underli": [], "underneath": [], "understand": 33, "uniform": 33, "uniqu": [3, 5, 7, 26], "unit": [7, 12], "univers": [22, 23, 30], "unix": [7, 27], "unix_t": [27, 28, 29, 32], "unknown": 12, "unless": [], "unlik": 32, "unspecifi": [], "unsupportedoper": [], "until": [3, 15], "untransl": [], "unus": [], "unweight": [], "up": [], "updat": [3, 12, 15], "upenn": [22, 23], "upon": [], "upper": [], "upper_quantil": [], "us": [3, 7, 8, 12, 15, 26, 27, 30, 32, 33], "usabl": [], "use_datetim": [], "use_offset": [], "user": [5, 7, 8, 10, 15, 26, 27, 28, 29, 32], "user_data_hdb": 28, "user_data_tadb": 32, "user_id": [5, 7, 8, 27, 28, 29, 32], "userwarn": [], "usr_polygon": [], "usual": [], "utc": 7, "utc_timestamp": 7, "util": 12, "v": [3, 26], "va": 26, "val": 26, "valid": [12, 22, 23], "valu": [3, 6, 7, 15, 28], "value_count": 30, "valueerror": [3, 5, 7, 8], "vari": [], "variabl": [15, 33], "variat": 33, "variou": [5, 22], "vector": 3, "verbos": [3, 7, 15], "version": [], "via": 7, "view": 7, "visit": [22, 23], "viz": [26, 27, 28, 29, 32], "w": 7, "w_min": [], "wa": [], "wai": [], "walnut": 30, "warn": [7, 26], "watt": [22, 23], "we": [26, 32], "web": [3, 12, 15], "web_mercator_origin": 3, "web_mercator_origin_i": 3, "web_mercator_origin_x": [3, 15], "websit": 23, "week": 7, "weekdai": [], "weekli": 7, "weight": [], "weight_col": 6, "weight_freq": [], "well": 27, "what": 10, "when": [6, 7, 12, 15, 27], "whenc": [], "where": [3, 5, 6, 7, 15, 31], "whether": [3, 15], "which": [3, 7, 15, 27, 29, 33], "while": [], "white": [], "who": 8, "whole": 15, "whose": [3, 6, 7], "width": 3, "window": [7, 10, 23, 26], "within": [3, 7, 8, 12, 15, 29, 32], "without": [3, 22, 23], "wkt": 7, "woodstown": 30, "work": [3, 15, 27, 31, 32], "work_dat": [], "work_end_hour": [], "work_ratio": 3, "work_start_hour": [], "workdai": [], "workday_stop": [], "workplac": [15, 22, 23, 30, 31], "workstat": [22, 23], "would": [], "wrap": [], "wrapper": 30, "writabl": [], "write": 3, "write_through": [], "writeabl": [], "writelin": [], "writer": [], "written": [], "wt": [], "x": [3, 7, 8, 12, 15, 26, 27, 28, 29, 32], "x8": 3, "x_block": 12, "x_mercat": 12, "y": [3, 7, 8, 12, 15, 27, 28, 29, 32], "y8": 3, "ye": [12, 30], "yield": [], "your": 22, "youthful_may": 26, "yyyi": [], "yyyymmdd": 7, "z": 3, "zero": [], "zip": 26, "zone": 7, "zoned_datetime_from_ts_and_offset": [], "zorder": 3, "\u03b5": [6, 15], "\u03b5_x": 15, "\u03b5_y": 15, "\u03c3": 15, "\u03c3\u00b2": 15}, "titles": ["Aggregation & Debiasing", "agg", "aggregation", "city_gen", "constants", "contact_estimation", "displacement", "filters", "filters_spark", "generation", "home_attribution", "io", "map_utils", "metrics", "stop_detection", "traj_gen", "visit_attribution", "API Reference", "base_module package", "Co-location & Contact Networks", "Data Ingestion", "Filtering & Completeness", "Getting Started", "NOMAD: Network for Open Mobility Analysis and Data", "Mobility Metrics", "src", "Benchmarking Stop Detection Algorithms", "Grid-Based Stop Detection", "HDBSCAN Stop Detection", "Lachesis Stop Detection", "Downloading places of interest (POI) Data from OSM", "Generating synthetic places of interest (POI) Data", "TADBSCAN Stop Detection", "Stop Detection", "Synthetic Data Generation", "Tessellation", "Visit Attribution"], "titleterms": {"With": 22, "agg": 1, "aggreg": [0, 2], "algorithm": 26, "analysi": 23, "api": 17, "attribut": 36, "base": [27, 33], "base_modul": 18, "basic": 22, "benchmark": 26, "bound": 30, "box": 30, "citi": 30, "city_gen": 3, "co": 19, "commun": 23, "comparison": 26, "complet": 21, "constant": 4, "contact": 19, "contact_estim": 5, "content": 18, "core": [], "data": [20, 23, 30, 31, 34], "dbscan": 33, "debias": 0, "detect": [26, 27, 28, 29, 32, 33], "displac": 6, "doc_test": 18, "download": 30, "exampl": 22, "featur": 22, "filter": [7, 21], "filters_spark": 8, "from": 30, "gener": [9, 31, 34], "get": 22, "grid": [27, 33], "grid_bas": [], "hdbscan": [28, 33], "home_attribut": 10, "i": 22, "indic": 23, "ingest": 20, "instal": [22, 23], "interest": [30, 31], "io": 11, "kei": 22, "lachesi": [29, 33], "licens": 22, "link": 23, "locat": 19, "map_util": 12, "method": 33, "metric": [13, 24], "mobil": [23, 24], "modul": [17, 18, 23], "name": 30, "network": [19, 23], "next": 22, "nomad": [22, 23], "od": [], "open": 23, "osm": 30, "overal": 26, "overview": [23, 33], "packag": 18, "paramet": 3, "perform": 26, "place": [30, 31], "poi": [30, 31], "postprocess": [], "preprocess": [], "privaci": [], "process": [], "quick": [22, 23], "refer": 17, "requir": 22, "return": 3, "runtim": 26, "scalabl": 26, "sequenti": [], "setup": [18, 26], "snowflak": [], "spark": 22, "sparsiti": [], "src": 25, "start": 22, "step": 22, "stop": [26, 27, 28, 29, 32, 33], "stop_detect": 14, "submodul": 18, "support": [22, 23], "synthet": [31, 34], "tabl": 23, "tadbscan": 32, "tessel": 35, "traj_gen": 15, "util": [], "visit": 36, "visit_attribut": 16, "visual": 26, "viz": [], "what": 22}})
\ No newline at end of file
diff --git a/docs/index.rst b/docs/index.rst
index 3332079c..f51bcbc7 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -56,6 +56,7 @@ Installation
:caption: Examples
:hidden:
+ source/ingesting-data
source/benchmarking_of_stop_detection_algorithms
source/lachesis_demo
source/tadbscan_demo
diff --git a/docs/source/benchmarking_of_stop_detection_algorithms.ipynb b/docs/source/benchmarking_of_stop_detection_algorithms.ipynb
index 747c1e8c..64549ab4 100644
--- a/docs/source/benchmarking_of_stop_detection_algorithms.ipynb
+++ b/docs/source/benchmarking_of_stop_detection_algorithms.ipynb
@@ -2,73 +2,191 @@
"cells": [
{
"cell_type": "markdown",
- "id": "92838936",
+ "id": "67fc810a",
"metadata": {},
"source": [
- "# Benchmarking Stop Detection Algorithms\n",
- "\n",
- "This notebook compares the performance of four stop detection algorithms: **Lachesis**, **TA-DBSCAN**, **Grid-Based**, and **HDBSCAN**. We evaluate both overall runtime on a full trajectory dataset and how runtime scales with increasing data size."
+ "# Comparing runtimes of different stop detection algorithms on toy datasets"
]
},
{
"cell_type": "markdown",
- "id": "426e43f2",
+ "id": "f152187c",
"metadata": {},
"source": [
- "## Setup"
+ "Here we compare the runtimes of four different stop detection algorithms: Lachesis, grid-based, temporal DBSCAN, and HDBSCAN."
]
},
{
"cell_type": "code",
"execution_count": 1,
- "id": "74f96664",
+ "id": "474229df",
"metadata": {
"execution": {
- "iopub.execute_input": "2025-10-17T05:59:34.956525Z",
- "iopub.status.busy": "2025-10-17T05:59:34.956274Z",
- "iopub.status.idle": "2025-10-17T05:59:38.575372Z",
- "shell.execute_reply": "2025-10-17T05:59:38.574638Z"
+ "iopub.execute_input": "2025-11-24T18:33:19.946986Z",
+ "iopub.status.busy": "2025-11-24T18:33:19.946986Z",
+ "iopub.status.idle": "2025-11-24T18:33:23.251955Z",
+ "shell.execute_reply": "2025-11-24T18:33:23.251955Z"
}
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Lachesis execution time: 0.02721381187438965 seconds\n",
+ "TA-DBSCAN execution time: 0.012791156768798828 seconds\n",
+ "TA-DBSCAN clustering time: 0.009821414947509766 seconds\n",
+ "TA-DBSCAN post-processing time: 0.0029697418212890625 seconds\n",
+ "Grid-Based execution time: 0.022524595260620117 seconds\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "HDBSCAN execution time: 0.3206779956817627 seconds\n",
+ "HDBSCAN clustering time: 0.3206779956817627 seconds\n",
+ "HDBSCAN post-processing time: 0.0 seconds\n"
+ ]
+ }
+ ],
"source": [
- "import time\n",
- "import warnings\n",
- "import pandas as pd\n",
- "import geopandas as gpd\n",
+ "%matplotlib inline\n",
+ "import matplotlib\n",
+ "matplotlib.use('TkAgg')\n",
"import matplotlib.pyplot as plt\n",
+ "plt.ion()\n",
+ "\n",
+ "# Imports\n",
"import nomad.io.base as loader\n",
- "import nomad.filters as filters\n",
- "from nomad.stop_detection.viz import plot_pings, plot_stops, plot_time_barcode, plot_stops_barcode, clip_spatial_outliers\n",
- "import nomad.stop_detection.lachesis as LACHESIS\n",
+ "import geopandas as gpd\n",
+ "from shapely.geometry import box\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "from nomad.stop_detection.viz import plot_stops_barcode, plot_pings, plot_stops, plot_time_barcode\n",
"import nomad.stop_detection.dbscan as DBSCAN\n",
+ "import nomad.stop_detection.lachesis as LACHESIS\n",
"import nomad.stop_detection.grid_based as GRID_BASED\n",
"import nomad.stop_detection.hdbscan as HDBSCAN\n",
+ "import nomad.filters as filters \n",
+ "import nomad.stop_detection.postprocessing as post\n",
+ "import time\n",
+ "from tqdm import tqdm\n",
+ "\n",
+ "# Load data\n",
+ "import nomad.data as data_folder\n",
+ "from pathlib import Path\n",
+ "data_dir = Path(data_folder.__file__).parent\n",
+ "city = gpd.read_parquet(data_dir / 'garden-city-buildings-mercator.parquet')\n",
+ "outer_box = box(*city.total_bounds).buffer(15, join_style='mitre')\n",
+ "\n",
+ "filepath_root = 'gc_data_long/'\n",
+ "tc = {\n",
+ " \"user_id\": \"gc_identifier\",\n",
+ " \"timestamp\": \"unix_ts\",\n",
+ " \"x\": \"dev_x\",\n",
+ " \"y\": \"dev_y\",\n",
+ " \"ha\":\"ha\",\n",
+ " \"date\":\"date\"}\n",
+ "\n",
+ "users = ['admiring_brattain']\n",
+ "traj = loader.sample_from_file(filepath_root, format='parquet', users=users, filters=('date','==', '2024-01-01'), traj_cols=tc)\n",
+ "\n",
+ "# Lachesis (sequential stop detection)\n",
+ "start_time = time.time()\n",
+ "stops = LACHESIS.lachesis(traj, delta_roam=20, dt_max = 60, dur_min=5, complete_output=True, keep_col_names=True, traj_cols=tc)\n",
+ "execution_time_lachesis = time.time() - start_time\n",
+ "print(f\"Lachesis execution time: {execution_time_lachesis} seconds\")\n",
+ "\n",
+ "# Density based stop detection (Temporal DBSCAN)\n",
+ "start_time = time.time()\n",
+ "user_data_tadb = traj.assign(cluster=DBSCAN.ta_dbscan_labels(traj, time_thresh=240, dist_thresh=15, min_pts=3, traj_cols=tc))\n",
+ "clustering_time_tadbscan = time.time() - start_time\n",
+ "start_time_post = time.time()\n",
+ "cluster_labels_tadb = post.remove_overlaps(user_data_tadb, time_thresh=240, method='cluster', traj_cols=tc, min_pts=3, dur_min=5, min_cluster_size=3)\n",
+ "execution_time_tadbscan = time.time() - start_time\n",
+ "post_time_tadbscan = time.time() - start_time_post\n",
+ "print(f\"TA-DBSCAN execution time: {execution_time_tadbscan} seconds\")\n",
+ "print(f\"TA-DBSCAN clustering time: {clustering_time_tadbscan} seconds\")\n",
+ "print(f\"TA-DBSCAN post-processing time: {post_time_tadbscan} seconds\")\n",
+ "\n",
+ "# Grid-based\n",
+ "start_time = time.time()\n",
+ "traj['h3_cell'] = filters.to_tessellation(traj, index=\"h3\", res=10, traj_cols=tc, data_crs='EPSG:3857')\n",
+ "stops_gb = GRID_BASED.grid_based(traj, time_thresh=240, complete_output=True, traj_cols=tc, location_id='h3_cell')\n",
+ "execution_time_grid = time.time() - start_time\n",
+ "print(f\"Grid-Based execution time: {execution_time_grid} seconds\")\n",
"\n",
- "city = gpd.read_file(\"../../examples/garden_city.geojson\")\n",
- "traj = loader.sample_from_file('../../examples/gc_data/', users=['youthful_mayer'], format='csv')"
+ "# HDBSCAN\n",
+ "start_time = time.time()\n",
+ "user_data_hdb = traj.assign(cluster=HDBSCAN.hdbscan_labels(traj, time_thresh=240, min_pts=3, min_cluster_size=2, traj_cols=tc))\n",
+ "clustering_time_hdbscan = time.time() - start_time\n",
+ "start_time_post = time.time()\n",
+ "cluster_labels_hdb = post.remove_overlaps(user_data_hdb, time_thresh=240, method='cluster', traj_cols=tc, min_pts=3, dur_min=5, min_cluster_size=3) \n",
+ "execution_time_hdbscan = time.time() - start_time\n",
+ "post_time_hdbscan = time.time() - start_time_post\n",
+ "print(f\"HDBSCAN execution time: {execution_time_hdbscan} seconds\")\n",
+ "print(f\"HDBSCAN clustering time: {clustering_time_hdbscan} seconds\")\n",
+ "print(f\"HDBSCAN post-processing time: {post_time_hdbscan} seconds\")"
]
},
{
"cell_type": "markdown",
- "id": "cb276fd9",
+ "id": "c88f426d",
"metadata": {},
"source": [
- "## Overall Runtime Comparison\n",
- "\n",
- "We first measure the total execution time for each algorithm on the complete dataset."
+ "## Summary of Single-User Performance"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6a678431",
+ "metadata": {},
+ "source": [
+ "### Lachesis"
]
},
{
"cell_type": "code",
"execution_count": 2,
- "id": "19184dee",
+ "id": "b7480c93",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-24T18:33:23.251955Z",
+ "iopub.status.busy": "2025-11-24T18:33:23.251955Z",
+ "iopub.status.idle": "2025-11-24T18:33:23.475346Z",
+ "shell.execute_reply": "2025-11-24T18:33:23.475346Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "fig, (ax_map, ax_barcode) = plt.subplots(2, 1, figsize=(6,6.5),\n",
+ " gridspec_kw={'height_ratios':[10,1]})\n",
+ "\n",
+ "gpd.GeoDataFrame(geometry=[outer_box], crs='EPSG:3857').plot(ax=ax_map, color='#d3d3d3')\n",
+ "city.plot(ax=ax_map, edgecolor='white', linewidth=1, color='#8c8c8c')\n",
+ "\n",
+ "plot_stops(stops, ax=ax_map, cmap='Reds')\n",
+ "plot_pings(traj, ax=ax_map, s=6, point_color='black', cmap='twilight', traj_cols=tc)\n",
+ "ax_map.set_axis_off()\n",
+ "\n",
+ "plot_time_barcode(traj[tc['timestamp']], ax=ax_barcode, set_xlim=True)\n",
+ "plot_stops_barcode(stops, ax=ax_barcode, cmap='Reds', set_xlim=False, timestamp='unix_ts')\n",
+ "\n",
+ "plt.tight_layout(pad=0.1)\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "98cdda1f",
"metadata": {
"execution": {
- "iopub.execute_input": "2025-10-17T05:59:38.579313Z",
- "iopub.status.busy": "2025-10-17T05:59:38.578920Z",
- "iopub.status.idle": "2025-10-17T05:59:58.677680Z",
- "shell.execute_reply": "2025-10-17T05:59:58.676969Z"
+ "iopub.execute_input": "2025-11-24T18:33:23.475346Z",
+ "iopub.status.busy": "2025-11-24T18:33:23.475346Z",
+ "iopub.status.idle": "2025-11-24T18:33:23.482904Z",
+ "shell.execute_reply": "2025-11-24T18:33:23.482904Z"
}
},
"outputs": [
@@ -76,189 +194,214 @@
"name": "stdout",
"output_type": "stream",
"text": [
- " Algorithm Runtime (s)\n",
- " Lachesis 1.060303\n",
- " TA-DBSCAN 6.314632\n",
- "Grid-Based 0.207118\n",
- " HDBSCAN 12.504672\n"
+ "Summary of Single-User Performance\n",
+ "Lachesis execution time: 0.02721381187438965 seconds\n",
+ "TA-DBSCAN execution time: 0.012791156768798828 seconds\n",
+ "Grid-Based execution time: 0.022524595260620117 seconds\n",
+ "HDBSCAN execution time: 0.3206779956817627 seconds\n"
]
}
],
"source": [
- "algorithms = [\n",
- " ('Lachesis', lambda t: LACHESIS.lachesis(t, delta_roam=20, dt_max=60, dur_min=5, \n",
- " complete_output=True, keep_col_names=True,\n",
- " latitude=\"latitude\", longitude=\"longitude\")),\n",
- " ('TA-DBSCAN', lambda t: DBSCAN.ta_dbscan(t, time_thresh=240, dist_thresh=15, min_pts=3, \n",
- " dur_min=5, complete_output=True,\n",
- " latitude=\"latitude\", longitude=\"longitude\")),\n",
- " ('Grid-Based', lambda t: GRID_BASED.grid_based(\n",
- " t.assign(h3_cell=filters.to_tessellation(t, index=\"h3\", res=10, \n",
- " latitude='latitude', longitude='longitude', \n",
- " data_crs='EPSG:4326')),\n",
- " time_thresh=240, complete_output=True, timestamp='timestamp', location_id='h3_cell')),\n",
- " ('HDBSCAN', lambda t: HDBSCAN.st_hdbscan(t, time_thresh=240, min_pts=3, min_cluster_size=2, \n",
- " dur_min=5, complete_output=True,\n",
- " latitude=\"latitude\", longitude=\"longitude\"))\n",
- "]\n",
- "\n",
- "results = []\n",
- "for name, func in algorithms:\n",
- " t0 = time.time()\n",
- " stops_output = func(traj)\n",
- " results.append({'Algorithm': name, 'Runtime (s)': time.time() - t0})\n",
- " if name == 'Lachesis':\n",
- " stops = stops_output\n",
- "\n",
- "results_df = pd.DataFrame(results)\n",
- "print(results_df.to_string(index=False))"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "5b150f1e",
- "metadata": {},
- "source": [
- "## Visualization\n",
- "\n",
- "Spatial and temporal visualization of detected stops using the Lachesis algorithm."
+ "print(\"Summary of Single-User Performance\")\n",
+ "print(f\"Lachesis execution time: {execution_time_lachesis} seconds\")\n",
+ "print(f\"TA-DBSCAN execution time: {execution_time_tadbscan} seconds\")\n",
+ "print(f\"Grid-Based execution time: {execution_time_grid} seconds\")\n",
+ "print(f\"HDBSCAN execution time: {execution_time_hdbscan} seconds\")"
]
},
{
"cell_type": "code",
- "execution_count": 3,
- "id": "f9c852d1",
+ "execution_count": 4,
+ "id": "41e9a154",
"metadata": {
"execution": {
- "iopub.execute_input": "2025-10-17T05:59:58.680965Z",
- "iopub.status.busy": "2025-10-17T05:59:58.680723Z",
- "iopub.status.idle": "2025-10-17T05:59:59.277513Z",
- "shell.execute_reply": "2025-10-17T05:59:59.276740Z"
+ "iopub.execute_input": "2025-11-24T18:33:23.482904Z",
+ "iopub.status.busy": "2025-11-24T18:33:23.482904Z",
+ "iopub.status.idle": "2025-11-24T18:33:23.492088Z",
+ "shell.execute_reply": "2025-11-24T18:33:23.492088Z"
}
},
"outputs": [
{
- "data": {
- "image/png": "",
- "text/plain": [
- "
"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Runtime Disaggregation\n",
+ "Lachesis clustering time: 0.02721381187438965 seconds\n",
+ "--------------------------------\n",
+ "TA-DBSCAN clustering time: 0.009821414947509766 seconds\n",
+ "TA-DBSCAN post-processing time: 0.0029697418212890625 seconds\n",
+ "--------------------------------\n",
+ "Grid-Based clustering time: 0.022524595260620117 seconds\n",
+ "--------------------------------\n",
+ "HDBSCAN clustering time: 0.3206779956817627 seconds\n",
+ "HDBSCAN post-processing time: 0.0 seconds\n"
+ ]
}
],
"source": [
- "fig, (ax_map, ax_barcode) = plt.subplots(2, 1, figsize=(6, 6.5),\n",
- " gridspec_kw={'height_ratios': [10, 1]})\n",
- "\n",
- "two_days = 1704162819 + 3600*48\n",
- "traj_subset = traj[traj['timestamp'] <= two_days]\n",
- "stops_subset = stops[stops['end_timestamp'] <= two_days]\n",
- "traj_clean = clip_spatial_outliers(traj_subset, latitude='latitude', longitude='longitude')\n",
- "\n",
- "plot_pings(traj_clean, ax=ax_map, color='black', s=1.5, alpha=0.3, \n",
- " base_geometry=city, latitude='latitude', longitude='longitude')\n",
- "plot_stops(stops_subset, ax=ax_map, cmap='Reds', base_geometry=city,\n",
- " latitude='latitude', longitude='longitude', radius=stops_subset[\"diameter\"]/2)\n",
- "plot_time_barcode(traj_subset['timestamp'], ax=ax_barcode, set_xlim=True)\n",
- "plot_stops_barcode(stops_subset, ax=ax_barcode, cmap='Reds', set_xlim=False, timestamp='timestamp')\n",
- "plt.tight_layout()\n",
- "plt.show()"
+ "print(\"Runtime Disaggregation\")\n",
+ "print(f\"Lachesis clustering time: {execution_time_lachesis} seconds\")\n",
+ "print(\"--------------------------------\")\n",
+ "print(f\"TA-DBSCAN clustering time: {clustering_time_tadbscan} seconds\")\n",
+ "print(f\"TA-DBSCAN post-processing time: {post_time_tadbscan} seconds\")\n",
+ "print(\"--------------------------------\")\n",
+ "print(f\"Grid-Based clustering time: {execution_time_grid} seconds\")\n",
+ "print(\"--------------------------------\")\n",
+ "print(f\"HDBSCAN clustering time: {clustering_time_hdbscan} seconds\")\n",
+ "print(f\"HDBSCAN post-processing time: {post_time_hdbscan} seconds\")"
]
},
{
"cell_type": "markdown",
- "id": "235e25d5",
+ "id": "5c9ee070",
"metadata": {},
"source": [
- "## Runtime Scalability\n",
- "\n",
- "We measure how runtime scales with dataset size by running each algorithm on progressively larger time windows (6-hour increments)."
+ "## Pings vs Runtime"
]
},
{
"cell_type": "code",
- "execution_count": 4,
- "id": "947ce6c3",
+ "execution_count": 5,
+ "id": "62ed6a42",
"metadata": {
"execution": {
- "iopub.execute_input": "2025-10-17T05:59:59.280183Z",
- "iopub.status.busy": "2025-10-17T05:59:59.279974Z",
- "iopub.status.idle": "2025-10-17T06:09:24.164218Z",
- "shell.execute_reply": "2025-10-17T06:09:24.162167Z"
+ "iopub.execute_input": "2025-11-24T18:33:23.492088Z",
+ "iopub.status.busy": "2025-11-24T18:33:23.492088Z",
+ "iopub.status.idle": "2025-11-24T18:33:23.520075Z",
+ "shell.execute_reply": "2025-11-24T18:33:23.520075Z"
}
},
"outputs": [],
"source": [
- "runtime_data = []\n",
- "for current_end in pd.date_range(start=traj['datetime'].min() + pd.Timedelta(hours=6),\n",
- " end=traj['datetime'].max(),\n",
- " freq='6h'):\n",
- " window = traj[traj['datetime'] <= current_end]\n",
- " n_pings = len(window)\n",
- " \n",
- " for name, func in algorithms:\n",
- " t0 = time.time()\n",
- " func(window)\n",
- " runtime_data.append({'Algorithm': name, 'n_pings': n_pings, 'runtime': time.time() - t0})\n",
+ "traj = loader.sample_from_file(filepath_root, frac_users=0.1, format='parquet', traj_cols=tc, seed=10)\n",
"\n",
- "runtime_df = pd.DataFrame(runtime_data)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "07203969",
- "metadata": {},
- "source": [
- "## Performance Comparison"
+ "# H3 cells for grid_based stop detection method\n",
+ "traj['h3_cell'] = filters.to_tessellation(traj, index=\"h3\", res=10, traj_cols=tc, data_crs='EPSG:3857')\n",
+ "pings_per_user = traj['gc_identifier'].value_counts()"
]
},
{
"cell_type": "code",
- "execution_count": 5,
- "id": "570b6103",
+ "execution_count": 6,
+ "id": "baafc0b8",
"metadata": {
"execution": {
- "iopub.execute_input": "2025-10-17T06:09:24.247662Z",
- "iopub.status.busy": "2025-10-17T06:09:24.247376Z",
- "iopub.status.idle": "2025-10-17T06:09:24.624693Z",
- "shell.execute_reply": "2025-10-17T06:09:24.623659Z"
+ "iopub.execute_input": "2025-11-24T18:33:23.520075Z",
+ "iopub.status.busy": "2025-11-24T18:33:23.520075Z",
+ "iopub.status.idle": "2025-11-24T18:33:24.093854Z",
+ "shell.execute_reply": "2025-11-24T18:33:24.093854Z"
}
},
"outputs": [
{
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\r",
+ " 0%| | 0/4 [00:00, ?it/s]"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\r",
+ " 25%|████████████▊ | 1/4 [00:00<00:00, 4.68it/s]"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\r",
+ " 50%|█████████████████████████▌ | 2/4 [00:00<00:00, 5.53it/s]"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\r",
+ " 75%|██████████████████████████████████████▎ | 3/4 [00:00<00:00, 6.61it/s]"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\r",
+ "100%|███████████████████████████████████████████████████| 4/4 [00:00<00:00, 7.10it/s]"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\n"
+ ]
}
],
"source": [
- "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))\n",
+ "# Approximately 5 minutes for 40 users\n",
+ "results = []\n",
+ "for user, n_pings in tqdm(pings_per_user.items(), total=len(pings_per_user)):\n",
+ " user_data = traj.query(\"gc_identifier == @user\")\n",
+ "\n",
+ " # For location based\n",
+ " start_time = time.time()\n",
+ " stops_gb = GRID_BASED.grid_based(user_data, time_thresh=240, complete_output=True, traj_cols=tc, location_id='h3_cell')\n",
+ " execution_time = time.time() - start_time\n",
+ " results += [pd.Series({'user':user, 'algo':'grid_based', 'execution_time':execution_time, 'n_pings':n_pings})]\n",
+ " \n",
+ " # For Lachesis\n",
+ " start_time = time.time()\n",
+ " stops_lac = LACHESIS.lachesis(user_data, delta_roam=30, dt_max=240, complete_output=True, traj_cols=tc)\n",
+ " execution_time = time.time() - start_time\n",
+ " results += [pd.Series({'user':user, 'algo':'lachesis', 'execution_time':execution_time, 'n_pings':n_pings})]\n",
"\n",
- "bars = ax1.barh(results_df['Algorithm'], results_df['Runtime (s)'])\n",
- "ax1.set_xlabel('Execution Time (seconds)')\n",
- "ax1.set_title('Overall Runtime (Full Dataset)')\n",
- "ax1.grid(axis='x', alpha=0.3)\n",
- "for bar, val in zip(bars, results_df['Runtime (s)']):\n",
- " ax1.text(val + 0.01, bar.get_y() + bar.get_height()/2, f'{val:.3f}s', va='center')\n",
+ " # For TADbscan\n",
+ " start_time = time.time()\n",
+ " user_data_tadb = user_data.assign(cluster=DBSCAN.ta_dbscan_labels(user_data, time_thresh=240, dist_thresh=15, min_pts=3, traj_cols=tc))\n",
+ " # - post-processing\n",
+ " stops_tadb = post.remove_overlaps(user_data_tadb, time_thresh=240, method='cluster', traj_cols=tc, min_pts=3, dur_min=5, min_cluster_size=3)\n",
+ " execution_time = time.time() - start_time\n",
+ " results += [pd.Series({'user':user, 'algo':'tadbscan', 'execution_time':execution_time, 'n_pings':n_pings})]\n",
"\n",
- "for algo in runtime_df['Algorithm'].unique():\n",
- " subset = runtime_df[runtime_df['Algorithm'] == algo]\n",
- " ax2.plot(subset['n_pings'], subset['runtime'], marker='o', label=algo, linewidth=2)\n",
+ " # For HDBSCAN\n",
+ " start_time = time.time()\n",
+ " user_data_hdb = user_data.assign(cluster=HDBSCAN.hdbscan_labels(user_data, time_thresh=240, min_pts=3, min_cluster_size=2, traj_cols=tc))\n",
+ " # - post-processing\n",
+ " stops_hdb = post.remove_overlaps(user_data_hdb, time_thresh=240, method='cluster', traj_cols=tc, min_pts=3, dur_min=5, min_cluster_size=3) \n",
+ " execution_time = time.time() - start_time\n",
+ " results += [pd.Series({'user':user, 'algo':'hdbscan', 'execution_time':execution_time, 'n_pings':n_pings})]\n",
+ "\n",
+ "results = pd.DataFrame(results)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "8e8546e8",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-24T18:33:24.093854Z",
+ "iopub.status.busy": "2025-11-24T18:33:24.093854Z",
+ "iopub.status.idle": "2025-11-24T18:33:24.338240Z",
+ "shell.execute_reply": "2025-11-24T18:33:24.338240Z"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "import seaborn as sns\n",
"\n",
- "ax2.set_xlabel('Number of Pings')\n",
- "ax2.set_ylabel('Runtime (seconds)')\n",
- "ax2.set_title('Runtime vs Data Size')\n",
- "ax2.legend()\n",
- "ax2.grid(alpha=0.3)\n",
+ "algos = ['grid_based', 'lachesis', 'tadbscan', 'hdbscan']\n",
+ "palette = dict(zip(algos, sns.color_palette(n_colors=len(algos))))\n",
"\n",
- "plt.tight_layout()\n",
+ "fig, ax = plt.subplots(figsize=(5, 5))\n",
+ "sns.scatterplot(data=results, x='n_pings', y='execution_time', hue='algo', ax=ax)\n",
+ "ax.set_title('n_pings vs execution_time')\n",
"plt.show()"
]
}
@@ -268,9 +411,9 @@
"formats": "ipynb,py:percent"
},
"kernelspec": {
- "display_name": "Python 313 (nomad-venv)",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
- "name": "nomad-venv"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
@@ -282,7 +425,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.13.6"
+ "version": "3.12.3"
}
},
"nbformat": 4,
diff --git a/docs/source/benchmarking_of_stop_detection_algorithms.py b/docs/source/benchmarking_of_stop_detection_algorithms.py
index 78db2b69..3c9f5d7b 100644
--- a/docs/source/benchmarking_of_stop_detection_algorithms.py
+++ b/docs/source/benchmarking_of_stop_detection_algorithms.py
@@ -8,135 +8,188 @@
# format_version: '1.3'
# jupytext_version: 1.17.3
# kernelspec:
-# display_name: Python 313 (nomad-venv)
+# display_name: Python 3 (ipykernel)
# language: python
-# name: nomad-venv
+# name: python3
# ---
# %% [markdown]
-# # Benchmarking Stop Detection Algorithms
-#
-# This notebook compares the performance of four stop detection algorithms: **Lachesis**, **TA-DBSCAN**, **Grid-Based**, and **HDBSCAN**. We evaluate both overall runtime on a full trajectory dataset and how runtime scales with increasing data size.
+# # Comparing runtimes of different stop detection algorithms on toy datasets
# %% [markdown]
-# ## Setup
+# Here we compare the runtimes of four different stop detection algorithms: Lachesis, grid-based, temporal DBSCAN, and HDBSCAN.
# %%
-import time
-import warnings
-import pandas as pd
+# %matplotlib inline
+
+# Imports
+import nomad.io.base as loader
import geopandas as gpd
+from shapely.geometry import box
+import pandas as pd
+import numpy as np
import matplotlib.pyplot as plt
-import nomad.io.base as loader
-import nomad.filters as filters
-from nomad.stop_detection.viz import plot_pings, plot_stops, plot_time_barcode, plot_stops_barcode, clip_spatial_outliers
-import nomad.stop_detection.lachesis as LACHESIS
+from nomad.stop_detection.viz import plot_stops_barcode, plot_pings, plot_stops, plot_time_barcode
import nomad.stop_detection.dbscan as DBSCAN
+import nomad.stop_detection.lachesis as LACHESIS
import nomad.stop_detection.grid_based as GRID_BASED
import nomad.stop_detection.hdbscan as HDBSCAN
+import nomad.filters as filters
+import nomad.stop_detection.postprocessing as post
+import time
+from tqdm import tqdm
+
+# Load data
+import nomad.data as data_folder
+from pathlib import Path
+data_dir = Path(data_folder.__file__).parent
+city = gpd.read_file(data_dir / 'garden-city-buildings.geojson')
+outer_box = box(*city.total_bounds).buffer(15, join_style='mitre')
+
+filepath_root = 'gc_data_long/'
+tc = {
+ "user_id": "gc_identifier",
+ "timestamp": "unix_ts",
+ "x": "dev_x",
+ "y": "dev_y",
+ "ha":"ha",
+ "date":"date"}
+
+users = ['admiring_brattain']
+traj = loader.sample_from_file(filepath_root, format='parquet', users=users, filters = ('date','==', '2024-01-01'), traj_cols=tc)
+
+# Lachesis (sequential stop detection)
+start_time = time.time()
+stops = LACHESIS.lachesis(traj, delta_roam=20, dt_max = 60, dur_min=5, complete_output=True, keep_col_names=True, traj_cols=tc)
+execution_time_lachesis = time.time() - start_time
+print(f"Lachesis execution time: {execution_time_lachesis} seconds")
+
+# Density based stop detection (Temporal DBSCAN)
+start_time = time.time()
+user_data_tadb = traj.assign(cluster=DBSCAN.ta_dbscan_labels(traj, time_thresh=240, dist_thresh=15, min_pts=3, traj_cols=tc))
+clustering_time_tadbscan = time.time() - start_time
+start_time_post = time.time()
+cluster_labels_tadb = post.remove_overlaps(user_data_tadb, time_thresh=240, method='cluster', traj_cols=tc, min_pts=3, dur_min=5, min_cluster_size=3)
+execution_time_tadbscan = time.time() - start_time
+post_time_tadbscan = time.time() - start_time_post
+print(f"TA-DBSCAN execution time: {execution_time_tadbscan} seconds")
+print(f"TA-DBSCAN clustering time: {clustering_time_tadbscan} seconds")
+print(f"TA-DBSCAN post-processing time: {post_time_tadbscan} seconds")
+
+# Grid-based
+start_time = time.time()
+traj['h3_cell'] = filters.to_tessellation(traj, index="h3", res=10, x='dev_x', y='dev_y', data_crs='EPSG:3857')
+stops_gb = GRID_BASED.grid_based(traj, time_thresh=240, complete_output=True, timestamp='unix_ts', location_id='h3_cell')
+execution_time_grid = time.time() - start_time
+print(f"Grid-Based execution time: {execution_time_grid} seconds")
+
+# HDBSCAN
+start_time = time.time()
+user_data_hdb = traj.assign(cluster=HDBSCAN.hdbscan_labels(traj, time_thresh=240, min_pts=3, min_cluster_size=2, traj_cols=tc))
+clustering_time_hdbscan = time.time() - start_time
+start_time_post = time.time()
+cluster_labels_hdb = post.remove_overlaps(user_data_hdb, time_thresh=240, method='cluster', traj_cols=tc, min_pts=3, dur_min=5, min_cluster_size=3)
+execution_time_hdbscan = time.time() - start_time
+post_time_hdbscan = time.time() - start_time_post
+print(f"HDBSCAN execution time: {execution_time_hdbscan} seconds")
+print(f"HDBSCAN clustering time: {clustering_time_hdbscan} seconds")
+print(f"HDBSCAN post-processing time: {post_time_hdbscan} seconds")
-city = gpd.read_file("../../examples/garden_city.geojson")
-traj = loader.sample_from_file('../../examples/gc_data/', users=['youthful_mayer'], format='csv')
+# %% [markdown]
+# ## Summary of Single-User Performance
# %% [markdown]
-# ## Overall Runtime Comparison
-#
-# We first measure the total execution time for each algorithm on the complete dataset.
+# ### Lachesis
# %%
-algorithms = [
- ('Lachesis', lambda t: LACHESIS.lachesis(t, delta_roam=20, dt_max=60, dur_min=5,
- complete_output=True, keep_col_names=True,
- latitude="latitude", longitude="longitude")),
- ('TA-DBSCAN', lambda t: DBSCAN.ta_dbscan(t, time_thresh=240, dist_thresh=15, min_pts=3,
- dur_min=5, complete_output=True,
- latitude="latitude", longitude="longitude")),
- ('Grid-Based', lambda t: GRID_BASED.grid_based(
- t.assign(h3_cell=filters.to_tessellation(t, index="h3", res=10,
- latitude='latitude', longitude='longitude',
- data_crs='EPSG:4326')),
- time_thresh=240, complete_output=True, timestamp='timestamp', location_id='h3_cell')),
- ('HDBSCAN', lambda t: HDBSCAN.st_hdbscan(t, time_thresh=240, min_pts=3, min_cluster_size=2,
- dur_min=5, complete_output=True,
- latitude="latitude", longitude="longitude"))
-]
+fig, (ax_map, ax_barcode) = plt.subplots(2, 1, figsize=(6,6.5),
+ gridspec_kw={'height_ratios':[10,1]})
-results = []
-for name, func in algorithms:
- t0 = time.time()
- stops_output = func(traj)
- results.append({'Algorithm': name, 'Runtime (s)': time.time() - t0})
- if name == 'Lachesis':
- stops = stops_output
+gpd.GeoDataFrame(geometry=[outer_box], crs='EPSG:3857').plot(ax=ax_map, color='#d3d3d3')
+city.plot(ax=ax_map, edgecolor='white', linewidth=1, color='#8c8c8c')
-results_df = pd.DataFrame(results)
-print(results_df.to_string(index=False))
+plot_stops(stops, ax=ax_map, cmap='Reds', x='x', y='y')
+plot_pings(traj, ax=ax_map, s=6, point_color='black', cmap='twilight', traj_cols=tc)
+ax_map.set_axis_off()
-# %% [markdown]
-# ## Visualization
-#
-# Spatial and temporal visualization of detected stops using the Lachesis algorithm.
+plot_time_barcode(traj[tc['timestamp']], ax=ax_barcode, set_xlim=True)
+plot_stops_barcode(stops, ax=ax_barcode, cmap='Reds', set_xlim=False, timestamp='unix_ts')
-# %%
-fig, (ax_map, ax_barcode) = plt.subplots(2, 1, figsize=(6, 6.5),
- gridspec_kw={'height_ratios': [10, 1]})
-
-two_days = 1704162819 + 3600*48
-traj_subset = traj[traj['timestamp'] <= two_days]
-stops_subset = stops[stops['end_timestamp'] <= two_days]
-traj_clean = clip_spatial_outliers(traj_subset, latitude='latitude', longitude='longitude')
-
-plot_pings(traj_clean, ax=ax_map, color='black', s=1.5, alpha=0.3,
- base_geometry=city, latitude='latitude', longitude='longitude')
-plot_stops(stops_subset, ax=ax_map, cmap='Reds', base_geometry=city,
- latitude='latitude', longitude='longitude', radius=stops_subset["diameter"]/2)
-plot_time_barcode(traj_subset['timestamp'], ax=ax_barcode, set_xlim=True)
-plot_stops_barcode(stops_subset, ax=ax_barcode, cmap='Reds', set_xlim=False, timestamp='timestamp')
-plt.tight_layout()
+plt.tight_layout(pad=0.1)
plt.show()
+# %%
+print("Summary of Single-User Performance")
+print(f"Lachesis execution time: {execution_time_lachesis} seconds")
+print(f"TA-DBSCAN execution time: {execution_time_tadbscan} seconds")
+print(f"Grid-Based execution time: {execution_time_grid} seconds")
+print(f"HDBSCAN execution time: {execution_time_hdbscan} seconds")
+
+# %%
+print("Runtime Disaggregation")
+print(f"Lachesis clustering time: {execution_time_lachesis} seconds")
+print("--------------------------------")
+print(f"TA-DBSCAN clustering time: {clustering_time_tadbscan} seconds")
+print(f"TA-DBSCAN post-processing time: {post_time_tadbscan} seconds")
+print("--------------------------------")
+print(f"Grid-Based clustering time: {execution_time_grid} seconds")
+print("--------------------------------")
+print(f"HDBSCAN clustering time: {clustering_time_hdbscan} seconds")
+print(f"HDBSCAN post-processing time: {post_time_hdbscan} seconds")
+
# %% [markdown]
-# ## Runtime Scalability
-#
-# We measure how runtime scales with dataset size by running each algorithm on progressively larger time windows (6-hour increments).
+# ## Pings vs Runtime
# %%
-runtime_data = []
-for current_end in pd.date_range(start=traj['datetime'].min() + pd.Timedelta(hours=6),
- end=traj['datetime'].max(),
- freq='6h'):
- window = traj[traj['datetime'] <= current_end]
- n_pings = len(window)
-
- for name, func in algorithms:
- t0 = time.time()
- func(window)
- runtime_data.append({'Algorithm': name, 'n_pings': n_pings, 'runtime': time.time() - t0})
+traj = loader.sample_from_file(filepath_root, frac_users=0.1, format='parquet', traj_cols=tc, seed=10) # try frac_users = 0.1
-runtime_df = pd.DataFrame(runtime_data)
+# H3 cells for grid_based stop detection method
+traj['h3_cell'] = filters.to_tessellation(traj, index="h3", res=10, x='dev_x', y='dev_y', data_crs='EPSG:3857')
+pings_per_user = traj['gc_identifier'].value_counts()
-# %% [markdown]
-# ## Performance Comparison
+# %%
+# Approximately 5 minutes for 40 users
+results = []
+for user, n_pings in tqdm(pings_per_user.items(), total=len(pings_per_user)):
+ user_data = traj.query("gc_identifier == @user")
+
+ # For location based
+ start_time = time.time()
+ stops_gb = GRID_BASED.grid_based(user_data, time_thresh=240, complete_output=True, timestamp='unix_ts', location_id='h3_cell')
+ execution_time = time.time() - start_time
+ results += [pd.Series({'user':user, 'algo':'grid_based', 'execution_time':execution_time, 'n_pings':n_pings})]
+
+ # For Lachesis
+ start_time = time.time()
+ stops_lac = LACHESIS.lachesis(user_data, delta_roam=30, dt_max=240, complete_output=True, traj_cols=tc)
+ execution_time = time.time() - start_time
+ results += [pd.Series({'user':user, 'algo':'lachesis', 'execution_time':execution_time, 'n_pings':n_pings})]
+
+ # For TADbscan
+ start_time = time.time()
+ user_data_tadb = user_data.assign(cluster=DBSCAN.ta_dbscan_labels(user_data, time_thresh=240, dist_thresh=15, min_pts=3, traj_cols=tc))
+ # - post-processing
+ stops_tadb = post.remove_overlaps(user_data_tadb, time_thresh=240, method='cluster', traj_cols=tc, min_pts=3, dur_min=5, min_cluster_size=3)
+ execution_time = time.time() - start_time
+ results += [pd.Series({'user':user, 'algo':'tadbscan', 'execution_time':execution_time, 'n_pings':n_pings})]
+
+ # For HDBSCAN
+ start_time = time.time()
+ user_data_hdb = user_data.assign(cluster=HDBSCAN.hdbscan_labels(user_data, time_thresh=240, min_pts=3, min_cluster_size=2, traj_cols=tc))
+ # - post-processing
+ stops_hdb = post.remove_overlaps(user_data_hdb, time_thresh=240, method='cluster', traj_cols=tc, min_pts=3, dur_min=5, min_cluster_size=3)
+ execution_time = time.time() - start_time
+ results += [pd.Series({'user':user, 'algo':'hdbscan', 'execution_time':execution_time, 'n_pings':n_pings})]
+
+results = pd.DataFrame(results)
# %%
-fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
-
-bars = ax1.barh(results_df['Algorithm'], results_df['Runtime (s)'])
-ax1.set_xlabel('Execution Time (seconds)')
-ax1.set_title('Overall Runtime (Full Dataset)')
-ax1.grid(axis='x', alpha=0.3)
-for bar, val in zip(bars, results_df['Runtime (s)']):
- ax1.text(val + 0.01, bar.get_y() + bar.get_height()/2, f'{val:.3f}s', va='center')
-
-for algo in runtime_df['Algorithm'].unique():
- subset = runtime_df[runtime_df['Algorithm'] == algo]
- ax2.plot(subset['n_pings'], subset['runtime'], marker='o', label=algo, linewidth=2)
-
-ax2.set_xlabel('Number of Pings')
-ax2.set_ylabel('Runtime (seconds)')
-ax2.set_title('Runtime vs Data Size')
-ax2.legend()
-ax2.grid(alpha=0.3)
-
-plt.tight_layout()
+import seaborn as sns
+
+algos = ['grid_based', 'lachesis', 'tadbscan', 'hdbscan']
+palette = dict(zip(algos, sns.color_palette(n_colors=len(algos))))
+
+fig, ax = plt.subplots(figsize=(5, 5))
+sns.scatterplot(data=results, x='n_pings', y='execution_time', hue='algo', ax=ax)
+ax.set_title('n_pings vs execution_time')
plt.show()
diff --git a/docs/source/grid_based_demo.ipynb b/docs/source/grid_based_demo.ipynb
index 03022d0a..90114911 100644
--- a/docs/source/grid_based_demo.ipynb
+++ b/docs/source/grid_based_demo.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
- "id": "92838936",
+ "id": "f56b531d",
"metadata": {},
"source": [
"# Grid-Based Stop Detection"
@@ -10,7 +10,7 @@
},
{
"cell_type": "markdown",
- "id": "cb276fd9",
+ "id": "9cfdb26e",
"metadata": {},
"source": [
"The stop detection algorithms implemented in `nomad` support different combinations of input formats that are common in commercial datasets, detecting default names when possible\n",
@@ -23,69 +23,78 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "19184dee",
- "metadata": {},
+ "execution_count": 1,
+ "id": "24b50a14",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-24T18:32:11.959806Z",
+ "iopub.status.busy": "2025-11-24T18:32:11.959806Z",
+ "iopub.status.idle": "2025-11-24T18:32:16.169725Z",
+ "shell.execute_reply": "2025-11-24T18:32:16.169725Z"
+ }
+ },
"outputs": [],
"source": [
"%matplotlib inline\n",
+ "import matplotlib\n",
+ "matplotlib.use('TkAgg') # Non-blocking backend\n",
+ "import matplotlib.pyplot as plt\n",
+ "plt.ion() # Interactive mode\n",
"\n",
"# Imports\n",
"import nomad.io.base as loader\n",
"import geopandas as gpd\n",
"from shapely.geometry import box\n",
- "import pandas as pd\n",
- "import numpy as np\n",
- "import matplotlib.pyplot as plt\n",
- "from nomad.stop_detection.viz import plot_stops_barcode, plot_time_barcode\n",
+ "from nomad.stop_detection.viz import plot_stops_barcode, plot_time_barcode, plot_hexagons, plot_pings\n",
"import nomad.stop_detection.grid_based as GRID_BASED\n",
"import nomad.filters as filters \n",
"\n",
"# Load data\n",
- "city = gpd.read_file(\"garden_city.geojson\").to_crs('EPSG:3857')\n",
- "outer_box = box(*city.total_bounds).buffer(15, join_style='mitre')\n",
+ "import nomad.data as data_folder\n",
+ "from pathlib import Path\n",
+ "data_dir = Path(data_folder.__file__).parent\n",
+ "city = gpd.read_parquet(data_dir / 'garden-city-buildings-mercator.parquet')\n",
+ "outer_box = box(*city.total_bounds)\n",
"\n",
- "filepath_root = '../tutorials/IC2S2-2025/gc_data_long/'\n",
- "tc = {\n",
- " \"user_id\": \"gc_identifier\",\n",
- " \"timestamp\": \"unix_ts\",\n",
- " \"x\": \"dev_x\",\n",
- " \"y\": \"dev_y\",\n",
- " \"ha\":\"ha\",\n",
- " \"date\":\"date\"}\n",
+ "filepath_root = 'gc_data_long/'\n",
+ "tc = {\"user_id\": \"gc_identifier\", \"x\": \"dev_x\", \"y\": \"dev_y\", \"timestamp\": \"unix_ts\"}\n",
"\n",
"users = ['admiring_brattain']\n",
- "traj = loader.sample_from_file(filepath_root, format='parquet', users=users, filters = ('date','==', '2024-01-01'), traj_cols=tc)\n",
+ "traj = loader.sample_from_file(filepath_root, format='parquet', users=users, filters=('date','==', '2024-01-01'), traj_cols=tc)\n",
"\n",
- "# Grid-based\n",
- "traj['h3_cell'] = filters.to_tessellation(traj, index=\"h3\", res=10, x='dev_x', y='dev_y', data_crs='EPSG:3857')\n",
- "stops_gb = GRID_BASED.grid_based(traj, time_thresh=240, complete_output=True, timestamp='unix_ts', location_id='h3_cell')"
+ "# Grid-based - data is in Web Mercator (EPSG:3857) projected coordinates\n",
+ "traj['h3_cell'] = filters.to_tessellation(traj, index=\"h3\", res=10, traj_cols=tc, data_crs='EPSG:3857')\n",
+ "stops_gb = GRID_BASED.grid_based(traj, time_thresh=240, complete_output=True, location_id='h3_cell', traj_cols=tc)"
]
},
{
"cell_type": "code",
- "execution_count": 21,
- "id": "62555a1b",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "execution_count": 2,
+ "id": "4fea8a03",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-24T18:32:16.169725Z",
+ "iopub.status.busy": "2025-11-24T18:32:16.169725Z",
+ "iopub.status.idle": "2025-11-24T18:32:16.492839Z",
+ "shell.execute_reply": "2025-11-24T18:32:16.492839Z"
}
- ],
+ },
+ "outputs": [],
"source": [
- "fig, ax_barcode = plt.subplots(figsize=(10,1.5))\n",
+ "fig, (ax_map, ax_barcode) = plt.subplots(2, 1, figsize=(6,6.5),\n",
+ " gridspec_kw={'height_ratios':[10,1]})\n",
+ "\n",
+ "gpd.GeoDataFrame(geometry=[outer_box], crs='EPSG:3857').plot(ax=ax_map, color='#d3d3d3')\n",
+ "city.plot(ax=ax_map, edgecolor='white', linewidth=1, color='#8c8c8c')\n",
+ "\n",
+ "plot_hexagons(stops_gb, ax=ax_map, color='cluster', cmap='Greens', location_id='h3_cell', data_crs='EPSG:3857')\n",
+ "plot_pings(traj, ax=ax_map, s=6, color='black', alpha=0.5, traj_cols=tc)\n",
+ "ax_map.set_axis_off()\n",
"\n",
"plot_time_barcode(traj['unix_ts'], ax=ax_barcode, set_xlim=True)\n",
- "plot_stops_barcode(stops_gb, ax=ax_barcode, stop_color='green', set_xlim=False, timestamp='unix_ts')\n",
- "plt.title(\"Grid-Based stops\")\n",
- "plt.tight_layout()\n",
+ "plot_stops_barcode(stops_gb, ax=ax_barcode, cmap='Greens', set_xlim=False, timestamp='unix_ts')\n",
+ "\n",
+ "plt.tight_layout(pad=0.1)\n",
"plt.show()"
]
}
@@ -106,7 +115,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.14.0"
+ "version": "3.12.3"
}
},
"nbformat": 4,
diff --git a/docs/source/hdbscan_demo.ipynb b/docs/source/hdbscan_demo.ipynb
index 4437d09a..b7c6c4e4 100644
--- a/docs/source/hdbscan_demo.ipynb
+++ b/docs/source/hdbscan_demo.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
- "id": "92838936",
+ "id": "4f16fcbe",
"metadata": {},
"source": [
"# HDBSCAN Stop Detection"
@@ -10,7 +10,7 @@
},
{
"cell_type": "markdown",
- "id": "cb276fd9",
+ "id": "f69ed80f",
"metadata": {},
"source": [
"The HDBSCAN algorithm constructs a hierarchy of non-overlapping clusters from different radius values and selects those that maximize stability."
@@ -18,73 +18,79 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "19184dee",
- "metadata": {},
+ "execution_count": 1,
+ "id": "3561532d",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-24T18:32:30.624504Z",
+ "iopub.status.busy": "2025-11-24T18:32:30.624504Z",
+ "iopub.status.idle": "2025-11-24T18:32:33.741073Z",
+ "shell.execute_reply": "2025-11-24T18:32:33.740043Z"
+ }
+ },
"outputs": [],
"source": [
"%matplotlib inline\n",
+ "import matplotlib\n",
+ "matplotlib.use('TkAgg')\n",
+ "import matplotlib.pyplot as plt\n",
+ "plt.ion()\n",
"\n",
"# Imports\n",
"import nomad.io.base as loader\n",
"import geopandas as gpd\n",
"from shapely.geometry import box\n",
- "import pandas as pd\n",
- "import numpy as np\n",
- "import matplotlib.pyplot as plt\n",
- "from nomad.stop_detection.viz import plot_stops_barcode, plot_time_barcode\n",
+ "from nomad.stop_detection.viz import plot_stops_barcode, plot_time_barcode, plot_stops, plot_pings\n",
"import nomad.stop_detection.hdbscan as HDBSCAN\n",
- "import nomad.stop_detection.postprocessing as post\n",
"\n",
"# Load data\n",
- "city = gpd.read_file(\"garden_city.geojson\").to_crs('EPSG:3857')\n",
- "outer_box = box(*city.total_bounds).buffer(15, join_style='mitre')\n",
+ "import nomad.data as data_folder\n",
+ "from pathlib import Path\n",
+ "data_dir = Path(data_folder.__file__).parent\n",
+ "city = gpd.read_parquet(data_dir / 'garden-city-buildings-mercator.parquet')\n",
+ "outer_box = box(*city.total_bounds)\n",
"\n",
"filepath_root = 'gc_data_long/'\n",
- "tc = {\n",
- " \"user_id\": \"gc_identifier\",\n",
- " \"timestamp\": \"unix_ts\",\n",
- " \"x\": \"dev_x\",\n",
- " \"y\": \"dev_y\",\n",
- " \"ha\":\"ha\",\n",
- " \"date\":\"date\"}\n",
+ "tc = {\"user_id\": \"gc_identifier\", \"x\": \"dev_x\", \"y\": \"dev_y\", \"timestamp\": \"unix_ts\"}\n",
"\n",
"users = ['admiring_brattain']\n",
- "traj = loader.sample_from_file(filepath_root, format='parquet', users=users, filters = ('date','==', '2024-01-01'), traj_cols=tc)\n",
+ "traj = loader.sample_from_file(filepath_root, format='parquet', users=users, filters=('date','==', '2024-01-01'), traj_cols=tc)\n",
"\n",
- "user_data_hdb = traj.assign(cluster=HDBSCAN.hdbscan_labels(traj, time_thresh=240, min_pts=3, min_cluster_size=2, traj_cols=tc))\n",
"stops_hdb = HDBSCAN.st_hdbscan(traj,\n",
" time_thresh=720,\n",
- " dist_thresh=15,\n",
" min_pts=3,\n",
" complete_output=True,\n",
- " traj_cols=tc)\n",
- "stops_hdb[\"cluster\"] = post.remove_overlaps(user_data_hdb, time_thresh=240, method='cluster', traj_cols=tc, min_pts=3, dur_min=5, min_cluster_size=3) "
+ " traj_cols=tc) "
]
},
{
"cell_type": "code",
- "execution_count": 30,
- "id": "fa70719e",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "execution_count": 2,
+ "id": "ca45c6c3",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-24T18:32:33.744689Z",
+ "iopub.status.busy": "2025-11-24T18:32:33.743614Z",
+ "iopub.status.idle": "2025-11-24T18:32:34.044704Z",
+ "shell.execute_reply": "2025-11-24T18:32:34.044704Z"
}
- ],
+ },
+ "outputs": [],
"source": [
- "fig, ax_barcode = plt.subplots(figsize=(10,1.5))\n",
+ "fig, (ax_map, ax_barcode) = plt.subplots(2, 1, figsize=(6,6.5),\n",
+ " gridspec_kw={'height_ratios':[10,1]})\n",
+ "\n",
+ "gpd.GeoDataFrame(geometry=[outer_box], crs='EPSG:3857').plot(ax=ax_map, color='#d3d3d3')\n",
+ "city.plot(ax=ax_map, edgecolor='white', linewidth=1, color='#8c8c8c')\n",
+ "\n",
+ "plot_stops(stops_hdb, ax=ax_map, cmap='Blues')\n",
+ "plot_pings(traj, ax=ax_map, s=6, color='black', alpha=0.5, traj_cols=tc)\n",
+ "ax_map.set_axis_off()\n",
"\n",
"plot_time_barcode(traj['unix_ts'], ax=ax_barcode, set_xlim=True)\n",
- "plot_stops_barcode(stops_hdb, ax=ax_barcode, stop_color='blue', set_xlim=False, timestamp='unix_ts')\n",
- "fig.suptitle(\"HDBSCAN stops with post-processing\")\n",
+ "plot_stops_barcode(stops_hdb, ax=ax_barcode, cmap='Blues', set_xlim=False, timestamp='unix_ts')\n",
+ "\n",
+ "plt.tight_layout(pad=0.1)\n",
"plt.show()"
]
}
@@ -105,7 +111,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.5"
+ "version": "3.12.3"
}
},
"nbformat": 4,
diff --git a/docs/source/ingesting-data.ipynb b/docs/source/ingesting-data.ipynb
new file mode 100644
index 00000000..0f7b355e
--- /dev/null
+++ b/docs/source/ingesting-data.ipynb
@@ -0,0 +1,507 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "460ff464-7812-41fb-bc5b-bc4f24e16499",
+ "metadata": {
+ "id": "460ff464-7812-41fb-bc5b-bc4f24e16499"
+ },
+ "source": [
+ "# **Loading and Sampling Trajectory Data**\n",
+ "\n",
+ "## Getting started\n",
+ "\n",
+ "Real-world mobility files vary widely in structure and formatting:\n",
+ "- e.g. **Timestamps** may be **UNIX** integers or **ISO-formatted strings**\n",
+ "- May have **timezones**, e.g. -05:00, Z, (GMT+01), -3600\n",
+ "- Coordinates might be **projected** or **geographical**\n",
+ "- Files may be a flat **CSV**, or **partitioned Parquets**, local or **in S3**.\n",
+ "\n",
+ "`nomad.io` is here to help."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "ca448248-3077-4e67-ad81-6d1ba1b170db",
+ "metadata": {
+ "executionInfo": {
+ "elapsed": 3404,
+ "status": "ok",
+ "timestamp": 1753083319439,
+ "user": {
+ "displayName": "Thomas Li",
+ "userId": "03526318197962168317"
+ },
+ "user_tz": -120
+ },
+ "id": "ca448248-3077-4e67-ad81-6d1ba1b170db"
+ },
+ "outputs": [],
+ "source": [
+ "from nomad.io import base as loader\n",
+ "import pandas as pd\n",
+ "import geopandas as gpd"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c78e81f2-bcf3-4cc5-8c26-b6111484df73",
+ "metadata": {
+ "id": "c78e81f2-bcf3-4cc5-8c26-b6111484df73"
+ },
+ "source": [
+ "## Typical data ingestion ( `pandas`, `geopandas`) vs `nomad` `io` utilities"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "904bf840-4253-41e3-a1d3-d54874072613",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 206
+ },
+ "executionInfo": {
+ "elapsed": 849,
+ "status": "ok",
+ "timestamp": 1753083322765,
+ "user": {
+ "displayName": "Thomas Li",
+ "userId": "03526318197962168317"
+ },
+ "user_tz": -120
+ },
+ "id": "904bf840-4253-41e3-a1d3-d54874072613",
+ "outputId": "9dd16f0f-7e96-4aaa-ade3-c431698bafbc"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " identifier \n",
+ " device_lon \n",
+ " device_lat \n",
+ " unix_timestamp \n",
+ " local_datetime \n",
+ " date \n",
+ " ha \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " cocky_stallman \n",
+ " -38.318802 \n",
+ " 36.669894 \n",
+ " 1704114435 \n",
+ " 2024-01-01 09:07:15-04:00 \n",
+ " 2024-01-01 \n",
+ " 8.492856 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " cocky_stallman \n",
+ " -38.318765 \n",
+ " 36.669905 \n",
+ " 1704114753 \n",
+ " 2024-01-01 09:12:33-04:00 \n",
+ " 2024-01-01 \n",
+ " 11.336772 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " cocky_stallman \n",
+ " -38.318627 \n",
+ " 36.669856 \n",
+ " 1704114792 \n",
+ " 2024-01-01 09:13:12-04:00 \n",
+ " 2024-01-01 \n",
+ " 18.436612 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " cocky_stallman \n",
+ " -38.318661 \n",
+ " 36.669920 \n",
+ " 1704114989 \n",
+ " 2024-01-01 09:16:29-04:00 \n",
+ " 2024-01-01 \n",
+ " 27.370737 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " cocky_stallman \n",
+ " -38.318602 \n",
+ " 36.669823 \n",
+ " 1704115195 \n",
+ " 2024-01-01 09:19:55-04:00 \n",
+ " 2024-01-01 \n",
+ " 12.506606 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " identifier device_lon device_lat unix_timestamp \\\n",
+ "0 cocky_stallman -38.318802 36.669894 1704114435 \n",
+ "1 cocky_stallman -38.318765 36.669905 1704114753 \n",
+ "2 cocky_stallman -38.318627 36.669856 1704114792 \n",
+ "3 cocky_stallman -38.318661 36.669920 1704114989 \n",
+ "4 cocky_stallman -38.318602 36.669823 1704115195 \n",
+ "\n",
+ " local_datetime date ha \n",
+ "0 2024-01-01 09:07:15-04:00 2024-01-01 8.492856 \n",
+ "1 2024-01-01 09:12:33-04:00 2024-01-01 11.336772 \n",
+ "2 2024-01-01 09:13:12-04:00 2024-01-01 18.436612 \n",
+ "3 2024-01-01 09:16:29-04:00 2024-01-01 27.370737 \n",
+ "4 2024-01-01 09:19:55-04:00 2024-01-01 12.506606 "
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = pd.read_csv(\"../../tutorials/IC2S2-2025/IC2S2-2025/gc_data.csv\")\n",
+ "city = gpd.read_file(\"../../tutorials/IC2S2-2025/IC2S2-2025/garden_city.geojson\")\n",
+ "\n",
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "de10f93d",
+ "metadata": {},
+ "source": [
+ "## `nomad.io` — facilitates type casting and default names\n",
+ "\n",
+ "`nomad.io.base.from_file` is basically a `pandas` / `pyarrow` wrapper, trying to simplify the formatting of canonical variables\n",
+ "\n",
+ "- dates and datetimes in **ISO format** are cast to `pandas.datetime64`\n",
+ "- **unix timestamps** are cast to integers and **reformatted to seconds**.\n",
+ "- **user identifiers** are cast to strings\n",
+ "- **partition folders** can be read as columns (Hive)\n",
+ "- **timezone handling** parses ISO datetime strings (with or without timezones)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "03b7bf33-48a1-4d75-bd95-ae05fb7f9357",
+ "metadata": {
+ "id": "03b7bf33-48a1-4d75-bd95-ae05fb7f9357"
+ },
+ "source": [
+ "Don't read partitioned data with a for loop! `nomad`'s `from_file` wraps `PyArrow`'s file readers maintaning the same signature."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "b33de9d2-ee49-46ac-96a1-56784674d40c",
+ "metadata": {
+ "id": "b33de9d2-ee49-46ac-96a1-56784674d40c"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "user_id object\n",
+ "longitude float64\n",
+ "latitude float64\n",
+ "timestamp Int64\n",
+ "datetime datetime64[ns]\n",
+ "ha float64\n",
+ "date object\n",
+ "tz_offset Int64\n",
+ "dtype: object\n"
+ ]
+ }
+ ],
+ "source": [
+ "# For the partitioned dataset\n",
+ "traj_cols = {\"user_id\": \"user_id\",\n",
+ " \"timestamp\": \"timestamp\",\n",
+ " \"latitude\": \"latitude\",\n",
+ " \"longitude\": \"longitude\",\n",
+ " \"datetime\": \"datetime\",\n",
+ " \"date\": \"date\"}\n",
+ "\n",
+ "file_path = \"../../tutorials/IC2S2-2025/IC2S2-2025/gc_data/\" # partitioned\n",
+ "\n",
+ "\n",
+ "df = loader.from_file(file_path, format=\"csv\", traj_cols=traj_cols, parse_dates=True)\n",
+ "print(df.dtypes)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "8ca7f977",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Canonical column names in nomad\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "{'user_id': 'user_id',\n",
+ " 'latitude': 'latitude',\n",
+ " 'longitude': 'longitude',\n",
+ " 'datetime': 'datetime',\n",
+ " 'start_datetime': 'start_datetime',\n",
+ " 'end_datetime': 'end_datetime',\n",
+ " 'start_timestamp': 'start_timestamp',\n",
+ " 'end_timestamp': 'end_timestamp',\n",
+ " 'timestamp': 'timestamp',\n",
+ " 'date': 'date',\n",
+ " 'utc_date': 'date',\n",
+ " 'x': 'x',\n",
+ " 'y': 'y',\n",
+ " 'geohash': 'geohash',\n",
+ " 'tz_offset': 'tz_offset',\n",
+ " 'duration': 'duration',\n",
+ " 'ha': 'ha',\n",
+ " 'h3_cell': 'h3_cell',\n",
+ " 'location_id': 'location_id'}"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from nomad.constants import DEFAULT_SCHEMA\n",
+ "print(\"Canonical column names in nomad\")\n",
+ "DEFAULT_SCHEMA"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cf4fbb1f",
+ "metadata": {},
+ "source": [
+ "```from_file``` automatically detects and reads Parquet files (single or partitioned directories) using ```PyArrow```'s dataset API, applying the same validation, type casting, and timezone handling as for CSV inputs."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "da9e025d",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "uid object\n",
+ "timestamp Int64\n",
+ "latitude float64\n",
+ "longitude float64\n",
+ "date object\n",
+ "dtype: object\n"
+ ]
+ }
+ ],
+ "source": [
+ "traj_cols = {\"user_id\": \"uid\",\n",
+ " \"timestamp\": \"timestamp\",\n",
+ " \"latitude\": \"latitude\",\n",
+ " \"longitude\": \"longitude\",\n",
+ " \"date\": \"date\"}\n",
+ "\n",
+ "file_path = \"../../nomad/data/partitioned_parquet/\" # partitioned\n",
+ "\n",
+ "df = loader.from_file(file_path, format=\"parquet\", traj_cols=traj_cols, parse_dates=True)\n",
+ "print(df.dtypes)"
+ ]
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.3"
+ },
+ "nbdime-conflicts": {
+ "local_diff": [
+ {
+ "diff": [
+ {
+ "diff": [
+ {
+ "key": 0,
+ "op": "addrange",
+ "valuelist": [
+ "conda_py_310_env"
+ ]
+ },
+ {
+ "key": 0,
+ "length": 1,
+ "op": "removerange"
+ }
+ ],
+ "key": "display_name",
+ "op": "patch"
+ },
+ {
+ "diff": [
+ {
+ "key": 0,
+ "op": "addrange",
+ "valuelist": [
+ "conda_py_310_env"
+ ]
+ },
+ {
+ "key": 0,
+ "length": 1,
+ "op": "removerange"
+ }
+ ],
+ "key": "name",
+ "op": "patch"
+ }
+ ],
+ "key": "kernelspec",
+ "op": "patch"
+ },
+ {
+ "diff": [
+ {
+ "diff": [
+ {
+ "diff": [
+ {
+ "key": 5,
+ "op": "addrange",
+ "valuelist": "6"
+ },
+ {
+ "key": 5,
+ "length": 2,
+ "op": "removerange"
+ }
+ ],
+ "key": 0,
+ "op": "patch"
+ }
+ ],
+ "key": "version",
+ "op": "patch"
+ }
+ ],
+ "key": "language_info",
+ "op": "patch"
+ }
+ ],
+ "remote_diff": [
+ {
+ "diff": [
+ {
+ "diff": [
+ {
+ "key": 0,
+ "op": "addrange",
+ "valuelist": [
+ "Python 3 (ipykernel)"
+ ]
+ },
+ {
+ "key": 0,
+ "length": 1,
+ "op": "removerange"
+ }
+ ],
+ "key": "display_name",
+ "op": "patch"
+ },
+ {
+ "diff": [
+ {
+ "key": 0,
+ "op": "addrange",
+ "valuelist": [
+ "python3"
+ ]
+ },
+ {
+ "key": 0,
+ "length": 1,
+ "op": "removerange"
+ }
+ ],
+ "key": "name",
+ "op": "patch"
+ }
+ ],
+ "key": "kernelspec",
+ "op": "patch"
+ },
+ {
+ "diff": [
+ {
+ "diff": [
+ {
+ "key": 0,
+ "length": 1,
+ "op": "removerange"
+ }
+ ],
+ "key": "version",
+ "op": "patch"
+ }
+ ],
+ "key": "language_info",
+ "op": "patch"
+ }
+ ]
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/source/lachesis_demo.ipynb b/docs/source/lachesis_demo.ipynb
index a46d838b..cb9a8992 100644
--- a/docs/source/lachesis_demo.ipynb
+++ b/docs/source/lachesis_demo.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
- "id": "92838936",
+ "id": "05c3afed",
"metadata": {},
"source": [
"# Lachesis Stop Detection"
@@ -10,7 +10,7 @@
},
{
"cell_type": "markdown",
- "id": "cb276fd9",
+ "id": "bca8605a",
"metadata": {},
"source": [
"The first stop detection algorithm implemented in ```nomad``` is a sequential algorithm insipired by the one in _Project Lachesis: Parsing and Modeling Location Histories_ (Hariharan & Toyama). This algorithm for extracting stays is dependent on two parameters: the roaming distance and the stay duration. \n",
@@ -28,32 +28,43 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "19184dee",
- "metadata": {},
+ "execution_count": 1,
+ "id": "7f0b2bb1",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-24T18:32:46.921799Z",
+ "iopub.status.busy": "2025-11-24T18:32:46.921799Z",
+ "iopub.status.idle": "2025-11-24T18:32:51.000857Z",
+ "shell.execute_reply": "2025-11-24T18:32:51.000857Z"
+ }
+ },
"outputs": [],
"source": [
"%matplotlib inline\n",
+ "import matplotlib\n",
+ "matplotlib.use('TkAgg')\n",
+ "import matplotlib.pyplot as plt\n",
+ "plt.ion()\n",
"\n",
"# Imports\n",
"import nomad.io.base as loader\n",
+ "import geopandas as gpd\n",
"from shapely.geometry import box\n",
- "import matplotlib.pyplot as plt\n",
- "from nomad.stop_detection.viz import plot_stops_barcode, plot_time_barcode\n",
+ "from nomad.stop_detection.viz import plot_stops_barcode, plot_time_barcode, plot_stops, plot_pings\n",
"import nomad.stop_detection.lachesis as LACHESIS\n",
+ "import nomad.data as data_folder\n",
+ "from pathlib import Path\n",
"\n",
"# Load data\n",
+ "data_dir = Path(data_folder.__file__).parent\n",
+ "city = gpd.read_parquet(data_dir / 'garden-city-buildings-mercator.parquet')\n",
+ "outer_box = box(*city.total_bounds)\n",
+ "\n",
"filepath_root = 'gc_data_long/'\n",
- "tc = {\n",
- " \"user_id\": \"gc_identifier\",\n",
- " \"timestamp\": \"unix_ts\",\n",
- " \"x\": \"dev_x\",\n",
- " \"y\": \"dev_y\",\n",
- " \"ha\":\"ha\",\n",
- " \"date\":\"date\"}\n",
+ "tc = {\"user_id\": \"gc_identifier\", \"x\": \"dev_x\", \"y\": \"dev_y\", \"timestamp\": \"unix_ts\"}\n",
"\n",
"users = ['admiring_brattain']\n",
- "traj = loader.sample_from_file(filepath_root, format='parquet', users=users, filters = ('date','==', '2024-01-01'), traj_cols=tc)\n",
+ "traj = loader.sample_from_file(filepath_root, format='parquet', users=users, filters=('date','==', '2024-01-01'), traj_cols=tc)\n",
"\n",
"# Lachesis (sequential stop detection)\n",
"stops = LACHESIS.lachesis(traj, delta_roam=20, dt_max = 60, dur_min=5, complete_output=True, keep_col_names=True, traj_cols=tc)"
@@ -61,28 +72,32 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "570b6103",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAA90AAACYCAYAAAD5s4rEAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAAAYdklEQVR4nO3de1AV5x3G8WchcolcDIgXIiDeQqVWq8ZrbctEjaYk07FqNCMmaFNjqk6qTsWa1Puk1aR1pg46maDUmhrMFBs0qTMab2k1qRZJUm2iSURgFFFTBS8IwvYPh9OccDnnwHm5ne9nhlHefffd9+zub5fHPRwt27ZtAQAAAAAAr/Nr6QkAAAAAANBeEboBAAAAADCE0A0AAAAAgCGEbgAAAAAADCF0AwAAAABgCKEbAAAAAABDCN0AAAAAABhC6AYAAAAAwBBCNwAAAAAAhhC6AQBtXmZmpizL0okTJ5ptmz179lRycnKzbU+SDh06JMuydOjQoWbZXnp6ujIzM5tlWwAAtFeEbgAA2ojBgwfr2LFjGjx4cLNsj9ANAEDT3dfSEwAAAO4JCwvTiBEjWnoaAADAAzzpBgD4hPLyci1atEiDBg1SeHi4IiIiNHLkSL399tu1+lZXV+sPf/iDBg0apODgYHXq1EkjRoxQTk5Orb579+7V4MGDFRwcrISEBG3ZsqVWn+LiYs2ZM0c9evRQQECA4uPjtXLlSt29e9ep36ZNmzRw4ECFhIQoNDRUCQkJ+tWvfuVYXtfby7/88ktNmzZN0dHRCgwMVNeuXfXII48oLy+vwf3har2ePXvq1KlTOnz4sCzLkmVZ6tmzp2P9goICzZgxQ126dFFgYKC+9a1v6dVXX1V1dbWjT35+vizL0rp167R27VrFxsYqKChIQ4cO1Xvvvec0n8uXL+tnP/uZYmJiFBgYqKioKI0ePVr79+9v8HUAANDa8aQbAOAT7ty5o6+++kqLFy/Wgw8+qIqKCu3fv1+TJk3S1q1bNXPmTEffZ555Rtu3b9fs2bO1atUqBQQEKDc3V/n5+U5jfvTRR1q0aJHS0tLUtWtXvf7665o9e7b69Omj73//+5LuBe5hw4bJz89Pv/71r9W7d28dO3ZMa9asUX5+vrZu3SpJevPNN/X8889r/vz5euWVV+Tn56fPP/9cp0+fbvB1PfbYY6qqqtK6desUGxurK1eu6OjRo7p27VqT1tu1a5cmT56s8PBwpaenS5ICAwMl3QvIo0aNUkVFhVavXq2ePXtqz549Wrx4sb744gtH/xobN25UXFycNmzYoOrqaq1bt04TJ07U4cOHNXLkSElSSkqKcnNztXbtWvXr10/Xrl1Tbm6url692uDrAACg1bMBAGjjtm7dakuyjx8/7vY6d+/etSsrK+3Zs2fb3/3udx3tR44csSXZy5Yta3D9uLg4OygoyD5//ryj7fbt23ZERIQ9Z84cR9ucOXPskJAQp362bduvvPKKLck+deqUbdu2PW/ePLtTp04NbvPgwYO2JPvgwYO2bdv2lStXbEn2hg0b3HrNNdxdLzEx0f7BD35Qqz0tLc2WZH/44YdO7XPnzrUty7I/++wz27Zt+9y5c7YkOzo62r59+7ajX2lpqR0REWGPHTvW0RYSEmK/8MILHr0OAADaAt5eDgDwGW+99ZZGjx6tkJAQ3XffferQoYMyMjL0n//8x9Hnb3/7myTp5z//ucvxBg0apNjYWMf3QUFB6tevn86fP+9o27Nnj5KSkhQdHa27d+86viZOnChJOnz4sCRp2LBhunbtmqZPn663335bV65ccbn9iIgI9e7dW+vXr9fvfvc7nTx50unt3d5er8aBAwfUv39/DRs2zKn9mWeekW3bOnDggFP7pEmTFBQU5Pg+NDRUjz/+uI4cOaKqqipJ915/Zmam1qxZow8++ECVlZVuzwcAgNaM0A0A8AnZ2dmaOnWqHnzwQW3fvl3Hjh3T8ePHNWvWLJWXlzv6Xb58Wf7+/urWrZvLMSMjI2u1BQYG6vbt247vL126pN27d6tDhw5OX4mJiZLkCNcpKSnasmWLzp8/r5/85Cfq0qWLhg8frn379tW7fcuy9N577+nRRx/VunXrNHjwYEVFRWnBggUqKyvz+no1rl69qu7du9dqj46Odiz/urr2Zbdu3VRRUaEbN25IkrKysvT000/r9ddf18iRIxUREaGZM2equLjY5XwAAGjN+J1uAIBP2L59u+Lj45WVlSXLshztd+7cceoXFRWlqqoqFRcX1xksPdW5c2d95zvf0dq1a+tcXhNUJSk1NVWpqam6efOmjhw5ouXLlys5OVlnzpxRXFxcnevHxcUpIyNDknTmzBnt3LlTK1asUEVFhTZv3lzvvBq7nnTvHxsuXrxYq/3ChQuO1/x1dQXn4uJiBQQEKCQkxLHOhg0btGHDBhUUFCgnJ0dpaWkqKSnR3r17G5wPAACtGU+6AQA+wbIsBQQEOAXu4uLiWp9eXvO2702bNnllu8nJyfr3v/+t3r17a+jQobW+vh66a3Ts2FETJ07UsmXLVFFRoVOnTrm1rX79+unFF1/UgAEDlJub6/Yc61vvm0/tazzyyCM6ffp0rW1s27ZNlmUpKSnJqT07O9vp3QRlZWXavXu3xowZI39//1rjx8bGat68eRo3bpxHrwMAgNaIJ90AgHbjwIEDtT5hXLr3Sd3JycnKzs7W888/r8mTJ6uwsFCrV69W9+7ddfbsWUffMWPGKCUlRWvWrNGlS5eUnJyswMBAnTx5Uvfff7/mz5/v0ZxWrVqlffv2adSoUVqwYIEeeughlZeXKz8/X++++642b96sHj166Nlnn1VwcLBGjx6t7t27q7i4WC+//LLCw8P18MMP1zn2xx9/rHnz5mnKlCnq27evAgICdODAAX388cdKS0urd07urjdgwAC9+eabysrKUq9evRQUFKQBAwboF7/4hbZt26Yf/ehHWrVqleLi4vTOO+8oPT1dc+fOVb9+/Zy25+/vr3HjxmnhwoWqrq7Wb3/7W5WWlmrlypWSpOvXryspKUlPPfWUEhISFBoaquPHj2vv3r2aNGmSR/sbAIDWhtANAGg3lixZUmf7uXPnlJqaqpKSEm3evFlbtmxRr169lJaWpqKiIkf4q5GZmanBgwcrIyNDmZmZCg4OVv/+/Z3+z2x3de/eXSdOnNDq1au1fv16FRUVKTQ0VPHx8ZowYYIeeOABSffCfmZmpnbu3Kn//ve/6ty5s773ve9p27ZtioqKqnPsbt26qXfv3kpPT1dhYaEsy1KvXr306quvNviPA+6ut3LlSl28eFHPPvusysrKFBcXp/z8fEVFReno0aNaunSpli5dqtLSUvXq1Uvr1q3TwoULa21v3rx5Ki8v14IFC1RSUqLExES98847Gj16tKR7H0A3fPhw/elPf1J+fr4qKysVGxurJUuW6Je//KXH+xwAgNbEsm3bbulJAACA9ic/P1/x8fFav369Fi9e3NLTAQCgRfA73QAAAAAAGELoBgAAAADAEN5eDgAAAACAITzpBgAAAADAEEI3AAAAAACGELoBAAAAADCE0A0AAAAAgCGEbgAAAAAADCF0AwAAAABgCKEbAAAAAABDCN0AAAAAABhC6AYAAAAAwBBCNwAAAAAAhhC6AQAAAAAwhNANAAAAAIAhhG4AAAAAAAwhdAMAAAAAYAihGwAAAAAAQwjdAAAAAAAYQugGAAAAAMAQQjcAAAAAAIYQugEAAAAAMITQDQAAAACAIYRuAAAAAAAMIXQDAAAAAGAIoRsAAAAAAEMI3QAAAAAAGELoBgAAAADAkPvc6VRdXa0LFy4oNDRUlmWZnhMAAAAAAK2abdsqKytTdHS0/Pzqf57tVui+cOGCYmJivDY5AAAAAADag8LCQvXo0aPe5W6F7tDQUMdgYWFh3pkZWqUvvpCefFIKCJCCglp6Ni2nvFyqqJCysqTevVt6NoD3tMYa96TevDF/b9e3yX3qq9ei1nieNjdfPfatTUudi754/Ftz3fvi8YB7SktLFRMT48jL9XErdNe8pTwsLIzQ3c6FhEj+/lLHjtL997f0bFqOv79UVXVvf3DKoz1pjTXuSb15Y/7erm+T+9RXr0Wt8Txtbr567FubljoXffH4t+a698XjAc+4+hVsPkgNAAAAAABDCN0AAAAAABhC6AYAAAAAwBBCNwAAAAAAhhC6AQAAAAAwhNANAAAAAIAhhG4AAAAAAAwhdAMAAAAAYAihGwAAAAAAQwjdAAAAAAAYQuhuoh07dri9rKG+7o7tbltTlJZ6dzxTLl0yO8+m7IcdO3Z4dG64aveGmrG/+ac75s+f3yyv5+v96/t7Y8bypL+7+8fd+vZ0HE+XN0Zp6Q5dvereuK7qrDF1WNc6X683V/uyrtq8dGmHR3Np6nXum3M0dd28enWHY+yGzk1P70WNPa++WZcNzaWuGvbkvlhcPN/lfLx9H6hvPNP3m/q248555Y1j6ardm+eQNzT15ylP12/oOLh7bjSmnzevK005Xs15rN2pe095q35N/nzsjePjjePk6mevlqz7to7Q3USE7pZTUkLo9kRTQvdbb71F6HYxfnsN3a7qrDF1WNc6TQ3dJSU7PJoLobv1h+6ysrdczsfb94H6xjN9v6lvO4TuurWm0O3uudGYfr4Yut2pe095q34J3YTupiB0AwAAAABgCKEbAAAAAABDCN0AAAAAABhC6AYAAAAAwBBCNwAAAAAAhhC6AQAAAAAwhNANAAAAAIAhhG4AAAAAAAwhdAMAAAAAYAihGwAAAAAAQwjdAAAAAAAYQugGAAAAAMAQQjcAAAAAAIYQugEAAAAAMITQDQAAAACAIYRuAAAAAAAMIXQDAAAAAGAIoRsAAAAAAEMI3QAAAAAAGELoBgAAAADAEEI3AAAAAACGELoBAAAAADCE0A0AAAAAgCGEbgAAAAAADCF0AwAAAABgCKEbAAAAAABDCN0AAAAAABhC6AYAAAAAwBBCNwAAAAAAhhC6AQAAAAAwhNANAAAAAIAhhG4AAAAAAAwhdAMAAAAAYAihGwAAAAAAQwjdAAAAAAAYQugGAAAAAMAQQjcAAAAAAIYQugEAAAAAMOS+lp5AWzd9+nS3lzXU192x3W1rirAw745nSpcuZufZlP3g6pjUt9zbx7Kusb/5pzumTJmiUaNGuRzb3XZ3xqnv740Zy5P+7u4fd+u7seeCu8sbIyxsuoKD3evrqs4aU4d1rfP1enO1L+uqTU/n0dTr3DfnaOq6GRk5XbdvO2/T0/tAU+9F9a3r6jjV1deTuYSGTnE5H2/fB+obz/T9pr7tuHNeNfZ4enLN8uY55A1N/XnK0/UbOg7unhuN6efN64q36t40d+reU96qX5M/H3vj+HjjOLm6xrd07bdllm3btqtOpaWlCg8P1/Xr1xUWFtYc80ILOXtWeuIJKSxMuv/+lp5Ny7l1SyotlXJypL59W3o2gPe0xhr3pN68MX9v17fJfeqr16LWeJ42N1899q1NS52Lvnj8W3Pd++LxgHvczcm8vRwAAAAAAEMI3QAAAAAAGELoBgAAAADAEEI3AAAAAACGELoBAAAAADCE0A0AAAAAgCGEbgAAAAAADCF0AwAAAABgCKEbAAAAAABDCN0AAAAAABhynzudbNuWJJWWlhqdDFrejRtSVZV08+a9P31Vefm913/jhsRpj/akNda4J/Xmjfl7u75N7lNfvRa1xvO0ufnqsW9tWupc9MXj35rr3hePB9xTk49r8nJ9LNtVD0lFRUWKiYnxzswAAAAAAGgnCgsL1aNHj3qXuxW6q6urdeHCBYWGhsqyLK9OsC0pLS1VTEyMCgsLFRYW1tLTAdok6gjwDmoJ8A5qCfAOX6wl27ZVVlam6Oho+fnV/5vbbr293M/Pr8Hk7mvCwsJ85kQCTKGOAO+glgDvoJYA7/C1WgoPD3fZhw9SAwAAAADAEEI3AAAAAACGELo9EBgYqOXLlyswMLClpwK0WdQR4B3UEuAd1BLgHdRS/dz6IDUAAAAAAOA5nnQDAAAAAGAIoRsAAAAAAEMI3QAAAAAAGELoBgAAAADAkHYduo8cOaLHH39c0dHRsixLf/3rX2v1yc7O1qOPPqrOnTvLsizl5eW5PX5RUZECAgKUkJBQ53LLsmRZlj744AOn9jt37igyMlKWZenQoUMevCKgZbiqpcrKSi1ZskQDBgxQx44dFR0drZkzZ+rChQsNjrtixQpHnfj7+ysmJkY//elPdfnyZUcf6gjtTXp6uuLj4xUUFKQhQ4bo/fffd1remPsStQRf1FAtcV8C3OfqvrRixQolJCSoY8eOeuCBBzR27Fh9+OGHDY5JLTlr16H75s2bGjhwoDZu3Nhgn9GjR+s3v/mNx+NnZmZq6tSpunXrlv7xj3/U2ScmJkZbt251atu1a5dCQkI83h7QUlzV0q1bt5Sbm6uXXnpJubm5ys7O1pkzZ/TEE0+4HDsxMVEXL15UQUGBNm3apN27d2vmzJlOfagjtBdZWVl64YUXtGzZMp08eVJjxozRxIkTVVBQ4OjT2PsStQRf4qqWuC8B7nHnvtSvXz9t3LhRn3zyif7+97+rZ8+eGj9+vFOArgu19DW2j5Bk79q1q97l586dsyXZJ0+edGu86upqu1evXvbevXvtJUuW2KmpqXVu88UXX7TDwsLsW7duOdrHjRtnv/TSS7Yk++DBgx6+EqBluaqlGv/85z9tSfb58+fr7bN8+XJ74MCBTm1r1qyx/fz8HDVDHaE9GTZsmP3cc885tSUkJNhpaWm1+npyX6KW4Gs8qaUa3JeA2hpTS9evX7cl2fv376+3D7XkrF0/6Tbp4MGDunXrlsaOHauUlBTt3LlTZWVltfoNGTJE8fHx+stf/iJJKiws1JEjR5SSktLcUwaa1fXr12VZljp16uTResHBwaqurtbdu3cdbdQR2oOKigr961//0vjx453ax48fr6NHj3p9e9QS2qvG1hL3JcBZY2qpoqJCr732msLDwzVw4ECPtufLtUTobqSMjAxNmzZN/v7+SkxMVJ8+fZSVlVVn39TUVG3ZskWStHXrVj322GOKiopqzukCzaq8vFxpaWl66qmnFBYW5vZ6n376qTZt2qRhw4YpNDTUaRl1hLbuypUrqqqqUteuXZ3au3btquLiYq9ui1pCe9aYWuK+BNTmSS3t2bNHISEhCgoK0u9//3vt27dPnTt3dntbvl5LhO5GuHbtmrKzszVjxgxH24wZMxwnyzfNmDFDx44d05dffqnMzEzNmjWruaYKNLvKykpNmzZN1dXVSk9Pd9n/k08+UUhIiIKDg9W/f3/FxMTojTfeqNWPOkJ7YVmW0/e2bddqawxqCb7G3VrivgQ0zJ1aSkpKUl5eno4ePaoJEyZo6tSpKikpaXBcaun/7mvpCbRFf/7zn1VeXq7hw4c72mzbVnV1tU6fPq3+/fs79Y+MjFRycrJmz56t8vJyTZw4sc63ogNtXWVlpaZOnapz587pwIEDbj1NeOihh5STkyN/f39FR0crMDCwzn7UEdq6zp07y9/fv9bTg5KSklpPGRqDWoKv8KSWuC8B9fOkljp27Kg+ffqoT58+GjFihPr27auMjAwtXbq03vGppf/jSXcjZGRkaNGiRcrLy3N8ffTRR0pKSqr3afesWbN06NAhzZw5U/7+/s08Y8C8mh9szp49q/379ysyMtKt9QICAtSnTx/Fx8fXezGuQR2hLQsICNCQIUO0b98+p/Z9+/Zp1KhRXhmfWoIvcLeWuC8BDWvKfcm2bd25c8fl+NTSPe36SfeNGzf0+eefO74/d+6c8vLyFBERodjYWEnSV199pYKCAsf/2/jZZ59Jkrp166Zu3brVGjMvL0+5ubl64403av3/3NOnT9eyZcv08ssvq0OHDk7LJkyYoMuXL3v0e0RAa+Gqlu7evavJkycrNzdXe/bsUVVVleNfTSMiIhQQEOCVeVBHaOsWLlyolJQUDR06VCNHjtRrr72mgoICPffcc44+nt6XGoNaQlvnqpa4LwHucVVLN2/e1Nq1a/XEE0+oe/fuunr1qtLT01VUVKQpU6Z4bR7tvZbadeg+ceKEkpKSHN8vXLhQkvT0008rMzNTkpSTk6PU1FRHn2nTpkmSli9frhUrVtQaMyMjQ/37968VuCXpxz/+sebOnavdu3dr0qRJTsssy/LowwaA1sRVLRUVFSknJ0eSNGjQIKd1Dx48qB/+8IdemQd1hLbuySef1NWrV7Vq1SpdvHhR3/72t/Xuu+8qLi7O0cfT+1JjUEto61zVEvclwD2uasnf31+ffvqp/vjHP+rKlSuKjIzUww8/rPfff1+JiYlem0d7ryXLtm27pScBAAAAAEB7xO90AwAAAABgCKEbAAAAAABDCN0AAAAAABhC6AYAAAAAwBBCNwAAAAAAhhC6AQAAAAAwhNANAAAAAIAhhG4AAAAAAAwhdAMAAAAAYAihGwAAAAAAQwjdAAAAAAAYQugGAAAAAMCQ/wEqxlazKR18KgAAAABJRU5ErkJggg==",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "execution_count": 2,
+ "id": "86273598",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-24T18:32:51.005525Z",
+ "iopub.status.busy": "2025-11-24T18:32:51.000857Z",
+ "iopub.status.idle": "2025-11-24T18:32:51.355886Z",
+ "shell.execute_reply": "2025-11-24T18:32:51.355274Z"
}
- ],
+ },
+ "outputs": [],
"source": [
- "fig, ax_barcode = plt.subplots(figsize=(10,1.5))\n",
+ "fig, (ax_map, ax_barcode) = plt.subplots(2, 1, figsize=(6,6.5),\n",
+ " gridspec_kw={'height_ratios':[10,1]})\n",
+ "\n",
+ "gpd.GeoDataFrame(geometry=[outer_box], crs='EPSG:3857').plot(ax=ax_map, color='#d3d3d3')\n",
+ "city.plot(ax=ax_map, edgecolor='white', linewidth=1, color='#8c8c8c')\n",
+ "\n",
+ "plot_stops(stops, ax=ax_map, cmap='Blues')\n",
+ "plot_pings(traj, ax=ax_map, s=6, color='black', alpha=0.5, traj_cols=tc)\n",
+ "ax_map.set_axis_off()\n",
+ "\n",
+ "plot_time_barcode(traj['unix_ts'], ax=ax_barcode, set_xlim=True)\n",
+ "plot_stops_barcode(stops, ax=ax_barcode, cmap='Blues', set_xlim=False, timestamp='unix_ts')\n",
"\n",
- "plot_time_barcode(traj[tc['timestamp']], ax=ax_barcode, set_xlim=True)\n",
- "plot_stops_barcode(stops, ax=ax_barcode, stop_color='blue', set_xlim=False, timestamp='unix_ts')\n",
- "fig.suptitle(\"Lachesis stops\")\n",
- "plt.tight_layout()\n",
+ "plt.tight_layout(pad=0.1)\n",
"plt.show()"
]
}
@@ -103,7 +118,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.5"
+ "version": "3.12.3"
}
},
"nbformat": 4,
diff --git a/docs/source/tadbscan_demo.ipynb b/docs/source/tadbscan_demo.ipynb
index 6e28d6b4..c4f5747b 100644
--- a/docs/source/tadbscan_demo.ipynb
+++ b/docs/source/tadbscan_demo.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
- "id": "92838936",
+ "id": "7c76d1d8",
"metadata": {},
"source": [
"# TADBSCAN Stop Detection"
@@ -10,7 +10,7 @@
},
{
"cell_type": "markdown",
- "id": "cb276fd9",
+ "id": "f29a96ce",
"metadata": {},
"source": [
"The second stop detection algorithm implemented in ```nomad``` is an adaptation of DBSCAN. Unlike in plain DBSCAN, we also incorporate the time dimension to determine if two pings are \"neighbors\". This implementation relies on 3 parameters\n",
@@ -24,79 +24,81 @@
},
{
"cell_type": "code",
- "execution_count": 4,
- "id": "19184dee",
- "metadata": {},
+ "execution_count": 1,
+ "id": "1e62c25a",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-24T18:33:03.240035Z",
+ "iopub.status.busy": "2025-11-24T18:33:03.240035Z",
+ "iopub.status.idle": "2025-11-24T18:33:05.816985Z",
+ "shell.execute_reply": "2025-11-24T18:33:05.816985Z"
+ }
+ },
"outputs": [],
"source": [
"%matplotlib inline\n",
+ "import matplotlib\n",
+ "matplotlib.use('TkAgg')\n",
+ "import matplotlib.pyplot as plt\n",
+ "plt.ion()\n",
"\n",
"# Imports\n",
"import nomad.io.base as loader\n",
"import geopandas as gpd\n",
"from shapely.geometry import box\n",
- "import pandas as pd\n",
- "import numpy as np\n",
- "import matplotlib.pyplot as plt\n",
- "from nomad.stop_detection.viz import plot_stops_barcode, plot_time_barcode\n",
+ "from nomad.stop_detection.viz import plot_stops_barcode, plot_time_barcode, plot_stops, plot_pings\n",
"import nomad.stop_detection.dbscan as DBSCAN\n",
- "import nomad.filters as filters \n",
- "import nomad.stop_detection.postprocessing as post\n",
"\n",
"# Load data\n",
- "city = gpd.read_file(\"garden_city.geojson\").to_crs('EPSG:3857')\n",
- "outer_box = box(*city.total_bounds).buffer(15, join_style='mitre')\n",
+ "import nomad.data as data_folder\n",
+ "from pathlib import Path\n",
+ "data_dir = Path(data_folder.__file__).parent\n",
+ "city = gpd.read_parquet(data_dir / 'garden-city-buildings-mercator.parquet')\n",
+ "outer_box = box(*city.total_bounds)\n",
"\n",
"filepath_root = 'gc_data_long/'\n",
- "tc = {\n",
- " \"user_id\": \"gc_identifier\",\n",
- " \"timestamp\": \"unix_ts\",\n",
- " \"x\": \"dev_x\",\n",
- " \"y\": \"dev_y\",\n",
- " \"ha\":\"ha\",\n",
- " \"date\":\"date\"}\n",
+ "tc = {\"user_id\": \"gc_identifier\", \"x\": \"dev_x\", \"y\": \"dev_y\", \"timestamp\": \"unix_ts\"}\n",
"\n",
"# Density based stop detection (Temporal DBSCAN)\n",
- "users = ['confident_aryabhata']\n",
- "traj = loader.sample_from_file(filepath_root, format='parquet', users=users, filters = ('date','<=', '2024-01-03'), traj_cols=tc)\n",
- "traj[['longitude','latitude']] = np.column_stack(\n",
- " filters.to_projection(traj, x='dev_x', y='dev_y', data_crs='EPSG:3857', crs_to='EPSG:4326')\n",
- ")\n",
+ "users = ['admiring_brattain']\n",
+ "traj = loader.sample_from_file(filepath_root, format='parquet', users=users, filters=('date','==', '2024-01-01'), traj_cols=tc)\n",
"\n",
- "user_data_tadb = traj.assign(cluster=DBSCAN.ta_dbscan_labels(traj, time_thresh=240, dist_thresh=15, min_pts=3, traj_cols=tc))\n",
"stops_tadb = DBSCAN.ta_dbscan(traj,\n",
" time_thresh=720,\n",
" dist_thresh=15,\n",
" min_pts=3,\n",
" complete_output=True,\n",
- " traj_cols=tc)\n",
- "stops_tadb[\"cluster\"] = post.remove_overlaps(user_data_tadb, time_thresh=240, method='cluster', traj_cols=tc, min_pts=3, dur_min=5, min_cluster_size=3) "
+ " traj_cols=tc) "
]
},
{
"cell_type": "code",
- "execution_count": 5,
- "id": "2159107b",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "execution_count": 2,
+ "id": "df942a2c",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-24T18:33:05.816985Z",
+ "iopub.status.busy": "2025-11-24T18:33:05.816985Z",
+ "iopub.status.idle": "2025-11-24T18:33:06.001663Z",
+ "shell.execute_reply": "2025-11-24T18:33:06.001511Z"
}
- ],
+ },
+ "outputs": [],
"source": [
- "fig, ax_barcode = plt.subplots(figsize=(10,1.5))\n",
+ "fig, (ax_map, ax_barcode) = plt.subplots(2, 1, figsize=(6,6.5),\n",
+ " gridspec_kw={'height_ratios':[10,1]})\n",
+ "\n",
+ "gpd.GeoDataFrame(geometry=[outer_box], crs='EPSG:3857').plot(ax=ax_map, color='#d3d3d3')\n",
+ "city.plot(ax=ax_map, edgecolor='white', linewidth=1, color='#8c8c8c')\n",
+ "\n",
+ "plot_stops(stops_tadb, ax=ax_map, cmap='Reds')\n",
+ "plot_pings(traj, ax=ax_map, s=6, color='black', alpha=0.5, traj_cols=tc)\n",
+ "ax_map.set_axis_off()\n",
"\n",
"plot_time_barcode(traj['unix_ts'], ax=ax_barcode, set_xlim=True)\n",
- "plot_stops_barcode(stops_tadb, ax=ax_barcode, stop_color='red', set_xlim=False, timestamp='unix_ts')\n",
- "plt.title(\"TA-DBSCAN stops with post-processing\")\n",
- "plt.tight_layout()\n",
+ "plot_stops_barcode(stops_tadb, ax=ax_barcode, cmap='Reds', set_xlim=False, timestamp='unix_ts')\n",
+ "\n",
+ "plt.tight_layout(pad=0.1)\n",
"plt.show()"
]
}
@@ -117,7 +119,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.5"
+ "version": "3.12.3"
}
},
"nbformat": 4,
diff --git a/examples/benchmarking_of_stop_detection_algorithms.ipynb b/examples/benchmarking_of_stop_detection_algorithms.ipynb
index 6097165d..64549ab4 100644
--- a/examples/benchmarking_of_stop_detection_algorithms.ipynb
+++ b/examples/benchmarking_of_stop_detection_algorithms.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
- "id": "92838936",
+ "id": "67fc810a",
"metadata": {},
"source": [
"# Comparing runtimes of different stop detection algorithms on toy datasets"
@@ -10,7 +10,7 @@
},
{
"cell_type": "markdown",
- "id": "cb276fd9",
+ "id": "f152187c",
"metadata": {},
"source": [
"Here we compare the runtimes of four different stop detection algorithms: Lachesis, grid-based, temporal DBSCAN, and HDBSCAN."
@@ -18,28 +18,44 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "19184dee",
- "metadata": {},
+ "execution_count": 1,
+ "id": "474229df",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-24T18:33:19.946986Z",
+ "iopub.status.busy": "2025-11-24T18:33:19.946986Z",
+ "iopub.status.idle": "2025-11-24T18:33:23.251955Z",
+ "shell.execute_reply": "2025-11-24T18:33:23.251955Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Lachesis execution time: 0.06618499755859375 seconds\n",
- "TA-DBSCAN execution time: 0.13709473609924316 seconds\n",
- "TA-DBSCAN pre-processing time: 0.06432580947875977 seconds\n",
- "TA-DBSCAN clustering time: 0.11364507675170898 seconds\n",
- "TA-DBSCAN post-processing time: 0.02340412139892578 seconds\n",
- "Grid-Based execution time: 0.16486215591430664 seconds\n",
- "HDBSCAN execution time: 2.95216703414917 seconds\n",
- "HDBSCAN clustering time: 2.930790901184082 seconds\n",
- "HDBSCAN post-processing time: 0.02130913734436035 seconds\n"
+ "Lachesis execution time: 0.02721381187438965 seconds\n",
+ "TA-DBSCAN execution time: 0.012791156768798828 seconds\n",
+ "TA-DBSCAN clustering time: 0.009821414947509766 seconds\n",
+ "TA-DBSCAN post-processing time: 0.0029697418212890625 seconds\n",
+ "Grid-Based execution time: 0.022524595260620117 seconds\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "HDBSCAN execution time: 0.3206779956817627 seconds\n",
+ "HDBSCAN clustering time: 0.3206779956817627 seconds\n",
+ "HDBSCAN post-processing time: 0.0 seconds\n"
]
}
],
"source": [
"%matplotlib inline\n",
+ "import matplotlib\n",
+ "matplotlib.use('TkAgg')\n",
+ "import matplotlib.pyplot as plt\n",
+ "plt.ion()\n",
"\n",
"# Imports\n",
"import nomad.io.base as loader\n",
@@ -47,9 +63,7 @@
"from shapely.geometry import box\n",
"import pandas as pd\n",
"import numpy as np\n",
- "import matplotlib.pyplot as plt\n",
- "import shapely.plotting as shp_plt\n",
- "from nomad.stop_detection.viz import adjust_zoom, plot_stops_barcode, plot_pings, plot_stops, plot_time_barcode\n",
+ "from nomad.stop_detection.viz import plot_stops_barcode, plot_pings, plot_stops, plot_time_barcode\n",
"import nomad.stop_detection.dbscan as DBSCAN\n",
"import nomad.stop_detection.lachesis as LACHESIS\n",
"import nomad.stop_detection.grid_based as GRID_BASED\n",
@@ -60,10 +74,13 @@
"from tqdm import tqdm\n",
"\n",
"# Load data\n",
- "city = gpd.read_file(\"garden_city.geojson\").to_crs('EPSG:3857')\n",
+ "import nomad.data as data_folder\n",
+ "from pathlib import Path\n",
+ "data_dir = Path(data_folder.__file__).parent\n",
+ "city = gpd.read_parquet(data_dir / 'garden-city-buildings-mercator.parquet')\n",
"outer_box = box(*city.total_bounds).buffer(15, join_style='mitre')\n",
"\n",
- "filepath_root = '../tutorials/IC2S2-2025/gc_data_long/'\n",
+ "filepath_root = 'gc_data_long/'\n",
"tc = {\n",
" \"user_id\": \"gc_identifier\",\n",
" \"timestamp\": \"unix_ts\",\n",
@@ -73,7 +90,7 @@
" \"date\":\"date\"}\n",
"\n",
"users = ['admiring_brattain']\n",
- "traj = loader.sample_from_file(filepath_root, format='parquet', users=users, filters = ('date','==', '2024-01-01'), traj_cols=tc)\n",
+ "traj = loader.sample_from_file(filepath_root, format='parquet', users=users, filters=('date','==', '2024-01-01'), traj_cols=tc)\n",
"\n",
"# Lachesis (sequential stop detection)\n",
"start_time = time.time()\n",
@@ -82,30 +99,21 @@
"print(f\"Lachesis execution time: {execution_time_lachesis} seconds\")\n",
"\n",
"# Density based stop detection (Temporal DBSCAN)\n",
- "start_time_pre_tadbscan = time.time()\n",
- "users = ['confident_aryabhata']\n",
- "traj = loader.sample_from_file(filepath_root, format='parquet', users=users, filters = ('date','<=', '2024-01-03'), traj_cols=tc)\n",
- "traj[['longitude','latitude']] = np.column_stack(\n",
- " filters.to_projection(traj, x='dev_x', y='dev_y', data_crs='EPSG:3857', crs_to='EPSG:4326')\n",
- ")\n",
- "time_pre_tadbscan = time.time() - start_time_pre_tadbscan\n",
- "\n",
"start_time = time.time()\n",
"user_data_tadb = traj.assign(cluster=DBSCAN.ta_dbscan_labels(traj, time_thresh=240, dist_thresh=15, min_pts=3, traj_cols=tc))\n",
"clustering_time_tadbscan = time.time() - start_time\n",
"start_time_post = time.time()\n",
- "stops_tadb = post.remove_overlaps(user_data_tadb, time_thresh=240, method='cluster', traj_cols=tc, min_pts=3, dur_min=5, min_cluster_size=3)\n",
+ "cluster_labels_tadb = post.remove_overlaps(user_data_tadb, time_thresh=240, method='cluster', traj_cols=tc, min_pts=3, dur_min=5, min_cluster_size=3)\n",
"execution_time_tadbscan = time.time() - start_time\n",
"post_time_tadbscan = time.time() - start_time_post\n",
"print(f\"TA-DBSCAN execution time: {execution_time_tadbscan} seconds\")\n",
- "print(f\"TA-DBSCAN pre-processing time: {time_pre_tadbscan} seconds\")\n",
"print(f\"TA-DBSCAN clustering time: {clustering_time_tadbscan} seconds\")\n",
"print(f\"TA-DBSCAN post-processing time: {post_time_tadbscan} seconds\")\n",
"\n",
"# Grid-based\n",
"start_time = time.time()\n",
- "traj['h3_cell'] = filters.to_tessellation(traj, index=\"h3\", res=10, x='dev_x', y='dev_y', data_crs='EPSG:3857')\n",
- "stops_gb = GRID_BASED.grid_based(traj, time_thresh=240, complete_output=True, timestamp='unix_ts', location_id='h3_cell')\n",
+ "traj['h3_cell'] = filters.to_tessellation(traj, index=\"h3\", res=10, traj_cols=tc, data_crs='EPSG:3857')\n",
+ "stops_gb = GRID_BASED.grid_based(traj, time_thresh=240, complete_output=True, traj_cols=tc, location_id='h3_cell')\n",
"execution_time_grid = time.time() - start_time\n",
"print(f\"Grid-Based execution time: {execution_time_grid} seconds\")\n",
"\n",
@@ -114,7 +122,7 @@
"user_data_hdb = traj.assign(cluster=HDBSCAN.hdbscan_labels(traj, time_thresh=240, min_pts=3, min_cluster_size=2, traj_cols=tc))\n",
"clustering_time_hdbscan = time.time() - start_time\n",
"start_time_post = time.time()\n",
- "stops_hdb = post.remove_overlaps(user_data_hdb, time_thresh=240, method='cluster', traj_cols=tc, min_pts=3, dur_min=5, min_cluster_size=3) \n",
+ "cluster_labels_hdb = post.remove_overlaps(user_data_hdb, time_thresh=240, method='cluster', traj_cols=tc, min_pts=3, dur_min=5, min_cluster_size=3) \n",
"execution_time_hdbscan = time.time() - start_time\n",
"post_time_hdbscan = time.time() - start_time_post\n",
"print(f\"HDBSCAN execution time: {execution_time_hdbscan} seconds\")\n",
@@ -124,7 +132,7 @@
},
{
"cell_type": "markdown",
- "id": "5b150f1e",
+ "id": "c88f426d",
"metadata": {},
"source": [
"## Summary of Single-User Performance"
@@ -132,7 +140,7 @@
},
{
"cell_type": "markdown",
- "id": "07203969",
+ "id": "6a678431",
"metadata": {},
"source": [
"### Lachesis"
@@ -140,177 +148,57 @@
},
{
"cell_type": "code",
- "execution_count": 7,
- "id": "570b6103",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "execution_count": 2,
+ "id": "b7480c93",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-24T18:33:23.251955Z",
+ "iopub.status.busy": "2025-11-24T18:33:23.251955Z",
+ "iopub.status.idle": "2025-11-24T18:33:23.475346Z",
+ "shell.execute_reply": "2025-11-24T18:33:23.475346Z"
}
- ],
+ },
+ "outputs": [],
"source": [
"fig, (ax_map, ax_barcode) = plt.subplots(2, 1, figsize=(6,6.5),\n",
" gridspec_kw={'height_ratios':[10,1]})\n",
"\n",
- "shp_plt.plot_polygon(outer_box, ax=ax_map, add_points=False, color='#0e0e0e')\n",
- "city.plot(ax=ax_map, edgecolor='white', linewidth=1, color='#2c353c')\n",
+ "gpd.GeoDataFrame(geometry=[outer_box], crs='EPSG:3857').plot(ax=ax_map, color='#d3d3d3')\n",
+ "city.plot(ax=ax_map, edgecolor='white', linewidth=1, color='#8c8c8c')\n",
"\n",
- "plot_stops(stops, ax=ax_map, cmap='Reds', x='x', y='y')\n",
+ "plot_stops(stops, ax=ax_map, cmap='Reds')\n",
"plot_pings(traj, ax=ax_map, s=6, point_color='black', cmap='twilight', traj_cols=tc)\n",
- "\n",
- "adjust_zoom(stops['x'], stops['y'], buffer=1.4, ax=ax_map)\n",
"ax_map.set_axis_off()\n",
"\n",
"plot_time_barcode(traj[tc['timestamp']], ax=ax_barcode, set_xlim=True)\n",
- "plot_stops_barcode(stops, ax=ax_barcode, cmap='Reds', set_xlim=False, x='x', y='y', timestamp='unix_ts')\n",
+ "plot_stops_barcode(stops, ax=ax_barcode, cmap='Reds', set_xlim=False, timestamp='unix_ts')\n",
"\n",
"plt.tight_layout(pad=0.1)\n",
"plt.show()"
]
},
- {
- "cell_type": "markdown",
- "id": "d286bd02",
- "metadata": {},
- "source": [
- "### TADBSCAN"
- ]
- },
{
"cell_type": "code",
- "execution_count": 20,
- "id": "2159107b",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/Users/carolinechen/Desktop/cs/nomad/nomad/io/base.py:104: UserWarning: Trajectory column 'unix_ts' specified for 'timestamp' not found in DataFrame.\n",
- " warnings.warn(f\"Trajectory column '{value}' specified for '{key}' not found in DataFrame.\")\n"
- ]
- },
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "fig, ax_barcode = plt.subplots(figsize=(10,1.5))\n",
- "\n",
- "plot_time_barcode(traj['unix_ts'], ax=ax_barcode, set_xlim=True)\n",
- "plot_stops_barcode(stops_tadb, ax=ax_barcode, stop_color='red', set_xlim=False, timestamp='unix_ts')\n",
- "plt.title(\"TA-DBSCAN stops with post-processing\")\n",
- "plt.tight_layout()\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "9c2b278a",
- "metadata": {},
- "source": [
- "### Grid-Based"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 21,
- "id": "62555a1b",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "fig, ax_barcode = plt.subplots(figsize=(10,1.5))\n",
- "\n",
- "plot_time_barcode(traj['unix_ts'], ax=ax_barcode, set_xlim=True)\n",
- "plot_stops_barcode(stops_gb, ax=ax_barcode, stop_color='green', set_xlim=False, timestamp='unix_ts')\n",
- "plt.title(\"Grid-Based stops\")\n",
- "plt.tight_layout()\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "779c39db",
- "metadata": {},
- "source": [
- "### HDBSCAN"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 22,
- "id": "fa70719e",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/Users/carolinechen/Desktop/cs/nomad/nomad/io/base.py:104: UserWarning: Trajectory column 'unix_ts' specified for 'timestamp' not found in DataFrame.\n",
- " warnings.warn(f\"Trajectory column '{value}' specified for '{key}' not found in DataFrame.\")\n"
- ]
- },
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "execution_count": 3,
+ "id": "98cdda1f",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-24T18:33:23.475346Z",
+ "iopub.status.busy": "2025-11-24T18:33:23.475346Z",
+ "iopub.status.idle": "2025-11-24T18:33:23.482904Z",
+ "shell.execute_reply": "2025-11-24T18:33:23.482904Z"
}
- ],
- "source": [
- "fig, ax_barcode = plt.subplots(figsize=(10,1.5))\n",
- "\n",
- "plot_time_barcode(traj['unix_ts'], ax=ax_barcode, set_xlim=True)\n",
- "plot_stops_barcode(stops_hdb, ax=ax_barcode, stop_color='blue', set_xlim=False, timestamp='unix_ts')\n",
- "fig.suptitle(\"HDBSCAN stops with post-processing\")\n",
- "plt.tight_layout()\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "id": "c6351f6e",
- "metadata": {},
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Summary of Single-User Performance\n",
- "Lachesis execution time: 0.06618499755859375 seconds\n",
- "TA-DBSCAN execution time: 0.13709473609924316 seconds\n",
- "Grid-Based execution time: 0.16486215591430664 seconds\n",
- "HDBSCAN execution time: 2.95216703414917 seconds\n"
+ "Lachesis execution time: 0.02721381187438965 seconds\n",
+ "TA-DBSCAN execution time: 0.012791156768798828 seconds\n",
+ "Grid-Based execution time: 0.022524595260620117 seconds\n",
+ "HDBSCAN execution time: 0.3206779956817627 seconds\n"
]
}
],
@@ -324,25 +212,31 @@
},
{
"cell_type": "code",
- "execution_count": 9,
- "id": "d0431a3c",
- "metadata": {},
+ "execution_count": 4,
+ "id": "41e9a154",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-24T18:33:23.482904Z",
+ "iopub.status.busy": "2025-11-24T18:33:23.482904Z",
+ "iopub.status.idle": "2025-11-24T18:33:23.492088Z",
+ "shell.execute_reply": "2025-11-24T18:33:23.492088Z"
+ }
+ },
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Runtime Disaggregation\n",
- "Lachesis clustering time: 0.06618499755859375 seconds\n",
+ "Lachesis clustering time: 0.02721381187438965 seconds\n",
"--------------------------------\n",
- "TA-DBSCAN pre-processing time: 0.06432580947875977 seconds\n",
- "TA-DBSCAN clustering time: 0.11364507675170898 seconds\n",
- "TA-DBSCAN post-processing time: 0.02340412139892578 seconds\n",
+ "TA-DBSCAN clustering time: 0.009821414947509766 seconds\n",
+ "TA-DBSCAN post-processing time: 0.0029697418212890625 seconds\n",
"--------------------------------\n",
- "Grid-Based clustering time: 0.16486215591430664 seconds\n",
+ "Grid-Based clustering time: 0.022524595260620117 seconds\n",
"--------------------------------\n",
- "HDBSCAN clustering time: 2.930790901184082 seconds\n",
- "HDBSCAN post-processing time: 0.02130913734436035 seconds\n"
+ "HDBSCAN clustering time: 0.3206779956817627 seconds\n",
+ "HDBSCAN post-processing time: 0.0 seconds\n"
]
}
],
@@ -350,7 +244,6 @@
"print(\"Runtime Disaggregation\")\n",
"print(f\"Lachesis clustering time: {execution_time_lachesis} seconds\")\n",
"print(\"--------------------------------\")\n",
- "print(f\"TA-DBSCAN pre-processing time: {time_pre_tadbscan} seconds\")\n",
"print(f\"TA-DBSCAN clustering time: {clustering_time_tadbscan} seconds\")\n",
"print(f\"TA-DBSCAN post-processing time: {post_time_tadbscan} seconds\")\n",
"print(\"--------------------------------\")\n",
@@ -362,7 +255,7 @@
},
{
"cell_type": "markdown",
- "id": "2de448bd",
+ "id": "5c9ee070",
"metadata": {},
"source": [
"## Pings vs Runtime"
@@ -370,36 +263,83 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "dc99ecd5",
- "metadata": {},
+ "execution_count": 5,
+ "id": "62ed6a42",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-24T18:33:23.492088Z",
+ "iopub.status.busy": "2025-11-24T18:33:23.492088Z",
+ "iopub.status.idle": "2025-11-24T18:33:23.520075Z",
+ "shell.execute_reply": "2025-11-24T18:33:23.520075Z"
+ }
+ },
"outputs": [],
"source": [
- "traj = loader.sample_from_file(filepath_root, frac_users=0.1, format='parquet', traj_cols=tc, seed=10) # try frac_users = 0.1\n",
+ "traj = loader.sample_from_file(filepath_root, frac_users=0.1, format='parquet', traj_cols=tc, seed=10)\n",
"\n",
"# H3 cells for grid_based stop detection method\n",
- "traj['h3_cell'] = filters.to_tessellation(traj, index=\"h3\", res=10, x='dev_x', y='dev_y', data_crs='EPSG:3857')\n",
+ "traj['h3_cell'] = filters.to_tessellation(traj, index=\"h3\", res=10, traj_cols=tc, data_crs='EPSG:3857')\n",
"pings_per_user = traj['gc_identifier'].value_counts()"
]
},
{
"cell_type": "code",
- "execution_count": 12,
- "id": "4609ebe7",
- "metadata": {},
+ "execution_count": 6,
+ "id": "baafc0b8",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-24T18:33:23.520075Z",
+ "iopub.status.busy": "2025-11-24T18:33:23.520075Z",
+ "iopub.status.idle": "2025-11-24T18:33:24.093854Z",
+ "shell.execute_reply": "2025-11-24T18:33:24.093854Z"
+ }
+ },
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
- " 0%| | 0/35 [00:00, ?it/s]"
+ "\r",
+ " 0%| | 0/4 [00:00, ?it/s]"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\r",
+ " 25%|████████████▊ | 1/4 [00:00<00:00, 4.68it/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
- "100%|██████████| 35/35 [06:59<00:00, 11.98s/it]\n"
+ "\r",
+ " 50%|█████████████████████████▌ | 2/4 [00:00<00:00, 5.53it/s]"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\r",
+ " 75%|██████████████████████████████████████▎ | 3/4 [00:00<00:00, 6.61it/s]"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\r",
+ "100%|███████████████████████████████████████████████████| 4/4 [00:00<00:00, 7.10it/s]"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "\n"
]
}
],
@@ -411,7 +351,7 @@
"\n",
" # For location based\n",
" start_time = time.time()\n",
- " stops_gb = GRID_BASED.grid_based(user_data, time_thresh=240, complete_output=True, timestamp='unix_ts', location_id='h3_cell')\n",
+ " stops_gb = GRID_BASED.grid_based(user_data, time_thresh=240, complete_output=True, traj_cols=tc, location_id='h3_cell')\n",
" execution_time = time.time() - start_time\n",
" results += [pd.Series({'user':user, 'algo':'grid_based', 'execution_time':execution_time, 'n_pings':n_pings})]\n",
" \n",
@@ -440,46 +380,19 @@
"results = pd.DataFrame(results)"
]
},
- {
- "cell_type": "markdown",
- "id": "b329c036-1a08-44ef-8a18-688240087a29",
- "metadata": {},
- "source": [
- "### Use **completeness to normalize** ('hrs with data' / 'total hrs')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 42,
- "id": "da1a8b6a",
- "metadata": {},
- "outputs": [],
- "source": [
- "completeness_per_user = filters.completeness(traj, timestamp='unix_ts', user_id='gc_identifier')\n",
- "dwell_scaling = 1/completeness_per_user\n",
- "dwell_scaling.name = 'dwell_scaling'\n",
- "\n",
- "metrics = pd.merge(results, dwell_scaling, left_on='user', right_index=True)\n",
- "metrics['rescaled_total_dwell'] = (metrics['total_dwell']/60)*metrics['dwell_scaling'] # in hours"
- ]
- },
{
"cell_type": "code",
- "execution_count": 49,
- "id": "22979688",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "execution_count": 7,
+ "id": "8e8546e8",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-24T18:33:24.093854Z",
+ "iopub.status.busy": "2025-11-24T18:33:24.093854Z",
+ "iopub.status.idle": "2025-11-24T18:33:24.338240Z",
+ "shell.execute_reply": "2025-11-24T18:33:24.338240Z"
}
- ],
+ },
+ "outputs": [],
"source": [
"import seaborn as sns\n",
"\n",
@@ -487,13 +400,16 @@
"palette = dict(zip(algos, sns.color_palette(n_colors=len(algos))))\n",
"\n",
"fig, ax = plt.subplots(figsize=(5, 5))\n",
- "sns.scatterplot(data=metrics, x='n_pings', y='execution_time', hue='algo', ax=ax)\n",
+ "sns.scatterplot(data=results, x='n_pings', y='execution_time', hue='algo', ax=ax)\n",
"ax.set_title('n_pings vs execution_time')\n",
"plt.show()"
]
}
],
"metadata": {
+ "jupytext": {
+ "formats": "ipynb,py:percent"
+ },
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
@@ -509,7 +425,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.10.0"
+ "version": "3.12.3"
}
},
"nbformat": 4,
diff --git a/examples/benchmarking_of_stop_detection_algorithms.py b/examples/benchmarking_of_stop_detection_algorithms.py
new file mode 100644
index 00000000..5972b6a1
--- /dev/null
+++ b/examples/benchmarking_of_stop_detection_algorithms.py
@@ -0,0 +1,198 @@
+# ---
+# jupyter:
+# jupytext:
+# formats: ipynb,py:percent
+# text_representation:
+# extension: .py
+# format_name: percent
+# format_version: '1.3'
+# jupytext_version: 1.17.3
+# kernelspec:
+# display_name: Python 3 (ipykernel)
+# language: python
+# name: python3
+# ---
+
+# %% [markdown]
+# # Comparing runtimes of different stop detection algorithms on toy datasets
+
+# %% [markdown]
+# Here we compare the runtimes of four different stop detection algorithms: Lachesis, grid-based, temporal DBSCAN, and HDBSCAN.
+
+# %%
+# %matplotlib inline
+import matplotlib
+matplotlib.use('TkAgg')
+import matplotlib.pyplot as plt
+plt.ion()
+
+# Imports
+import nomad.io.base as loader
+import geopandas as gpd
+from shapely.geometry import box
+import pandas as pd
+import numpy as np
+from nomad.stop_detection.viz import plot_stops_barcode, plot_pings, plot_stops, plot_time_barcode
+import nomad.stop_detection.dbscan as DBSCAN
+import nomad.stop_detection.lachesis as LACHESIS
+import nomad.stop_detection.grid_based as GRID_BASED
+import nomad.stop_detection.hdbscan as HDBSCAN
+import nomad.filters as filters
+import nomad.stop_detection.postprocessing as post
+import time
+from tqdm import tqdm
+
+# Load data
+import nomad.data as data_folder
+from pathlib import Path
+data_dir = Path(data_folder.__file__).parent
+city = gpd.read_parquet(data_dir / 'garden-city-buildings-mercator.parquet')
+outer_box = box(*city.total_bounds).buffer(15, join_style='mitre')
+
+filepath_root = 'gc_data_long/'
+tc = {
+ "user_id": "gc_identifier",
+ "timestamp": "unix_ts",
+ "x": "dev_x",
+ "y": "dev_y",
+ "ha":"ha",
+ "date":"date"}
+
+users = ['admiring_brattain']
+traj = loader.sample_from_file(filepath_root, format='parquet', users=users, filters=('date','==', '2024-01-01'), traj_cols=tc)
+
+# Lachesis (sequential stop detection)
+start_time = time.time()
+stops = LACHESIS.lachesis(traj, delta_roam=20, dt_max = 60, dur_min=5, complete_output=True, keep_col_names=True, traj_cols=tc)
+execution_time_lachesis = time.time() - start_time
+print(f"Lachesis execution time: {execution_time_lachesis} seconds")
+
+# Density based stop detection (Temporal DBSCAN)
+start_time = time.time()
+user_data_tadb = traj.assign(cluster=DBSCAN.ta_dbscan_labels(traj, time_thresh=240, dist_thresh=15, min_pts=3, traj_cols=tc))
+clustering_time_tadbscan = time.time() - start_time
+start_time_post = time.time()
+cluster_labels_tadb = post.remove_overlaps(user_data_tadb, time_thresh=240, method='cluster', traj_cols=tc, min_pts=3, dur_min=5, min_cluster_size=3)
+execution_time_tadbscan = time.time() - start_time
+post_time_tadbscan = time.time() - start_time_post
+print(f"TA-DBSCAN execution time: {execution_time_tadbscan} seconds")
+print(f"TA-DBSCAN clustering time: {clustering_time_tadbscan} seconds")
+print(f"TA-DBSCAN post-processing time: {post_time_tadbscan} seconds")
+
+# Grid-based
+start_time = time.time()
+traj['h3_cell'] = filters.to_tessellation(traj, index="h3", res=10, traj_cols=tc, data_crs='EPSG:3857')
+stops_gb = GRID_BASED.grid_based(traj, time_thresh=240, complete_output=True, traj_cols=tc, location_id='h3_cell')
+execution_time_grid = time.time() - start_time
+print(f"Grid-Based execution time: {execution_time_grid} seconds")
+
+# HDBSCAN
+start_time = time.time()
+user_data_hdb = traj.assign(cluster=HDBSCAN.hdbscan_labels(traj, time_thresh=240, min_pts=3, min_cluster_size=2, traj_cols=tc))
+clustering_time_hdbscan = time.time() - start_time
+start_time_post = time.time()
+cluster_labels_hdb = post.remove_overlaps(user_data_hdb, time_thresh=240, method='cluster', traj_cols=tc, min_pts=3, dur_min=5, min_cluster_size=3)
+execution_time_hdbscan = time.time() - start_time
+post_time_hdbscan = time.time() - start_time_post
+print(f"HDBSCAN execution time: {execution_time_hdbscan} seconds")
+print(f"HDBSCAN clustering time: {clustering_time_hdbscan} seconds")
+print(f"HDBSCAN post-processing time: {post_time_hdbscan} seconds")
+
+# %% [markdown]
+# ## Summary of Single-User Performance
+
+# %% [markdown]
+# ### Lachesis
+
+# %%
+fig, (ax_map, ax_barcode) = plt.subplots(2, 1, figsize=(6,6.5),
+ gridspec_kw={'height_ratios':[10,1]})
+
+gpd.GeoDataFrame(geometry=[outer_box], crs='EPSG:3857').plot(ax=ax_map, color='#d3d3d3')
+city.plot(ax=ax_map, edgecolor='white', linewidth=1, color='#8c8c8c')
+
+plot_stops(stops, ax=ax_map, cmap='Reds')
+plot_pings(traj, ax=ax_map, s=6, point_color='black', cmap='twilight', traj_cols=tc)
+ax_map.set_axis_off()
+
+plot_time_barcode(traj[tc['timestamp']], ax=ax_barcode, set_xlim=True)
+plot_stops_barcode(stops, ax=ax_barcode, cmap='Reds', set_xlim=False, timestamp='unix_ts')
+
+plt.tight_layout(pad=0.1)
+plt.show()
+
+# %%
+print("Summary of Single-User Performance")
+print(f"Lachesis execution time: {execution_time_lachesis} seconds")
+print(f"TA-DBSCAN execution time: {execution_time_tadbscan} seconds")
+print(f"Grid-Based execution time: {execution_time_grid} seconds")
+print(f"HDBSCAN execution time: {execution_time_hdbscan} seconds")
+
+# %%
+print("Runtime Disaggregation")
+print(f"Lachesis clustering time: {execution_time_lachesis} seconds")
+print("--------------------------------")
+print(f"TA-DBSCAN clustering time: {clustering_time_tadbscan} seconds")
+print(f"TA-DBSCAN post-processing time: {post_time_tadbscan} seconds")
+print("--------------------------------")
+print(f"Grid-Based clustering time: {execution_time_grid} seconds")
+print("--------------------------------")
+print(f"HDBSCAN clustering time: {clustering_time_hdbscan} seconds")
+print(f"HDBSCAN post-processing time: {post_time_hdbscan} seconds")
+
+# %% [markdown]
+# ## Pings vs Runtime
+
+# %%
+traj = loader.sample_from_file(filepath_root, frac_users=0.1, format='parquet', traj_cols=tc, seed=10)
+
+# H3 cells for grid_based stop detection method
+traj['h3_cell'] = filters.to_tessellation(traj, index="h3", res=10, traj_cols=tc, data_crs='EPSG:3857')
+pings_per_user = traj['gc_identifier'].value_counts()
+
+# %%
+# Approximately 5 minutes for 40 users
+results = []
+for user, n_pings in tqdm(pings_per_user.items(), total=len(pings_per_user)):
+ user_data = traj.query("gc_identifier == @user")
+
+ # For location based
+ start_time = time.time()
+ stops_gb = GRID_BASED.grid_based(user_data, time_thresh=240, complete_output=True, traj_cols=tc, location_id='h3_cell')
+ execution_time = time.time() - start_time
+ results += [pd.Series({'user':user, 'algo':'grid_based', 'execution_time':execution_time, 'n_pings':n_pings})]
+
+ # For Lachesis
+ start_time = time.time()
+ stops_lac = LACHESIS.lachesis(user_data, delta_roam=30, dt_max=240, complete_output=True, traj_cols=tc)
+ execution_time = time.time() - start_time
+ results += [pd.Series({'user':user, 'algo':'lachesis', 'execution_time':execution_time, 'n_pings':n_pings})]
+
+ # For TADbscan
+ start_time = time.time()
+ user_data_tadb = user_data.assign(cluster=DBSCAN.ta_dbscan_labels(user_data, time_thresh=240, dist_thresh=15, min_pts=3, traj_cols=tc))
+ # - post-processing
+ stops_tadb = post.remove_overlaps(user_data_tadb, time_thresh=240, method='cluster', traj_cols=tc, min_pts=3, dur_min=5, min_cluster_size=3)
+ execution_time = time.time() - start_time
+ results += [pd.Series({'user':user, 'algo':'tadbscan', 'execution_time':execution_time, 'n_pings':n_pings})]
+
+ # For HDBSCAN
+ start_time = time.time()
+ user_data_hdb = user_data.assign(cluster=HDBSCAN.hdbscan_labels(user_data, time_thresh=240, min_pts=3, min_cluster_size=2, traj_cols=tc))
+ # - post-processing
+ stops_hdb = post.remove_overlaps(user_data_hdb, time_thresh=240, method='cluster', traj_cols=tc, min_pts=3, dur_min=5, min_cluster_size=3)
+ execution_time = time.time() - start_time
+ results += [pd.Series({'user':user, 'algo':'hdbscan', 'execution_time':execution_time, 'n_pings':n_pings})]
+
+results = pd.DataFrame(results)
+
+# %%
+import seaborn as sns
+
+algos = ['grid_based', 'lachesis', 'tadbscan', 'hdbscan']
+palette = dict(zip(algos, sns.color_palette(n_colors=len(algos))))
+
+fig, ax = plt.subplots(figsize=(5, 5))
+sns.scatterplot(data=results, x='n_pings', y='execution_time', hue='algo', ax=ax)
+ax.set_title('n_pings vs execution_time')
+plt.show()
diff --git a/examples/generate_synthetic_pois.ipynb b/examples/generate_synthetic_pois.ipynb
index c9e8d88c..1fec4f56 100644
--- a/examples/generate_synthetic_pois.ipynb
+++ b/examples/generate_synthetic_pois.ipynb
@@ -2,8 +2,8 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 2,
- "id": "0eb89ec7-0bed-48fe-9005-d54c14695ff7",
+ "execution_count": null,
+ "id": "e7c495ca",
"metadata": {},
"outputs": [],
"source": [
@@ -15,36 +15,10 @@
},
{
"cell_type": "code",
- "execution_count": 3,
- "id": "71e396a0-5d62-4183-9ae0-5b15c8ca9853",
+ "execution_count": null,
+ "id": "e96465b0",
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "C:\\Users\\pacob\\Desktop\\Brain\\Code Development\\nomad\\nomad\\city_gen.py:271: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n",
- " self.buildings_gdf = pd.concat([self.buildings_gdf, new_row], axis=0)\n"
- ]
- },
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "City built and street graph computed in 0.952s; buildings=106 streets=224\n"
- ]
- },
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"# Initialize city and time the build\n",
"t0 = time.perf_counter()\n",
@@ -73,37 +47,37 @@
"city.add_building('home', (8, 10), blocks=[(7, 10), (7, 9)])\n",
"\n",
"# add workplaces\n",
- "city.add_building('work', (3, 4), blocks=[(4, 4), (4, 5)])\n",
- "city.add_building('work', (5, 3), blocks=[(5, 4), (5, 5)])\n",
- "city.add_building('work', (6, 6), geom=box(6, 4, 8, 6))\n",
- "city.add_building('work', (8, 6), geom=box(8, 4, 10, 6))\n",
- "city.add_building('work', (12, 6), geom=box(11, 5, 14, 6))\n",
- "city.add_building('work', (12, 3), geom=box(11, 4, 14, 5))\n",
- "city.add_building('work', (15, 3), geom=box(14, 4, 17, 6))\n",
- "city.add_building('work', (18, 4), geom=box(17, 4, 18, 6))\n",
- "city.add_building('work', (18, 6), geom=box(16, 6, 18, 8))\n",
- "city.add_building('work', (15, 9), geom=box(16, 8, 17, 10))\n",
- "city.add_building('work', (18, 8), geom=box(17, 8, 18, 10))\n",
- "city.add_building('work', (18, 10), geom=box(16, 10, 18, 12))\n",
- "city.add_building('work', (18, 13), geom=box(16, 13, 18, 15))\n",
- "city.add_building('work', (18, 15), geom=box(16, 15, 18, 16))\n",
- "city.add_building('work', (15, 15), geom=box(15, 16, 18, 17))\n",
- "city.add_building('work', (14, 15), blocks=[(14, 16)])\n",
- "city.add_building('work', (16, 18), geom=box(16, 17, 18, 18))\n",
- "city.add_building('work', (15, 18), geom=box(14, 17, 16, 18))\n",
- "city.add_building('work', (13, 18), geom=box(12, 16, 14, 18))\n",
- "city.add_building('work', (11, 18), geom=box(10, 17, 12, 18))\n",
- "city.add_building('work', (11, 15), geom=box(10, 16, 12, 17))\n",
- "city.add_building('work', (8, 18), geom=box(7, 16, 9, 18))\n",
- "city.add_building('work', (6, 18), geom=box(5, 17, 7, 18))\n",
- "city.add_building('work', (6, 15), geom=box(5, 16, 7, 17))\n",
- "city.add_building('work', (3, 16), blocks=[(4, 16), (4, 17)])\n",
- "city.add_building('work', (3, 13), geom=box(4, 13, 6, 16))\n",
- "city.add_building('work', (6, 12), geom=box(4, 12, 6, 13))\n",
- "city.add_building('work', (3, 10), blocks=[(4, 9), (4, 10)])\n",
- "city.add_building('work', (6, 9), blocks=[(5, 9), (5, 10)])\n",
- "city.add_building('work', (6, 8), blocks=[(4, 8), (5, 8)])\n",
- "city.add_building('work', (3, 6), geom=box(4, 6, 6, 8))\n",
+ "city.add_building('workplace', (3, 4), blocks=[(4, 4), (4, 5)])\n",
+ "city.add_building('workplace', (5, 3), blocks=[(5, 4), (5, 5)])\n",
+ "city.add_building('workplace', (6, 6), geom=box(6, 4, 8, 6))\n",
+ "city.add_building('workplace', (8, 6), geom=box(8, 4, 10, 6))\n",
+ "city.add_building('workplace', (12, 6), geom=box(11, 5, 14, 6))\n",
+ "city.add_building('workplace', (12, 3), geom=box(11, 4, 14, 5))\n",
+ "city.add_building('workplace', (15, 3), geom=box(14, 4, 17, 6))\n",
+ "city.add_building('workplace', (18, 4), geom=box(17, 4, 18, 6))\n",
+ "city.add_building('workplace', (18, 6), geom=box(16, 6, 18, 8))\n",
+ "city.add_building('workplace', (15, 9), geom=box(16, 8, 17, 10))\n",
+ "city.add_building('workplace', (18, 8), geom=box(17, 8, 18, 10))\n",
+ "city.add_building('workplace', (18, 10), geom=box(16, 10, 18, 12))\n",
+ "city.add_building('workplace', (18, 13), geom=box(16, 13, 18, 15))\n",
+ "city.add_building('workplace', (18, 15), geom=box(16, 15, 18, 16))\n",
+ "city.add_building('workplace', (15, 15), geom=box(15, 16, 18, 17))\n",
+ "city.add_building('workplace', (14, 15), blocks=[(14, 16)])\n",
+ "city.add_building('workplace', (16, 18), geom=box(16, 17, 18, 18))\n",
+ "city.add_building('workplace', (15, 18), geom=box(14, 17, 16, 18))\n",
+ "city.add_building('workplace', (13, 18), geom=box(12, 16, 14, 18))\n",
+ "city.add_building('workplace', (11, 18), geom=box(10, 17, 12, 18))\n",
+ "city.add_building('workplace', (11, 15), geom=box(10, 16, 12, 17))\n",
+ "city.add_building('workplace', (8, 18), geom=box(7, 16, 9, 18))\n",
+ "city.add_building('workplace', (6, 18), geom=box(5, 17, 7, 18))\n",
+ "city.add_building('workplace', (6, 15), geom=box(5, 16, 7, 17))\n",
+ "city.add_building('workplace', (3, 16), blocks=[(4, 16), (4, 17)])\n",
+ "city.add_building('workplace', (3, 13), geom=box(4, 13, 6, 16))\n",
+ "city.add_building('workplace', (6, 12), geom=box(4, 12, 6, 13))\n",
+ "city.add_building('workplace', (3, 10), blocks=[(4, 9), (4, 10)])\n",
+ "city.add_building('workplace', (6, 9), blocks=[(5, 9), (5, 10)])\n",
+ "city.add_building('workplace', (6, 8), blocks=[(4, 8), (5, 8)])\n",
+ "city.add_building('workplace', (3, 6), geom=box(4, 6, 6, 8))\n",
"\n",
"# add retail places\n",
"city.add_building('retail', (0, 1), geom=box(1, 1, 3, 3))\n",
@@ -167,7 +141,7 @@
"print(f\"City built and street graph computed in {elapsed:.3f}s; buildings={len(city.buildings_gdf)} streets={len(city.streets_gdf)}\")\n",
"\n",
"# Persist as GeoPackage\n",
- "city.save_geopackage('synthetic_pois.gpkg')\n",
+ "city.save_geopackage('garden-city.gpkg')\n",
"\n",
"# Plot a city\n",
"fig, ax = plt.subplots(figsize=(6, 6))\n",
@@ -179,7 +153,7 @@
},
{
"cell_type": "markdown",
- "id": "4b65abc5-4e5f-4376-88ab-998caca51d57",
+ "id": "12f6be66",
"metadata": {},
"source": [
"## Plotting a shortest path"
@@ -187,8 +161,8 @@
},
{
"cell_type": "code",
- "execution_count": 30,
- "id": "c599d0f0-5b20-4627-a015-daa7cc3e35d3",
+ "execution_count": null,
+ "id": "c8f01501",
"metadata": {},
"outputs": [],
"source": [
@@ -207,18 +181,6 @@
"display_name": "Python (nomad repo venv)",
"language": "python",
"name": "nomad-repo-venv"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.12.3"
}
},
"nbformat": 4,
diff --git a/examples/generate_synthetic_trajectories.ipynb b/examples/generate_synthetic_trajectories.ipynb
index 1ee4a1cc..2fd681c3 100644
--- a/examples/generate_synthetic_trajectories.ipynb
+++ b/examples/generate_synthetic_trajectories.ipynb
@@ -1,856 +1,247 @@
{
"cells": [
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "2ccfc0c4",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "import pandas as pd\n",
- "import numpy as np\n",
- "from datetime import datetime, timedelta\n",
- "from zoneinfo import ZoneInfo\n",
- "import matplotlib.pyplot as plt\n",
- "plt.style.use('seaborn-v0_8-muted')\n",
- "from matplotlib import cm\n",
- "import geopandas as gpd\n",
- "\n",
- "from pyproj import Transformer\n",
- "from concurrent.futures import ProcessPoolExecutor\n",
- "import concurrent.futures\n",
- "import multiprocessing\n",
- "from multiprocessing import Pool\n",
- "from functools import partial\n",
- "import numpy.random as npr\n",
- "import matplotlib.dates as mdates\n",
- "from itertools import product\n",
- "import copy\n",
- "import pickle\n",
- "from tqdm import tqdm\n",
- "\n",
- "import nomad.io.base as loader\n",
- "import nomad.city_gen as cg\n",
- "from nomad.city_gen import City, Building\n",
- "import nomad.traj_gen as tg\n",
- "from nomad.traj_gen import Agent, Population\n",
- "import nomad.stop_detection.ta_dbscan as DBSCAN\n",
- "import nomad.stop_detection.lachesis as Lachesis\n",
- "from nomad.generation.sparsity import gen_params_target_q"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "3fd6c945",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "# garden city\n",
- "\n",
- "city_geojson = gpd.read_file('garden_city.geojson')\n",
- "\n",
- "city = cg.load('garden-city.pkl')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "id": "3e5ee980",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "# synthetic philly\n",
- "\n",
- "city_geojson = gpd.read_file('philly.geojson')\n",
- "\n",
- "s3 = boto3.client('s3', region_name=\"us-east-2\")\n",
- "pickle_buffer = io.BytesIO()\n",
- "s3.download_fileobj(\"synthetic-philly\", \"philadelphia-city.pkl\", pickle_buffer)\n",
- "pickle_buffer.seek(0)\n",
- "city = pickle.load(pickle_buffer)"
- ]
- },
{
"cell_type": "markdown",
- "id": "eb7675c1",
+ "id": "10e2517a",
"metadata": {},
"source": [
- "### Generate N agents"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "b76a575b",
- "metadata": {},
- "source": [
- "The following code maps our Garden City coordinates to a location in the Atlantic Ocean (Atlantis?)."
+ "# Synthetic Trajectory Generation with Nomad\n",
+ "\n",
+ "This notebook demonstrates how to generate realistic synthetic human mobility trajectories."
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "e21366a2",
- "metadata": {
- "tags": []
- },
+ "id": "58d68e64",
+ "metadata": {},
"outputs": [],
"source": [
- "def garden_city_to_lat_long(agent, sparse_traj=True, full_traj=False, diaries=True):\n",
- " def project_city_blocks_to_web_mercator(df):\n",
- " \"\"\"Convert (x, y) from 15m block units to Web Mercator meters via affine shift and projection.\"\"\"\n",
- " transformer = Transformer.from_crs(\"EPSG:3857\", \"EPSG:4326\", always_xy=True)\n",
- " df['x'] = 15 * df['x'] - 4265699\n",
- " df['y'] = 15 * df['y'] + 4392976\n",
- " if 'ha' in df:\n",
- " df['ha'] = 15 * df['ha']\n",
- " df['longitude'], df['latitude'] = transformer.transform(df['x'].values, df['y'].values)\n",
- " df['date'] = df['datetime'].dt.date\n",
- " return df\n",
- "\n",
- " def finalize(df):\n",
- " front = ['user_id', 'timestamp', 'longitude', 'latitude', 'x', 'y', 'date']\n",
- " cols = [col for col in front if col in df] + [col for col in df.columns if col not in front]\n",
- " return df[cols].rename(columns={'user_id': 'user_id', 'timestamp': 'timestamp'}).reset_index(drop=True)\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "plt.style.use('default')\n",
+ "import time\n",
+ "import os\n",
+ "from joblib import Parallel, delayed\n",
"\n",
- " if sparse_traj:\n",
- " agent.sparse_traj = finalize(project_city_blocks_to_web_mercator(agent.sparse_traj))\n",
- " if full_traj:\n",
- " agent.trajectory = finalize(project_city_blocks_to_web_mercator(agent.trajectory))\n",
- " \n",
- " if diaries:\n",
- " diary = agent.diary.copy()\n",
- " xs = []\n",
- " ys = []\n",
- " for loc in diary[\"location\"]:\n",
- " if loc is None:\n",
- " xs.append(None)\n",
- " ys.append(None)\n",
- " else:\n",
- " pt = agent.city.buildings[loc].geometry.centroid\n",
- " xs.append(pt.x)\n",
- " ys.append(pt.y)\n",
- " diary[\"x\"] = xs\n",
- " diary[\"y\"] = ys\n",
- " agent.diary = finalize(project_city_blocks_to_web_mercator(diary))"
+ "from nomad.city_gen import City\n",
+ "from nomad.traj_gen import Agent, Population\n",
+ "from nomad.stop_detection.viz import plot_pings, plot_time_barcode"
]
},
{
"cell_type": "code",
- "execution_count": 22,
- "id": "3d0fe233",
- "metadata": {
- "tags": []
- },
+ "execution_count": null,
+ "id": "9860e901",
+ "metadata": {},
"outputs": [],
"source": [
- "def philly_to_lat_long(agent, sparse_traj=True, full_traj=False, diaries=True):\n",
- " def project_point_to_web_mercator(x, y):\n",
- " \"\"\"\n",
- " Project a fractional (x, y) block coord to Web Mercator using affine interpolation.\n",
- " philly_grid_map is the grid_map produced by RealCityGenerator in virtual_philly.ipynb\n",
- " import it into this notebook through pkl\n",
- " \"\"\"\n",
- " i, j = int(math.floor(x)), int(math.floor(y))\n",
- " poly = philly_grid_map.get((i, j))\n",
- "\n",
- " if poly is None:\n",
- " raise ValueError(f\"No polygon found at grid cell ({i}, {j})\")\n",
- "\n",
- " # Bounds of the 1x1 block polygon in EPSG:3857\n",
- " minx, miny, maxx, maxy = poly.bounds\n",
+ "city = City.from_geopackage('garden-city.gpkg', edges_path='garden-city-edges.parquet')\n",
+ "city._build_hub_network(hub_size=16)\n",
+ "city.compute_gravity(exponent=2.0)\n",
"\n",
- " dx = x - i\n",
- " dy = y - j\n",
- "\n",
- " X = minx + dx * (maxx - minx)\n",
- " Y = miny + dy * (maxy - miny)\n",
- "\n",
- " return X, Y\n",
- "\n",
- " def apply_projection_to_df(df):\n",
- " \"\"\"Apply Web Mercator projection to a DataFrame with 'x' and 'y' columns.\"\"\"\n",
- " def safe_project(row):\n",
- " try:\n",
- " return project_point_to_web_mercator(row['x'], row['y'])\n",
- " except Exception:\n",
- " return (None, None)\n",
- "\n",
- " projected = df.apply(safe_project, axis=1)\n",
- " df[['x', 'y']] = pd.DataFrame(projected.tolist(), index=df.index)\n",
- "\n",
- " transformer = Transformer.from_crs(\"EPSG:3857\", \"EPSG:4326\", always_xy=True)\n",
- " if 'ha' in df:\n",
- " df['ha'] = 10 * df['ha'] # 10 because thats the sidelength of a block\n",
- " df['longitude'], df['latitude'] = transformer.transform(df['x'].values, df['y'].values)\n",
- " df['date'] = df['datetime'].dt.date\n",
- " return df\n",
- "\n",
- " def finalize(df):\n",
- " front = ['identifier', 'timestamp', 'longitude', 'latitude', 'x', 'y', 'date']\n",
- " cols = [col for col in front if col in df] + [col for col in df.columns if col not in front]\n",
- " return df[cols].rename(columns={'identifier': 'uid', 'timestamp': 'timestamp'}).reset_index(drop=True)\n",
- "\n",
- " if sparse_traj:\n",
- " agent.sparse_traj = finalize(apply_projection_to_df(agent.sparse_traj))\n",
- " if full_traj:\n",
- " agent.trajectory = finalize(apply_projection_to_df(agent.trajectory))\n",
- "\n",
- " if diaries:\n",
- " diary = agent.diary.copy()\n",
- " xs = []\n",
- " ys = []\n",
- " for loc in diary[\"location\"]:\n",
- " if loc is None:\n",
- " xs.append(None)\n",
- " ys.append(None)\n",
- " else:\n",
- " pt = agent.city.buildings[loc].geometry.centroid\n",
- " xs.append(pt.x)\n",
- " ys.append(pt.y)\n",
- " diary[\"x\"] = xs\n",
- " diary[\"y\"] = ys\n",
- " agent.diary = finalize(apply_projection_to_df(diary))"
+ "print(f\"City: {city.name}\")\n",
+ "print(f\"Dimensions: {city.dimensions}\")\n",
+ "print(f\"Buildings: {len(city.buildings_gdf)}\")"
]
},
{
"cell_type": "markdown",
- "id": "eb8c0db6",
+ "id": "9ff8ddec",
"metadata": {},
"source": [
- "## Simple trajectory generation\n",
+ "## Part 1: Effect of Sampling Parameters on Sparsity\n",
"\n",
- "For simple trajectory generation tasks that don't require too much computation power and can be done on a personal laptop, the following code generates a trajectory for each agent and saves it to a csv."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 26,
- "id": "82f2112f",
- "metadata": {
- "tags": []
- },
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Generating trajectories: 100%|██████████| 1/1 [00:28<00:00, 28.22s/it]\n"
- ]
- }
- ],
- "source": [
- "def generate_trajectory_data(agent, seed_trajectory=None, seed_sparsity=None):\n",
- " beta_params = gen_params_target_q(q_range=(0.3, 0.9), seed=seed_sparsity)\n",
- " rng = npr.default_rng(seed_sparsity)\n",
- " ha_sample = rng.uniform(11.5/15, 1)\n",
- "\n",
- " agent.reset_trajectory()\n",
- " agent.generate_trajectory(\n",
- " datetime = \"2024-01-01T07:00 -04:00\",\n",
- " end_time = pd.Timestamp('2024-01-31T09:00 -04:00'),\n",
- " seed=seed_trajectory,\n",
- " dt=1)\n",
- "\n",
- " agent.sample_trajectory(\n",
- " **beta_params,\n",
- " seed=seed_sparsity,\n",
- " ha=ha_sample,\n",
- " replace_sparse_traj=True)\n",
- "\n",
- " philly_to_lat_long(agent, sparse_traj=True, full_traj=False)\n",
- " agent.reset_trajectory(trajectory = False, sparse = False, diary = False)\n",
- " return None\n",
- "\n",
- "# Generate trajectories with progress bar\n",
- "N = 1\n",
- "population = Population(city)\n",
- "population.generate_agents(N=N, seed=250, name_count=2)\n",
- "\n",
- "for i, agent in enumerate(tqdm(population.roster.values(), desc=\"Generating trajectories\")):\n",
- " generate_trajectory_data(agent, seed_trajectory=i, seed_sparsity=i)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 27,
- "id": "cda6fba3",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "agent.sparse_traj.to_csv(\"philly_sparse_traj.csv\", index=False)\n",
- "agent.trajectory.to_csv(\"philly_full_traj.csv\", index=False)"
+ "Generate 3 agents with 2-day trajectories, varying beta_duration and beta_start \n",
+ "to show their effect on sparsity (q = observed points / ground truth points)."
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "8f47ea1c",
+ "id": "b6336cc1",
"metadata": {},
"outputs": [],
"source": [
- "def generate_agent_trajectory(agent_id, agent, seed):\n",
+ "np.random.seed(42)\n",
+ "population = Population(city)\n",
+ "population.generate_agents(N=3, seed=42, name_count=2)\n",
"\n",
- " beta_params = gen_params_target_q(q_range=(0.4, 0.85), seed=seed)\n",
+ "# Vary beta_duration and beta_start to target different sparsity levels\n",
+ "sampling_params = [\n",
+ " {'beta_ping': 5, 'beta_start': 100, 'beta_durations': 60}, \n",
+ " {'beta_ping': 5, 'beta_start': 250, 'beta_durations': 150}, \n",
+ " {'beta_ping': 5, 'beta_start': 400, 'beta_durations': 240} \n",
+ "]\n",
"\n",
+ "# Generate 2-day trajectories for quick visualization\n",
+ "for i, (agent_id, agent) in enumerate(population.roster.items()):\n",
" agent.generate_trajectory(\n",
- " datetime = \"2024-01-01T08:00 -04:00\",\n",
- " end_time = pd.Timestamp('2024-01-21T08:30:00 -04:00'),\n",
- " seed=1,\n",
- " dt=0.25)\n",
- " print('finished generating trajectory')\n",
+ " datetime=pd.Timestamp(\"2024-01-01T07:00-04:00\"),\n",
+ " end_time=pd.Timestamp(\"2024-01-03T07:00-04:00\"),\n",
+ " seed=i\n",
+ " )\n",
+ "\n",
" agent.sample_trajectory(\n",
- " **beta_params,\n",
- " seed=seed,\n",
- " ha=13/15, # <<<<<<\n",
- " replace_sparse_traj=True)\n",
+ " **sampling_params[i],\n",
+ " replace_sparse_traj=True,\n",
+ " seed=i\n",
+ " )\n",
" \n",
- " garden_city_to_lat_long(agent,\n",
- " sparse_traj=True,\n",
- " full_traj=False)\n",
- " agent.reset_trajectory(trajectory = True, sparse = False, diary = False)\n",
- " \n",
- " return agent_id, copy.deepcopy(agent)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "d0f8a946",
- "metadata": {},
- "outputs": [],
- "source": [
- "population = Population(city)\n",
- "population.generate_agents(N=2, seed=2, name_count=2)"
+ " q = len(agent.sparse_traj) / len(agent.trajectory)\n",
+ " print(f\"Agent {i}: q={q:.3f}, beta_start={sampling_params[i]['beta_start']}, \"\n",
+ " f\"beta_dur={sampling_params[i]['beta_durations']}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "4946528f",
+ "id": "0e4b20fa",
"metadata": {
- "scrolled": true
+ "lines_to_next_cell": 1
},
"outputs": [],
"source": [
- "%%time \n",
- "# time for 2 weeks of data at dt = 0.25\n",
- "agent_1 = list(population.roster.values())[1]\n",
- "generate_agent_trajectory(agent_1, 10, 6)\n",
- "agent_1.sparse_traj.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "fbb51dfd",
- "metadata": {},
- "outputs": [],
- "source": [
- "def generate_trajectory_data(agent, seed_trajectory=None, seed_sparsity=None, use_datetime=None, use_lon_lat=True):\n",
- " beta_params = gen_params_ranges(seed=seed_sparsity)\n",
- " rng = npr.default_rng(seed_sparsity)\n",
- " ha_sample = rng.uniform(11.5/15, 16.5/15)\n",
- "\n",
- " agent.reset_trajectory()\n",
- " agent.generate_trajectory(\n",
- " datetime = \"2024-01-01T07:00 -04:00\",\n",
- " end_time = pd.Timestamp('2024-01-15T09:00:00 -04:00'),\n",
- " seed=seed_trajectory,\n",
- " dt=1)\n",
- "\n",
- " agent.sample_trajectory(\n",
- " **beta_params,\n",
- " seed=seed_sparsity,\n",
- " ha=ha_sample,\n",
- " replace_sparse_traj=True)\n",
- "\n",
- " garden_city_to_lat_long(agent, sparse_traj=True, full_traj=False, use_datetime=use_datetime, use_lon_lat=use_lon_lat)\n",
- " agent.reset_trajectory(trajectory = True, sparse = False, diary = False)\n",
- " return None\n",
- "\n",
- "# Generate trajectories with progress bar\n",
- "N = 100 \n",
- "population = Population(city)\n",
- "population.generate_agents(N=N, seed=314, name_count=2)\n",
+ "fig, axes = plt.subplots(2, 3, figsize=(15, 10), \n",
+ " gridspec_kw={'height_ratios': [10, 1]})\n",
"\n",
- "for i, agent in enumerate(tqdm(population.roster.values(), desc=\"Generating trajectories\")):\n",
- " generate_trajectory_data(agent, seed_trajectory=i, seed_sparsity=i, use_datetime=None, use_lon_lat=True)\n",
- " #agent.sparse_traj.rename(columns={'uid': 'identifier', 'timestamp': 'unix_timestamp', 'latitude':'device_lat', 'longitude':'device_lon', 'datetime':'local_datetime'}, inplace=True)\n",
- " agent.sparse_traj.rename(columns={'uid': 'user_id'}, inplace=True)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "ed6116df",
- "metadata": {},
- "outputs": [],
- "source": [
- "agent_id, agent = [(agent_id, agent) for agent_id, agent in population.roster.items()][0]\n",
- "agent_id"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "9724ff27",
- "metadata": {},
- "outputs": [],
- "source": [
- "def generate_trajectory_data(agent_id, agent, seed):\n",
- " agent.reset_trajectory()\n",
+ "for i, (agent_id, agent) in enumerate(population.roster.items()):\n",
+ " ax_map = axes[0, i]\n",
+ " ax_barcode = axes[1, i]\n",
" \n",
- " agent.generate_trajectory(\n",
- " local_timestamp=\"2024-01-01T06:00:00 +02:00\",\n",
- " end_time=pd.Timestamp(\"2024-01-20T12:00:00 +02:00\"),\n",
- " seed=105,\n",
- " dt=1)\n",
- "\n",
- " beta_duration = npr.uniform(25, 170)\n",
- " beta_start = max(npr.uniform(25, 520), beta_duration)\n",
- " beta_ping = min(npr.uniform(3, 15), beta_duration//2)\n",
- "\n",
- " agent.sample_trajectory(\n",
- " beta_start=beta_start,\n",
- " beta_durations=beta_duration,\n",
- " beta_ping=beta_ping,\n",
- " seed=seed,\n",
- " replace_sparse_traj=True)\n",
- "\n",
- " garden_city_to_lat_long(agent, sparse_traj=True, full_traj=False)\n",
- " return None\n",
+ " city.plot_city(ax=ax_map, doors=False, address=False)\n",
+ " \n",
+ " traj = agent.sparse_traj\n",
+ " plot_pings(traj, ax=ax_map, s=15, point_color='red', \n",
+ " x='x', y='y', timestamp='timestamp')\n",
+ " \n",
+ " plot_time_barcode(traj['timestamp'], ax=ax_barcode, set_xlim=True)\n",
+ " \n",
+ " q = len(traj) / len(agent.trajectory)\n",
+ " ax_map.set_title(f\"Agent {i}: {len(traj)} obs (q={q:.2f})\\n\"\n",
+ " f\"beta_start={sampling_params[i]['beta_start']}, \"\n",
+ " f\"beta_dur={sampling_params[i]['beta_durations']}\")\n",
+ " ax_map.set_axis_off()\n",
"\n",
- "# Generate trajectories with progress bar\n",
- "for agent_id, agent in tqdm(population.roster.items(), desc=\"Generating trajectories\"):\n",
- " generate_trajectory_data(agent_id, agent, seed=150)"
+ "plt.tight_layout()\n",
+ "plt.savefig('data/trajectories_visualization.png', dpi=150, bbox_inches='tight')\n",
+ "plt.show()"
]
},
{
- "cell_type": "code",
- "execution_count": null,
- "id": "957367ef",
+ "cell_type": "markdown",
+ "id": "dc7b266a",
"metadata": {},
- "outputs": [],
"source": [
- "# dataset no 1\n",
- "traj_cols = {\n",
- " \"user_id\": \"identifier\",\n",
- " \"timestamp\": \"unix_timestamp\",\n",
- " \"latitude\": \"device_lat\",\n",
- " \"longitude\": \"device_lon\",\n",
- " \"datetime\": \"local_datetime\"}\n",
- "# Save only sparse trajectories and diaries\n",
- "population.save_pop(\n",
- " sparse_path=\"output/gc_data.csv\",\n",
- " diaries_path=None,\n",
- " partition_cols=None,\n",
- " traj_cols=traj_cols,\n",
- " fmt=\"csv\"\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "8b8b2f21",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "# dataset no 2\n",
- "traj_cols = {\n",
- " \"user_id\": \"user_id\",\n",
- " \"timestamp\": \"timestamp\",\n",
- " \"latitude\": \"latitude\",\n",
- " \"longitude\": \"longitude\",\n",
- " \"datetime\": \"datetime\"}\n",
- "# Save only sparse trajectories and diaries\n",
- "population.save_pop(\n",
- " sparse_path=\"output/gc_data/\",\n",
- " diaries_path=None,\n",
- " partition_cols=['date'],\n",
- " traj_cols=traj_cols,\n",
- " fmt=\"csv\"\n",
- ")"
+ "## Part 2: Parallel Generation at Scale\n",
+ "\n",
+ "Generate trajectories for 15 users using parallelization."
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "3c870944",
+ "id": "5238c745",
"metadata": {
- "scrolled": true
+ "lines_to_next_cell": 1
},
"outputs": [],
"source": [
- "sparse_df = loader.from_file(\"output/gc_data/\", format=\"csv\", traj_cols=traj_cols,\n",
- " parse_dates=True)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "340dab62",
- "metadata": {},
- "source": [
- "## Generate dataset 3 for tutorial"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "1b22e370",
- "metadata": {},
- "outputs": [],
- "source": [
- "def generate_trajectory_data(agent, seed_trajectory=None, seed_sparsity=None, use_datetime=None, use_lon_lat=True):\n",
- " beta_params = gen_params_target_q(q_range=(0.2, 0.8), beta_dur_range=(25, 180), beta_ping_range=(1.5, 6), seed=seed_sparsity)\n",
- " rng = npr.default_rng(seed_sparsity)\n",
- " ha_sample = rng.uniform(11.5/15, 16.5/15)\n",
- "\n",
- " agent.reset_trajectory()\n",
+ "def generate_agent_trajectory(args):\n",
+ " \"\"\"Worker function for parallel generation.\"\"\"\n",
+ " identifier, home, work, seed = args\n",
+ " \n",
+ " city = City.from_geopackage('garden-city.gpkg', edges_path='garden-city-edges.parquet')\n",
+ " city._build_hub_network(hub_size=16)\n",
+ " city.compute_gravity(exponent=2.0)\n",
+ " agent = Agent(identifier=identifier, city=city, home=home, workplace=work)\n",
+ " \n",
" agent.generate_trajectory(\n",
- " datetime = \"2024-01-01T07:00 -04:00\",\n",
- " end_time = pd.Timestamp('2024-01-21T09:00:00 -04:00'),\n",
- " seed=seed_trajectory,\n",
- " dt=0.15)\n",
+ " datetime=pd.Timestamp(\"2024-01-01T07:00-04:00\"),\n",
+ " end_time=pd.Timestamp(\"2024-01-08T07:00-04:00\"),\n",
+ " seed=seed\n",
+ " )\n",
"\n",
" agent.sample_trajectory(\n",
- " **beta_params,\n",
- " seed=seed_sparsity,\n",
- " ha=ha_sample,\n",
- " replace_sparse_traj=True)\n",
- "\n",
- " garden_city_to_lat_long(agent, sparse_traj=True, full_traj=False, use_datetime=use_datetime, use_lon_lat=use_lon_lat)\n",
- " agent.reset_trajectory(trajectory = True, sparse = False, diary = False)\n",
- " return None\n",
- "\n",
- "# Generate trajectories with progress bar\n",
- "N = 350 \n",
- "population = Population(city)\n",
- "population.generate_agents(N=N, seed=5, name_count=2)\n",
- "\n",
- "for i, agent in enumerate(tqdm(population.roster.values(), desc=\"Generating trajectories\")):\n",
- " if i == 0:\n",
- " continue\n",
- " generate_trajectory_data(agent, seed_trajectory=i, seed_sparsity=i, use_datetime=False, use_lon_lat=False)\n",
- " agent.sparse_traj.rename(columns={'uid': 'gc_identifier', 'timestamp': 'unix_ts', 'x':'dev_x', 'y':'dev_y'}, inplace=True)"
+ " beta_ping=5,\n",
+ " replace_sparse_traj=True,\n",
+ " seed=seed\n",
+ " )\n",
+ " \n",
+ " sparse_df = agent.sparse_traj.copy()\n",
+ " sparse_df['user_id'] = identifier\n",
+ " return sparse_df"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "f71d0544",
+ "id": "aa1b6112",
"metadata": {},
"outputs": [],
"source": [
- "# Make data for agent 0\n",
- "start_time = pd.date_range(start='2024-01-01T07:00 -04:00', periods=4, freq='60min')\n",
- "tz_offset = loader._offset_seconds_from_ts(start_time[0])\n",
- "unix_timestamp = [int(t.timestamp()) for t in start_time]\n",
- "duration = [60]*4 # in minutes\n",
- "location = ['h-x13-y11'] * 1 + ['h-x13-y9'] * 1 + ['w-x18-y10'] * 1 + ['w-x18-y8'] * 1\n",
- "\n",
- "destinations = pd.DataFrame(\n",
- " {\"datetime\":start_time,\n",
- " \"timestamp\":unix_timestamp,\n",
- " \"duration\":duration,\n",
- " \"location\":location}\n",
- " )\n",
- "destinations = condense_destinations(destinations)\n",
- "\n",
- "agent_0 = list(population.roster.values())[0]\n",
- "\n",
- "rng = npr.default_rng(0)\n",
- "ha_sample = rng.uniform(11.5/15, 16.5/15)\n",
- "\n",
- "agent_0.reset_trajectory()\n",
- "agent_0.generate_trajectory(destination_diary=destinations, seed=0, dt=0.15)\n",
+ "np.random.seed(100)\n",
+ "n_agents = 15\n",
+ "homes = city.buildings_gdf[city.buildings_gdf['building_type'] == 'home']['id'].tolist()\n",
+ "workplaces = city.buildings_gdf[city.buildings_gdf['building_type'] == 'workplace']['id'].tolist()\n",
"\n",
- "agent_0.sample_trajectory(\n",
- " beta_ping=2,\n",
- " beta_start=None,\n",
- " beta_durations=None,\n",
- " seed=0,\n",
- " ha=ha_sample,\n",
- " replace_sparse_traj=True)\n",
- "\n",
- "garden_city_to_lat_long(agent_0, sparse_traj=True, full_traj=False, use_datetime=False, use_lon_lat=False)\n",
- "agent_0.reset_trajectory(trajectory = True, sparse = False, diary = False)\n",
- "agent_0.sparse_traj.rename(columns={'uid': 'gc_identifier', 'timestamp': 'unix_ts', 'x':'dev_x', 'y':'dev_y'}, inplace=True)"
+ "agent_params = [\n",
+ " (f'agent_{i:04d}', \n",
+ " np.random.choice(homes),\n",
+ " np.random.choice(workplaces),\n",
+ " i)\n",
+ " for i in range(n_agents)\n",
+ "]"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "aff38e9c",
+ "id": "a0364bc9",
"metadata": {},
"outputs": [],
"source": [
- "# dataset no 3\n",
- "traj_cols = {\n",
- " \"user_id\": \"gc_identifier\",\n",
- " \"timestamp\": \"unix_ts\",\n",
- " \"x\": \"dev_x\",\n",
- " \"y\": \"dev_y\"}\n",
- "# Save only sparse trajectories and diaries\n",
- "population.save_pop(\n",
- " sparse_path=\"output/gc_data_long/\",\n",
- " diaries_path=None,\n",
- " partition_cols=['date'],\n",
- " traj_cols=traj_cols,\n",
- " fmt=\"parquet\"\n",
- ")"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "5c2af855",
- "metadata": {},
- "source": [
- "For larger trajectory generation tasks that require a lot of compute power, we can parallelize the trajectory generation using the following code. We generate ground-truth trajectories in agent-month \"chunks\", sparsify each chunk, then reset the ground-truth trajectory field to lessen the memory usage. "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "41b6d757",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "# Using parallel processing (e.g., using a cluster)\n",
- "%%time\n",
+ "print(f\"Generating {n_agents} agents in parallel...\")\n",
+ "start_time = time.time()\n",
"\n",
- "def generate_agent_trajectory(agent_id, agent, seed):\n",
- " \n",
- " beta_duration = npr.uniform(15, 180)\n",
- " beta_start = max(npr.uniform(60, 1200), beta_duration*3)\n",
- " beta_ping = npr.uniform(1.5, 30)\n",
- " \n",
- " param = (beta_start, beta_duration, beta_ping)\n",
- " \n",
- " for month in range(1,13):\n",
- " days = calendar.monthrange(2024, month)[1]\n",
- " population_n.generate_trajectory(agent, \n",
- " T=datetime(2024, month, days, hour=23, minute=59), \n",
- " seed=seed)\n",
- " \n",
- " agent.sample_traj_hier_nhpp(*param, \n",
- " seed=seed,\n",
- " reset_traj=True)\n",
- " \n",
- " garden_city_to_lat_long(agent,\n",
- " sparse_traj=True,\n",
- " full_traj=False)\n",
- " \n",
- " return agent_id, copy.deepcopy(agent)\n",
- "\n",
- "manager = multiprocessing.Manager()\n",
- "shared_roster = manager.dict(population_n.roster)\n",
- "\n",
- "start = 6001 # 12001 # can modify\n",
- "end = 12001 # 18001 # can modify\n",
- "roster = dict(population_n.roster)\n",
- "batch = islice(roster.items(), start, end)\n",
- "\n",
- "with ProcessPoolExecutor() as executor:\n",
- " with tqdm(total=(end-start), desc=\"Processing agents\") as pbar:\n",
- " futures = [\n",
- " executor.submit(generate_agent_trajectory, agent_id, agent, i+15000)\n",
- " for i, (agent_id, agent) in enumerate(batch, start=start)\n",
- " ]\n",
- " results = []\n",
- " for future in futures:\n",
- " results.append(future.result())\n",
- " pbar.update(1)\n",
+ "results = Parallel(n_jobs=-1, verbose=10)(\n",
+ " delayed(generate_agent_trajectory)(params) for params in agent_params\n",
+ ")\n",
"\n",
- "for agent_id, agent in results:\n",
- " population_n.roster[agent_id] = agent"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "68ea90d9",
- "metadata": {},
- "source": [
- "This code saves the generated trajectories in a parquet file, using the date as the partition column. "
+ "generation_time = time.time() - start_time\n",
+ "print(f\"Generated {n_agents} agents in {generation_time:.2f}s ({generation_time/n_agents:.2f}s per agent)\")"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "a8d9885f",
- "metadata": {
- "tags": []
- },
+ "id": "e8a48448",
+ "metadata": {},
"outputs": [],
"source": [
- "partition_cols = {\n",
- " 'sparse_traj': ['date'],\n",
- " 'diaries': ['identifier']\n",
- "}\n",
+ "all_trajectories = pd.concat(results, ignore_index=True)\n",
+ "all_trajectories = city.to_mercator(all_trajectories)\n",
+ "all_trajectories['date'] = pd.to_datetime(all_trajectories['datetime']).dt.date\n",
"\n",
- "roster = dict(islice(population_n.roster.items(), start, end))\n",
+ "output_path = 'data/trajectories_15_users'\n",
+ "for date, group in all_trajectories.groupby('date'):\n",
+ " os.makedirs(f'{output_path}/date={str(date)}', exist_ok=True)\n",
+ " group.to_parquet(f'{output_path}/date={str(date)}/data.parquet', index=False)\n",
"\n",
- "population.save_pop(bucket=\"synthetic-raw-data\",\n",
- " prefix=f\"agents-{start+15000}-{end+15000-1}/\",\n",
- " save_full_traj=False,\n",
- " save_sparse_traj=True,\n",
- " save_homes=True,\n",
- " save_diaries=True,\n",
- " partition_cols=partition_cols,\n",
- " roster=roster)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "bee94160",
- "metadata": {
- "tags": []
- },
- "outputs": [],
- "source": [
- "# Load the Parquet files\n",
- "s3_path = \"s3://synthetic-raw-data/agents-1-1001/sparse_trajectories.parquet/\"\n",
- "df1 = pd.read_parquet(s3_path)\n",
- "s3_path = \"s3://synthetic-raw-data/agents-1001-2000/sparse_trajectories.parquet/\"\n",
- "df2 = pd.read_parquet(s3_path)"
+ "print(f\"Saved {len(all_trajectories):,} records to {output_path}/\")"
]
}
],
"metadata": {
"jupytext": {
- "formats": "ipynb,auto:percent"
+ "formats": "ipynb,py:percent"
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.0"
- },
- "nbdime-conflicts": {
- "local_diff": [
- {
- "diff": [
- {
- "diff": [
- {
- "key": 0,
- "op": "addrange",
- "valuelist": [
- "Python 3 (ipykernel)"
- ]
- },
- {
- "key": 0,
- "length": 1,
- "op": "removerange"
- }
- ],
- "key": "display_name",
- "op": "patch"
- }
- ],
- "key": "kernelspec",
- "op": "patch"
- },
- {
- "diff": [
- {
- "diff": [
- {
- "key": 0,
- "length": 1,
- "op": "removerange"
- }
- ],
- "key": "version",
- "op": "patch"
- }
- ],
- "key": "language_info",
- "op": "patch"
- }
- ],
- "remote_diff": [
- {
- "diff": [
- {
- "diff": [
- {
- "key": 0,
- "op": "addrange",
- "valuelist": [
- "Python 3.10 (daphme)"
- ]
- },
- {
- "key": 0,
- "length": 1,
- "op": "removerange"
- }
- ],
- "key": "display_name",
- "op": "patch"
- }
- ],
- "key": "kernelspec",
- "op": "patch"
- },
- {
- "diff": [
- {
- "diff": [
- {
- "diff": [
- {
- "key": 3,
- "op": "addrange",
- "valuelist": "0"
- },
- {
- "key": 3,
- "length": 1,
- "op": "removerange"
- }
- ],
- "key": 0,
- "op": "patch"
- }
- ],
- "key": "version",
- "op": "patch"
- }
- ],
- "key": "language_info",
- "op": "patch"
- }
- ]
- },
- "toc-autonumbering": false
+ }
},
"nbformat": 4,
"nbformat_minor": 5
diff --git a/examples/grid_based_demo.ipynb b/examples/grid_based_demo.ipynb
index 03022d0a..90114911 100644
--- a/examples/grid_based_demo.ipynb
+++ b/examples/grid_based_demo.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
- "id": "92838936",
+ "id": "f56b531d",
"metadata": {},
"source": [
"# Grid-Based Stop Detection"
@@ -10,7 +10,7 @@
},
{
"cell_type": "markdown",
- "id": "cb276fd9",
+ "id": "9cfdb26e",
"metadata": {},
"source": [
"The stop detection algorithms implemented in `nomad` support different combinations of input formats that are common in commercial datasets, detecting default names when possible\n",
@@ -23,69 +23,78 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "19184dee",
- "metadata": {},
+ "execution_count": 1,
+ "id": "24b50a14",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-24T18:32:11.959806Z",
+ "iopub.status.busy": "2025-11-24T18:32:11.959806Z",
+ "iopub.status.idle": "2025-11-24T18:32:16.169725Z",
+ "shell.execute_reply": "2025-11-24T18:32:16.169725Z"
+ }
+ },
"outputs": [],
"source": [
"%matplotlib inline\n",
+ "import matplotlib\n",
+ "matplotlib.use('TkAgg') # Non-blocking backend\n",
+ "import matplotlib.pyplot as plt\n",
+ "plt.ion() # Interactive mode\n",
"\n",
"# Imports\n",
"import nomad.io.base as loader\n",
"import geopandas as gpd\n",
"from shapely.geometry import box\n",
- "import pandas as pd\n",
- "import numpy as np\n",
- "import matplotlib.pyplot as plt\n",
- "from nomad.stop_detection.viz import plot_stops_barcode, plot_time_barcode\n",
+ "from nomad.stop_detection.viz import plot_stops_barcode, plot_time_barcode, plot_hexagons, plot_pings\n",
"import nomad.stop_detection.grid_based as GRID_BASED\n",
"import nomad.filters as filters \n",
"\n",
"# Load data\n",
- "city = gpd.read_file(\"garden_city.geojson\").to_crs('EPSG:3857')\n",
- "outer_box = box(*city.total_bounds).buffer(15, join_style='mitre')\n",
+ "import nomad.data as data_folder\n",
+ "from pathlib import Path\n",
+ "data_dir = Path(data_folder.__file__).parent\n",
+ "city = gpd.read_parquet(data_dir / 'garden-city-buildings-mercator.parquet')\n",
+ "outer_box = box(*city.total_bounds)\n",
"\n",
- "filepath_root = '../tutorials/IC2S2-2025/gc_data_long/'\n",
- "tc = {\n",
- " \"user_id\": \"gc_identifier\",\n",
- " \"timestamp\": \"unix_ts\",\n",
- " \"x\": \"dev_x\",\n",
- " \"y\": \"dev_y\",\n",
- " \"ha\":\"ha\",\n",
- " \"date\":\"date\"}\n",
+ "filepath_root = 'gc_data_long/'\n",
+ "tc = {\"user_id\": \"gc_identifier\", \"x\": \"dev_x\", \"y\": \"dev_y\", \"timestamp\": \"unix_ts\"}\n",
"\n",
"users = ['admiring_brattain']\n",
- "traj = loader.sample_from_file(filepath_root, format='parquet', users=users, filters = ('date','==', '2024-01-01'), traj_cols=tc)\n",
+ "traj = loader.sample_from_file(filepath_root, format='parquet', users=users, filters=('date','==', '2024-01-01'), traj_cols=tc)\n",
"\n",
- "# Grid-based\n",
- "traj['h3_cell'] = filters.to_tessellation(traj, index=\"h3\", res=10, x='dev_x', y='dev_y', data_crs='EPSG:3857')\n",
- "stops_gb = GRID_BASED.grid_based(traj, time_thresh=240, complete_output=True, timestamp='unix_ts', location_id='h3_cell')"
+ "# Grid-based - data is in Web Mercator (EPSG:3857) projected coordinates\n",
+ "traj['h3_cell'] = filters.to_tessellation(traj, index=\"h3\", res=10, traj_cols=tc, data_crs='EPSG:3857')\n",
+ "stops_gb = GRID_BASED.grid_based(traj, time_thresh=240, complete_output=True, location_id='h3_cell', traj_cols=tc)"
]
},
{
"cell_type": "code",
- "execution_count": 21,
- "id": "62555a1b",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAA90AAACMCAYAAABh9MpJAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8ekN5oAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAf0UlEQVR4nO3deXRU9f3/8dckk8m+sCSWyFYwrCL8vojSooKagobFVhFMwUKpHkQE7TmtonXDys+D39qjglCkgFplEUQPxpZzsA0uHJUlRI9VItqEqoAQCFkJ2T6/P/jd68xkJpkslwzx+ThnDsy9n/v5vO/nfu7n3ndmcxljjAAAAAAAQLuL6OgAAAAAAADorEi6AQAAAABwCEk3AAAAAAAOIekGAAAAAMAhJN0AAAAAADiEpBsAAAAAAIeQdAMAAAAA4BCSbgAAAAAAHELSDQAAAACAQ0i6AQA/GLNnz1bfvn2bLVdUVCSXy6UXXnjB8Zg6St++fTV79uyODgMAgE6PpBsAEPYKCwt11113acCAAYqLi1NcXJyGDBmi+fPn65NPPumwuHbu3CmXy+Xz6Nq1q0aPHq1XXnmlw+I6lz777DM9+uijKioq6uhQAAAIS+6ODgAAgKbk5ORo+vTpcrvdmjFjhoYPH66IiAgdOHBAW7du1cqVK1VYWKg+ffo0W9fq1avV0NDQ7jEuXLhQo0aNkiSdOHFCmzZt0syZM3Xq1CnNnz+/3dsLJ5999pkWL16scePGhfQuAgAAfmhIugEAYeurr77SLbfcoj59+uif//ynevTo4bN+6dKlWrFihSIimn7jVmVlpeLj4xUVFeVInFdeeaWmTp1qP583b5769eun9evXd/qkGwAANI23lwMAwtaTTz6pyspKrVu3rlHCLUlut1sLFy5Ur1697GWzZ89WQkKCvvrqK2VlZSkxMVEzZsyw1/m/Gnvq1CnNnj1bycnJSklJ0axZs3Tq1Kk2xe3xeNSlSxe53b5/2163bp2uueYapaWlKTo6WkOGDNHKlSsbbb93715NmDBB3bt3V2xsrH784x9rzpw5PmUaGhr09NNPa+jQoYqJidEFF1yguXPnqqSkxKecMUaPP/64evbsqbi4OF199dX697//HfK+bNy4USNHjlRiYqKSkpI0bNgwPfPMM5KkF154QTfffLMk6eqrr7bfYr9z5057+xUrVmjo0KGKjo5Wenq65s+f36h/x40bp4svvlj79u3TT3/6U3uf//KXvzSKZ9myZRo6dKji4uLUpUsXXXrppVq/fn3I+wMAwLnGK90AgLCVk5Ojiy66SJdffnmLtqurq9OECRN0xRVX6E9/+pPi4uICljPG6IYbbtD777+vO+64Q4MHD9brr7+uWbNmtai98vJyFRcXS5JOnjyp9evX69NPP9WaNWt8yq1cuVJDhw7VlClT5Ha79eabb+rOO+9UQ0OD/Yr4sWPHNH78eKWmpmrRokVKSUlRUVGRtm7d6lPX3Llz9cILL+jXv/61Fi5cqMLCQi1fvlz79+/Xrl277Ff1H374YT3++OPKyspSVlaW8vLyNH78eNXU1DS7Xzt27FB2drauvfZaLV26VJL0+eefa9euXbr77rt11VVXaeHChXr22Wf1wAMPaPDgwZJk//voo49q8eLFyszM1Lx581RQUKCVK1dqz549PjFKUklJibKysjRt2jRlZ2fr1Vdf1bx58+TxeOw/OKxevVoLFy7U1KlTdffdd6u6ulqffPKJPvroI/3yl79s0TEDAOCcMQAAhKHS0lIjyfz85z9vtK6kpMQcP37cflRVVdnrZs2aZSSZRYsWNdpu1qxZpk+fPvbzN954w0gyTz75pL2srq7OXHnllUaSWbduXZMx5ubmGkmNHhEREWbJkiWNynvHaZkwYYLp16+f/fz11183ksyePXuCtvvee+8ZSeaVV17xWb59+3af5ceOHTMej8dMnDjRNDQ02OUeeOABI8nMmjWryf27++67TVJSkqmrqwtaZvPmzUaSyc3N9VlutT1+/HhTX19vL1++fLmRZNauXWsvGzt2rJFknnrqKXvZmTNnzIgRI0xaWpqpqakxxhhzww03mKFDhzYZMwAA4Ya3lwMAwlJZWZkkKSEhodG6cePGKTU11X4899xzjcrMmzev2Tb+/ve/y+12+5SNjIzUggULWhTrww8/rB07dmjHjh3atGmTsrOz9Yc//MF+G7YlNjbW/n9paamKi4s1duxY/ec//1FpaakkKSUlRdLZV/lra2sDtrd582YlJyfrZz/7mYqLi+3HyJEjlZCQoNzcXEnS22+/rZqaGi1YsEAul8ve/p577glpv1JSUlRZWakdO3aE2hU2q+177rnH5zP3t99+u5KSkvTWW2/5lHe73Zo7d6793OPxaO7cuTp27Jj27dtnx/PNN99oz549LY4HAICOQtINAAhLiYmJkqSKiopG61atWqUdO3bo5ZdfDrit2+1Wz549m23j0KFD6tGjR6PEfuDAgT7PT58+raNHj/o8vA0bNkyZmZnKzMzUtGnT9PLLL2vSpElatGiRjh8/bpfbtWuXMjMzFR8fr5SUFKWmpuqBBx6QJDvpHjt2rG666SYtXrxY3bt31w033KB169bpzJkzdj0HDx5UaWmp0tLSfP74kJqaqoqKCh07dszeP0nKyMjwiTc1NVVdunRptn/uvPNODRgwQNdff7169uypOXPmaPv27c1u5922f196PB7169fPXm9JT09XfHy8z7IBAwZIkv1zZPfdd58SEhJ02WWXKSMjQ/Pnz9euXbtCigcAgI5C0g0ACEvJycnq0aOHPv3000brLr/8cmVmZmrMmDEBt42Ojm72G81bYtOmTerRo4fPoznXXnutqqurtXv3bklnv4n92muvVXFxsf785z/rrbfe0o4dO/Tb3/5WkuyfMnO5XNqyZYs++OAD3XXXXfr22281Z84cjRw50v4DRENDg9LS0uxX1/0fjz32WLvsd1pamvLz87Vt2zZNmTJFubm5uv7661v8mff2MnjwYBUUFGjjxo264oor9Nprr+mKK67QI4880iHxAAAQCr5IDQAQtiZOnKi//vWv2r17ty677LJ2r9/6KbKKigqfV7sLCgp8yk2YMKHFb7Guq6uT9P0r9W+++abOnDmjbdu2qXfv3nY5663g/kaPHq3Ro0dryZIlWr9+vWbMmKGNGzfqtttuU//+/fX2229rzJgxPm9ZD7R/0tlXxvv162cvP378eKNvOQ/G4/Fo8uTJmjx5shoaGnTnnXdq1apVeuihh3TRRRf5vG09UNsFBQU+bdfU1KiwsFCZmZk+5Q8fPmz/tJvliy++kCSfb5yPj4/X9OnTNX36dNXU1OjGG2/UkiVLdP/99ysmJiakfQIA4FzilW4AQNi69957FRcXpzlz5ui7775rtN4Y06b6s7KyVFdX5/OzXfX19Vq2bJlPuR49ethvH7cezcnJyZEkDR8+XNLZz4r7x1xaWqp169b5bFdSUtJov0aMGCFJ9lvMp02bpvr6ev3xj39s1G5dXZ39k1yZmZmKiorSsmXLfOp8+umnm41fkk6cOOHzPCIiQpdccolPLFaS7P8zYJmZmfJ4PHr22Wd92l6zZo1KS0s1ceLERnGvWrXKfl5TU6NVq1YpNTVVI0eODBiPx+PRkCFDZIwJ+vl3AAA6Gq90AwDCVkZGhtavX6/s7GwNHDhQM2bM0PDhw2WMUWFhodavX6+IiIiQPr8dyOTJkzVmzBgtWrRIRUVFGjJkiLZu3Wp/vjpU7733nqqrqyWd/cmwbdu26Z133tEtt9yiQYMGSZLGjx9vv2o8d+5cVVRUaPXq1UpLS9ORI0fsul588UWtWLFCv/jFL9S/f3+Vl5dr9erVSkpKUlZWlqSzn/ueO3eunnjiCeXn52v8+PGKiorSwYMHtXnzZj3zzDOaOnWqUlNT9bvf/U5PPPGEJk2apKysLO3fv1//+Mc/1L1792b367bbbtPJkyd1zTXXqGfPnjp06JCWLVumESNG2D8LNmLECEVGRmrp0qUqLS1VdHS0/Vvk999/vxYvXqzrrrtOU6ZMUUFBgVasWKFRo0Zp5syZPm2lp6dr6dKlKioq0oABA7Rp0ybl5+fr+eeft39abPz48frRj36kMWPG6IILLtDnn3+u5cuXa+LEifZ3AAAAEHY68JvTAQAIyZdffmnmzZtnLrroIhMTE2NiY2PNoEGDzB133GHy8/N9ys6aNcvEx8cHrMf/J8OMMebEiRPm1ltvNUlJSSY5OdnceuutZv/+/a3+yTCPx2MGDRpklixZYv/UlWXbtm3mkksuMTExMaZv375m6dKlZu3atUaSKSwsNMYYk5eXZ7Kzs03v3r1NdHS0SUtLM5MmTTJ79+5t1P7zzz9vRo4caWJjY01iYqIZNmyYuffee83hw4ftMvX19Wbx4sWmR48eJjY21owbN858+umnpk+fPs3+ZNiWLVvM+PHjTVpamvF4PKZ3795m7ty55siRIz7lVq9ebfr162ciIyMb/XzY8uXLzaBBg0xUVJS54IILzLx580xJSYnP9mPHjjVDhw41e/fuNT/5yU9MTEyM6dOnj1m+fLlPuVWrVpmrrrrKdOvWzURHR5v+/fub3//+96a0tLTJ/QAAoCO5jGnje/MAAADaYNy4cSouLg74pXkAAJzv+Ew3AAAAAAAOIekGAAAAAMAhJN0AAAAAADiEz3QDAAAAAOAQXukGAAAAAMAhJN0AAAAAADjEHUqhhoYGHT58WImJiXK5XE7HBAAAAABAWDPGqLy8XOnp6YqICP56dkhJ9+HDh9WrV692Cw4AAAAAgM7g66+/Vs+ePYOuDynpTkxMtCtLSkqSJH118itN3zJdxhi5XC5tmrpJkjR9y3R5Ij2Kccc0WWd1XbVq6mu0aeom9e/aP2g5q51Q6mytUGNprVD2wekYmuMfY6B4WnMsrHqezHxS9759b8Btq+uqVVlbKRnJHeG2x1N794N3/GfqzuhQ6SH1Te4rj9tjtx/viZekZmMOlfe+xXviFeOOUWl1qd12UkxSwLIt6YemjkuwcRXo/G3LcQ6lzWAxS7LLS83PIf596r19/6797boltes4ChZzoPrPxbzllEBjNlCZjpyvLMH6Odzj8xcu8Z7POuKcC+VcCTf+Yy3QvNYe1z50Pp11njrX83RT+ZO1rCPPwbbMa6Hc85/vAo2DsrIy9erVy86Xgwkp6bbeUp6UlGQn3Qm1CYqMjVRURJRqG2qVkJggSYqMjVR8dLziouKarDOyNlL1Z+qVkJhg1xmI1U4odbZWqLG0Vij74HQMzfGPMVA8rTkWVj3xifFBt42sjdTpqtOSkWI9sfZ4au9+8I4/si5SrjMuxSXGKdodbbcfH382iWsu5lB571t8/Nl6aqNq7bYTYxMDlm1JPzR1XIKNq0Dnb1uOcyhtBotZkl1ean4O8e9T7+2TkpLsuiW16zgKFnOg+s/FvOWUQGM2UJmOnK8swfo53OPzFy7xns864pwL5VwJN/5jLdC81h7XPnQ+nXWeOtfzdFP5k7WsI8/Btsxrodzzn++aGgfNfQSbL1IDAAAAAMAhJN0AAAAAADiEpBsAAAAAAIeQdAMAAAAA4BCSbgAAAAAAHELSDQAAAACAQ0i6AQAAAABwCEk3AAAAAAAOIekGAAAAAMAhJN0AAAAAADiEpBsAAAAAAIeQdAMAAAAA4JA2Jd1le8t0as8p/Xf5f5XzWo697MRHJyRJ3334nc9Dkr742xeSpBMfnVDZ3rJm28h5Lceu84u/fWHXI0n5/5vv007+/+b7rPfnHYNV13cffhdyLG1h7cN3H35n94G31sawYcMGbdiwoT1C1NFXj9r9bD33bsc6FkXrixodV0mNnku+++Vdn3+5in0VqthXYY8np3j3cc2yGpXsKdGJj06oYl+Fjqw8Yq+zYrX6JNhx898P67n38uItxXbd1vLanNqA8RVvKVbxlmK77ZzXcrRhwwYtWLDAp5x1zDds2KBJV07y2S//mPzH1YQJEyRJ/13+X53ac0ple8v02H2P+dTrfR63lP9YDjY+rb71Lx/K8a/YV2H304mPTujoq0f12H2P2eP06KtHW3w+hXIuWeMilPPV6TmlJfznTklBz92i9UV2/1pjwJpbvc8B7/P5XPM+Tt5zq7f2mEesdvzPv5aw4rOuVxbvuaJofVHI/dle831nZJ1z3uPUf+x7j3VrTFv/et8XWHU0dU8hfT9nW2PQ/ziHyjumYNcbf977Fuj6G4g1d1n3bN7Xden788b72ic1vt8K1F5T/RxOgh3jlhx3//qs7YKNN//7U/9lTfVrS+MItP3uB3f7PPceXy1pL5yua/7aMjc2tV/efXXwwYOtbsPbf/7vf+z7L4v3fafU+mtYsLnDf16UGs+BVk5UvKVYFfsq7DLNsba35pfcN3Pt/bDGeVvHdzCB9veLv32h3Q/uDmmessr7Xwus5f51tCVnbHvSvfeUqg5WKWerV9K95+yN2rEPj/k8JOn4nuNng94TYtK9Nceu8/ie43Y9klRaUOrTTmlBqc96f94xWHUd+/BYyLG0hbUPxz48ZveBt9bG0J5Jd/n+crufrefe7VjH4uS+k42Oq6RGzyXf/fKuz79cRV6FKvIq7PHkFJ8+Piad2nNKJ/acUEVehU5/edpeZcVq9Umw4+a/H9Zz7+WV+ZV23dZy86kJGF9lfqUqP660287Zejbp3rx5s08576T74IGDPvvlH5P/uNq5c6ckqepglU7tPTvpb9+23ade7/O4pfzHcrDxafWtf/lQjn9FXoXdTyf2nFD5/nJt37bdHqfl+8sdSbqtcRHK+RpONyf+c6ekoOfuyX0n7f61xoA1t3qfA97n87nWKOn+/+eot/aYR6x2/M+/lrDis65XFu+54uS+kyH3J0l3cNY55z1O/ce+91i3xrT1r/d9gVVHU/cU0vdztjUG/Y9zqLxjCna98ee9b4Guv4FYc5d1z+Z9XZe+P2+8r31S4/utQO011c/hJNgxbslx96/P2i7YePO/P/Vf1lS/tjSOQNtXfes7H3qPr5a0F07XNX9OJd3efVV/qr7VbXirOVxj339ZvO87pdZfw4LNHf7zotR4DrRyosr8SlXkVdhlmmNtb80vuW/m2vthjfO2ju9gAu3v8T3HVfVtVUjzlFXe/1pgLfevoy05I28vBwAAAADAISTdAAAAAAA4hKQbAAAAAACHkHQDAAAAAOAQkm4AAAAAABxC0g0AAAAAgENIugEAAAAAcAhJNwAAAAAADiHpBgAAAADAISTdAAAAAAA4hKQbAAAAAACHkHQDAAAAAOAQkm4AAAAAABxC0g0AAAAAgENIugEAAAAAcAhJNwAAAAAADiHpBgAAAADAISTdAAAAAAA4hKQbAAAAAACHkHQDAAAAAOAQkm4AAAAAABxC0g0AAAAAgENIugEAAAAAcAhJNwAAAAAADiHpBgAAAADAISTdAAAAAAA4hKQbAAAAAACHkHQDAAAAAOAQkm4AAAAAABxC0g0AAAAAgENIugEAAAAAcAhJNwAAAAAADiHpBgAAAADAISTdAAAAAAA4hKQbAAAAAACHkHQDAAAAAOAQkm4AAAAAABxC0g0AAAAAgENIugEAAAAAcAhJNwAAAAAADiHpBgAAAADAISTdAAAAAAA4xN2WjZMuTVKkK1L1tfWadOMke1msO1aSlDY6rdE2qaNSJUndRnXT6brTzbYx6cZJOvDBAcW6YxXXPU7JGcn2uuSByT7t1NfWB2zTYq2zYrDqqqmrCSmWtrD6xeP2KLpbdKP1ofaHv+zs7PYIT5KU+H8S1W1AN8V1j7Ofe7dztPyoDnxwQHGpceo2sFuj7QP1vfd+edfnXzbhfxIkI0W7o1VfW98u+xNI0qVJ3z9Jk1JGpSgqIkrlNeUytcZeZcVq9Umw4+a/H9Zz7+XxI+JVe6zWZ7nrYlfA+OJHxEvm+xgm3ThJP0r8kfr06eNTzjru2dnZKviyQBWXVgSNyWefJY0bN06SFJcRp5RLU1Rv6nVN5DU+9Xqfxy3lP5aDjVGrbyX5lI/LiGu2jYT/SZA7xW23p2TpuozrlHVtlo6WH1Xe6TzF9Wu+Hm+hnEvWuAjlfPXv946UOirVZ+6UfMeJ9/+7juwq9ZLcKW51G3T2+CQPTFba6DSfc8D7fD7XvI+V99zqLZRxFGo7N998c6vrsM+lOt/l3nNFZJdI1dTXtCgmNGadc9Y13vq/99j3H/fR3aKVnJFsj22rrPW8qXsK6fs5u9ugs9eJ1l6//K8dga43/rz3rbk4LdbcNeknZ+/ZrHusuNSz54t13nhf+6TG91tN7UNT/w8H/n3s3fehHnf/+qztgo03//vTtNFpPsvao4+8x7O/uAt950Pvc6QlbYfTdc1fW+bGpvbLu68iUyJb3YY3T7rHvv+yeN93Sq2/hnnHG2y59X//OTA5I1k1dTVqSGpQTN8Yu0xzrHpifxyr03WndfXwq3Wg7oCk7+8fnBJof1NHpepUwamA7Qaam7zzQu+5N9C9U2vzNUlyGWNMc4XKysqUnJys0tJSJSWdHZgHTxzUlI1TFBURpdqGWm27ZZskacrGKUqKTlJcVNODpaq2SmVnyrTtlm3K6JYRtJzVTih1tlaosbRWKPvgdAzN8Y8xUDytORZWPc9e96wWbl8YcNuq2ioVVxVLRor3xNvjqb37wTv+6rpqHSg+oMHdByvaHW233z2+uyQ1G3OovPete3x3xUXF6eTpk3bbXWK7BCzbkn5o6rgEG1eBzt+2HOdQ2gwWsyS7vNT8HOLfp97bZ3TLsOuW1K7jKFjMgeo/F/OWUwKN2UBlOnK+sgTr53CPz1+4xHs+64hzLpRzJdz4j7VA81p7XPvQ+XTWeepcz9NN5U/Wso48B9syr4Vyz3++CzQOAuXJgfD2cgAAAAAAHELSDQAAAACAQ0i6AQAAAABwCEk3AAAAAAAOIekGAAAAAMAhJN0AAAAAADiEpBsAAAAAAIeQdAMAAAAA4BCSbgAAAAAAHELSDQAAAACAQ0i6AQAAAABwCEk3AAAAAAAOcYdSyBgjSSorK7OXVZRXqP50vepMnVwulyrKKyRJ9afrVVlTqXp3fZN1VtdVq76+XhXlFSqLKgtazmonlDpbK9RYWiuUfXA6hub4xxgontYcC6ueyvLKoNtW11WrobZBMtLpmtP2eGrvfvCO/0zdGZlqo6ryKtW56+z2KxsqJanZmEPlvW+VDWfrqaqustt217oDlm1JPzR1XIKNq0Dnb1uOcyhtBotZkl1ean4O8e9T7+3Losrsuq122mscBYs5UP3nYt5ySqAxG6hMR85XlmD9HO7x+QuXeM9nHXHOhXKuhBv/sRZoXmuPax86n846T53rebqp/Mla1pHnYFvmtVDu+c93gcaBlR9b+XIwLtNcCUnffPONevXq1Q6hAgAAAADQeXz99dfq2bNn0PUhJd0NDQ06fPiwEhMT5XK52jVAoCOVlZWpV69e+vrrr5WUlNTR4QAA/DBPA0B4+yHP08YYlZeXKz09XRERwT+5HdLbyyMiIprM3IHzXVJS0g9ukgCA8wnzNACEtx/qPJ2cnNxsGb5IDQAAAAAAh5B0AwAAAADgEJJu/KBFR0frkUceUXR0dEeHAgAIgHkaAMIb83TzQvoiNQAAAAAA0HK80g0AAAAAgENIugEAAAAAcAhJNwAAAAAADiHpBgAAAADAISTdOK+8++67mjx5stLT0+VyufTGG2/4rK+trdV9992nYcOGKT4+Xunp6frVr36lw4cPh1T/Bx98oMjISE2cOLHRuqKiIrlcLkVGRurbb7/1WXfkyBG53W65XC4VFRW1dvcA4LzHPA0A4Y15+twj6cZ5pbKyUsOHD9dzzz0XcH1VVZXy8vL00EMPKS8vT1u3blVBQYGmTJkSUv1r1qzRggUL9O677wadWC688EK99NJLPstefPFFXXjhhS3bGQDohJinASC8MU93AAOcpySZ119/vdlyu3fvNpLMoUOHmixXXl5uEhISzIEDB8z06dPNkiVLfNYXFhYaSebBBx80GRkZPusGDBhgHnroISPJFBYWtnRXAKBTYp4GgPDGPH1u8Eo3Or3S0lK5XC6lpKQ0We7VV1/VoEGDNHDgQM2cOVNr166VCfAz9lOmTFFJSYnef/99SdL777+vkpISTZ482YnwAaDTY54GgPDGPN02JN3o1Kqrq3XfffcpOztbSUlJTZZds2aNZs6cKUm67rrrVFpaqnfeeadRuaioKHsSkaS1a9dq5syZioqKav8dAIBOjnkaAMIb83TbkXSj06qtrdW0adNkjNHKlSubLFtQUKDdu3crOztbkuR2uzV9+nStWbMmYPk5c+Zo8+bNOnr0qDZv3qw5c+a0e/wA0NkxTwNAeGOebh/ujg4AcII1QRw6dEj/+te/QvqrXF1dndLT0+1lxhhFR0dr+fLlSk5O9ik/bNgwDRo0SNnZ2Ro8eLAuvvhi5efnO7ErANApMU8DQHhjnm4/vNKNTseaIA4ePKi3335b3bp1a7J8XV2dXnrpJT311FPKz8+3Hx9//LHS09O1YcOGgNvNmTNHO3fu7NR/lQMAJzBPA0B4Y55uX7zSjfNKRUWFvvzyS/t5YWGh8vPz1bVrV/Xu3Vu1tbWaOnWq8vLylJOTo/r6eh09elSS1LVrV3k8nkZ15uTkqKSkRL/5zW8a/QXupptu0po1a3THHXc02u7222/XzTff3OwXSgDADwnzNACEN+bpDtBh35sOtEJubq6R1Ogxa9YsY8z3P0MQ6JGbmxuwzkmTJpmsrKyA6z766CMjyXz88cd23fv37w9Ydv/+/Z3yJw4AoCWYpwEgvDFPn3suYwJ8hzsAAAAAAGgzPtMNAAAAAIBDSLoBAAAAAHAISTcAAAAAAA4h6QYAAAAAwCEk3QAAAAAAOISkGwAAAAAAh5B0AwAAAADgEJJuAAAAAAAcQtINAAAAAIBDSLoBAAAAAHAISTcAAAAAAA4h6QYAAAAAwCH/D/ekkhADmiG8AAAAAElFTkSuQmCC",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "execution_count": 2,
+ "id": "4fea8a03",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-24T18:32:16.169725Z",
+ "iopub.status.busy": "2025-11-24T18:32:16.169725Z",
+ "iopub.status.idle": "2025-11-24T18:32:16.492839Z",
+ "shell.execute_reply": "2025-11-24T18:32:16.492839Z"
}
- ],
+ },
+ "outputs": [],
"source": [
- "fig, ax_barcode = plt.subplots(figsize=(10,1.5))\n",
+ "fig, (ax_map, ax_barcode) = plt.subplots(2, 1, figsize=(6,6.5),\n",
+ " gridspec_kw={'height_ratios':[10,1]})\n",
+ "\n",
+ "gpd.GeoDataFrame(geometry=[outer_box], crs='EPSG:3857').plot(ax=ax_map, color='#d3d3d3')\n",
+ "city.plot(ax=ax_map, edgecolor='white', linewidth=1, color='#8c8c8c')\n",
+ "\n",
+ "plot_hexagons(stops_gb, ax=ax_map, color='cluster', cmap='Greens', location_id='h3_cell', data_crs='EPSG:3857')\n",
+ "plot_pings(traj, ax=ax_map, s=6, color='black', alpha=0.5, traj_cols=tc)\n",
+ "ax_map.set_axis_off()\n",
"\n",
"plot_time_barcode(traj['unix_ts'], ax=ax_barcode, set_xlim=True)\n",
- "plot_stops_barcode(stops_gb, ax=ax_barcode, stop_color='green', set_xlim=False, timestamp='unix_ts')\n",
- "plt.title(\"Grid-Based stops\")\n",
- "plt.tight_layout()\n",
+ "plot_stops_barcode(stops_gb, ax=ax_barcode, cmap='Greens', set_xlim=False, timestamp='unix_ts')\n",
+ "\n",
+ "plt.tight_layout(pad=0.1)\n",
"plt.show()"
]
}
@@ -106,7 +115,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.14.0"
+ "version": "3.12.3"
}
},
"nbformat": 4,
diff --git a/examples/grid_based_demo.py b/examples/grid_based_demo.py
index 7ec92360..4d667bbd 100644
--- a/examples/grid_based_demo.py
+++ b/examples/grid_based_demo.py
@@ -25,38 +25,49 @@
# %%
# %matplotlib inline
+import matplotlib
+matplotlib.use('TkAgg') # Non-blocking backend
+import matplotlib.pyplot as plt
+plt.ion() # Interactive mode
# Imports
import nomad.io.base as loader
import geopandas as gpd
from shapely.geometry import box
-import pandas as pd
-import numpy as np
-import matplotlib.pyplot as plt
-from nomad.stop_detection.viz import plot_stops_barcode, plot_time_barcode
+from nomad.stop_detection.viz import plot_stops_barcode, plot_time_barcode, plot_hexagons, plot_pings
import nomad.stop_detection.grid_based as GRID_BASED
import nomad.filters as filters
# Load data
-from nomad.city_gen import City
-city_obj = City.from_geopackage("garden-city.gpkg")
-outer_box = box(0, 0, city_obj.dimensions[0], city_obj.dimensions[1])
+import nomad.data as data_folder
+from pathlib import Path
+data_dir = Path(data_folder.__file__).parent
+city = gpd.read_parquet(data_dir / 'garden-city-buildings-mercator.parquet')
+outer_box = box(*city.total_bounds)
filepath_root = 'gc_data_long/'
tc = {"user_id": "gc_identifier", "x": "dev_x", "y": "dev_y", "timestamp": "unix_ts"}
users = ['admiring_brattain']
-traj = loader.sample_from_file(filepath_root, format='parquet', users=users, filters = ('date','==', '2024-01-01'), traj_cols=tc)
+traj = loader.sample_from_file(filepath_root, format='parquet', users=users, filters=('date','==', '2024-01-01'), traj_cols=tc)
# Grid-based - data is in Web Mercator (EPSG:3857) projected coordinates
traj['h3_cell'] = filters.to_tessellation(traj, index="h3", res=10, traj_cols=tc, data_crs='EPSG:3857')
stops_gb = GRID_BASED.grid_based(traj, time_thresh=240, complete_output=True, location_id='h3_cell', traj_cols=tc)
# %%
-fig, ax_barcode = plt.subplots(figsize=(10,1.5))
+fig, (ax_map, ax_barcode) = plt.subplots(2, 1, figsize=(6,6.5),
+ gridspec_kw={'height_ratios':[10,1]})
+
+gpd.GeoDataFrame(geometry=[outer_box], crs='EPSG:3857').plot(ax=ax_map, color='#d3d3d3')
+city.plot(ax=ax_map, edgecolor='white', linewidth=1, color='#8c8c8c')
+
+plot_hexagons(stops_gb, ax=ax_map, color='cluster', cmap='Greens', location_id='h3_cell', data_crs='EPSG:3857')
+plot_pings(traj, ax=ax_map, s=6, color='black', alpha=0.5, traj_cols=tc)
+ax_map.set_axis_off()
plot_time_barcode(traj['unix_ts'], ax=ax_barcode, set_xlim=True)
-plot_stops_barcode(stops_gb, ax=ax_barcode, stop_color='green', set_xlim=False, timestamp='unix_ts')
-plt.title("Grid-Based stops")
-plt.tight_layout()
+plot_stops_barcode(stops_gb, ax=ax_barcode, cmap='Greens', set_xlim=False, timestamp='unix_ts')
+
+plt.tight_layout(pad=0.1)
plt.show()
diff --git a/examples/hdbscan_demo.ipynb b/examples/hdbscan_demo.ipynb
index 4437d09a..b7c6c4e4 100644
--- a/examples/hdbscan_demo.ipynb
+++ b/examples/hdbscan_demo.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
- "id": "92838936",
+ "id": "4f16fcbe",
"metadata": {},
"source": [
"# HDBSCAN Stop Detection"
@@ -10,7 +10,7 @@
},
{
"cell_type": "markdown",
- "id": "cb276fd9",
+ "id": "f69ed80f",
"metadata": {},
"source": [
"The HDBSCAN algorithm constructs a hierarchy of non-overlapping clusters from different radius values and selects those that maximize stability."
@@ -18,73 +18,79 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "19184dee",
- "metadata": {},
+ "execution_count": 1,
+ "id": "3561532d",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-24T18:32:30.624504Z",
+ "iopub.status.busy": "2025-11-24T18:32:30.624504Z",
+ "iopub.status.idle": "2025-11-24T18:32:33.741073Z",
+ "shell.execute_reply": "2025-11-24T18:32:33.740043Z"
+ }
+ },
"outputs": [],
"source": [
"%matplotlib inline\n",
+ "import matplotlib\n",
+ "matplotlib.use('TkAgg')\n",
+ "import matplotlib.pyplot as plt\n",
+ "plt.ion()\n",
"\n",
"# Imports\n",
"import nomad.io.base as loader\n",
"import geopandas as gpd\n",
"from shapely.geometry import box\n",
- "import pandas as pd\n",
- "import numpy as np\n",
- "import matplotlib.pyplot as plt\n",
- "from nomad.stop_detection.viz import plot_stops_barcode, plot_time_barcode\n",
+ "from nomad.stop_detection.viz import plot_stops_barcode, plot_time_barcode, plot_stops, plot_pings\n",
"import nomad.stop_detection.hdbscan as HDBSCAN\n",
- "import nomad.stop_detection.postprocessing as post\n",
"\n",
"# Load data\n",
- "city = gpd.read_file(\"garden_city.geojson\").to_crs('EPSG:3857')\n",
- "outer_box = box(*city.total_bounds).buffer(15, join_style='mitre')\n",
+ "import nomad.data as data_folder\n",
+ "from pathlib import Path\n",
+ "data_dir = Path(data_folder.__file__).parent\n",
+ "city = gpd.read_parquet(data_dir / 'garden-city-buildings-mercator.parquet')\n",
+ "outer_box = box(*city.total_bounds)\n",
"\n",
"filepath_root = 'gc_data_long/'\n",
- "tc = {\n",
- " \"user_id\": \"gc_identifier\",\n",
- " \"timestamp\": \"unix_ts\",\n",
- " \"x\": \"dev_x\",\n",
- " \"y\": \"dev_y\",\n",
- " \"ha\":\"ha\",\n",
- " \"date\":\"date\"}\n",
+ "tc = {\"user_id\": \"gc_identifier\", \"x\": \"dev_x\", \"y\": \"dev_y\", \"timestamp\": \"unix_ts\"}\n",
"\n",
"users = ['admiring_brattain']\n",
- "traj = loader.sample_from_file(filepath_root, format='parquet', users=users, filters = ('date','==', '2024-01-01'), traj_cols=tc)\n",
+ "traj = loader.sample_from_file(filepath_root, format='parquet', users=users, filters=('date','==', '2024-01-01'), traj_cols=tc)\n",
"\n",
- "user_data_hdb = traj.assign(cluster=HDBSCAN.hdbscan_labels(traj, time_thresh=240, min_pts=3, min_cluster_size=2, traj_cols=tc))\n",
"stops_hdb = HDBSCAN.st_hdbscan(traj,\n",
" time_thresh=720,\n",
- " dist_thresh=15,\n",
" min_pts=3,\n",
" complete_output=True,\n",
- " traj_cols=tc)\n",
- "stops_hdb[\"cluster\"] = post.remove_overlaps(user_data_hdb, time_thresh=240, method='cluster', traj_cols=tc, min_pts=3, dur_min=5, min_cluster_size=3) "
+ " traj_cols=tc) "
]
},
{
"cell_type": "code",
- "execution_count": 30,
- "id": "fa70719e",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "execution_count": 2,
+ "id": "ca45c6c3",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-24T18:32:33.744689Z",
+ "iopub.status.busy": "2025-11-24T18:32:33.743614Z",
+ "iopub.status.idle": "2025-11-24T18:32:34.044704Z",
+ "shell.execute_reply": "2025-11-24T18:32:34.044704Z"
}
- ],
+ },
+ "outputs": [],
"source": [
- "fig, ax_barcode = plt.subplots(figsize=(10,1.5))\n",
+ "fig, (ax_map, ax_barcode) = plt.subplots(2, 1, figsize=(6,6.5),\n",
+ " gridspec_kw={'height_ratios':[10,1]})\n",
+ "\n",
+ "gpd.GeoDataFrame(geometry=[outer_box], crs='EPSG:3857').plot(ax=ax_map, color='#d3d3d3')\n",
+ "city.plot(ax=ax_map, edgecolor='white', linewidth=1, color='#8c8c8c')\n",
+ "\n",
+ "plot_stops(stops_hdb, ax=ax_map, cmap='Blues')\n",
+ "plot_pings(traj, ax=ax_map, s=6, color='black', alpha=0.5, traj_cols=tc)\n",
+ "ax_map.set_axis_off()\n",
"\n",
"plot_time_barcode(traj['unix_ts'], ax=ax_barcode, set_xlim=True)\n",
- "plot_stops_barcode(stops_hdb, ax=ax_barcode, stop_color='blue', set_xlim=False, timestamp='unix_ts')\n",
- "fig.suptitle(\"HDBSCAN stops with post-processing\")\n",
+ "plot_stops_barcode(stops_hdb, ax=ax_barcode, cmap='Blues', set_xlim=False, timestamp='unix_ts')\n",
+ "\n",
+ "plt.tight_layout(pad=0.1)\n",
"plt.show()"
]
}
@@ -105,7 +111,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.5"
+ "version": "3.12.3"
}
},
"nbformat": 4,
diff --git a/examples/hdbscan_demo.py b/examples/hdbscan_demo.py
index 6eb922a1..fcef411f 100644
--- a/examples/hdbscan_demo.py
+++ b/examples/hdbscan_demo.py
@@ -20,41 +20,50 @@
# %%
# %matplotlib inline
+import matplotlib
+matplotlib.use('TkAgg')
+import matplotlib.pyplot as plt
+plt.ion()
# Imports
import nomad.io.base as loader
import geopandas as gpd
from shapely.geometry import box
-import pandas as pd
-import numpy as np
-import matplotlib.pyplot as plt
-from nomad.stop_detection.viz import plot_stops_barcode, plot_time_barcode
+from nomad.stop_detection.viz import plot_stops_barcode, plot_time_barcode, plot_stops, plot_pings
import nomad.stop_detection.hdbscan as HDBSCAN
-import nomad.stop_detection.postprocessing as post
# Load data
-from nomad.city_gen import City
-city_obj = City.from_geopackage("garden-city.gpkg")
-outer_box = box(0, 0, city_obj.dimensions[0], city_obj.dimensions[1])
+import nomad.data as data_folder
+from pathlib import Path
+data_dir = Path(data_folder.__file__).parent
+city = gpd.read_parquet(data_dir / 'garden-city-buildings-mercator.parquet')
+outer_box = box(*city.total_bounds)
filepath_root = 'gc_data_long/'
tc = {"user_id": "gc_identifier", "x": "dev_x", "y": "dev_y", "timestamp": "unix_ts"}
users = ['admiring_brattain']
-traj = loader.sample_from_file(filepath_root, format='parquet', users=users, filters = ('date','==', '2024-01-01'), traj_cols=tc)
+traj = loader.sample_from_file(filepath_root, format='parquet', users=users, filters=('date','==', '2024-01-01'), traj_cols=tc)
-user_data_hdb = traj.assign(cluster=HDBSCAN.hdbscan_labels(traj, time_thresh=240, min_pts=3, min_cluster_size=2, traj_cols=tc))
stops_hdb = HDBSCAN.st_hdbscan(traj,
time_thresh=720,
min_pts=3,
complete_output=True,
- traj_cols=tc)
-stops_hdb["cluster"] = post.remove_overlaps(user_data_hdb, time_thresh=240, method='cluster', traj_cols=tc, min_pts=3, dur_min=5, min_cluster_size=3)
+ traj_cols=tc)
# %%
-fig, ax_barcode = plt.subplots(figsize=(10,1.5))
+fig, (ax_map, ax_barcode) = plt.subplots(2, 1, figsize=(6,6.5),
+ gridspec_kw={'height_ratios':[10,1]})
+
+gpd.GeoDataFrame(geometry=[outer_box], crs='EPSG:3857').plot(ax=ax_map, color='#d3d3d3')
+city.plot(ax=ax_map, edgecolor='white', linewidth=1, color='#8c8c8c')
+
+plot_stops(stops_hdb, ax=ax_map, cmap='Blues')
+plot_pings(traj, ax=ax_map, s=6, color='black', alpha=0.5, traj_cols=tc)
+ax_map.set_axis_off()
plot_time_barcode(traj['unix_ts'], ax=ax_barcode, set_xlim=True)
-plot_stops_barcode(stops_hdb, ax=ax_barcode, stop_color='blue', set_xlim=False, timestamp='unix_ts')
-fig.suptitle("HDBSCAN stops with post-processing")
+plot_stops_barcode(stops_hdb, ax=ax_barcode, cmap='Blues', set_xlim=False, timestamp='unix_ts')
+
+plt.tight_layout(pad=0.1)
plt.show()
diff --git a/examples/ingesting-data.ipynb b/examples/ingesting-data.ipynb
new file mode 100644
index 00000000..ecee86c5
--- /dev/null
+++ b/examples/ingesting-data.ipynb
@@ -0,0 +1,139 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "18b18c53",
+ "metadata": {
+ "id": "460ff464-7812-41fb-bc5b-bc4f24e16499"
+ },
+ "source": [
+ "# Loading Trajectory Data\n",
+ "\n",
+ "Mobility data comes in many formats: timestamps as unix integers or ISO strings (with timezones), \n",
+ "coordinates in lat/lon or projected, files as single CSVs or partitioned directories.\n",
+ "\n",
+ "`nomad.io.from_file` handles these cases with a single function call."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e3245095",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import glob\n",
+ "import pandas as pd\n",
+ "import nomad.io.base as loader\n",
+ "import nomad.data as data_folder\n",
+ "from pathlib import Path\n",
+ "\n",
+ "data_dir = Path(data_folder.__file__).parent"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a48b37e7",
+ "metadata": {},
+ "source": [
+ "## Pandas vs nomad.io for partitioned data\n",
+ "\n",
+ "Partitioned directories (e.g., `date=2024-01-01/`, `date=2024-01-02/`, ...) require a loop with pandas:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0a3a2a1b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "csv_files = glob.glob(str(data_dir / \"partitioned_csv\" / \"*\" / \"*.csv\"))\n",
+ "df_list = []\n",
+ "for f in csv_files:\n",
+ " df_list.append(pd.read_csv(f))\n",
+ "df_pandas = pd.concat(df_list, ignore_index=True)\n",
+ "\n",
+ "print(f\"Pandas: {len(df_pandas)} rows\")\n",
+ "print(df_pandas.dtypes)\n",
+ "print(\"\\nFirst few rows:\")\n",
+ "print(df_pandas.head(3))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "71eb4656",
+ "metadata": {},
+ "source": [
+ "`nomad.io.from_file` handles partitioned directories in one line, plus automatic type casting and column mapping:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d48bf128",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "traj_cols = {\"user_id\": \"user_id\",\n",
+ " \"latitude\": \"dev_lat\",\n",
+ " \"longitude\": \"dev_lon\",\n",
+ " \"datetime\": \"local_datetime\"}\n",
+ "\n",
+ "df = loader.from_file(data_dir / \"partitioned_csv\", format=\"csv\", traj_cols=traj_cols, parse_dates=True)\n",
+ "print(f\"nomad.io: {len(df)} rows\")\n",
+ "print(df.dtypes)\n",
+ "print(\"\\nFirst few rows:\")\n",
+ "print(df.head(3))\n",
+ "print(\"\\nNote: 'local_datetime' is now datetime64[ns], not object!\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "20c187a2",
+ "metadata": {},
+ "source": [
+ "The same pattern works for Parquet files, with the type casting and processing relying on passing to the functions which columns correspond to the default \"typical\" spatio-temporal column names"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4ca035ae",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "traj_cols = {\"user_id\": \"uid\", \"timestamp\": \"timestamp\", \n",
+ " \"latitude\": \"latitude\", \"longitude\": \"longitude\", \"date\": \"date\"}\n",
+ "\n",
+ "df = loader.from_file(data_dir / \"partitioned_parquet\", format=\"parquet\", traj_cols=traj_cols, parse_dates=True)\n",
+ "print(f\"Loaded {len(df)} rows\")\n",
+ "print(df.dtypes)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1e2f4479",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# These are the default canonical columnn names\n",
+ "from nomad.constants import DEFAULT_SCHEMA\n",
+ "print(DEFAULT_SCHEMA.keys())"
+ ]
+ }
+ ],
+ "metadata": {
+ "jupytext": {
+ "formats": "ipynb,py:percent"
+ },
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/examples/ingesting-data.py b/examples/ingesting-data.py
new file mode 100644
index 00000000..4cc46fd9
--- /dev/null
+++ b/examples/ingesting-data.py
@@ -0,0 +1,80 @@
+# ---
+# jupyter:
+# jupytext:
+# formats: ipynb,py:percent
+# text_representation:
+# extension: .py
+# format_name: percent
+# format_version: '1.3'
+# jupytext_version: 1.17.3
+# kernelspec:
+# display_name: Python 3 (ipykernel)
+# language: python
+# name: python3
+# ---
+
+# %% [markdown] id="460ff464-7812-41fb-bc5b-bc4f24e16499"
+# # Loading Trajectory Data
+#
+# Mobility data comes in many formats: timestamps as unix integers or ISO strings (with timezones),
+# coordinates in lat/lon or projected, files as single CSVs or partitioned directories.
+#
+# `nomad.io.from_file` handles these cases with a single function call.
+
+# %%
+import glob
+import pandas as pd
+import nomad.io.base as loader
+import nomad.data as data_folder
+from pathlib import Path
+
+data_dir = Path(data_folder.__file__).parent
+
+# %% [markdown]
+# ## Pandas vs nomad.io for partitioned data
+#
+# Partitioned directories (e.g., `date=2024-01-01/`, `date=2024-01-02/`, ...) require a loop with pandas:
+
+# %%
+csv_files = glob.glob(str(data_dir / "partitioned_csv" / "*" / "*.csv"))
+df_list = []
+for f in csv_files:
+ df_list.append(pd.read_csv(f))
+df_pandas = pd.concat(df_list, ignore_index=True)
+
+print(f"Pandas: {len(df_pandas)} rows")
+print(df_pandas.dtypes)
+print("\nFirst few rows:")
+print(df_pandas.head(3))
+
+# %% [markdown]
+# `nomad.io.from_file` handles partitioned directories in one line, plus automatic type casting and column mapping:
+
+# %%
+traj_cols = {"user_id": "user_id",
+ "latitude": "dev_lat",
+ "longitude": "dev_lon",
+ "datetime": "local_datetime"}
+
+df = loader.from_file(data_dir / "partitioned_csv", format="csv", traj_cols=traj_cols, parse_dates=True)
+print(f"nomad.io: {len(df)} rows")
+print(df.dtypes)
+print("\nFirst few rows:")
+print(df.head(3))
+print("\nNote: 'local_datetime' is now datetime64[ns], not object!")
+
+# %% [markdown]
+# The same pattern works for Parquet files, with the type casting and processing relying on passing to the functions which columns correspond to the default "typical" spatio-temporal column names
+
+# %%
+traj_cols = {"user_id": "uid", "timestamp": "timestamp",
+ "latitude": "latitude", "longitude": "longitude", "date": "date"}
+
+df = loader.from_file(data_dir / "partitioned_parquet", format="parquet", traj_cols=traj_cols, parse_dates=True)
+print(f"Loaded {len(df)} rows")
+print(df.dtypes)
+
+# %%
+# These are the default canonical columnn names
+from nomad.constants import DEFAULT_SCHEMA
+print(DEFAULT_SCHEMA.keys())
diff --git a/examples/lachesis_demo.ipynb b/examples/lachesis_demo.ipynb
index a46d838b..cb9a8992 100644
--- a/examples/lachesis_demo.ipynb
+++ b/examples/lachesis_demo.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
- "id": "92838936",
+ "id": "05c3afed",
"metadata": {},
"source": [
"# Lachesis Stop Detection"
@@ -10,7 +10,7 @@
},
{
"cell_type": "markdown",
- "id": "cb276fd9",
+ "id": "bca8605a",
"metadata": {},
"source": [
"The first stop detection algorithm implemented in ```nomad``` is a sequential algorithm insipired by the one in _Project Lachesis: Parsing and Modeling Location Histories_ (Hariharan & Toyama). This algorithm for extracting stays is dependent on two parameters: the roaming distance and the stay duration. \n",
@@ -28,32 +28,43 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "19184dee",
- "metadata": {},
+ "execution_count": 1,
+ "id": "7f0b2bb1",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-24T18:32:46.921799Z",
+ "iopub.status.busy": "2025-11-24T18:32:46.921799Z",
+ "iopub.status.idle": "2025-11-24T18:32:51.000857Z",
+ "shell.execute_reply": "2025-11-24T18:32:51.000857Z"
+ }
+ },
"outputs": [],
"source": [
"%matplotlib inline\n",
+ "import matplotlib\n",
+ "matplotlib.use('TkAgg')\n",
+ "import matplotlib.pyplot as plt\n",
+ "plt.ion()\n",
"\n",
"# Imports\n",
"import nomad.io.base as loader\n",
+ "import geopandas as gpd\n",
"from shapely.geometry import box\n",
- "import matplotlib.pyplot as plt\n",
- "from nomad.stop_detection.viz import plot_stops_barcode, plot_time_barcode\n",
+ "from nomad.stop_detection.viz import plot_stops_barcode, plot_time_barcode, plot_stops, plot_pings\n",
"import nomad.stop_detection.lachesis as LACHESIS\n",
+ "import nomad.data as data_folder\n",
+ "from pathlib import Path\n",
"\n",
"# Load data\n",
+ "data_dir = Path(data_folder.__file__).parent\n",
+ "city = gpd.read_parquet(data_dir / 'garden-city-buildings-mercator.parquet')\n",
+ "outer_box = box(*city.total_bounds)\n",
+ "\n",
"filepath_root = 'gc_data_long/'\n",
- "tc = {\n",
- " \"user_id\": \"gc_identifier\",\n",
- " \"timestamp\": \"unix_ts\",\n",
- " \"x\": \"dev_x\",\n",
- " \"y\": \"dev_y\",\n",
- " \"ha\":\"ha\",\n",
- " \"date\":\"date\"}\n",
+ "tc = {\"user_id\": \"gc_identifier\", \"x\": \"dev_x\", \"y\": \"dev_y\", \"timestamp\": \"unix_ts\"}\n",
"\n",
"users = ['admiring_brattain']\n",
- "traj = loader.sample_from_file(filepath_root, format='parquet', users=users, filters = ('date','==', '2024-01-01'), traj_cols=tc)\n",
+ "traj = loader.sample_from_file(filepath_root, format='parquet', users=users, filters=('date','==', '2024-01-01'), traj_cols=tc)\n",
"\n",
"# Lachesis (sequential stop detection)\n",
"stops = LACHESIS.lachesis(traj, delta_roam=20, dt_max = 60, dur_min=5, complete_output=True, keep_col_names=True, traj_cols=tc)"
@@ -61,28 +72,32 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "570b6103",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "execution_count": 2,
+ "id": "86273598",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-24T18:32:51.005525Z",
+ "iopub.status.busy": "2025-11-24T18:32:51.000857Z",
+ "iopub.status.idle": "2025-11-24T18:32:51.355886Z",
+ "shell.execute_reply": "2025-11-24T18:32:51.355274Z"
}
- ],
+ },
+ "outputs": [],
"source": [
- "fig, ax_barcode = plt.subplots(figsize=(10,1.5))\n",
+ "fig, (ax_map, ax_barcode) = plt.subplots(2, 1, figsize=(6,6.5),\n",
+ " gridspec_kw={'height_ratios':[10,1]})\n",
+ "\n",
+ "gpd.GeoDataFrame(geometry=[outer_box], crs='EPSG:3857').plot(ax=ax_map, color='#d3d3d3')\n",
+ "city.plot(ax=ax_map, edgecolor='white', linewidth=1, color='#8c8c8c')\n",
+ "\n",
+ "plot_stops(stops, ax=ax_map, cmap='Blues')\n",
+ "plot_pings(traj, ax=ax_map, s=6, color='black', alpha=0.5, traj_cols=tc)\n",
+ "ax_map.set_axis_off()\n",
+ "\n",
+ "plot_time_barcode(traj['unix_ts'], ax=ax_barcode, set_xlim=True)\n",
+ "plot_stops_barcode(stops, ax=ax_barcode, cmap='Blues', set_xlim=False, timestamp='unix_ts')\n",
"\n",
- "plot_time_barcode(traj[tc['timestamp']], ax=ax_barcode, set_xlim=True)\n",
- "plot_stops_barcode(stops, ax=ax_barcode, stop_color='blue', set_xlim=False, timestamp='unix_ts')\n",
- "fig.suptitle(\"Lachesis stops\")\n",
- "plt.tight_layout()\n",
+ "plt.tight_layout(pad=0.1)\n",
"plt.show()"
]
}
@@ -103,7 +118,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.5"
+ "version": "3.12.3"
}
},
"nbformat": 4,
diff --git a/examples/lachesis_demo.py b/examples/lachesis_demo.py
index 6598fb9e..fc916225 100644
--- a/examples/lachesis_demo.py
+++ b/examples/lachesis_demo.py
@@ -30,29 +30,47 @@
# %%
# %matplotlib inline
+import matplotlib
+matplotlib.use('TkAgg')
+import matplotlib.pyplot as plt
+plt.ion()
# Imports
import nomad.io.base as loader
+import geopandas as gpd
from shapely.geometry import box
-import matplotlib.pyplot as plt
-from nomad.stop_detection.viz import plot_stops_barcode, plot_time_barcode
+from nomad.stop_detection.viz import plot_stops_barcode, plot_time_barcode, plot_stops, plot_pings
import nomad.stop_detection.lachesis as LACHESIS
+import nomad.data as data_folder
+from pathlib import Path
# Load data
+data_dir = Path(data_folder.__file__).parent
+city = gpd.read_parquet(data_dir / 'garden-city-buildings-mercator.parquet')
+outer_box = box(*city.total_bounds)
+
filepath_root = 'gc_data_long/'
tc = {"user_id": "gc_identifier", "x": "dev_x", "y": "dev_y", "timestamp": "unix_ts"}
users = ['admiring_brattain']
-traj = loader.sample_from_file(filepath_root, format='parquet', users=users, filters = ('date','==', '2024-01-01'), traj_cols=tc)
+traj = loader.sample_from_file(filepath_root, format='parquet', users=users, filters=('date','==', '2024-01-01'), traj_cols=tc)
# Lachesis (sequential stop detection)
stops = LACHESIS.lachesis(traj, delta_roam=20, dt_max = 60, dur_min=5, complete_output=True, keep_col_names=True, traj_cols=tc)
# %%
-fig, ax_barcode = plt.subplots(figsize=(10,1.5))
+fig, (ax_map, ax_barcode) = plt.subplots(2, 1, figsize=(6,6.5),
+ gridspec_kw={'height_ratios':[10,1]})
+
+gpd.GeoDataFrame(geometry=[outer_box], crs='EPSG:3857').plot(ax=ax_map, color='#d3d3d3')
+city.plot(ax=ax_map, edgecolor='white', linewidth=1, color='#8c8c8c')
+
+plot_stops(stops, ax=ax_map, cmap='Blues')
+plot_pings(traj, ax=ax_map, s=6, color='black', alpha=0.5, traj_cols=tc)
+ax_map.set_axis_off()
plot_time_barcode(traj['unix_ts'], ax=ax_barcode, set_xlim=True)
-plot_stops_barcode(stops, ax=ax_barcode, stop_color='blue', set_xlim=False, timestamp='unix_ts')
-fig.suptitle("Lachesis stops")
-plt.tight_layout()
+plot_stops_barcode(stops, ax=ax_barcode, cmap='Blues', set_xlim=False, timestamp='unix_ts')
+
+plt.tight_layout(pad=0.1)
plt.show()
diff --git a/examples/tadbscan_demo.ipynb b/examples/tadbscan_demo.ipynb
index 6e28d6b4..c4f5747b 100644
--- a/examples/tadbscan_demo.ipynb
+++ b/examples/tadbscan_demo.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "markdown",
- "id": "92838936",
+ "id": "7c76d1d8",
"metadata": {},
"source": [
"# TADBSCAN Stop Detection"
@@ -10,7 +10,7 @@
},
{
"cell_type": "markdown",
- "id": "cb276fd9",
+ "id": "f29a96ce",
"metadata": {},
"source": [
"The second stop detection algorithm implemented in ```nomad``` is an adaptation of DBSCAN. Unlike in plain DBSCAN, we also incorporate the time dimension to determine if two pings are \"neighbors\". This implementation relies on 3 parameters\n",
@@ -24,79 +24,81 @@
},
{
"cell_type": "code",
- "execution_count": 4,
- "id": "19184dee",
- "metadata": {},
+ "execution_count": 1,
+ "id": "1e62c25a",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-24T18:33:03.240035Z",
+ "iopub.status.busy": "2025-11-24T18:33:03.240035Z",
+ "iopub.status.idle": "2025-11-24T18:33:05.816985Z",
+ "shell.execute_reply": "2025-11-24T18:33:05.816985Z"
+ }
+ },
"outputs": [],
"source": [
"%matplotlib inline\n",
+ "import matplotlib\n",
+ "matplotlib.use('TkAgg')\n",
+ "import matplotlib.pyplot as plt\n",
+ "plt.ion()\n",
"\n",
"# Imports\n",
"import nomad.io.base as loader\n",
"import geopandas as gpd\n",
"from shapely.geometry import box\n",
- "import pandas as pd\n",
- "import numpy as np\n",
- "import matplotlib.pyplot as plt\n",
- "from nomad.stop_detection.viz import plot_stops_barcode, plot_time_barcode\n",
+ "from nomad.stop_detection.viz import plot_stops_barcode, plot_time_barcode, plot_stops, plot_pings\n",
"import nomad.stop_detection.dbscan as DBSCAN\n",
- "import nomad.filters as filters \n",
- "import nomad.stop_detection.postprocessing as post\n",
"\n",
"# Load data\n",
- "city = gpd.read_file(\"garden_city.geojson\").to_crs('EPSG:3857')\n",
- "outer_box = box(*city.total_bounds).buffer(15, join_style='mitre')\n",
+ "import nomad.data as data_folder\n",
+ "from pathlib import Path\n",
+ "data_dir = Path(data_folder.__file__).parent\n",
+ "city = gpd.read_parquet(data_dir / 'garden-city-buildings-mercator.parquet')\n",
+ "outer_box = box(*city.total_bounds)\n",
"\n",
"filepath_root = 'gc_data_long/'\n",
- "tc = {\n",
- " \"user_id\": \"gc_identifier\",\n",
- " \"timestamp\": \"unix_ts\",\n",
- " \"x\": \"dev_x\",\n",
- " \"y\": \"dev_y\",\n",
- " \"ha\":\"ha\",\n",
- " \"date\":\"date\"}\n",
+ "tc = {\"user_id\": \"gc_identifier\", \"x\": \"dev_x\", \"y\": \"dev_y\", \"timestamp\": \"unix_ts\"}\n",
"\n",
"# Density based stop detection (Temporal DBSCAN)\n",
- "users = ['confident_aryabhata']\n",
- "traj = loader.sample_from_file(filepath_root, format='parquet', users=users, filters = ('date','<=', '2024-01-03'), traj_cols=tc)\n",
- "traj[['longitude','latitude']] = np.column_stack(\n",
- " filters.to_projection(traj, x='dev_x', y='dev_y', data_crs='EPSG:3857', crs_to='EPSG:4326')\n",
- ")\n",
+ "users = ['admiring_brattain']\n",
+ "traj = loader.sample_from_file(filepath_root, format='parquet', users=users, filters=('date','==', '2024-01-01'), traj_cols=tc)\n",
"\n",
- "user_data_tadb = traj.assign(cluster=DBSCAN.ta_dbscan_labels(traj, time_thresh=240, dist_thresh=15, min_pts=3, traj_cols=tc))\n",
"stops_tadb = DBSCAN.ta_dbscan(traj,\n",
" time_thresh=720,\n",
" dist_thresh=15,\n",
" min_pts=3,\n",
" complete_output=True,\n",
- " traj_cols=tc)\n",
- "stops_tadb[\"cluster\"] = post.remove_overlaps(user_data_tadb, time_thresh=240, method='cluster', traj_cols=tc, min_pts=3, dur_min=5, min_cluster_size=3) "
+ " traj_cols=tc) "
]
},
{
"cell_type": "code",
- "execution_count": 5,
- "id": "2159107b",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
+ "execution_count": 2,
+ "id": "df942a2c",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2025-11-24T18:33:05.816985Z",
+ "iopub.status.busy": "2025-11-24T18:33:05.816985Z",
+ "iopub.status.idle": "2025-11-24T18:33:06.001663Z",
+ "shell.execute_reply": "2025-11-24T18:33:06.001511Z"
}
- ],
+ },
+ "outputs": [],
"source": [
- "fig, ax_barcode = plt.subplots(figsize=(10,1.5))\n",
+ "fig, (ax_map, ax_barcode) = plt.subplots(2, 1, figsize=(6,6.5),\n",
+ " gridspec_kw={'height_ratios':[10,1]})\n",
+ "\n",
+ "gpd.GeoDataFrame(geometry=[outer_box], crs='EPSG:3857').plot(ax=ax_map, color='#d3d3d3')\n",
+ "city.plot(ax=ax_map, edgecolor='white', linewidth=1, color='#8c8c8c')\n",
+ "\n",
+ "plot_stops(stops_tadb, ax=ax_map, cmap='Reds')\n",
+ "plot_pings(traj, ax=ax_map, s=6, color='black', alpha=0.5, traj_cols=tc)\n",
+ "ax_map.set_axis_off()\n",
"\n",
"plot_time_barcode(traj['unix_ts'], ax=ax_barcode, set_xlim=True)\n",
- "plot_stops_barcode(stops_tadb, ax=ax_barcode, stop_color='red', set_xlim=False, timestamp='unix_ts')\n",
- "plt.title(\"TA-DBSCAN stops with post-processing\")\n",
- "plt.tight_layout()\n",
+ "plot_stops_barcode(stops_tadb, ax=ax_barcode, cmap='Reds', set_xlim=False, timestamp='unix_ts')\n",
+ "\n",
+ "plt.tight_layout(pad=0.1)\n",
"plt.show()"
]
}
@@ -117,7 +119,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.5"
+ "version": "3.12.3"
}
},
"nbformat": 4,
diff --git a/examples/tadbscan_demo.py b/examples/tadbscan_demo.py
index 9e63e671..7b6eb0d3 100644
--- a/examples/tadbscan_demo.py
+++ b/examples/tadbscan_demo.py
@@ -26,46 +26,52 @@
# %%
# %matplotlib inline
+import matplotlib
+matplotlib.use('TkAgg')
+import matplotlib.pyplot as plt
+plt.ion()
# Imports
import nomad.io.base as loader
import geopandas as gpd
from shapely.geometry import box
-import pandas as pd
-import numpy as np
-import matplotlib.pyplot as plt
-from nomad.stop_detection.viz import plot_stops_barcode, plot_time_barcode
+from nomad.stop_detection.viz import plot_stops_barcode, plot_time_barcode, plot_stops, plot_pings
import nomad.stop_detection.dbscan as DBSCAN
-import nomad.filters as filters
-import nomad.stop_detection.postprocessing as post
# Load data
-from nomad.city_gen import City
-city_obj = City.from_geopackage("garden-city.gpkg")
-# Create a simple bounds box for visualization
-outer_box = box(0, 0, city_obj.dimensions[0], city_obj.dimensions[1])
+import nomad.data as data_folder
+from pathlib import Path
+data_dir = Path(data_folder.__file__).parent
+city = gpd.read_parquet(data_dir / 'garden-city-buildings-mercator.parquet')
+outer_box = box(*city.total_bounds)
filepath_root = 'gc_data_long/'
tc = {"user_id": "gc_identifier", "x": "dev_x", "y": "dev_y", "timestamp": "unix_ts"}
# Density based stop detection (Temporal DBSCAN)
-users = ['confident_aryabhata']
-traj = loader.sample_from_file(filepath_root, format='parquet', users=users, filters = ('date','<=', '2024-01-03'), traj_cols=tc)
+users = ['admiring_brattain']
+traj = loader.sample_from_file(filepath_root, format='parquet', users=users, filters=('date','==', '2024-01-01'), traj_cols=tc)
-user_data_tadb = traj.assign(cluster=DBSCAN.ta_dbscan_labels(traj, time_thresh=240, dist_thresh=15, min_pts=3, traj_cols=tc))
stops_tadb = DBSCAN.ta_dbscan(traj,
time_thresh=720,
dist_thresh=15,
min_pts=3,
complete_output=True,
- traj_cols=tc)
-stops_tadb["cluster"] = post.remove_overlaps(user_data_tadb, time_thresh=240, method='cluster', traj_cols=tc, min_pts=3, dur_min=5, min_cluster_size=3)
+ traj_cols=tc)
# %%
-fig, ax_barcode = plt.subplots(figsize=(10,1.5))
+fig, (ax_map, ax_barcode) = plt.subplots(2, 1, figsize=(6,6.5),
+ gridspec_kw={'height_ratios':[10,1]})
+
+gpd.GeoDataFrame(geometry=[outer_box], crs='EPSG:3857').plot(ax=ax_map, color='#d3d3d3')
+city.plot(ax=ax_map, edgecolor='white', linewidth=1, color='#8c8c8c')
+
+plot_stops(stops_tadb, ax=ax_map, cmap='Reds')
+plot_pings(traj, ax=ax_map, s=6, color='black', alpha=0.5, traj_cols=tc)
+ax_map.set_axis_off()
plot_time_barcode(traj['unix_ts'], ax=ax_barcode, set_xlim=True)
-plot_stops_barcode(stops_tadb, ax=ax_barcode, stop_color='red', set_xlim=False, timestamp='unix_ts')
-plt.title("TA-DBSCAN stops with post-processing")
-plt.tight_layout()
+plot_stops_barcode(stops_tadb, ax=ax_barcode, cmap='Reds', set_xlim=False, timestamp='unix_ts')
+
+plt.tight_layout(pad=0.1)
plt.show()
diff --git a/nomad/data/garden-city-buildings-mercator.parquet b/nomad/data/garden-city-buildings-mercator.parquet
new file mode 100644
index 00000000..aceac951
Binary files /dev/null and b/nomad/data/garden-city-buildings-mercator.parquet differ
diff --git a/nomad/data/garden-city-buildings.geojson b/nomad/data/garden-city-buildings.geojson
new file mode 100644
index 00000000..41979fd3
--- /dev/null
+++ b/nomad/data/garden-city-buildings.geojson
@@ -0,0 +1,113 @@
+{
+"type": "FeatureCollection",
+"name": "garden-city-buildings",
+"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
+"features": [
+{ "type": "Feature", "properties": { "index": "p-x12-y11", "id": "p-x12-y11", "building_type": "park", "door_cell_x": 13, "door_cell_y": 11, "size": 16, "door_point_x": 13.0, "door_point_y": 11.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317674376729549, 36.669298771960719 ], [ -38.317674376729549, 36.669731091674187 ], [ -38.318213365900021, 36.669731091674187 ], [ -38.318213365900021, 36.669298771960719 ], [ -38.317674376729549, 36.669298771960719 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "h-x7-y8", "id": "h-x7-y8", "building_type": "home", "door_cell_x": 8, "door_cell_y": 8, "size": 2, "door_point_x": 8.0, "door_point_y": 8.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318482860485254, 36.669082611193211 ], [ -38.318348113192641, 36.669082611193211 ], [ -38.318348113192641, 36.669298771960719 ], [ -38.318482860485254, 36.669298771960719 ], [ -38.318482860485254, 36.669082611193211 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "h-x9-y7", "id": "h-x9-y7", "building_type": "home", "door_cell_x": 9, "door_cell_y": 8, "size": 2, "door_point_x": 9.5, "door_point_y": 8.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318348113192641, 36.669082611193211 ], [ -38.318078618607409, 36.669082611193211 ], [ -38.318078618607409, 36.669190691652865 ], [ -38.318348113192641, 36.669190691652865 ], [ -38.318348113192641, 36.669082611193211 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "h-x10-y7", "id": "h-x10-y7", "building_type": "home", "door_cell_x": 10, "door_cell_y": 8, "size": 1, "door_point_x": 10.5, "door_point_y": 8.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317943871314782, 36.669082611193211 ], [ -38.317943871314782, 36.669190691652865 ], [ -38.318078618607409, 36.669190691652865 ], [ -38.318078618607409, 36.669082611193211 ], [ -38.317943871314782, 36.669082611193211 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "h-x11-y7", "id": "h-x11-y7", "building_type": "home", "door_cell_x": 11, "door_cell_y": 8, "size": 1, "door_point_x": 11.5, "door_point_y": 8.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317809124022169, 36.669082611193211 ], [ -38.317809124022169, 36.669190691652865 ], [ -38.317943871314782, 36.669190691652865 ], [ -38.317943871314782, 36.669082611193211 ], [ -38.317809124022169, 36.669082611193211 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "h-x13-y7", "id": "h-x13-y7", "building_type": "home", "door_cell_x": 13, "door_cell_y": 6, "size": 1, "door_point_x": 13.5, "door_point_y": 7.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317539629436936, 36.669082611193211 ], [ -38.317539629436936, 36.669190691652865 ], [ -38.317674376729549, 36.669190691652865 ], [ -38.317674376729549, 36.669082611193211 ], [ -38.317539629436936, 36.669082611193211 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "h-x14-y7", "id": "h-x14-y7", "building_type": "home", "door_cell_x": 14, "door_cell_y": 6, "size": 1, "door_point_x": 14.5, "door_point_y": 7.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317404882144309, 36.669082611193211 ], [ -38.317404882144309, 36.669190691652865 ], [ -38.317539629436936, 36.669190691652865 ], [ -38.317539629436936, 36.669082611193211 ], [ -38.317404882144309, 36.669082611193211 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "h-x14-y8", "id": "h-x14-y8", "building_type": "home", "door_cell_x": 13, "door_cell_y": 8, "size": 1, "door_point_x": 14.0, "door_point_y": 8.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317404882144309, 36.669190691652865 ], [ -38.317404882144309, 36.669298771960719 ], [ -38.317539629436936, 36.669298771960719 ], [ -38.317539629436936, 36.669190691652865 ], [ -38.317404882144309, 36.669190691652865 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "h-x14-y9", "id": "h-x14-y9", "building_type": "home", "door_cell_x": 13, "door_cell_y": 9, "size": 1, "door_point_x": 14.0, "door_point_y": 9.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317404882144309, 36.669298771960719 ], [ -38.317404882144309, 36.669406852116779 ], [ -38.317539629436936, 36.669406852116779 ], [ -38.317539629436936, 36.669298771960719 ], [ -38.317404882144309, 36.669298771960719 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "h-x14-y11", "id": "h-x14-y11", "building_type": "home", "door_cell_x": 13, "door_cell_y": 11, "size": 1, "door_point_x": 14.0, "door_point_y": 11.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317404882144309, 36.669514932121047 ], [ -38.317404882144309, 36.669623011973513 ], [ -38.317539629436936, 36.669623011973513 ], [ -38.317539629436936, 36.669514932121047 ], [ -38.317404882144309, 36.669514932121047 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "h-x14-y12", "id": "h-x14-y12", "building_type": "home", "door_cell_x": 13, "door_cell_y": 12, "size": 1, "door_point_x": 14.0, "door_point_y": 12.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317404882144309, 36.669623011973513 ], [ -38.317404882144309, 36.669731091674187 ], [ -38.317539629436936, 36.669731091674187 ], [ -38.317539629436936, 36.669623011973513 ], [ -38.317404882144309, 36.669623011973513 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "h-x14-y13", "id": "h-x14-y13", "building_type": "home", "door_cell_x": 15, "door_cell_y": 13, "size": 1, "door_point_x": 15.0, "door_point_y": 13.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317404882144309, 36.669731091674187 ], [ -38.317404882144309, 36.669839171223067 ], [ -38.317539629436936, 36.669839171223067 ], [ -38.317539629436936, 36.669731091674187 ], [ -38.317404882144309, 36.669731091674187 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "h-x13-y14", "id": "h-x13-y14", "building_type": "home", "door_cell_x": 13, "door_cell_y": 13, "size": 2, "door_point_x": 13.5, "door_point_y": 14.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317674376729549, 36.669839171223067 ], [ -38.317404882144309, 36.669839171223067 ], [ -38.317404882144309, 36.66994725062014 ], [ -38.317674376729549, 36.66994725062014 ], [ -38.317674376729549, 36.669839171223067 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "h-x12-y14", "id": "h-x12-y14", "building_type": "home", "door_cell_x": 12, "door_cell_y": 13, "size": 1, "door_point_x": 12.5, "door_point_y": 14.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317674376729549, 36.669839171223067 ], [ -38.317674376729549, 36.66994725062014 ], [ -38.317809124022169, 36.66994725062014 ], [ -38.317809124022169, 36.669839171223067 ], [ -38.317674376729549, 36.669839171223067 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "h-x11-y14", "id": "h-x11-y14", "building_type": "home", "door_cell_x": 11, "door_cell_y": 13, "size": 1, "door_point_x": 11.5, "door_point_y": 14.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317809124022169, 36.669839171223067 ], [ -38.317809124022169, 36.66994725062014 ], [ -38.317943871314782, 36.66994725062014 ], [ -38.317943871314782, 36.669839171223067 ], [ -38.317809124022169, 36.669839171223067 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "h-x9-y14", "id": "h-x9-y14", "building_type": "home", "door_cell_x": 9, "door_cell_y": 13, "size": 1, "door_point_x": 9.5, "door_point_y": 14.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318078618607409, 36.669839171223067 ], [ -38.318078618607409, 36.66994725062014 ], [ -38.318213365900021, 36.66994725062014 ], [ -38.318213365900021, 36.669839171223067 ], [ -38.318078618607409, 36.669839171223067 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "h-x8-y14", "id": "h-x8-y14", "building_type": "home", "door_cell_x": 8, "door_cell_y": 13, "size": 1, "door_point_x": 8.5, "door_point_y": 14.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318213365900021, 36.669839171223067 ], [ -38.318213365900021, 36.66994725062014 ], [ -38.318348113192641, 36.66994725062014 ], [ -38.318348113192641, 36.669839171223067 ], [ -38.318213365900021, 36.669839171223067 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "h-x7-y14", "id": "h-x7-y14", "building_type": "home", "door_cell_x": 7, "door_cell_y": 15, "size": 1, "door_point_x": 7.5, "door_point_y": 15.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318348113192641, 36.669839171223067 ], [ -38.318348113192641, 36.66994725062014 ], [ -38.318482860485254, 36.66994725062014 ], [ -38.318482860485254, 36.669839171223067 ], [ -38.318348113192641, 36.669839171223067 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "h-x7-y13", "id": "h-x7-y13", "building_type": "home", "door_cell_x": 6, "door_cell_y": 13, "size": 1, "door_point_x": 7.0, "door_point_y": 13.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318348113192641, 36.669731091674187 ], [ -38.318348113192641, 36.669839171223067 ], [ -38.318482860485254, 36.669839171223067 ], [ -38.318482860485254, 36.669731091674187 ], [ -38.318348113192641, 36.669731091674187 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "h-x7-y12", "id": "h-x7-y12", "building_type": "home", "door_cell_x": 8, "door_cell_y": 12, "size": 1, "door_point_x": 8.0, "door_point_y": 12.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318348113192641, 36.669623011973513 ], [ -38.318348113192641, 36.669731091674187 ], [ -38.318482860485254, 36.669731091674187 ], [ -38.318482860485254, 36.669623011973513 ], [ -38.318348113192641, 36.669623011973513 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "h-x7-y10", "id": "h-x7-y10", "building_type": "home", "door_cell_x": 8, "door_cell_y": 10, "size": 2, "door_point_x": 8.0, "door_point_y": 10.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318482860485254, 36.669298771960719 ], [ -38.318348113192641, 36.669298771960719 ], [ -38.318348113192641, 36.669514932121047 ], [ -38.318482860485254, 36.669514932121047 ], [ -38.318482860485254, 36.669298771960719 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x4-y4", "id": "w-x4-y4", "building_type": "workplace", "door_cell_x": 3, "door_cell_y": 4, "size": 2, "door_point_x": 4.0, "door_point_y": 4.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318887102363114, 36.668758368903482 ], [ -38.318752355070494, 36.668758368903482 ], [ -38.318752355070494, 36.668974530581764 ], [ -38.318887102363114, 36.668974530581764 ], [ -38.318887102363114, 36.668758368903482 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x5-y4", "id": "w-x5-y4", "building_type": "workplace", "door_cell_x": 5, "door_cell_y": 3, "size": 2, "door_point_x": 5.5, "door_point_y": 4.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318752355070494, 36.668758368903482 ], [ -38.318617607777881, 36.668758368903482 ], [ -38.318617607777881, 36.668974530581764 ], [ -38.318752355070494, 36.668974530581764 ], [ -38.318752355070494, 36.668758368903482 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x6-y5", "id": "w-x6-y5", "building_type": "workplace", "door_cell_x": 6, "door_cell_y": 6, "size": 4, "door_point_x": 6.5, "door_point_y": 6.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318348113192641, 36.668758368903482 ], [ -38.318348113192641, 36.668974530581764 ], [ -38.318617607777881, 36.668974530581764 ], [ -38.318617607777881, 36.668758368903482 ], [ -38.318348113192641, 36.668758368903482 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x8-y5", "id": "w-x8-y5", "building_type": "workplace", "door_cell_x": 8, "door_cell_y": 6, "size": 4, "door_point_x": 8.5, "door_point_y": 6.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318078618607409, 36.668758368903482 ], [ -38.318078618607409, 36.668974530581764 ], [ -38.318348113192641, 36.668974530581764 ], [ -38.318348113192641, 36.668758368903482 ], [ -38.318078618607409, 36.668758368903482 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x12-y5", "id": "w-x12-y5", "building_type": "workplace", "door_cell_x": 12, "door_cell_y": 6, "size": 3, "door_point_x": 12.5, "door_point_y": 6.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317539629436936, 36.668866449818516 ], [ -38.317539629436936, 36.668974530581764 ], [ -38.317943871314782, 36.668974530581764 ], [ -38.317943871314782, 36.668866449818516 ], [ -38.317539629436936, 36.668866449818516 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x12-y4", "id": "w-x12-y4", "building_type": "workplace", "door_cell_x": 12, "door_cell_y": 3, "size": 3, "door_point_x": 12.5, "door_point_y": 4.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317539629436936, 36.668758368903482 ], [ -38.317539629436936, 36.668866449818516 ], [ -38.317943871314782, 36.668866449818516 ], [ -38.317943871314782, 36.668758368903482 ], [ -38.317539629436936, 36.668758368903482 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x15-y4", "id": "w-x15-y4", "building_type": "workplace", "door_cell_x": 15, "door_cell_y": 3, "size": 6, "door_point_x": 15.5, "door_point_y": 4.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317135387559077, 36.668758368903482 ], [ -38.317135387559077, 36.668974530581764 ], [ -38.317539629436936, 36.668974530581764 ], [ -38.317539629436936, 36.668758368903482 ], [ -38.317135387559077, 36.668758368903482 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x17-y4", "id": "w-x17-y4", "building_type": "workplace", "door_cell_x": 18, "door_cell_y": 4, "size": 2, "door_point_x": 18.0, "door_point_y": 4.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317000640266464, 36.668758368903482 ], [ -38.317000640266464, 36.668974530581764 ], [ -38.317135387559077, 36.668974530581764 ], [ -38.317135387559077, 36.668758368903482 ], [ -38.317000640266464, 36.668758368903482 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x17-y6", "id": "w-x17-y6", "building_type": "workplace", "door_cell_x": 18, "door_cell_y": 6, "size": 4, "door_point_x": 18.0, "door_point_y": 6.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317000640266464, 36.668974530581764 ], [ -38.317000640266464, 36.669190691652865 ], [ -38.317270134851697, 36.669190691652865 ], [ -38.317270134851697, 36.668974530581764 ], [ -38.317000640266464, 36.668974530581764 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x16-y9", "id": "w-x16-y9", "building_type": "workplace", "door_cell_x": 15, "door_cell_y": 9, "size": 2, "door_point_x": 16.0, "door_point_y": 9.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317135387559077, 36.669190691652865 ], [ -38.317135387559077, 36.669406852116779 ], [ -38.317270134851697, 36.669406852116779 ], [ -38.317270134851697, 36.669190691652865 ], [ -38.317135387559077, 36.669190691652865 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x17-y8", "id": "w-x17-y8", "building_type": "workplace", "door_cell_x": 18, "door_cell_y": 8, "size": 2, "door_point_x": 18.0, "door_point_y": 8.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317000640266464, 36.669190691652865 ], [ -38.317000640266464, 36.669406852116779 ], [ -38.317135387559077, 36.669406852116779 ], [ -38.317135387559077, 36.669190691652865 ], [ -38.317000640266464, 36.669190691652865 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x17-y10", "id": "w-x17-y10", "building_type": "workplace", "door_cell_x": 18, "door_cell_y": 10, "size": 4, "door_point_x": 18.0, "door_point_y": 10.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317000640266464, 36.669406852116779 ], [ -38.317000640266464, 36.669623011973513 ], [ -38.317270134851697, 36.669623011973513 ], [ -38.317270134851697, 36.669406852116779 ], [ -38.317000640266464, 36.669406852116779 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x17-y13", "id": "w-x17-y13", "building_type": "workplace", "door_cell_x": 18, "door_cell_y": 13, "size": 4, "door_point_x": 18.0, "door_point_y": 13.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317000640266464, 36.669731091674187 ], [ -38.317000640266464, 36.66994725062014 ], [ -38.317270134851697, 36.66994725062014 ], [ -38.317270134851697, 36.669731091674187 ], [ -38.317000640266464, 36.669731091674187 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x17-y15", "id": "w-x17-y15", "building_type": "workplace", "door_cell_x": 18, "door_cell_y": 15, "size": 2, "door_point_x": 18.0, "door_point_y": 15.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317000640266464, 36.66994725062014 ], [ -38.317000640266464, 36.670055329865427 ], [ -38.317270134851697, 36.670055329865427 ], [ -38.317270134851697, 36.66994725062014 ], [ -38.317000640266464, 36.66994725062014 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x15-y16", "id": "w-x15-y16", "building_type": "workplace", "door_cell_x": 15, "door_cell_y": 15, "size": 3, "door_point_x": 15.5, "door_point_y": 16.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317000640266464, 36.670055329865427 ], [ -38.317000640266464, 36.670163408958906 ], [ -38.317404882144309, 36.670163408958906 ], [ -38.317404882144309, 36.670055329865427 ], [ -38.317000640266464, 36.670055329865427 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x14-y16", "id": "w-x14-y16", "building_type": "workplace", "door_cell_x": 14, "door_cell_y": 15, "size": 1, "door_point_x": 14.5, "door_point_y": 16.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317404882144309, 36.670055329865427 ], [ -38.317404882144309, 36.670163408958906 ], [ -38.317539629436936, 36.670163408958906 ], [ -38.317539629436936, 36.670055329865427 ], [ -38.317404882144309, 36.670055329865427 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x16-y17", "id": "w-x16-y17", "building_type": "workplace", "door_cell_x": 16, "door_cell_y": 18, "size": 2, "door_point_x": 16.5, "door_point_y": 18.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317000640266464, 36.670163408958906 ], [ -38.317000640266464, 36.670271487900592 ], [ -38.317270134851697, 36.670271487900592 ], [ -38.317270134851697, 36.670163408958906 ], [ -38.317000640266464, 36.670163408958906 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x15-y17", "id": "w-x15-y17", "building_type": "workplace", "door_cell_x": 15, "door_cell_y": 18, "size": 2, "door_point_x": 15.5, "door_point_y": 18.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317270134851697, 36.670163408958906 ], [ -38.317270134851697, 36.670271487900592 ], [ -38.317539629436936, 36.670271487900592 ], [ -38.317539629436936, 36.670163408958906 ], [ -38.317270134851697, 36.670163408958906 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x13-y17", "id": "w-x13-y17", "building_type": "workplace", "door_cell_x": 13, "door_cell_y": 18, "size": 4, "door_point_x": 13.5, "door_point_y": 18.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317539629436936, 36.670055329865427 ], [ -38.317539629436936, 36.670271487900592 ], [ -38.317809124022169, 36.670271487900592 ], [ -38.317809124022169, 36.670055329865427 ], [ -38.317539629436936, 36.670055329865427 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x11-y17", "id": "w-x11-y17", "building_type": "workplace", "door_cell_x": 11, "door_cell_y": 18, "size": 2, "door_point_x": 11.5, "door_point_y": 18.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317809124022169, 36.670163408958906 ], [ -38.317809124022169, 36.670271487900592 ], [ -38.318078618607409, 36.670271487900592 ], [ -38.318078618607409, 36.670163408958906 ], [ -38.317809124022169, 36.670163408958906 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x11-y16", "id": "w-x11-y16", "building_type": "workplace", "door_cell_x": 11, "door_cell_y": 15, "size": 2, "door_point_x": 11.5, "door_point_y": 16.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317809124022169, 36.670055329865427 ], [ -38.317809124022169, 36.670163408958906 ], [ -38.318078618607409, 36.670163408958906 ], [ -38.318078618607409, 36.670055329865427 ], [ -38.317809124022169, 36.670055329865427 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x8-y17", "id": "w-x8-y17", "building_type": "workplace", "door_cell_x": 8, "door_cell_y": 18, "size": 4, "door_point_x": 8.5, "door_point_y": 18.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318213365900021, 36.670055329865427 ], [ -38.318213365900021, 36.670271487900592 ], [ -38.318482860485254, 36.670271487900592 ], [ -38.318482860485254, 36.670055329865427 ], [ -38.318213365900021, 36.670055329865427 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x6-y17", "id": "w-x6-y17", "building_type": "workplace", "door_cell_x": 6, "door_cell_y": 18, "size": 2, "door_point_x": 6.5, "door_point_y": 18.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318482860485254, 36.670163408958906 ], [ -38.318482860485254, 36.670271487900592 ], [ -38.318752355070494, 36.670271487900592 ], [ -38.318752355070494, 36.670163408958906 ], [ -38.318482860485254, 36.670163408958906 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x6-y16", "id": "w-x6-y16", "building_type": "workplace", "door_cell_x": 6, "door_cell_y": 15, "size": 2, "door_point_x": 6.5, "door_point_y": 16.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318482860485254, 36.670055329865427 ], [ -38.318482860485254, 36.670163408958906 ], [ -38.318752355070494, 36.670163408958906 ], [ -38.318752355070494, 36.670055329865427 ], [ -38.318482860485254, 36.670055329865427 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x4-y16", "id": "w-x4-y16", "building_type": "workplace", "door_cell_x": 3, "door_cell_y": 16, "size": 2, "door_point_x": 4.0, "door_point_y": 16.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318887102363114, 36.670055329865427 ], [ -38.318752355070494, 36.670055329865427 ], [ -38.318752355070494, 36.670271487900592 ], [ -38.318887102363114, 36.670271487900592 ], [ -38.318887102363114, 36.670055329865427 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x4-y13", "id": "w-x4-y13", "building_type": "workplace", "door_cell_x": 3, "door_cell_y": 13, "size": 6, "door_point_x": 4.0, "door_point_y": 13.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318617607777881, 36.669731091674187 ], [ -38.318617607777881, 36.670055329865427 ], [ -38.318887102363114, 36.670055329865427 ], [ -38.318887102363114, 36.669731091674187 ], [ -38.318617607777881, 36.669731091674187 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x5-y12", "id": "w-x5-y12", "building_type": "workplace", "door_cell_x": 6, "door_cell_y": 12, "size": 2, "door_point_x": 6.0, "door_point_y": 12.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318617607777881, 36.669623011973513 ], [ -38.318617607777881, 36.669731091674187 ], [ -38.318887102363114, 36.669731091674187 ], [ -38.318887102363114, 36.669623011973513 ], [ -38.318617607777881, 36.669623011973513 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x4-y10", "id": "w-x4-y10", "building_type": "workplace", "door_cell_x": 3, "door_cell_y": 10, "size": 2, "door_point_x": 4.0, "door_point_y": 10.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318887102363114, 36.669298771960719 ], [ -38.318752355070494, 36.669298771960719 ], [ -38.318752355070494, 36.669514932121047 ], [ -38.318887102363114, 36.669514932121047 ], [ -38.318887102363114, 36.669298771960719 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x5-y9", "id": "w-x5-y9", "building_type": "workplace", "door_cell_x": 6, "door_cell_y": 9, "size": 2, "door_point_x": 6.0, "door_point_y": 9.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318752355070494, 36.669298771960719 ], [ -38.318617607777881, 36.669298771960719 ], [ -38.318617607777881, 36.669514932121047 ], [ -38.318752355070494, 36.669514932121047 ], [ -38.318752355070494, 36.669298771960719 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x5-y8", "id": "w-x5-y8", "building_type": "workplace", "door_cell_x": 6, "door_cell_y": 8, "size": 2, "door_point_x": 6.0, "door_point_y": 8.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318887102363114, 36.669190691652865 ], [ -38.318617607777881, 36.669190691652865 ], [ -38.318617607777881, 36.669298771960719 ], [ -38.318887102363114, 36.669298771960719 ], [ -38.318887102363114, 36.669190691652865 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "w-x4-y6", "id": "w-x4-y6", "building_type": "workplace", "door_cell_x": 3, "door_cell_y": 6, "size": 4, "door_point_x": 4.0, "door_point_y": 6.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318617607777881, 36.668974530581764 ], [ -38.318617607777881, 36.669190691652865 ], [ -38.318887102363114, 36.669190691652865 ], [ -38.318887102363114, 36.668974530581764 ], [ -38.318617607777881, 36.668974530581764 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x1-y1", "id": "r-x1-y1", "building_type": "retail", "door_cell_x": 0, "door_cell_y": 1, "size": 4, "door_point_x": 1.0, "door_point_y": 1.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.319021849655726, 36.6684341252476 ], [ -38.319021849655726, 36.668650287836641 ], [ -38.319291344240966, 36.668650287836641 ], [ -38.319291344240966, 36.6684341252476 ], [ -38.319021849655726, 36.6684341252476 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x3-y1", "id": "r-x3-y1", "building_type": "retail", "door_cell_x": 3, "door_cell_y": 0, "size": 4, "door_point_x": 3.5, "door_point_y": 1.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318752355070494, 36.6684341252476 ], [ -38.318752355070494, 36.668650287836641 ], [ -38.319021849655726, 36.668650287836641 ], [ -38.319021849655726, 36.6684341252476 ], [ -38.318752355070494, 36.6684341252476 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x5-y1", "id": "r-x5-y1", "building_type": "retail", "door_cell_x": 5, "door_cell_y": 0, "size": 1, "door_point_x": 5.5, "door_point_y": 1.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318617607777881, 36.6684341252476 ], [ -38.318617607777881, 36.668542206618028 ], [ -38.318752355070494, 36.668542206618028 ], [ -38.318752355070494, 36.6684341252476 ], [ -38.318617607777881, 36.6684341252476 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x5-y2", "id": "r-x5-y2", "building_type": "retail", "door_cell_x": 5, "door_cell_y": 3, "size": 1, "door_point_x": 5.5, "door_point_y": 3.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318617607777881, 36.668542206618028 ], [ -38.318617607777881, 36.668650287836641 ], [ -38.318752355070494, 36.668650287836641 ], [ -38.318752355070494, 36.668542206618028 ], [ -38.318617607777881, 36.668542206618028 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x6-y1", "id": "r-x6-y1", "building_type": "retail", "door_cell_x": 6, "door_cell_y": 0, "size": 2, "door_point_x": 6.5, "door_point_y": 1.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318348113192641, 36.6684341252476 ], [ -38.318348113192641, 36.668542206618028 ], [ -38.318617607777881, 36.668542206618028 ], [ -38.318617607777881, 36.6684341252476 ], [ -38.318348113192641, 36.6684341252476 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x6-y2", "id": "r-x6-y2", "building_type": "retail", "door_cell_x": 6, "door_cell_y": 3, "size": 2, "door_point_x": 6.5, "door_point_y": 3.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318348113192641, 36.668542206618028 ], [ -38.318348113192641, 36.668650287836641 ], [ -38.318617607777881, 36.668650287836641 ], [ -38.318617607777881, 36.668542206618028 ], [ -38.318348113192641, 36.668542206618028 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x9-y2", "id": "r-x9-y2", "building_type": "retail", "door_cell_x": 9, "door_cell_y": 3, "size": 2, "door_point_x": 9.5, "door_point_y": 3.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318078618607409, 36.6684341252476 ], [ -38.318078618607409, 36.668650287836641 ], [ -38.318213365900021, 36.668650287836641 ], [ -38.318213365900021, 36.6684341252476 ], [ -38.318078618607409, 36.6684341252476 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x12-y2", "id": "r-x12-y2", "building_type": "retail", "door_cell_x": 12, "door_cell_y": 3, "size": 6, "door_point_x": 12.5, "door_point_y": 3.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317674376729549, 36.6684341252476 ], [ -38.317674376729549, 36.668650287836641 ], [ -38.318078618607409, 36.668650287836641 ], [ -38.318078618607409, 36.6684341252476 ], [ -38.317674376729549, 36.6684341252476 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x14-y2", "id": "r-x14-y2", "building_type": "retail", "door_cell_x": 14, "door_cell_y": 3, "size": 4, "door_point_x": 14.5, "door_point_y": 3.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317404882144309, 36.6684341252476 ], [ -38.317404882144309, 36.668650287836641 ], [ -38.317674376729549, 36.668650287836641 ], [ -38.317674376729549, 36.6684341252476 ], [ -38.317404882144309, 36.6684341252476 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x15-y2", "id": "r-x15-y2", "building_type": "retail", "door_cell_x": 15, "door_cell_y": 3, "size": 1, "door_point_x": 15.5, "door_point_y": 3.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317270134851697, 36.668542206618028 ], [ -38.317270134851697, 36.668650287836641 ], [ -38.317404882144309, 36.668650287836641 ], [ -38.317404882144309, 36.668542206618028 ], [ -38.317270134851697, 36.668542206618028 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x16-y2", "id": "r-x16-y2", "building_type": "retail", "door_cell_x": 16, "door_cell_y": 3, "size": 1, "door_point_x": 16.5, "door_point_y": 3.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317135387559077, 36.668542206618028 ], [ -38.317135387559077, 36.668650287836641 ], [ -38.317270134851697, 36.668650287836641 ], [ -38.317270134851697, 36.668542206618028 ], [ -38.317135387559077, 36.668542206618028 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x15-y1", "id": "r-x15-y1", "building_type": "retail", "door_cell_x": 15, "door_cell_y": 0, "size": 1, "door_point_x": 15.5, "door_point_y": 1.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317270134851697, 36.6684341252476 ], [ -38.317270134851697, 36.668542206618028 ], [ -38.317404882144309, 36.668542206618028 ], [ -38.317404882144309, 36.6684341252476 ], [ -38.317270134851697, 36.6684341252476 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x16-y1", "id": "r-x16-y1", "building_type": "retail", "door_cell_x": 16, "door_cell_y": 0, "size": 1, "door_point_x": 16.5, "door_point_y": 1.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317135387559077, 36.6684341252476 ], [ -38.317135387559077, 36.668542206618028 ], [ -38.317270134851697, 36.668542206618028 ], [ -38.317270134851697, 36.6684341252476 ], [ -38.317135387559077, 36.6684341252476 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x17-y2", "id": "r-x17-y2", "building_type": "retail", "door_cell_x": 17, "door_cell_y": 3, "size": 2, "door_point_x": 17.5, "door_point_y": 3.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.316865892973837, 36.668542206618028 ], [ -38.316865892973837, 36.668650287836641 ], [ -38.317135387559077, 36.668650287836641 ], [ -38.317135387559077, 36.668542206618028 ], [ -38.316865892973837, 36.668542206618028 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x18-y1", "id": "r-x18-y1", "building_type": "retail", "door_cell_x": 18, "door_cell_y": 0, "size": 2, "door_point_x": 18.5, "door_point_y": 1.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.316865892973837, 36.6684341252476 ], [ -38.316865892973837, 36.668542206618028 ], [ -38.317135387559077, 36.668542206618028 ], [ -38.317135387559077, 36.6684341252476 ], [ -38.316865892973837, 36.6684341252476 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x19-y1", "id": "r-x19-y1", "building_type": "retail", "door_cell_x": 19, "door_cell_y": 0, "size": 2, "door_point_x": 19.5, "door_point_y": 1.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.316596398388604, 36.6684341252476 ], [ -38.316596398388604, 36.668542206618028 ], [ -38.316865892973837, 36.668542206618028 ], [ -38.316865892973837, 36.6684341252476 ], [ -38.316596398388604, 36.6684341252476 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x19-y3", "id": "r-x19-y3", "building_type": "retail", "door_cell_x": 18, "door_cell_y": 3, "size": 4, "door_point_x": 19.0, "door_point_y": 3.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.316596398388604, 36.668542206618028 ], [ -38.316596398388604, 36.668758368903482 ], [ -38.316865892973837, 36.668758368903482 ], [ -38.316865892973837, 36.668542206618028 ], [ -38.316596398388604, 36.668542206618028 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x19-y5", "id": "r-x19-y5", "building_type": "retail", "door_cell_x": 18, "door_cell_y": 5, "size": 4, "door_point_x": 19.0, "door_point_y": 5.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.316596398388604, 36.668758368903482 ], [ -38.316596398388604, 36.668974530581764 ], [ -38.316865892973837, 36.668974530581764 ], [ -38.316865892973837, 36.668758368903482 ], [ -38.316596398388604, 36.668758368903482 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x19-y7", "id": "r-x19-y7", "building_type": "retail", "door_cell_x": 18, "door_cell_y": 7, "size": 2, "door_point_x": 19.0, "door_point_y": 7.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.316731145681224, 36.668974530581764 ], [ -38.316731145681224, 36.669190691652865 ], [ -38.316865892973837, 36.669190691652865 ], [ -38.316865892973837, 36.668974530581764 ], [ -38.316731145681224, 36.668974530581764 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x20-y7", "id": "r-x20-y7", "building_type": "retail", "door_cell_x": 21, "door_cell_y": 7, "size": 2, "door_point_x": 21.0, "door_point_y": 7.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.316596398388604, 36.668974530581764 ], [ -38.316596398388604, 36.669190691652865 ], [ -38.316731145681224, 36.669190691652865 ], [ -38.316731145681224, 36.668974530581764 ], [ -38.316596398388604, 36.668974530581764 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x19-y10", "id": "r-x19-y10", "building_type": "retail", "door_cell_x": 18, "door_cell_y": 10, "size": 4, "door_point_x": 19.0, "door_point_y": 10.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.316596398388604, 36.669298771960719 ], [ -38.316596398388604, 36.669514932121047 ], [ -38.316865892973837, 36.669514932121047 ], [ -38.316865892973837, 36.669298771960719 ], [ -38.316596398388604, 36.669298771960719 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x19-y11", "id": "r-x19-y11", "building_type": "retail", "door_cell_x": 18, "door_cell_y": 11, "size": 4, "door_point_x": 19.0, "door_point_y": 11.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.316596398388604, 36.669514932121047 ], [ -38.316596398388604, 36.669731091674187 ], [ -38.316865892973837, 36.669731091674187 ], [ -38.316865892973837, 36.669514932121047 ], [ -38.316596398388604, 36.669514932121047 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x19-y13", "id": "r-x19-y13", "building_type": "retail", "door_cell_x": 18, "door_cell_y": 13, "size": 2, "door_point_x": 19.0, "door_point_y": 13.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.316731145681224, 36.669731091674187 ], [ -38.316731145681224, 36.66994725062014 ], [ -38.316865892973837, 36.66994725062014 ], [ -38.316865892973837, 36.669731091674187 ], [ -38.316731145681224, 36.669731091674187 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x20-y13", "id": "r-x20-y13", "building_type": "retail", "door_cell_x": 21, "door_cell_y": 13, "size": 2, "door_point_x": 21.0, "door_point_y": 13.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.316596398388604, 36.669731091674187 ], [ -38.316596398388604, 36.66994725062014 ], [ -38.316731145681224, 36.66994725062014 ], [ -38.316731145681224, 36.669731091674187 ], [ -38.316596398388604, 36.669731091674187 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x20-y16", "id": "r-x20-y16", "building_type": "retail", "door_cell_x": 21, "door_cell_y": 16, "size": 4, "door_point_x": 21.0, "door_point_y": 16.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.316596398388604, 36.66994725062014 ], [ -38.316596398388604, 36.670163408958906 ], [ -38.316865892973837, 36.670163408958906 ], [ -38.316865892973837, 36.66994725062014 ], [ -38.316596398388604, 36.66994725062014 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x20-y18", "id": "r-x20-y18", "building_type": "retail", "door_cell_x": 21, "door_cell_y": 18, "size": 4, "door_point_x": 21.0, "door_point_y": 18.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.316596398388604, 36.670163408958906 ], [ -38.316596398388604, 36.670379566690485 ], [ -38.316865892973837, 36.670379566690485 ], [ -38.316865892973837, 36.670163408958906 ], [ -38.316596398388604, 36.670163408958906 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x20-y19", "id": "r-x20-y19", "building_type": "retail", "door_cell_x": 21, "door_cell_y": 19, "size": 2, "door_point_x": 21.0, "door_point_y": 19.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.316596398388604, 36.670379566690485 ], [ -38.316596398388604, 36.670487645328578 ], [ -38.316865892973837, 36.670487645328578 ], [ -38.316865892973837, 36.670379566690485 ], [ -38.316596398388604, 36.670379566690485 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x20-y20", "id": "r-x20-y20", "building_type": "retail", "door_cell_x": 20, "door_cell_y": 21, "size": 2, "door_point_x": 20.5, "door_point_y": 21.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.316596398388604, 36.670487645328578 ], [ -38.316596398388604, 36.67059572381487 ], [ -38.316865892973837, 36.67059572381487 ], [ -38.316865892973837, 36.670487645328578 ], [ -38.316596398388604, 36.670487645328578 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x17-y19", "id": "r-x17-y19", "building_type": "retail", "door_cell_x": 17, "door_cell_y": 18, "size": 2, "door_point_x": 17.5, "door_point_y": 19.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317000640266464, 36.670379566690485 ], [ -38.317000640266464, 36.67059572381487 ], [ -38.317135387559077, 36.67059572381487 ], [ -38.317135387559077, 36.670379566690485 ], [ -38.317000640266464, 36.670379566690485 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x16-y19", "id": "r-x16-y19", "building_type": "retail", "door_cell_x": 16, "door_cell_y": 18, "size": 2, "door_point_x": 16.5, "door_point_y": 19.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317135387559077, 36.670379566690485 ], [ -38.317135387559077, 36.67059572381487 ], [ -38.317270134851697, 36.67059572381487 ], [ -38.317270134851697, 36.670379566690485 ], [ -38.317135387559077, 36.670379566690485 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x14-y19", "id": "r-x14-y19", "building_type": "retail", "door_cell_x": 14, "door_cell_y": 18, "size": 3, "door_point_x": 14.5, "door_point_y": 19.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317270134851697, 36.670379566690485 ], [ -38.317270134851697, 36.670487645328578 ], [ -38.317674376729549, 36.670487645328578 ], [ -38.317674376729549, 36.670379566690485 ], [ -38.317270134851697, 36.670379566690485 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x15-y20", "id": "r-x15-y20", "building_type": "retail", "door_cell_x": 15, "door_cell_y": 21, "size": 2, "door_point_x": 15.5, "door_point_y": 21.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317270134851697, 36.670487645328578 ], [ -38.317270134851697, 36.67059572381487 ], [ -38.317539629436936, 36.67059572381487 ], [ -38.317539629436936, 36.670487645328578 ], [ -38.317270134851697, 36.670487645328578 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x13-y20", "id": "r-x13-y20", "building_type": "retail", "door_cell_x": 13, "door_cell_y": 21, "size": 2, "door_point_x": 13.5, "door_point_y": 21.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317539629436936, 36.670487645328578 ], [ -38.317539629436936, 36.67059572381487 ], [ -38.317809124022169, 36.67059572381487 ], [ -38.317809124022169, 36.670487645328578 ], [ -38.317539629436936, 36.670487645328578 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x12-y19", "id": "r-x12-y19", "building_type": "retail", "door_cell_x": 12, "door_cell_y": 18, "size": 1, "door_point_x": 12.5, "door_point_y": 19.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317674376729549, 36.670379566690485 ], [ -38.317674376729549, 36.670487645328578 ], [ -38.317809124022169, 36.670487645328578 ], [ -38.317809124022169, 36.670379566690485 ], [ -38.317674376729549, 36.670379566690485 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x11-y19", "id": "r-x11-y19", "building_type": "retail", "door_cell_x": 11, "door_cell_y": 18, "size": 4, "door_point_x": 11.5, "door_point_y": 19.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.317809124022169, 36.670379566690485 ], [ -38.317809124022169, 36.67059572381487 ], [ -38.318078618607409, 36.67059572381487 ], [ -38.318078618607409, 36.670379566690485 ], [ -38.317809124022169, 36.670379566690485 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x9-y19", "id": "r-x9-y19", "building_type": "retail", "door_cell_x": 9, "door_cell_y": 18, "size": 2, "door_point_x": 9.5, "door_point_y": 19.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318078618607409, 36.670379566690485 ], [ -38.318078618607409, 36.670487645328578 ], [ -38.318348113192641, 36.670487645328578 ], [ -38.318348113192641, 36.670379566690485 ], [ -38.318078618607409, 36.670379566690485 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x9-y20", "id": "r-x9-y20", "building_type": "retail", "door_cell_x": 9, "door_cell_y": 21, "size": 2, "door_point_x": 9.5, "door_point_y": 21.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318078618607409, 36.670487645328578 ], [ -38.318078618607409, 36.67059572381487 ], [ -38.318348113192641, 36.67059572381487 ], [ -38.318348113192641, 36.670487645328578 ], [ -38.318078618607409, 36.670487645328578 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x6-y20", "id": "r-x6-y20", "building_type": "retail", "door_cell_x": 6, "door_cell_y": 21, "size": 4, "door_point_x": 6.5, "door_point_y": 21.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318482860485254, 36.670379566690485 ], [ -38.318482860485254, 36.67059572381487 ], [ -38.318752355070494, 36.67059572381487 ], [ -38.318752355070494, 36.670379566690485 ], [ -38.318482860485254, 36.670379566690485 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x4-y20", "id": "r-x4-y20", "building_type": "retail", "door_cell_x": 4, "door_cell_y": 21, "size": 2, "door_point_x": 4.5, "door_point_y": 21.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318752355070494, 36.670487645328578 ], [ -38.318752355070494, 36.67059572381487 ], [ -38.319021849655726, 36.67059572381487 ], [ -38.319021849655726, 36.670487645328578 ], [ -38.318752355070494, 36.670487645328578 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x4-y19", "id": "r-x4-y19", "building_type": "retail", "door_cell_x": 4, "door_cell_y": 18, "size": 2, "door_point_x": 4.5, "door_point_y": 19.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.318752355070494, 36.670379566690485 ], [ -38.318752355070494, 36.670487645328578 ], [ -38.319021849655726, 36.670487645328578 ], [ -38.319021849655726, 36.670379566690485 ], [ -38.318752355070494, 36.670379566690485 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x2-y19", "id": "r-x2-y19", "building_type": "retail", "door_cell_x": 2, "door_cell_y": 18, "size": 2, "door_point_x": 2.5, "door_point_y": 19.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.319021849655726, 36.670379566690485 ], [ -38.319021849655726, 36.67059572381487 ], [ -38.319156596948346, 36.67059572381487 ], [ -38.319156596948346, 36.670379566690485 ], [ -38.319021849655726, 36.670379566690485 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x1-y19", "id": "r-x1-y19", "building_type": "retail", "door_cell_x": 1, "door_cell_y": 18, "size": 2, "door_point_x": 1.5, "door_point_y": 19.0 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.319156596948346, 36.670379566690485 ], [ -38.319156596948346, 36.67059572381487 ], [ -38.319291344240966, 36.67059572381487 ], [ -38.319291344240966, 36.670379566690485 ], [ -38.319156596948346, 36.670379566690485 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x2-y17", "id": "r-x2-y17", "building_type": "retail", "door_cell_x": 3, "door_cell_y": 17, "size": 4, "door_point_x": 3.0, "door_point_y": 17.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.319021849655726, 36.670055329865427 ], [ -38.319021849655726, 36.670271487900592 ], [ -38.319291344240966, 36.670271487900592 ], [ -38.319291344240966, 36.670055329865427 ], [ -38.319021849655726, 36.670055329865427 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x2-y15", "id": "r-x2-y15", "building_type": "retail", "door_cell_x": 3, "door_cell_y": 15, "size": 2, "door_point_x": 3.0, "door_point_y": 15.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.319021849655726, 36.66994725062014 ], [ -38.319021849655726, 36.670055329865427 ], [ -38.319291344240966, 36.670055329865427 ], [ -38.319291344240966, 36.66994725062014 ], [ -38.319021849655726, 36.66994725062014 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x2-y14", "id": "r-x2-y14", "building_type": "retail", "door_cell_x": 3, "door_cell_y": 14, "size": 2, "door_point_x": 3.0, "door_point_y": 14.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.319021849655726, 36.669839171223067 ], [ -38.319021849655726, 36.66994725062014 ], [ -38.319291344240966, 36.66994725062014 ], [ -38.319291344240966, 36.669839171223067 ], [ -38.319021849655726, 36.669839171223067 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x2-y12", "id": "r-x2-y12", "building_type": "retail", "door_cell_x": 3, "door_cell_y": 12, "size": 4, "door_point_x": 3.0, "door_point_y": 12.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.319021849655726, 36.669623011973513 ], [ -38.319021849655726, 36.669839171223067 ], [ -38.319291344240966, 36.669839171223067 ], [ -38.319291344240966, 36.669623011973513 ], [ -38.319021849655726, 36.669623011973513 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x2-y11", "id": "r-x2-y11", "building_type": "retail", "door_cell_x": 3, "door_cell_y": 11, "size": 2, "door_point_x": 3.0, "door_point_y": 11.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.319021849655726, 36.669514932121047 ], [ -38.319021849655726, 36.669623011973513 ], [ -38.319291344240966, 36.669623011973513 ], [ -38.319291344240966, 36.669514932121047 ], [ -38.319021849655726, 36.669514932121047 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x2-y10", "id": "r-x2-y10", "building_type": "retail", "door_cell_x": 3, "door_cell_y": 10, "size": 2, "door_point_x": 3.0, "door_point_y": 10.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.319021849655726, 36.669406852116779 ], [ -38.319021849655726, 36.669514932121047 ], [ -38.319291344240966, 36.669514932121047 ], [ -38.319291344240966, 36.669406852116779 ], [ -38.319021849655726, 36.669406852116779 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x2-y8", "id": "r-x2-y8", "building_type": "retail", "door_cell_x": 3, "door_cell_y": 8, "size": 4, "door_point_x": 3.0, "door_point_y": 8.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.319021849655726, 36.669190691652865 ], [ -38.319021849655726, 36.669406852116779 ], [ -38.319291344240966, 36.669406852116779 ], [ -38.319291344240966, 36.669190691652865 ], [ -38.319021849655726, 36.669190691652865 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x2-y7", "id": "r-x2-y7", "building_type": "retail", "door_cell_x": 3, "door_cell_y": 7, "size": 2, "door_point_x": 3.0, "door_point_y": 7.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.319021849655726, 36.669082611193211 ], [ -38.319021849655726, 36.669190691652865 ], [ -38.319291344240966, 36.669190691652865 ], [ -38.319291344240966, 36.669082611193211 ], [ -38.319021849655726, 36.669082611193211 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x1-y5", "id": "r-x1-y5", "building_type": "retail", "door_cell_x": 0, "door_cell_y": 5, "size": 3, "door_point_x": 1.0, "door_point_y": 5.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.319156596948346, 36.668758368903482 ], [ -38.319156596948346, 36.669082611193211 ], [ -38.319291344240966, 36.669082611193211 ], [ -38.319291344240966, 36.668758368903482 ], [ -38.319156596948346, 36.668758368903482 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x2-y6", "id": "r-x2-y6", "building_type": "retail", "door_cell_x": 3, "door_cell_y": 6, "size": 1, "door_point_x": 3.0, "door_point_y": 6.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.319021849655726, 36.668974530581764 ], [ -38.319021849655726, 36.669082611193211 ], [ -38.319156596948346, 36.669082611193211 ], [ -38.319156596948346, 36.668974530581764 ], [ -38.319021849655726, 36.668974530581764 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x2-y5", "id": "r-x2-y5", "building_type": "retail", "door_cell_x": 3, "door_cell_y": 5, "size": 1, "door_point_x": 3.0, "door_point_y": 5.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.319021849655726, 36.668866449818516 ], [ -38.319021849655726, 36.668974530581764 ], [ -38.319156596948346, 36.668974530581764 ], [ -38.319156596948346, 36.668866449818516 ], [ -38.319021849655726, 36.668866449818516 ] ] ] } },
+{ "type": "Feature", "properties": { "index": "r-x2-y4", "id": "r-x2-y4", "building_type": "retail", "door_cell_x": 3, "door_cell_y": 4, "size": 1, "door_point_x": 3.0, "door_point_y": 4.5 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ -38.319021849655726, 36.668758368903482 ], [ -38.319021849655726, 36.668866449818516 ], [ -38.319156596948346, 36.668866449818516 ], [ -38.319156596948346, 36.668758368903482 ], [ -38.319021849655726, 36.668758368903482 ] ] ] } }
+]
+}
diff --git a/nomad/home_attribution.py b/nomad/home_attribution.py
index 90534184..074502d9 100644
--- a/nomad/home_attribution.py
+++ b/nomad/home_attribution.py
@@ -1,156 +1,156 @@
-import pandas as pd
-import nomad.io.base as loader
-from datetime import datetime, time, timedelta
-
-
-def nocturnal_stops(
- stops_table,
- dusk_hour=19,
- dawn_hour=6,
- start_datetime="start_datetime",
- end_datetime="end_datetime",
-):
- """Clip each stop to the nocturnal window between *dusk_hour* and *dawn_hour*.
-
- This helper assumes the caller already provides proper datetime columns. It
- merely slices the stop to the relevant night portion and recomputes the
- duration, dropping rows that do not intersect the night at all.
- """
-
- df = stops_table.copy()
-
- # Build candidate night windows for every stop
- df["_night_start"] = df.apply(
- lambda r: [
- pd.Timestamp(datetime.combine(d, time(dusk_hour)), tz=r[start_datetime].tzinfo)
- for d in pd.date_range(
- (r[start_datetime] - timedelta(days=1)).date(),
- r[end_datetime].date(),
- freq="D",
- )
- ],
- axis=1,
- )
-
- df = df.explode("_night_start", ignore_index=True)
- df["_night_end"] = df["_night_start"] + timedelta(hours=(24 - dusk_hour + dawn_hour))
-
- # Clip the stop to the nightly interval
- df[start_datetime] = df[[start_datetime, "_night_start"]].max(axis=1)
- df[end_datetime] = df[[end_datetime, "_night_end"]].min(axis=1)
-
- df["duration"] = (
- (df[end_datetime] - df[start_datetime]).dt.total_seconds() // 60
- ).astype(int)
-
- return df[df["duration"] > 0].drop(columns=["_night_start", "_night_end"])
-
-
-def compute_candidate_homes(
- stops_table,
- dusk_hour=19,
- dawn_hour=6,
- traj_cols=None,
- **kwargs,
-):
- """Aggregate nightly presence statistics for home inference.
-
- Column names are resolved through *traj_cols* or keyword overrides and no
- type coercion beyond what is strictly necessary for the calculation is
- performed.
- """
-
- stops = stops_table.copy()
-
- # Resolve column names
- traj_cols = loader._parse_traj_cols(stops.columns, traj_cols, kwargs)
- loader._has_time_cols(stops.columns, traj_cols)
-
- t_key, use_datetime = loader._fallback_time_cols_dt(stops.columns, traj_cols, kwargs)
- end_t_key = "end_datetime" if use_datetime else "end_timestamp"
-
- # Ensure we can compute an end time
- end_col_present = loader._has_end_cols(stops.columns, traj_cols)
- duration_col_present = loader._has_duration_cols(stops.columns, traj_cols)
- if not (end_col_present or duration_col_present):
- raise ValueError("stops_table must provide either an end time or a duration.")
-
- if not end_col_present:
- dur_col = traj_cols["duration"]
- if use_datetime:
- stops[end_t_key] = stops[traj_cols[t_key]] + pd.to_timedelta(stops[dur_col], unit="m")
- else:
- stops[end_t_key] = stops[traj_cols[t_key]] + stops[dur_col] * 60
-
- # Nocturnal clipping
- stops_night = nocturnal_stops(
- stops,
- dusk_hour=dusk_hour,
- dawn_hour=dawn_hour,
- start_datetime=traj_cols[t_key],
- end_datetime=end_t_key,
- )
-
- # Dates and ISO weeks (convert timestamps if needed)
- if use_datetime:
- dt = stops_night[traj_cols[t_key]]
- else:
- dt = pd.to_datetime(stops_night[traj_cols[t_key]], unit="s", utc=True)
-
- stops_night["_date"] = dt.dt.date
- stops_night["_iso_week"] = dt.dt.isocalendar().week
-
- out = (
- stops_night.groupby([traj_cols["user_id"], traj_cols["location_id"]], as_index=False)
- .agg(
- num_nights=("_date", "nunique"),
- num_weeks=("_iso_week", "nunique"),
- total_duration=(traj_cols["duration"], "sum"),
- )
- )
-
- return out
-
-
-
-def select_home(
- candidate_homes,
- stops_table,
- min_days,
- min_weeks,
- traj_cols=None,
- **kwargs,
-):
- """Select a single home location per user."""
-
- traj_cols = loader._parse_traj_cols(candidate_homes.columns, traj_cols, kwargs)
-
- # Last observation date
- t_key, use_datetime = loader._fallback_time_cols_dt(stops_table.columns, traj_cols, kwargs)
- dt_series = (
- stops_table[traj_cols[t_key]]
- if use_datetime
- else pd.to_datetime(stops_table[traj_cols[t_key]], unit="s", utc=True)
- )
- last_date = dt_series.dt.date.max()
-
- # Filter and rank
- filtered = (
- candidate_homes.loc[
- (candidate_homes["num_nights"] >= min_days)
- & (candidate_homes["num_weeks"] >= min_weeks)
- ]
- .sort_values(
- [traj_cols["user_id"], "num_nights", "total_duration"],
- ascending=[True, False, False],
- )
- )
-
- best = (
- filtered.drop_duplicates(traj_cols["user_id"], keep="first")
- .assign(home_date=last_date)
- .reset_index(drop=True)
- )
-
- return best[[traj_cols["user_id"], traj_cols["location_id"], "home_date"]]
-
+import pandas as pd
+import nomad.io.base as loader
+from datetime import datetime, time, timedelta
+
+
+def nocturnal_stops(
+ stops_table,
+ dusk_hour=19,
+ dawn_hour=6,
+ start_datetime="start_datetime",
+ end_datetime="end_datetime",
+):
+ """Clip each stop to the nocturnal window between *dusk_hour* and *dawn_hour*.
+
+ This helper assumes the caller already provides proper datetime columns. It
+ merely slices the stop to the relevant night portion and recomputes the
+ duration, dropping rows that do not intersect the night at all.
+ """
+
+ df = stops_table.copy()
+
+ # Build candidate night windows for every stop
+ df["_night_start"] = df.apply(
+ lambda r: [
+ pd.Timestamp(datetime.combine(d, time(dusk_hour)), tz=r[start_datetime].tzinfo)
+ for d in pd.date_range(
+ (r[start_datetime] - timedelta(days=1)).date(),
+ r[end_datetime].date(),
+ freq="D",
+ )
+ ],
+ axis=1,
+ )
+
+ df = df.explode("_night_start", ignore_index=True)
+ df["_night_end"] = df["_night_start"] + timedelta(hours=(24 - dusk_hour + dawn_hour))
+
+ # Clip the stop to the nightly interval
+ df[start_datetime] = df[[start_datetime, "_night_start"]].max(axis=1)
+ df[end_datetime] = df[[end_datetime, "_night_end"]].min(axis=1)
+
+ df["duration"] = (
+ (df[end_datetime] - df[start_datetime]).dt.total_seconds() // 60
+ ).astype(int)
+
+ return df[df["duration"] > 0].drop(columns=["_night_start", "_night_end"])
+
+
+def compute_candidate_homes(
+ stops_table,
+ dusk_hour=19,
+ dawn_hour=6,
+ traj_cols=None,
+ **kwargs,
+):
+ """Aggregate nightly presence statistics for home inference.
+
+ Column names are resolved through *traj_cols* or keyword overrides and no
+ type coercion beyond what is strictly necessary for the calculation is
+ performed.
+ """
+
+ stops = stops_table.copy()
+
+ # Resolve column names
+ traj_cols = loader._parse_traj_cols(stops.columns, traj_cols, kwargs)
+ loader._has_time_cols(stops.columns, traj_cols)
+
+ t_key, use_datetime = loader._fallback_time_cols_dt(stops.columns, traj_cols, kwargs)
+ end_t_key = "end_datetime" if use_datetime else "end_timestamp"
+
+ # Ensure we can compute an end time
+ end_col_present = loader._has_end_cols(stops.columns, traj_cols)
+ duration_col_present = loader._has_duration_cols(stops.columns, traj_cols)
+ if not (end_col_present or duration_col_present):
+ raise ValueError("stops_table must provide either an end time or a duration.")
+
+ if not end_col_present:
+ dur_col = traj_cols["duration"]
+ if use_datetime:
+ stops[end_t_key] = stops[traj_cols[t_key]] + pd.to_timedelta(stops[dur_col], unit="m")
+ else:
+ stops[end_t_key] = stops[traj_cols[t_key]] + stops[dur_col] * 60
+
+ # Nocturnal clipping
+ stops_night = nocturnal_stops(
+ stops,
+ dusk_hour=dusk_hour,
+ dawn_hour=dawn_hour,
+ start_datetime=traj_cols[t_key],
+ end_datetime=end_t_key,
+ )
+
+ # Dates and ISO weeks (convert timestamps if needed)
+ if use_datetime:
+ dt = stops_night[traj_cols[t_key]]
+ else:
+ dt = pd.to_datetime(stops_night[traj_cols[t_key]], unit="s", utc=True)
+
+ stops_night["_date"] = dt.dt.date
+ stops_night["_iso_week"] = dt.dt.isocalendar().week
+
+ out = (
+ stops_night.groupby([traj_cols["user_id"], traj_cols["location_id"]], as_index=False)
+ .agg(
+ num_nights=("_date", "nunique"),
+ num_weeks=("_iso_week", "nunique"),
+ total_duration=(traj_cols["duration"], "sum"),
+ )
+ )
+
+ return out
+
+
+
+def select_home(
+ candidate_homes,
+ stops_table,
+ min_days,
+ min_weeks,
+ traj_cols=None,
+ **kwargs,
+):
+ """Select a single home location per user."""
+
+ traj_cols = loader._parse_traj_cols(candidate_homes.columns, traj_cols, kwargs)
+
+ # Last observation date
+ t_key, use_datetime = loader._fallback_time_cols_dt(stops_table.columns, traj_cols, kwargs)
+ dt_series = (
+ stops_table[traj_cols[t_key]]
+ if use_datetime
+ else pd.to_datetime(stops_table[traj_cols[t_key]], unit="s", utc=True)
+ )
+ last_date = dt_series.dt.date.max()
+
+ # Filter and rank
+ filtered = (
+ candidate_homes.loc[
+ (candidate_homes["num_nights"] >= min_days)
+ & (candidate_homes["num_weeks"] >= min_weeks)
+ ]
+ .sort_values(
+ [traj_cols["user_id"], "num_nights", "total_duration"],
+ ascending=[True, False, False],
+ )
+ )
+
+ best = (
+ filtered.drop_duplicates(traj_cols["user_id"], keep="first")
+ .assign(home_date=last_date)
+ .reset_index(drop=True)
+ )
+
+ return best[[traj_cols["user_id"], traj_cols["location_id"], "home_date"]]
+
diff --git a/nomad/visit_attribution/visit_attribution.py b/nomad/visit_attribution/visit_attribution.py
index 0daf845f..26c1ba5a 100644
--- a/nomad/visit_attribution/visit_attribution.py
+++ b/nomad/visit_attribution/visit_attribution.py
@@ -1,441 +1,441 @@
-import geopandas as gpd
-import nomad.io.base as loader
-import nomad.constants as constants
-import warnings
-import pandas as pd
-import nomad.io.base as loader
-import pyproj
-import pdb
-
-# TO DO: change to stops_to_poi
-def point_in_polygon(data, poi_table, method='centroid', data_crs=None, max_distance=0,
- cluster_label=None, location_id=None, traj_cols=None, **kwargs):
- """
- Assign each stop or cluster of pings in `data` to a polygon in `poi_table`,
- either by the cluster’s centroid location or by the most frequent polygon hit.
-
- Parameters
- ----------
- data : pd.DataFrame or gpd.GeoDataFrame
- A table of pings (with optional stop/duration columns) or stops,
- indexed by observation or cluster.
- poi_table : gpd.GeoDataFrame
- Polygons to match against, with CRS set and optional ID column.
- method : {'centroid', 'majority'}, default 'centroid'
- ‘centroid’ uses each cluster’s mean point; ‘majority’ picks the polygon
- most often visited within each cluster (only for ping data).
- data_crs : str or pyproj.CRS, optional
- CRS for `data` when it is a plain DataFrame; ignored if `data` is a GeoDataFrame.
- max_distance : float, default 0
- Search radius for nearest‐neighbor fall-back; zero triggers strict
- point-in-polygon matching.
- cluster_label : str, optional
- Column name holding cluster IDs in ping data; inferred from `data` if absent.
- location_id : str, optional
- Column in `poi_table` containing the output ID; uses the GeoDataFrame index if None.
- traj_cols : list of str, optional
- Names of the coordinate columns in `data` when it is a DataFrame.
- **kwargs
- Passed through to `poi_map` or the trajectory-column parser.
-
- Returns
- -------
- pd.Series
- Indexed like `data`, giving the matched polygon ID for each stop or ping.
- Points or clusters that fall outside every polygon or beyond `max_distance`
- are set to NaN.
- """
- # check if it is stop table
- traj_cols_w_deflts = loader._parse_traj_cols(data.columns, traj_cols, kwargs)
- end_col_present = loader._has_end_cols(data.columns, traj_cols_w_deflts)
- duration_col_present = loader._has_duration_cols(data.columns, traj_cols_w_deflts)
- is_stop_table = (end_col_present or duration_col_present)
-
- if is_stop_table:
- # is stop table
- if method=='majority':
- raise TypeError("Method `majority' requires ping data with cluster labels,\
- but a stop table was provided")
- elif method=='centroid':
- stop_table = data.copy()
- location = poi_map(
- data=stop_table,
- poi_table=poi_table,
- max_distance=max_distance,
- data_crs=data_crs,
- location_id=location_id,
- traj_cols=traj_cols,
- **kwargs)
-
- return location
-
- else:
- raise ValueError(f"Method {method} not among implemented methods: `centroid' and `majority'")
-
- else:
- # is labeled pings
- if not cluster_label: #try defaults and raise
- if 'cluster_label' in data.columns:
- cluster_label = 'cluster_label'
- elif 'cluster' in data.columns:
- cluster_label = 'cluster'
- else:
- raise ValueError(f"Argument `cluster_label` is required for visit attribution of labeled pings.")
-
- clustered_pings = data.loc[data[cluster_label] != -1].copy()
- if method=='majority':
- location = poi_map(
- data=clustered_pings,
- poi_table=poi_table,
- max_distance=max_distance,
- data_crs=data_crs,
- location_id=location_id,
- traj_cols=traj_cols,
- **kwargs
- )
- loc_col = location.name
- clustered_pings = clustered_pings.join(location)
-
- location = clustered_pings.groupby(cluster_label)[loc_col].agg(
- lambda x: x.mode().iloc[0] if not x.mode().empty else None)
-
- return data[[cluster_label]].join(location, on=cluster_label)[loc_col]
-
- elif method=='centroid': # should be medoid?
- loader._has_spatial_cols(data.columns, traj_cols, exclusive=True)
- use_lon_lat = ('latitude' in traj_cols and 'longitude' in traj_cols)
- if use_lon_lat:
- warnings.warn("Spherical ('longitude', 'latitude') coordinates were passed. Centroids will not agree with geodetic distances")
- centr_data = clustered_pings.groupby(cluster_label).agg({traj_cols['longitude']:'mean', traj_cols['latitude']:'mean'})
- else:
- centr_data = clustered_pings.groupby(cluster_label).agg({traj_cols['x']:'mean', traj_cols['y']:'mean'})
-
- location = poi_map(
- data=centr_data,
- poi_table=poi_table,
- max_distance=max_distance,
- data_crs=data_crs,
- location_id=location_id,
- traj_cols=traj_cols,
- **kwargs)
- loc_col = location.name
-
- return data[[cluster_label]].join(location, on=cluster_label)[loc_col]
-
- else:
- raise ValueError(f"Method {method} not among implemented methods: `centroid' and `majority'")
-
- return None
-
-# change to point_in_polygon, move to filters.py
-def poi_map(data, poi_table, max_distance=0, data_crs=None, location_id=None, traj_cols=None, **kwargs):
- """
- Assign each point in `data` to a polygon in `poi_table`, using containment when
- `max_distance==0` or the nearest neighbor within `max_distance` otherwise.
-
- Parameters
- ----------
- data : pd.DataFrame or gpd.GeoDataFrame
- Input points, either as a DataFrame with coordinate columns or a GeoDataFrame.
- poi_table : gpd.GeoDataFrame
- Polygons to match against, indexed or with `location_id` column.
- traj_cols : list of str, optional
- Names of the coordinate columns in `data` when it is a DataFrame.
- max_distance : float, default 0
- Maximum search radius for nearest‐neighbor matching; zero invokes a point‐in‐polygon test.
- data_crs : str or pyproj.CRS, optional
- CRS for `data` if it is a DataFrame; ignored for GeoDataFrames.
- location_id : str, optional
- Name of the geometry ID column in `poi_table`; uses the GeoDataFrame index if not provided.
- **kwargs
- Passed to trajectory‐column parsing helper.
-
- Returns
- -------
- pd.Series
- Indexed like `data`, with each entry set to the matching polygon’s ID (from
- `location_id` or `poi_table.index`). Points not contained or beyond `max_distance`
- yield NaN. When multiple polygons overlap a point, only the first match is kept.
- """
- # column name handling
- traj_cols = loader._parse_traj_cols(data.columns, traj_cols, kwargs, defaults={})
-
- if poi_table.crs is None:
- raise ValueError(f"poi_table must have crs attribute for spatial join.")
-
- # Determine which geometry to use
- if isinstance(data, gpd.GeoDataFrame):
- pings_gdf = data.geometry
- # if geodataframe, data_crs is ignored but we Raise if conflicting crs because it is suspect
- if data_crs and not pyproj.CRS(pings_gdf.crs).equals(pyproj.CRS(data_crs)):
- raise ValueError(f"Provided CRS {data_crs} conflicts with traj CRS {data.crs}.")
-
- if isinstance(data, pd.DataFrame):
- # Parse traj_cols with kwargs to get spatial column mappings (using empty defaults to avoid conflicts)
- traj_cols_w_deflts = loader._parse_traj_cols(data.columns, traj_cols, kwargs, defaults={}, warn=False)
- # check that user specified x,y or lat, lon but not both
- loader._has_spatial_cols(data.columns, traj_cols_w_deflts, exclusive=True)
-
- use_lon_lat = ('latitude' in traj_cols_w_deflts and 'longitude' in traj_cols_w_deflts)
-
- if use_lon_lat:
- if data_crs:
- data_crs = pyproj.CRS(data_crs)
- if data_crs.is_projected:
- warnings.warn(f"Provided CRS {data_crs.name} is a projected coordinate system, but "
- "spherical ('longitude', 'latitude') coordinates were passed. Did you mean to pass data_crs='EPSG:4326'?"
- )
- else: # we assume EPSG:4326
- warnings.warn("Argument `data_crs` not provided, assuming EPSG:4326 for ('longitude', 'latitude') coordinates")
- data_crs = pyproj.CRS("EPSG:4326")
-
- pings_gdf= gpd.points_from_xy(
- data[traj_cols_w_deflts['longitude']],
- data[traj_cols_w_deflts['latitude']],
- crs=data_crs) # order matters: lon first
- else:
- if not data_crs:
- raise ValueError(f"data_crs must be provided when using projected coordinates.")
- data_crs = pyproj.CRS(data_crs)
- if data_crs.is_geographic:
- warnings.warn(f"Provided CRS {data_crs.name} is a geographic coordinate system. "
- "This will lead to errors if passed coordinates ('x', 'y') are projected."
- f"Did you mean to use {poi_table.crs}?"
- )
- pings_gdf= gpd.points_from_xy(
- data[traj_cols_w_deflts['x']],
- data[traj_cols_w_deflts['y']],
- crs=data_crs)
- else:
- raise TypeError("`data` must be a pandas DataFrame or a GeoDataFrame.")
-
- if not data_crs.equals(pyproj.CRS(poi_table.crs)):
- poi_table = poi_table.to_crs(data_crs)
- warnings.warn("CRS for `poi_table` does not match crs for `data`. Reprojecting...")
-
- use_poi_idx = True
- if location_id is not None:
- loc_col = location_id
- if location_id in poi_table:
- use_poi_idx=False
- else:
- warnings.warn(f"{location_id} column not found in {poi_table.columns}, defaulting to poi_table.index for spatial join.")
- else:
- loc_col = 'location_id'
- warnings.warn(f"location_id column not provided, defaulting to poi_table.index for spatial join.")
-
-
- if max_distance>0:
- if data_crs.is_geographic:
- warnings.warn(f"Provided CRS {data_crs.name} is a geographic coordinate system. "
- "This will lead to errors when computing euclidean distances."
- f"Did you mean to use `max_distance=0'?"
- )
-
- p_idx, idx = poi_table.sindex.nearest(pings_gdf, max_distance=max_distance, return_all=False)
- if use_poi_idx:
- s = pd.Series(poi_table.iloc[idx].index, index=data.index[p_idx])
- s.name = loc_col
- else:
- s = pd.Series(poi_table.iloc[idx][loc_col].values, index=data.index[p_idx])
- s.name = loc_col
-
- return s.reindex(data.index)
-
- else: # default max_distance = 0
- p_idx, idx = poi_table.sindex.query(pings_gdf, predicate="within") # boundary counts; use "contains" to exclude it
- if use_poi_idx:
- s = pd.Series(poi_table.iloc[idx].index, index=data.index[p_idx]) # might have duplicates
- s = s.loc[~s.index.duplicated()]
- s.name = loc_col
- else:
- s = pd.Series(poi_table.iloc[idx][loc_col].values, index=data.index[p_idx])
- s = s.loc[~s.index.duplicated()]
- s.name = loc_col
- return s.reindex(data.index)
-
-def oracle_map(data, true_visits, traj_cols=None, **kwargs):
- """
- Map elements in traj to ground truth location based solely on time.
-
- Parameters
- ----------
- data : pd.DataFrame
- The trajectory DataFrame containing x and y coordinates.
- true_visits : pd.DataFrame
- A visitation table containing location IDs, start times, and durations/end times.
- traj_cols : list
- The columns in the trajectory DataFrame to be used for mapping.
- **kwargs : dict
- Additional keyword arguments.
-
- Returns
- -------
- pd.Series
- A Series containing the location IDs corresponding to the pings in the trajectory.
- """
- true_visits = true_visits.copy()
- data = data.copy()
-
- # determine temporal columns to use
- t_key_l, use_datetime_l = loader._fallback_time_cols_dt(data.columns, traj_cols, kwargs)
- t_key_r, use_datetime_r = loader._fallback_time_cols_dt(true_visits.columns, traj_cols, kwargs)
-
-
- traj_cols = loader._parse_traj_cols(true_visits.columns, traj_cols, kwargs) #load defaults
- if use_datetime_l != use_datetime_r:
- raise ValueError(f"Mismatch in temporal columns {traj_cols[t_key_l]} vs {traj_cols[t_key_r]}.")
-
- # check is diary table
- end_col_present = loader._has_end_cols(true_visits.columns, traj_cols)
- duration_col_present = loader._has_duration_cols(true_visits.columns, traj_cols)
- if not (end_col_present or duration_col_present):
- raise ValueError("Missing required (end or duration) temporal columns for true_visits dataframe.")
-
- if traj_cols['location_id'] not in true_visits.columns:
- raise ValueError(f"Missing {traj_cols[location_id]} column in {true_visits.columns}."
- "pass `location_id` as keyword argument or in traj_cols."
- )
-
- end_t_key = 'end_datetime' if use_datetime_r else 'end_timestamp'
- if not end_col_present:
- if use_datetime_r:
- true_visits[end_t_key] = true_visits[traj_cols[t_key_r]] + pd.to_timedelta(true_visits[traj_cols['duration']]*60, unit='s')
- else:
- true_visits[end_t_key] = true_visits[traj_cols[t_key_r]] + true_visits[traj_cols['duration']]*60
-
-
- # t_key_l and t_key_r match in type, and end_t_key exists
- data[traj_cols['location_id']] = pd.NA
- for idx, row in true_visits.loc[~true_visits[traj_cols['location_id']].isna()].iterrows():
- start, end, loc = row[traj_cols[t_key_r]], row[traj_cols[end_t_key]], row[traj_cols['location_id']]
- data.loc[(data[traj_cols[t_key_l]]>=start)&(data[traj_cols[t_key_l]]= 0:
- day_parts = [(start.time(), time.max), (time.min, end.time())]
- else:
- full_days = 0
- day_parts = [(start.time(), end.time()), (start.time(), start.time())]
- return full_days, day_parts
-
-def duration_at_night_fast(start, end, dawn_hour = 6, dusk_hour = 19):
- full_days, (part1, part2) = slice_datetimes_interval_fast(start, end)
- total_dawn_time = dawn_time(part1, dawn_hour)+dawn_time(part2, dawn_hour)
- total_dusk_time = dusk_time(part1, dusk_hour)+dusk_time(part2, dusk_hour)
- return int(total_dawn_time + total_dusk_time + full_days*(dawn_hour + (24-dusk_hour))*60)
-
-def clip_stays_date(traj, dates, dawn_hour = 6, dusk_hour = 19):
- start = pd.to_datetime(traj['start_datetime'])
- duration = traj['duration']
-
- # Ensure timezone-aware clipping bounds
- tz = start.dt.tz
- date_0 = pd.Timestamp(parse(dates[0]), tz=tz)
- date_1 = pd.Timestamp(parse(dates[1]), tz=tz)
-
- end = start + pd.to_timedelta(duration, unit='m')
-
- # Clip to date range
- start_clipped = start.clip(lower=date_0, upper=date_1)
- end_clipped = end.clip(lower=date_0, upper=date_1)
-
- # Recompute durations
- duration_clipped = ((end_clipped - start_clipped).dt.total_seconds() // 60).astype(int)
- duration_night = [duration_at_night_fast(s, e, dawn_hour, dusk_hour) for s, e in zip(start_clipped, end_clipped)]
-
- return pd.DataFrame({
- 'id': traj['id'].values,
- 'start': start_clipped,
- 'duration': duration_clipped,
- 'duration_night': duration_night,
- 'location': traj['location']
- })
-
-def count_nights(usr_polygon, dawn_hour = 6, dusk_hour = 19, min_dwell = 10):
- nights = set()
- weeks = set()
-
- for _, row in usr_polygon.iterrows():
- d = row['start']
- d = pd.to_datetime(d)
- full_days, (part1, part2) = slice_datetimes_interval_fast(d, d + pd.to_timedelta(row['duration'], unit='m'))
-
- dawn1 = dawn_time(part1, dawn_hour)
- dusk1 = dusk_time(part1, dusk_hour)
- dawn2 = dawn_time(part2, dawn_hour)
- dusk2 = dusk_time(part2, dusk_hour)
-
- if full_days == 0:
- if dawn1 >= min_dwell:
- night = d - timedelta(days=1)
- nights.add(night.date())
- weeks.add((night - timedelta(days=night.weekday())).date())
-
- if (dusk1 + dawn2) >= min_dwell:
- night = d
- nights.add(night.date())
- weeks.add((night - timedelta(days=night.weekday())).date())
-
- if dusk2 >= min_dwell:
- night = d + timedelta(days=1)
- nights.add(night.date())
- weeks.add((night - timedelta(days=night.weekday())).date())
- else:
- if dawn1 >= min_dwell:
- night = d - timedelta(days=1)
- nights.add(night.date())
- weeks.add((night - timedelta(days=night.weekday())).date())
-
- for t in range(full_days + 1):
- night = d + timedelta(days=t)
- nights.add(night.date())
- weeks.add((night - timedelta(days=night.weekday())).date())
-
- if dusk2 >= min_dwell:
- night = d + timedelta(days=full_days + 1)
- nights.add(night.date())
- weeks.add((night - timedelta(days=night.weekday())).date())
-
- identifier = usr_polygon['id'].iloc[0]
- location = usr_polygon['location'].iloc[0]
-
- return pd.DataFrame([{
- 'id': identifier,
- 'location': location,
- 'night_count': len(nights),
- 'week_count': len(weeks)
- }])
-
-
-def night_stops(stop_table, user='user', dawn_hour = 6, dusk_hour = 19, min_dwell = 10):
- # Date range
- start_date = str(stop_table['start_datetime'].min().date())
- weeks = stop_table['start_datetime'].dt.strftime('%Y-%U')
- num_weeks = weeks.nunique()
-
- # turn dates to datetime
- stop_table['start_datetime'] = pd.to_datetime(stop_table['start_datetime'])
-
- if 'id' not in stop_table.columns:
- stop_table['id'] = user
-
- end_date = (parse(start_date) + timedelta(weeks=num_weeks)).date().isoformat()
- dates = (start_date, end_date)
- df_clipped = clip_stays_date(stop_table, dates, dawn_hour, dusk_hour)
- df_clipped = df_clipped[(df_clipped['duration'] > 0) & (df_clipped['duration_night'] >= 15)]
-
- return df_clipped.groupby(['id', 'location'], group_keys=False).apply(count_nights(dawn_hour, dusk_hour, min_dwell)).reset_index(drop=True)
-
+import geopandas as gpd
+import nomad.io.base as loader
+import nomad.constants as constants
+import warnings
+import pandas as pd
+import nomad.io.base as loader
+import pyproj
+import pdb
+
+# TO DO: change to stops_to_poi
+def point_in_polygon(data, poi_table, method='centroid', data_crs=None, max_distance=0,
+ cluster_label=None, location_id=None, traj_cols=None, **kwargs):
+ """
+ Assign each stop or cluster of pings in `data` to a polygon in `poi_table`,
+ either by the cluster’s centroid location or by the most frequent polygon hit.
+
+ Parameters
+ ----------
+ data : pd.DataFrame or gpd.GeoDataFrame
+ A table of pings (with optional stop/duration columns) or stops,
+ indexed by observation or cluster.
+ poi_table : gpd.GeoDataFrame
+ Polygons to match against, with CRS set and optional ID column.
+ method : {'centroid', 'majority'}, default 'centroid'
+ ‘centroid’ uses each cluster’s mean point; ‘majority’ picks the polygon
+ most often visited within each cluster (only for ping data).
+ data_crs : str or pyproj.CRS, optional
+ CRS for `data` when it is a plain DataFrame; ignored if `data` is a GeoDataFrame.
+ max_distance : float, default 0
+ Search radius for nearest‐neighbor fall-back; zero triggers strict
+ point-in-polygon matching.
+ cluster_label : str, optional
+ Column name holding cluster IDs in ping data; inferred from `data` if absent.
+ location_id : str, optional
+ Column in `poi_table` containing the output ID; uses the GeoDataFrame index if None.
+ traj_cols : list of str, optional
+ Names of the coordinate columns in `data` when it is a DataFrame.
+ **kwargs
+ Passed through to `poi_map` or the trajectory-column parser.
+
+ Returns
+ -------
+ pd.Series
+ Indexed like `data`, giving the matched polygon ID for each stop or ping.
+ Points or clusters that fall outside every polygon or beyond `max_distance`
+ are set to NaN.
+ """
+ # check if it is stop table
+ traj_cols_w_deflts = loader._parse_traj_cols(data.columns, traj_cols, kwargs)
+ end_col_present = loader._has_end_cols(data.columns, traj_cols_w_deflts)
+ duration_col_present = loader._has_duration_cols(data.columns, traj_cols_w_deflts)
+ is_stop_table = (end_col_present or duration_col_present)
+
+ if is_stop_table:
+ # is stop table
+ if method=='majority':
+ raise TypeError("Method `majority' requires ping data with cluster labels,\
+ but a stop table was provided")
+ elif method=='centroid':
+ stop_table = data.copy()
+ location = poi_map(
+ data=stop_table,
+ poi_table=poi_table,
+ max_distance=max_distance,
+ data_crs=data_crs,
+ location_id=location_id,
+ traj_cols=traj_cols,
+ **kwargs)
+
+ return location
+
+ else:
+ raise ValueError(f"Method {method} not among implemented methods: `centroid' and `majority'")
+
+ else:
+ # is labeled pings
+ if not cluster_label: #try defaults and raise
+ if 'cluster_label' in data.columns:
+ cluster_label = 'cluster_label'
+ elif 'cluster' in data.columns:
+ cluster_label = 'cluster'
+ else:
+ raise ValueError(f"Argument `cluster_label` is required for visit attribution of labeled pings.")
+
+ clustered_pings = data.loc[data[cluster_label] != -1].copy()
+ if method=='majority':
+ location = poi_map(
+ data=clustered_pings,
+ poi_table=poi_table,
+ max_distance=max_distance,
+ data_crs=data_crs,
+ location_id=location_id,
+ traj_cols=traj_cols,
+ **kwargs
+ )
+ loc_col = location.name
+ clustered_pings = clustered_pings.join(location)
+
+ location = clustered_pings.groupby(cluster_label)[loc_col].agg(
+ lambda x: x.mode().iloc[0] if not x.mode().empty else None)
+
+ return data[[cluster_label]].join(location, on=cluster_label)[loc_col]
+
+ elif method=='centroid': # should be medoid?
+ loader._has_spatial_cols(data.columns, traj_cols, exclusive=True)
+ use_lon_lat = ('latitude' in traj_cols and 'longitude' in traj_cols)
+ if use_lon_lat:
+ warnings.warn("Spherical ('longitude', 'latitude') coordinates were passed. Centroids will not agree with geodetic distances")
+ centr_data = clustered_pings.groupby(cluster_label).agg({traj_cols['longitude']:'mean', traj_cols['latitude']:'mean'})
+ else:
+ centr_data = clustered_pings.groupby(cluster_label).agg({traj_cols['x']:'mean', traj_cols['y']:'mean'})
+
+ location = poi_map(
+ data=centr_data,
+ poi_table=poi_table,
+ max_distance=max_distance,
+ data_crs=data_crs,
+ location_id=location_id,
+ traj_cols=traj_cols,
+ **kwargs)
+ loc_col = location.name
+
+ return data[[cluster_label]].join(location, on=cluster_label)[loc_col]
+
+ else:
+ raise ValueError(f"Method {method} not among implemented methods: `centroid' and `majority'")
+
+ return None
+
+# change to point_in_polygon, move to filters.py
+def poi_map(data, poi_table, max_distance=0, data_crs=None, location_id=None, traj_cols=None, **kwargs):
+ """
+ Assign each point in `data` to a polygon in `poi_table`, using containment when
+ `max_distance==0` or the nearest neighbor within `max_distance` otherwise.
+
+ Parameters
+ ----------
+ data : pd.DataFrame or gpd.GeoDataFrame
+ Input points, either as a DataFrame with coordinate columns or a GeoDataFrame.
+ poi_table : gpd.GeoDataFrame
+ Polygons to match against, indexed or with `location_id` column.
+ traj_cols : list of str, optional
+ Names of the coordinate columns in `data` when it is a DataFrame.
+ max_distance : float, default 0
+ Maximum search radius for nearest‐neighbor matching; zero invokes a point‐in‐polygon test.
+ data_crs : str or pyproj.CRS, optional
+ CRS for `data` if it is a DataFrame; ignored for GeoDataFrames.
+ location_id : str, optional
+ Name of the geometry ID column in `poi_table`; uses the GeoDataFrame index if not provided.
+ **kwargs
+ Passed to trajectory‐column parsing helper.
+
+ Returns
+ -------
+ pd.Series
+ Indexed like `data`, with each entry set to the matching polygon’s ID (from
+ `location_id` or `poi_table.index`). Points not contained or beyond `max_distance`
+ yield NaN. When multiple polygons overlap a point, only the first match is kept.
+ """
+ # column name handling
+ traj_cols = loader._parse_traj_cols(data.columns, traj_cols, kwargs, defaults={})
+
+ if poi_table.crs is None:
+ raise ValueError(f"poi_table must have crs attribute for spatial join.")
+
+ # Determine which geometry to use
+ if isinstance(data, gpd.GeoDataFrame):
+ pings_gdf = data.geometry
+ # if geodataframe, data_crs is ignored but we Raise if conflicting crs because it is suspect
+ if data_crs and not pyproj.CRS(pings_gdf.crs).equals(pyproj.CRS(data_crs)):
+ raise ValueError(f"Provided CRS {data_crs} conflicts with traj CRS {data.crs}.")
+
+ if isinstance(data, pd.DataFrame):
+ # Parse traj_cols with kwargs to get spatial column mappings (using empty defaults to avoid conflicts)
+ traj_cols_w_deflts = loader._parse_traj_cols(data.columns, traj_cols, kwargs, defaults={}, warn=False)
+ # check that user specified x,y or lat, lon but not both
+ loader._has_spatial_cols(data.columns, traj_cols_w_deflts, exclusive=True)
+
+ use_lon_lat = ('latitude' in traj_cols_w_deflts and 'longitude' in traj_cols_w_deflts)
+
+ if use_lon_lat:
+ if data_crs:
+ data_crs = pyproj.CRS(data_crs)
+ if data_crs.is_projected:
+ warnings.warn(f"Provided CRS {data_crs.name} is a projected coordinate system, but "
+ "spherical ('longitude', 'latitude') coordinates were passed. Did you mean to pass data_crs='EPSG:4326'?"
+ )
+ else: # we assume EPSG:4326
+ warnings.warn("Argument `data_crs` not provided, assuming EPSG:4326 for ('longitude', 'latitude') coordinates")
+ data_crs = pyproj.CRS("EPSG:4326")
+
+ pings_gdf= gpd.points_from_xy(
+ data[traj_cols_w_deflts['longitude']],
+ data[traj_cols_w_deflts['latitude']],
+ crs=data_crs) # order matters: lon first
+ else:
+ if not data_crs:
+ raise ValueError(f"data_crs must be provided when using projected coordinates.")
+ data_crs = pyproj.CRS(data_crs)
+ if data_crs.is_geographic:
+ warnings.warn(f"Provided CRS {data_crs.name} is a geographic coordinate system. "
+ "This will lead to errors if passed coordinates ('x', 'y') are projected."
+ f"Did you mean to use {poi_table.crs}?"
+ )
+ pings_gdf= gpd.points_from_xy(
+ data[traj_cols_w_deflts['x']],
+ data[traj_cols_w_deflts['y']],
+ crs=data_crs)
+ else:
+ raise TypeError("`data` must be a pandas DataFrame or a GeoDataFrame.")
+
+ if not data_crs.equals(pyproj.CRS(poi_table.crs)):
+ poi_table = poi_table.to_crs(data_crs)
+ warnings.warn("CRS for `poi_table` does not match crs for `data`. Reprojecting...")
+
+ use_poi_idx = True
+ if location_id is not None:
+ loc_col = location_id
+ if location_id in poi_table:
+ use_poi_idx=False
+ else:
+ warnings.warn(f"{location_id} column not found in {poi_table.columns}, defaulting to poi_table.index for spatial join.")
+ else:
+ loc_col = 'location_id'
+ warnings.warn(f"location_id column not provided, defaulting to poi_table.index for spatial join.")
+
+
+ if max_distance>0:
+ if data_crs.is_geographic:
+ warnings.warn(f"Provided CRS {data_crs.name} is a geographic coordinate system. "
+ "This will lead to errors when computing euclidean distances."
+ f"Did you mean to use `max_distance=0'?"
+ )
+
+ p_idx, idx = poi_table.sindex.nearest(pings_gdf, max_distance=max_distance, return_all=False)
+ if use_poi_idx:
+ s = pd.Series(poi_table.iloc[idx].index, index=data.index[p_idx])
+ s.name = loc_col
+ else:
+ s = pd.Series(poi_table.iloc[idx][loc_col].values, index=data.index[p_idx])
+ s.name = loc_col
+
+ return s.reindex(data.index)
+
+ else: # default max_distance = 0
+ p_idx, idx = poi_table.sindex.query(pings_gdf, predicate="within") # boundary counts; use "contains" to exclude it
+ if use_poi_idx:
+ s = pd.Series(poi_table.iloc[idx].index, index=data.index[p_idx]) # might have duplicates
+ s = s.loc[~s.index.duplicated()]
+ s.name = loc_col
+ else:
+ s = pd.Series(poi_table.iloc[idx][loc_col].values, index=data.index[p_idx])
+ s = s.loc[~s.index.duplicated()]
+ s.name = loc_col
+ return s.reindex(data.index)
+
+def oracle_map(data, true_visits, traj_cols=None, **kwargs):
+ """
+ Map elements in traj to ground truth location based solely on time.
+
+ Parameters
+ ----------
+ data : pd.DataFrame
+ The trajectory DataFrame containing x and y coordinates.
+ true_visits : pd.DataFrame
+ A visitation table containing location IDs, start times, and durations/end times.
+ traj_cols : list
+ The columns in the trajectory DataFrame to be used for mapping.
+ **kwargs : dict
+ Additional keyword arguments.
+
+ Returns
+ -------
+ pd.Series
+ A Series containing the location IDs corresponding to the pings in the trajectory.
+ """
+ true_visits = true_visits.copy()
+ data = data.copy()
+
+ # determine temporal columns to use
+ t_key_l, use_datetime_l = loader._fallback_time_cols_dt(data.columns, traj_cols, kwargs)
+ t_key_r, use_datetime_r = loader._fallback_time_cols_dt(true_visits.columns, traj_cols, kwargs)
+
+
+ traj_cols = loader._parse_traj_cols(true_visits.columns, traj_cols, kwargs) #load defaults
+ if use_datetime_l != use_datetime_r:
+ raise ValueError(f"Mismatch in temporal columns {traj_cols[t_key_l]} vs {traj_cols[t_key_r]}.")
+
+ # check is diary table
+ end_col_present = loader._has_end_cols(true_visits.columns, traj_cols)
+ duration_col_present = loader._has_duration_cols(true_visits.columns, traj_cols)
+ if not (end_col_present or duration_col_present):
+ raise ValueError("Missing required (end or duration) temporal columns for true_visits dataframe.")
+
+ if traj_cols['location_id'] not in true_visits.columns:
+ raise ValueError(f"Missing {traj_cols[location_id]} column in {true_visits.columns}."
+ "pass `location_id` as keyword argument or in traj_cols."
+ )
+
+ end_t_key = 'end_datetime' if use_datetime_r else 'end_timestamp'
+ if not end_col_present:
+ if use_datetime_r:
+ true_visits[end_t_key] = true_visits[traj_cols[t_key_r]] + pd.to_timedelta(true_visits[traj_cols['duration']]*60, unit='s')
+ else:
+ true_visits[end_t_key] = true_visits[traj_cols[t_key_r]] + true_visits[traj_cols['duration']]*60
+
+
+ # t_key_l and t_key_r match in type, and end_t_key exists
+ data[traj_cols['location_id']] = pd.NA
+ for idx, row in true_visits.loc[~true_visits[traj_cols['location_id']].isna()].iterrows():
+ start, end, loc = row[traj_cols[t_key_r]], row[traj_cols[end_t_key]], row[traj_cols['location_id']]
+ data.loc[(data[traj_cols[t_key_l]]>=start)&(data[traj_cols[t_key_l]]= 0:
+ day_parts = [(start.time(), time.max), (time.min, end.time())]
+ else:
+ full_days = 0
+ day_parts = [(start.time(), end.time()), (start.time(), start.time())]
+ return full_days, day_parts
+
+def duration_at_night_fast(start, end, dawn_hour = 6, dusk_hour = 19):
+ full_days, (part1, part2) = slice_datetimes_interval_fast(start, end)
+ total_dawn_time = dawn_time(part1, dawn_hour)+dawn_time(part2, dawn_hour)
+ total_dusk_time = dusk_time(part1, dusk_hour)+dusk_time(part2, dusk_hour)
+ return int(total_dawn_time + total_dusk_time + full_days*(dawn_hour + (24-dusk_hour))*60)
+
+def clip_stays_date(traj, dates, dawn_hour = 6, dusk_hour = 19):
+ start = pd.to_datetime(traj['start_datetime'])
+ duration = traj['duration']
+
+ # Ensure timezone-aware clipping bounds
+ tz = start.dt.tz
+ date_0 = pd.Timestamp(parse(dates[0]), tz=tz)
+ date_1 = pd.Timestamp(parse(dates[1]), tz=tz)
+
+ end = start + pd.to_timedelta(duration, unit='m')
+
+ # Clip to date range
+ start_clipped = start.clip(lower=date_0, upper=date_1)
+ end_clipped = end.clip(lower=date_0, upper=date_1)
+
+ # Recompute durations
+ duration_clipped = ((end_clipped - start_clipped).dt.total_seconds() // 60).astype(int)
+ duration_night = [duration_at_night_fast(s, e, dawn_hour, dusk_hour) for s, e in zip(start_clipped, end_clipped)]
+
+ return pd.DataFrame({
+ 'id': traj['id'].values,
+ 'start': start_clipped,
+ 'duration': duration_clipped,
+ 'duration_night': duration_night,
+ 'location': traj['location']
+ })
+
+def count_nights(usr_polygon, dawn_hour = 6, dusk_hour = 19, min_dwell = 10):
+ nights = set()
+ weeks = set()
+
+ for _, row in usr_polygon.iterrows():
+ d = row['start']
+ d = pd.to_datetime(d)
+ full_days, (part1, part2) = slice_datetimes_interval_fast(d, d + pd.to_timedelta(row['duration'], unit='m'))
+
+ dawn1 = dawn_time(part1, dawn_hour)
+ dusk1 = dusk_time(part1, dusk_hour)
+ dawn2 = dawn_time(part2, dawn_hour)
+ dusk2 = dusk_time(part2, dusk_hour)
+
+ if full_days == 0:
+ if dawn1 >= min_dwell:
+ night = d - timedelta(days=1)
+ nights.add(night.date())
+ weeks.add((night - timedelta(days=night.weekday())).date())
+
+ if (dusk1 + dawn2) >= min_dwell:
+ night = d
+ nights.add(night.date())
+ weeks.add((night - timedelta(days=night.weekday())).date())
+
+ if dusk2 >= min_dwell:
+ night = d + timedelta(days=1)
+ nights.add(night.date())
+ weeks.add((night - timedelta(days=night.weekday())).date())
+ else:
+ if dawn1 >= min_dwell:
+ night = d - timedelta(days=1)
+ nights.add(night.date())
+ weeks.add((night - timedelta(days=night.weekday())).date())
+
+ for t in range(full_days + 1):
+ night = d + timedelta(days=t)
+ nights.add(night.date())
+ weeks.add((night - timedelta(days=night.weekday())).date())
+
+ if dusk2 >= min_dwell:
+ night = d + timedelta(days=full_days + 1)
+ nights.add(night.date())
+ weeks.add((night - timedelta(days=night.weekday())).date())
+
+ identifier = usr_polygon['id'].iloc[0]
+ location = usr_polygon['location'].iloc[0]
+
+ return pd.DataFrame([{
+ 'id': identifier,
+ 'location': location,
+ 'night_count': len(nights),
+ 'week_count': len(weeks)
+ }])
+
+
+def night_stops(stop_table, user='user', dawn_hour = 6, dusk_hour = 19, min_dwell = 10):
+ # Date range
+ start_date = str(stop_table['start_datetime'].min().date())
+ weeks = stop_table['start_datetime'].dt.strftime('%Y-%U')
+ num_weeks = weeks.nunique()
+
+ # turn dates to datetime
+ stop_table['start_datetime'] = pd.to_datetime(stop_table['start_datetime'])
+
+ if 'id' not in stop_table.columns:
+ stop_table['id'] = user
+
+ end_date = (parse(start_date) + timedelta(weeks=num_weeks)).date().isoformat()
+ dates = (start_date, end_date)
+ df_clipped = clip_stays_date(stop_table, dates, dawn_hour, dusk_hour)
+ df_clipped = df_clipped[(df_clipped['duration'] > 0) & (df_clipped['duration_night'] >= 15)]
+
+ return df_clipped.groupby(['id', 'location'], group_keys=False).apply(count_nights(dawn_hour, dusk_hour, min_dwell)).reset_index(drop=True)
+