Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions _extensions/openproblems-bio/custommdx/_extension.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
title: Custom MDX
author: OpenProblems
contributes:
formats:
md:
output-ext: mdx
quarto-custom-format: custommdx-md
preserve-yaml: true
wrap: none
filters:
- at: post-render
path: custommdx_postrender.lua

variant: >-
gfm+pipe_tables+tex_math_dollars+header_attributes+raw_html
+all_symbols_escapable+backtick_code_blocks+fenced_code_blocks
+space_in_atx_header+intraword_underscores+lists_without_preceding_blankline
+shortcut_reference_links+footnotes+definition_lists+startnum

fig-width: 8
fig-height: 5
fig-format: svg

html-math-method: webtex

81 changes: 81 additions & 0 deletions _extensions/openproblems-bio/custommdx/custommdx_postrender.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
local function jsx(content)
return pandoc.RawBlock("markdown", content)
end

-- Check for the custom format (IMPORTANT: This is the correct way)
local function isCustomMdxOutput()
return true
-- local name = param("quarto-custom-format", "")
-- print("Shortcode: ", name)
-- return name == "custommdx-md"
end

quarto._quarto.ast.add_renderer("Callout", isCustomMdxOutput, function(node)
return jsx(('<Callout variant="%s">\n%s</Callout>\n'):format(
node.type:gsub("callout%-", ""), -- Extract "note", "tip", etc.
pandoc.write(pandoc.Pandoc(node.content), "markdown") -- Convert content to Markdown
))
end)

quarto._quarto.ast.add_renderer("Tabset", isCustomMdxOutput, function(node)
local groupId = ""
local group = node.attr.attributes["group"]
if group then
groupId = ([[ group="%s"]]):format(group)
end

local tabValues = {}
for i = 1, #node.tabs do
tabValues[i] = pandoc.utils.stringify(node.tabs[i].title)
end

local tabs = pandoc.Div({})
tabs.content:insert(jsx("<Tabset" .. groupId .. ' values={["' .. table.concat(tabValues, '","') .. '"]}>'))

for i = 1, #node.tabs do
local content = node.tabs[i].content
local title = node.tabs[i].title
tabs.content:insert(jsx(([[<Tab value="%s">]]):format(pandoc.utils.stringify(title))))
if type(content) == "table" then
tabs.content:extend(content)
else
tabs.content:insert(content)
end
tabs.content:insert(jsx("</Tab>"))
end

tabs.content:insert(jsx("</Tabset>"))
return tabs
end)

quarto._quarto.ast.add_renderer("FloatRefTarget", isCustomMdxOutput, function(float)
float = quarto.doc.crossref.decorate_caption_with_crossref(float)

-- note: is there a better way to do this?
local captionMd = pandoc.write(pandoc.Pandoc({ float.caption_long }), FORMAT, PANDOC_WRITER_OPTIONS)

return pandoc.Blocks({
jsx(('<Figure id="%s" caption="%s">'):format(float.identifier, captionMd)),
pandoc.Div(quarto.utils.as_blocks(float.content)),
jsx('</Figure>')
})
end)

-- Remove simple HTML comments from text
local function RawInline(el)
el.text = el.text:gsub("<!--.*-->", "")
return el
end

-- Remove simple HTML comments from text
local function RawBlock(el)
el.text = el.text:gsub("<!--.*-->", "")
return el
end

return {
{
RawInline = RawInline,
RawBlock = RawBlock,
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,6 @@ end
-- Main Glossary Function Shortcode
return {
["glossary"] = function(args, kwargs2, meta)
-- this will only run for HTML documents
if not quarto.doc.isFormat("html:js") then
return pandoc.Null()
end

-- fix issue with kwargs
local kwargs = {}
Expand Down Expand Up @@ -102,12 +98,7 @@ return {
label = pandoc.utils.stringify(kwargs["label"])
end

local url = "/reference/glossary/index.qmd#" .. term
local infoIcon = pandoc.RawInline('html', '<i class="bi bi-question-circle info-icon"></i>')
local combinedLabel = pandoc.Span({pandoc.Str(label), infoIcon})
local link = pandoc.Link(combinedLabel, url, "", pandoc.Attr("class", {"quarto-xref"}))

return link
return pandoc.RawInline("markdown", "<Glossary id=\"" .. term .. "\">" .. label .. "</Glossary>")
end
end
}
7 changes: 7 additions & 0 deletions _extensions/rcannood/openproblemsdocs/_extension.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
title: OpenProblemsDocs
author: Robrecht Cannoodt
version: 1.0.0
quarto-required: ">=1.2.0"
contributes:
filters:
- openproblemsdocs.lua
22 changes: 22 additions & 0 deletions _extensions/rcannood/openproblemsdocs/openproblemsdocs.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@

return {
{
-- wrap all divs with class="callout" in a Callout element
Callout = function(el)
local calloutContent = pandoc.utils.stringify(el.content)
return pandoc.RawBlock("markdown", "<Callout variant=\"" .. el.type .. "\">\n" .. calloutContent .. "\n</Callout>")
end,
-- wrap all codeblocks with class="mermaid" in a Mermaid element
CodeBlock = function(el)
if el.attr.classes:includes("mermaid") then
local escapedTest = el.text:gsub("`", "\\`")
return pandoc.RawBlock("markdown", "<MermaidDiagram chart={`\n" .. escapedTest .. "`} />")
end
end,
-- remove simple html comments from text
RawBlock = function(el)
el.text = el.text:gsub("<!--.*-->", "")
return el
end
}
}
2 changes: 1 addition & 1 deletion _include/_clone_repo_openproblems.qmd
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

```{r setup_openporblems, include=FALSE}
```{r setup_openproblems, include=FALSE}
# discard local changes from the /openproblems repo and
# sets the work directory to that directory.
repo_path <- system("git rev-parse --show-toplevel", intern = TRUE)
Expand Down
50 changes: 50 additions & 0 deletions _include/_quarto_helpers.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
```{r include=FALSE}
# contents is a glob, e.g. "./*/index.qmd"
# use it to list all files
library(purrr, warn.conflicts = FALSE)
library(dplyr, warn.conflicts = FALSE)
library(stringr, warn.conflicts = FALSE)

# search for qmd files in a directory
listing <- function(
dir = ".",
pattern = "^([^/_][^/]*|[^/_][^/]*/index)\\.qmd$"
) {
qmd_files <- list.files(
dir,
recursive = TRUE
) |>
keep(~grepl(pattern, .)) |>
discard(~ . == "index.qmd")

df <- map_dfr(qmd_files, function(qmd_file) {
file <- file.path(dir, qmd_file)
tryCatch({
lines <- readr::read_lines(file)
yamlText <- lines[seq_len(which(lines == "---")[2] - 1)]
data <- yaml::yaml.load(yamlText)
for (col in names(data)) {
if (length(data[[col]]) != 1) {
data[[col]] <- list(data[[col]])
}
}
df_row <- as.data.frame(data)
df_row$path <- file |>
str_replace_all("/index\\.qmd", "") |>
str_replace_all("\\.qmd", "")
df_row
}, error = function(e) {
message("Error reading ", file, ": ", e$message)
return(NULL)
})
})

df$description <- df$description %||% ""
df$order <- df$order %||% 0

df |>
arrange(order, title, path) |>
mutate(str = glue::glue("* [**{title}**]({path}){ifelse(description != '', paste0(': ', description), '')}")) |>
pull(str)
}
```
18 changes: 15 additions & 3 deletions _quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,20 @@ project:
output-dir: _docs

format:
docusaurus-md:
toc: true
fig-format: svg
custommdx-md:
bibliography: library.bib
glossary: _glossary.md

# gfm:
# toc: false
# fig-format: svg
# bibliography: library.bib
# output-ext: mdx
# glossary: _glossary.md
# variant: "+yaml_metadata_block"

execute:
freeze: auto

# filters:
# - custommdx
37 changes: 23 additions & 14 deletions advanced_topics/create_a_dataset_loader.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ engine: knitr
{{< include /_include/_clone_repo_openproblems.qmd >}}
{{< include /_include/_evaluate_code.qmd >}}

A {{< glossary "dataset loader" >}} is used to generate one or more {{< glossary "raw dataset" label="raw datasets" >}}. A raw dataset is processed by the [Dataset preprocessing workflow](/reference/openproblems/src-datasets.qmd) to create a {{< glossary "common dataset" >}} that can be used by multiple benchmarking tasks.
A {{< glossary "dataset loader" >}} is used to generate one or more {{< glossary "raw dataset" label="raw datasets" >}}. A raw dataset is processed by the [Dataset preprocessing workflow](/reference/openproblems/src-datasets) to create a {{< glossary "common dataset" >}} that can be used by multiple benchmarking tasks.

This guide will show you how to create a new {{< glossary Viash >}} component to fetch or generate datasets.

Expand All @@ -21,7 +21,7 @@ Common datasets are created by generating raw datasets with a data loader and ru

{{< include ../_figures/_project_structure_dataflow.qmd >}}

See the [reference documentation](../reference/openproblems/src-datasets.qmd) for more information on how each of these steps works.
See the [reference documentation](../reference/openproblems/src-datasets) for more information on how each of these steps works.

```{r setup-packages, include=FALSE}
library(tidyverse)
Expand Down Expand Up @@ -78,17 +78,19 @@ platforms:
HERE
```

:::{.panel-tabset group="language"}
<TabsCodeBlocks type="language">

## Python
:::{#Python}

Contents of `src/datasets/loaders/myloader/config.vsh.yaml`:

```{embed, lang="yaml"}
src/datasets/loaders/myloader/config.vsh.yaml
```

## R
:::

:::{#R}

Contents of `src/datasets/loaders/myloader/config.vsh.yaml`:

Expand All @@ -114,8 +116,11 @@ platforms:
packages: [ libhdf5-dev, libgeos-dev, python3, python3-pip, python3-dev, python-is-python3 ]
- type: nextflow
```

:::

</TabsCodeBlocks>

For more parameter options, refer to ["Parameters"](#parameters) section.


Expand Down Expand Up @@ -179,17 +184,19 @@ adata.write_h5ad(par["output"], compression="gzip")
HERE
```

:::{.panel-tabset group="language"}
<TabsCodeBlocks type="language">

## Python
:::{#Python}

Contents of `src/datasets/loaders/myloader/script.py`:

```{embed, lang="python"}
src/datasets/loaders/myloader/script.py
```

## R
:::

:::{#R}

Contents of `src/datasets/loaders/myloader/script.R`:

Expand Down Expand Up @@ -243,9 +250,10 @@ adata <- AnnData(
# Write to file
adata$write_h5ad(par[["output"]], compression = "gzip")
```

:::

</TabsCodeBlocks>

## Step 4: Run the component

Try running your component! You can start off by running your script inside your IDE.
Expand Down Expand Up @@ -284,9 +292,9 @@ arguments:

You can then use the `n_obs` and `n_vars` values in the `par` object to get access to the runtime parameters:

:::{.panel-tabset group="language"}
<TabsCodeBlocks type="language">

## Python
:::{#Python}

```python
obs = pd.DataFrame({
Expand All @@ -298,8 +306,8 @@ var = pd.DataFrame(
index=[f"Gene_{i}" for i in range(100)]
)
```

## R
:::
:::{#R}

```r
obs <- data.frame(
Expand All @@ -311,9 +319,10 @@ var <- data.frame(
row.names = paste0("Gene_", seq_len(par$n_vars))
)
```

:::

</TabsCodeBlocks>

## Format of a raw dataset object

```{r, include=FALSE}
Expand Down
2 changes: 1 addition & 1 deletion advanced_topics/technology_stack.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ We use a variety of technologies to ensure the reliability, reproducibility, and

8. **Standardized Dataset Loaders and Processing Pipeline**: We offer a library of common datasets to be reused across tasks whenever possible. These datasets are derived from sources like CELLxGENE census and GEO and are processed by a standardized dataset processing pipeline. This uniformity in data processing ensures consistent data quality and structure across different tasks, facilitating comparability and consistency in benchmarking results.

![Overview of the OpenProblems repositories, mainly consisting of the main repository and the website. For detailed information on how this project is structured, see the [reference documentation](/reference).](../images/repositories-thin.svg){#fig-project-structure}
![Overview of the OpenProblems repositories, mainly consisting of the main repository and the website. For detailed information on how this project is structured, see the [reference documentation](/reference).](../images/repositories-thin.svg){#fig-project-structure}
Loading