Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
a2fbb34
Sundry updates and cleanup, pre-refactor
mbacon-renci Nov 25, 2025
f189e29
checkpointing the big move of dags/roger to src/roger. No packaging i…
mbacon-renci Nov 25, 2025
3fdc1f4
First round of code updates for new dug data model
mbacon-renci Dec 4, 2025
2474b71
Setuptools and python library adjustments
mbacon-renci Jan 8, 2026
edbd74b
Tweaking a couple of pipelines
mbacon-renci Jan 10, 2026
f301c06
First cut at a DDM pipeline class
mbacon-renci Jan 10, 2026
f6da839
Refactoring CLI to use pipelines instead of dug_util
mbacon-renci Jan 12, 2026
bf6857d
Starting DM2 ingestion pipeline.
mbacon-renci Jan 12, 2026
24d71c8
Finished refactor so that cli runs same code as dags (pipelines module)
mbacon-renci Jan 12, 2026
229d7c7
Removing dead code
mbacon-renci Jan 12, 2026
d8321d7
just a tiny PEP8 tweak
mbacon-renci Jan 14, 2026
2d8bdff
Breaking test dag into separate file for clarity's sake.
mbacon-renci Jan 14, 2026
0381fc1
Bringing old tests up to speed
mbacon-renci Jan 14, 2026
0996e49
Updated Makefile to use new cli entrypoint
mbacon-renci Jan 16, 2026
d23dea3
Fix to make subsubclasses of DugPipeline work as pipelines
mbacon-renci Jan 16, 2026
f12ac6a
Tweak to make simpler override of pipelines, just focusing on filenam…
mbacon-renci Jan 16, 2026
2a83305
Numerous fixes to CLI to make CLI-based testing easier, more pipeline…
mbacon-renci Jan 16, 2026
b6ea6fb
Update to allow disabling of redis http cache in config (pass to dug)
mbacon-renci Jan 16, 2026
a4c0a64
touchup heal studies pipeline
mbacon-renci Jan 16, 2026
812a514
Added ability to override config file location, test config (no redis…
mbacon-renci Jan 18, 2026
bb9db55
heal-cdes pipeline added
mbacon-renci Jan 18, 2026
4aa8931
Merge from develop
mbacon-renci Jan 20, 2026
0f1688a
Update dockerfile image, temp bump to github workflows to build
mbacon-renci Jan 22, 2026
3ac6162
Backing up to python 3.12 because of issues with airflow and python 3.13
mbacon-renci Jan 22, 2026
76316a9
Version subrevision bumped to trigger build for vulnerabilities
mbacon-renci Feb 9, 2026
43864f8
update healthcheck
frostyfan109 Feb 10, 2026
2245901
update pipeline indexing
frostyfan109 Feb 12, 2026
559fbad
add index config, fix typo
frostyfan109 Feb 12, 2026
fc617d3
Fix errors in failing subtasks, update dockerfile
frostyfan109 Feb 20, 2026
3109ef5
remove expand_to_dug_element and update pipeline to filter DDM2 json …
frostyfan109 Feb 20, 2026
5bfd7ca
update pipeline validation
frostyfan109 Feb 23, 2026
13ccad9
update validation
frostyfan109 Feb 26, 2026
0715546
add roger heal_studies pipeline back
frostyfan109 Feb 26, 2026
8b6b50a
update validation pagination
frostyfan109 Feb 27, 2026
fa6e57d
fix search validation for 2 char concept names
frostyfan109 Mar 1, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/build-push-dev-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ on:
push:
branches:
- develop
- package-refactor
paths-ignore:
- README.md
- .old_cicd/*
Expand Down Expand Up @@ -83,4 +84,4 @@ jobs:
containers.renci.org/${{ github.repository }}:develop
containers.renci.org/${{ github.repository }}:${{ steps.vars.outputs.short_sha }}
cache-from: type=registry,ref=${{ github.repository }}:buildcache-dev
cache-to: type=registry,ref=${{ github.repository }}:buildcache-dev,mode=max
cache-to: type=registry,ref=${{ github.repository }}:buildcache-dev,mode=max
15 changes: 12 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Use a Debian-based image for better compatibility
FROM python:3.11.14-slim-trixie
FROM python:3.12-slim-trixie

# Set Airflow version and home directory
ARG AIRFLOW_VERSION=3.1.5
ARG AIRFLOW_VERSION=3.1.7
ARG AIRFLOW_HOME=/opt/airflow

# Environment variables
Expand Down Expand Up @@ -37,6 +37,10 @@ RUN pip install --no-cache-dir \
"apache-airflow-providers-cncf-kubernetes" \
--constraint "https://raw.githubusercontent.com/apache/airflow/constraints-${AIRFLOW_VERSION}/constraints-3.11.txt"

# Fix auth rollback bug.
RUN pip install --no-cache-dir \
"apache-airflow-providers-fab==3.3.0rc1"

# Optional: install extra packages
RUN pip install --no-cache-dir psycopg2-binary redis

Expand All @@ -46,7 +50,8 @@ RUN pip install -r /tmp/requirements.txt

RUN rm /tmp/requirements.txt


# COPY . /opt/roger
# RUN pip install /opt/roger

RUN apt-get purge -y --auto-remove \
build-essential \
Expand All @@ -57,13 +62,17 @@ RUN apt-get purge -y --auto-remove \
git && \
apt-get clean

RUN if [ -n "$ROGER_SOURCE" ]; then pip install -e $ROGER_SOURCE; fi

# Set ownership
RUN chown -R airflow:airflow ${AIRFLOW_HOME}

# Switch to airflow user
USER airflow
WORKDIR ${AIRFLOW_HOME}

ENV PYTHONPATH=/opt/airflow/dags/repo/src/

# Expose Airflow webserver port
EXPOSE 8080

Expand Down
6 changes: 3 additions & 3 deletions bin/dug_annotate/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,12 @@ clean:
$(RM) -rf ${KGX_DIR}

get_input_files:
$(TIME) python ${CLI_WRAPPER} -gd
$(TIME) roger -gd

annotate_and_normalize:
$(TIME) python ${CLI_WRAPPER} -l
$(TIME) roger -l

create_kgx_files:
$(TIME) python ${CLI_WRAPPER} -t
$(TIME) roger -t

all: get_input_files annotate_and_normalize create_kgx_files
11 changes: 5 additions & 6 deletions bin/dug_indexing/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ THIS_MAKEFILE_PATH:=$(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST))
THIS_DIR:=$(shell cd $(dir $(THIS_MAKEFILE_PATH));pwd)

ROGER_HOME=${THIS_DIR}/../..
CLI_WRAPPER=${ROGER_HOME}/cli.py

# Override Roger data dir ENV
INDEXING_DIR=${ROGERENV_DATA__ROOT}/dug/expanded_concepts
Expand All @@ -34,19 +33,19 @@ clean:
$(RM) -rf ${CRAWL_DIR}

crawl_concepts:
$(TIME) python ${CLI_WRAPPER} -C
$(TIME) roger -C

index_concepts: crawl_concepts
$(TIME) python ${CLI_WRAPPER} -ic
$(TIME) roger -ic

index_variables:
$(TIME) python ${CLI_WRAPPER} -iv
$(TIME) roger -iv

validate_indexed_concepts: index_concepts
$(TIME) python ${CLI_WRAPPER} -vc
$(TIME) roger -vc

validate_indexed_variables: index_variables
$(TIME) python ${CLI_WRAPPER} -vv
$(TIME) roger -vv

all: validate_indexed_concepts validate_indexed_variables

Expand Down
112 changes: 0 additions & 112 deletions cli.py

This file was deleted.

36 changes: 3 additions & 33 deletions dags/annotate_and_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
import os

from airflow.models import DAG
from airflow.operators.empty import EmptyOperator
from airflow.operators.python import PythonOperator
from airflow.providers.standard.operators.empty import EmptyOperator
from airflow.providers.standard.operators.python import PythonOperator
from roger.tasks import default_args, create_pipeline_taskgroup, logger, create_python_task

env_enabled_datasets = os.getenv(
Expand Down Expand Up @@ -52,35 +52,5 @@

init >> create_pipeline_taskgroup(dag, pipeline_class, config) >> finish




with DAG(
dag_id='dag_test',
default_args=default_args,
params=
{
"repository_id": None,
"branch_name": None,
"commitid_from": None,
"commitid_to": None
},
# schedule_interval=None
) as dag:

init = EmptyOperator(task_id="init", dag=dag)
finish = EmptyOperator(task_id="finish", dag=dag)

def print_context(ds=None, **kwargs):
print(">>>All kwargs")
print(kwargs)
print(">>>All ds")
print(ds)


init >> create_python_task(dag, "get_from_lakefs", print_context) >> finish

#run_this = PythonOperator(task_id="print_the_context", python_callable=print_context)

if __name__ == "__main__":
dag.test()
dag.test()
3 changes: 0 additions & 3 deletions dags/dug_helpers/__init__.py

This file was deleted.

Loading
Loading