Skip to content

Commit abd3e44

Browse files
authored
Merge pull request #20 from openaleph/develop
ftm-datalake
2 parents 8914365 + 7bce5ba commit abd3e44

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+939
-1130
lines changed

.bumpversion.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,4 @@ message = 🔖 Bump version: {current_version} → {new_version}
1010
search = version = "{current_version}"
1111
replace = version = "{new_version}"
1212

13-
[bumpversion:file:leakrfc/__init__.py]
13+
[bumpversion:file:ftm-datalake/__init__.py]

.github/dependabot.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,11 @@ updates:
55
open-pull-requests-limit: 99
66
schedule:
77
interval: "daily"
8-
target-branch: "develop"
98
- package-ecosystem: "github-actions"
109
directory: "/"
1110
schedule:
1211
interval: "daily"
13-
target-branch: "develop"
1412
- package-ecosystem: "docker"
1513
directory: "/"
1614
schedule:
1715
interval: "weekly"
18-
target-branch: "develop"

.github/workflows/docker.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,12 @@ jobs:
1919
id: meta
2020
uses: docker/metadata-action@v5
2121
with:
22-
images: ghcr.io/investigativedata/leakrfc
22+
images: ghcr.io/openaleph/ftm-datalake
2323
tags: |
2424
type=ref,event=branch
2525
type=semver,pattern={{version}}
2626
type=sha
27-
type=raw,value=latest
27+
type=raw,value=latest,enable=${{ startsWith(github.ref, 'refs/tags') }}
2828
- name: Set up Docker Buildx
2929
uses: docker/setup-buildx-action@v3
3030
with:

.github/workflows/python.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ jobs:
4343
path: ~/.cache/pre-commit
4444
key: pre-commit-${{ runner.os }}-${{ env.PY }}-${{ hashFiles('.pre-commit-config.yaml') }}
4545
- name: Install dependencies
46-
run: poetry install --with dev
46+
run: poetry install --with dev --all-extras
4747
- name: Run pre-commit hooks
4848
run: poetry run pre-commit run
4949
- name: Lint with flake8

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
_wip
22
archive/*
33
.anystore/*
4-
tests/fixtures/**/.leakrfc/*
4+
tests/fixtures/**/.ftm-datalake/*
55
# Byte-compiled / optimized / DLL files
66
__pycache__/
77
*.py[cod]

.pre-commit-config.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,13 @@ repos:
3232
- id: absolufy-imports
3333

3434
- repo: https://github.com/pycqa/isort
35-
rev: 5.13.2
35+
rev: 6.0.1
3636
hooks:
3737
- id: isort
3838
args: ["--profile", "black"]
3939

4040
- repo: https://github.com/psf/black
41-
rev: 24.10.0
41+
rev: 25.1.0
4242
hooks:
4343
- id: black
4444

@@ -51,7 +51,7 @@ repos:
5151
exclude: (test_[\w]+\.py|\.csv|\.json|\.lock)$
5252

5353
- repo: https://github.com/codespell-project/codespell
54-
rev: v2.3.0
54+
rev: v2.4.1
5555
hooks:
5656
- id: codespell
5757
exclude: (test_[\w]+\.py|\.csv|\.i?json|\.lock)$
@@ -69,7 +69,7 @@ repos:
6969
- id: rst-inline-touching-normal
7070

7171
- repo: https://github.com/python-poetry/poetry
72-
rev: 2.0.1
72+
rev: 2.1.3
7373
hooks:
7474
- id: poetry-check
7575
- id: poetry-lock

Dockerfile

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,15 @@
1-
FROM python:3.13-bookworm
1+
FROM ghcr.io/dataresearchcenter/ftmq:latest
22

3-
RUN apt-get -qq update && apt-get -qq -y upgrade
4-
RUN apt-get install -qq -y pkg-config libicu-dev
5-
RUN apt-get -qq -y autoremove && apt-get clean
63

7-
RUN pip install --no-cache-dir -q -U pip setuptools
8-
9-
COPY leakrfc /src/leakrfc
4+
COPY ftm_datalake /src/ftm_datalake
105
COPY setup.py /src/setup.py
11-
# COPY requirements.txt /src/requirements.txt
126
COPY README.md /src/README.md
137
COPY pyproject.toml /src/pyproject.toml
148
COPY VERSION /src/VERSION
159
COPY LICENSE /src/LICENSE
1610
COPY NOTICE /src/NOTICE
1711

1812
WORKDIR /src
19-
# RUN pip install -r requirements.txt
2013
RUN pip install --no-cache-dir -q "."
21-
RUN pip install --no-cache-dir -q -U redis sqlalchemy psycopg2-binary
2214

23-
ENTRYPOINT ["leakrfc"]
15+
ENTRYPOINT ["ftm-datalake"]

Makefile

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,24 @@
11
all: clean install test
22

33
api:
4-
LEAKRFC_ARCHIVE__URI=./tests/fixtures/archive DEBUG=1 uvicorn leakrfc.api:app --reload --port 5000
4+
LEAKRFC_ARCHIVE__URI=./tests/fixtures/archive DEBUG=1 uvicorn ftm_datalake.api:app --reload --port 5000
55

66
install:
7-
poetry install --with dev
7+
poetry install --with dev --all-extras
88

99
lint:
10-
poetry run flake8 leakrfc --count --select=E9,F63,F7,F82 --show-source --statistics
11-
poetry run flake8 leakrfc --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
10+
poetry run flake8 ftm_datalake --count --select=E9,F63,F7,F82 --show-source --statistics
11+
poetry run flake8 ftm_datalake --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
1212

1313
pre-commit:
1414
poetry run pre-commit install
1515
poetry run pre-commit run -a
1616

1717
typecheck:
18-
poetry run mypy --strict leakrfc
18+
poetry run mypy --strict ftm_datalake
1919

2020
test:
21-
poetry run pytest -v --capture=sys --cov=leakrfc --cov-report lcov
21+
poetry run pytest -v --capture=sys --cov=ftm_datalake --cov-report lcov
2222

2323
build:
2424
poetry run build
@@ -36,4 +36,4 @@ clean:
3636

3737
documentation:
3838
mkdocs build
39-
aws --endpoint-url https://s3.investigativedata.org s3 sync ./site s3://docs.investigraph.dev/lib/leakrfc
39+
aws --profile nbg1 --endpoint-url https://s3.investigativedata.org s3 sync ./site s3://openaleph.org/docs/lib/ftm-datalake

NOTICE

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
LEAKRFC, (C) 2024 investigativedata.io
2-
LEAKRFC, (C) 2025 investigativedata.io
2+
LEAKRFC, (C) 2025 Data and Research Center – DARC
33

4-
This product includes software developed at investigativedata.io
5-
(https://investigativedata.io)
4+
This product includes software developed at the Data and Research Center
5+
(https://dataresearchcenter.org)
66

77
LEAKRFC contains unmodified subcomponents too with separate copyright notices
88
and license terms. Your use of the source code for these subcomponents is

README.md

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,26 @@
1-
# leakrfc
1+
[![ftm-datalake on pypi](https://img.shields.io/pypi/v/ftm-datalake)](https://pypi.org/project/ftm-datalake/)
2+
[![Python test and package](https://github.com/dataresearchcenter/ftm-datalake/actions/workflows/python.yml/badge.svg)](https://github.com/dataresearchcenter/ftm-datalake/actions/workflows/python.yml)
3+
[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit)
4+
[![Coverage Status](https://coveralls.io/repos/github/dataresearchcenter/ftm-datalake/badge.svg?branch=main)](https://coveralls.io/github/dataresearchcenter/ftm-datalake?branch=main)
5+
[![AGPLv3+ License](https://img.shields.io/pypi/l/ftm-datalake)](./LICENSE)
26

3-
"_A RFC for leaks_"
7+
# ftm-datalake
48

5-
[leak-rfc.org](https://leak-rfc.org)
9+
`ftm-datalake` provides a _data standard_ and _archive storage_ for structured [FollowTheMoney](https://followthemoney.tech) data, leaked data, private and public document collections. The concepts and implementations are originally inspired by [mmmeta](https://github.com/simonwoerpel/mmmeta) and [Aleph's servicelayer archive](https://github.com/alephdata/servicelayer) and are [discussed here](https://aleph.discourse.group/t/rfc-followthemoney-data-lake-specification/276/3)
610

7-
`leakrfc` provides a _data standard_ and _archive storage_ for leaked data, private and public document collections. The concepts and implementations are originally inspired by [mmmeta](https://github.com/simonwoerpel/mmmeta) and [Aleph's servicelayer archive](https://github.com/alephdata/servicelayer).
8-
9-
`leakrfc` acts as a multi-tenant storage and retrieval mechanism for documents and their metadata. It provides a high-level interface for generating and sharing document collections and importing them into various search and analysis platforms, such as [_ICIJ Datashare_](https://datashare.icij.org/), [_Liquid Investigations_](https://github.com/liquidinvestigations/), and [_Aleph_](https://docs.aleph.occrp.org/).
11+
`ftm-datalake` acts as a multi-tenant storage and retrieval mechanism for structured entity data, documents and their metadata. It provides a high-level interface for generating and sharing document collections and importing them into various search and analysis platforms, such as [_ICIJ Datashare_](https://datashare.icij.org/), [_Liquid Investigations_](https://github.com/liquidinvestigations/), and [_OpenAleph_](https://openaleph.org/).
1012

1113
## Installation
1214

1315
Requires python 3.11 or later.
1416

1517
```bash
16-
pip install leakrfc
18+
pip install ftm-datalake
1719
```
1820

1921
## Documentation
2022

21-
[docs.investigraph.dev/lib/leakrfc](https://docs.investigraph.dev/lib/leakrfc)
23+
[openaleph.org/lib/ftm-datalake](https://openaleph.org/lib/ftm-datalake)
2224

2325
## Development
2426

@@ -38,15 +40,16 @@ Before creating a commit, this checks for correct code formatting (isort, black)
3840

3941
### testing
4042

41-
`leakrfc` uses [pytest](https://docs.pytest.org/en/stable/) as the testing framework.
43+
`ftm-datalake` uses [pytest](https://docs.pytest.org/en/stable/) as the testing framework.
4244

4345
make test
4446

4547
## License and Copyright
4648

47-
`leakrfc`, (C) 2024 investigativedata.io
48-
`leakrfc`, (C) 2025 investigativedata.io
49+
`ftm-datalake`, (C) 2024 investigativedata.io
50+
51+
`ftm-datalake`, (C) 2025 [Data and Resear Center – DARC](https://dataresearchcenter.org)
4952

50-
`leakrfc` is licensed under the AGPLv3 or later license.
53+
`ftm-datalake` is licensed under the AGPLv3 or later license.
5154

5255
see [NOTICE](./NOTICE) and [LICENSE](./LICENSE)

0 commit comments

Comments
 (0)