Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions metrics/datagov_metrics/__main__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from datagov_metrics import ckan, ga
from datagov_metrics import catalog, ga

ga.main()
ckan.main()
catalog.main()
47 changes: 47 additions & 0 deletions metrics/datagov_metrics/catalog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import requests
import csv
import io
from datagov_metrics.s3_util import put_data_to_s3
from datagov_metrics.ga import date_range_last_month


def get_data() -> dict:
output = {}

queries = {
"harvest_sources": "https://harvest.data.gov/organizations/?paginate=false",
"datasets_per_org": "https://catalog.data.gov/api/organizations",
}

for report_name, query in queries.items():
res = requests.get(query)
if res.ok:
data = res.json()
count_key = "source_count"
if report_name == "datasets_per_org":
data = data["organizations"]
count_key = "dataset_count"
output[report_name] = [[org["slug"], org[count_key]] for org in data]

return output


def write_data_to_csv(response):
"""Reshape the response CSV."""
with io.StringIO() as csv_buffer:
writer = csv.writer(csv_buffer, delimiter=",")
writer.writerow(["organization", "count"]) # write header
writer.writerows(response)
return csv_buffer.getvalue()


def main():
data = get_data()
end_date = date_range_last_month()[0]["endDate"] # for example, 2024-10-31
for k, v in data.items():
csv_data = write_data_to_csv(v)
put_data_to_s3(f"global__{k}.{end_date}.csv", csv_data)


if __name__ == "__main__":
main()
45 changes: 0 additions & 45 deletions metrics/datagov_metrics/ckan.py

This file was deleted.

11 changes: 5 additions & 6 deletions metrics/datagov_metrics/ga.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,19 @@ def date_range_last_month():


def get_org_list():
url = 'https://catalog.data.gov/api/action/package_search?q=*:*&facet.field=["organization"]&facet.limit=200&rows=0'
url = "https://harvest.data.gov/organizations/?paginate=false"
repo = requests.get(url)
data = repo.json()

return data["result"]["search_facets"]["organization"]["items"]
if repo.ok:
return repo.json()


def setup_organization_reports():
orgs = get_org_list()
org_reports = {}

for org in orgs:
org_name = org["name"]
org_display_name = escape(org["display_name"])
org_name = org["slug"]
org_display_name = escape(org["name"])
org_dimension_filter = {
"filter": {
"fieldName": "customEvent:DATAGOV_dataset_organization",
Expand Down
Loading