diff --git a/Makefile b/Makefile index 89da3c9..8b7bfa7 100644 --- a/Makefile +++ b/Makefile @@ -48,40 +48,8 @@ run_idus_download: "https://helix-copilot-prod-helix-media-external.s3.amazonaws.com/external-media/api-dump/idus-all/2025-06-04-10-00-32/5mndO/idus_all.json" @echo "✅ Saved (decompressed): data/idmc_idu/idus_all.json" -run_glide_normal: - @echo "Running Glide normalisation" - @poetry run python -m src.glide.data_normalisation_glide - -run_gdacs_normal: - @echo "Running GDACS normalisation" - @poetry run python -m src.gdacs.data_normalisation_gdacs - -run_dc_normal: - @echo "Running Disaster-Charter normalisation" - @poetry run python -m src.disaster_charter.data_normalisation_dc - -run_emdat_normal: - @echo "Running EmDat normalisation" - @poetry run python -m src.emdat.data_normalisation_emdat - -run_idmc_normal: - @echo "Running IDMC normalisation" - @poetry run python -m src.idmc.data_normalisation_idmc - -run_cerf_normal: - @echo "Running CERF normalisation" - @poetry run python -m src.cerf.data_normalisation_cerf - -run_ifrc_normal: - @echo "Running IFRC normalisation" - @poetry run python -m src.ifrc_eme.data_normalisation_ifrc_eme - -run_all_normal: | run_glide_normal run_gdacs_normal run_dc_normal run_emdat_normal run_idmc_normal run_cerf_normal run_ifrc_normal - @echo "Running all normalisation scripts.." - -run_all_clean: | run_all_normal - @echo "Running all cleaning scripts.." - @poetry run python -m src.utils.splitter +run_all_download: | run_gdacs_download run_glide_download run_cerf_download run_disaster_charter_download run_idus_download + @echo "Running all download scripts.." help: @echo "Available make commands for setup:" diff --git a/src/glide/data_acquisition_scrape.py b/src/glide/data_acquisition_scrape.py index adf0b0b..d77e19c 100644 --- a/src/glide/data_acquisition_scrape.py +++ b/src/glide/data_acquisition_scrape.py @@ -12,13 +12,13 @@ from selenium.webdriver.support.ui import WebDriverWait URL = "https://glidenumber.net/glide/public/result/report.jsp" -# GECKODRIVER_PATH = "/usr/local/bin/geckodriver" -GECKODRIVER_PATH = "/opt/homebrew/bin/geckodriver" -# PROFILE_PATH = ( -# "/home/evangelos/snap/firefox/common/.mozilla/firefox/" -# "cf7shfvv.selenium_profile" -# ) -PROFILE_PATH = "/Users/evangelosdiakatossaoulas/Library/Application Support/Firefox/Profiles/r0zabgcj.default-release" +GECKODRIVER_PATH = "/usr/local/bin/geckodriver" + +PROFILE_PATH = ( + "/home/evangelos/snap/firefox/common/.mozilla/firefox/" + "cf7shfvv.selenium_profile" + ) + Path("./data/glide/").mkdir(parents=True, exist_ok=True) CSV_OUTPUT = "./data/glide/glide_events.csv" @@ -30,8 +30,8 @@ def scrape_with_selenium() -> str: """ options = FirefoxOptions() options.headless = False # type: ignore[attr-defined] - # options.add_argument("-profile") - # options.add_argument(PROFILE_PATH) + options.add_argument("-profile") + options.add_argument(PROFILE_PATH) service = FirefoxService(GECKODRIVER_PATH) driver = webdriver.Firefox(service=service, options=options)