diff --git a/CHANGELOG.md b/CHANGELOG.md index 44f30f99a9..00288494fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,23 @@ ======= +## 0.152.0 (2025-05-02) + +### Feat + +- Added Fermanagh Omagh +- Added Twekesbury +- added Slough council +- Added Argus Council +- added Angus to input.json + +### Fix + +- Chichester now only requires postcode and house number +- Broadland now only requires postcode and house number +- Barking now only requires postcode and house number +- Brighton now only requires postcode and house number +- ensured all bins for this council +- added skip_get_url to hyndburn + ## 0.151.0 (2025-04-27) ### Feat diff --git a/custom_components/uk_bin_collection/config_flow.py b/custom_components/uk_bin_collection/config_flow.py index 16ad3ba442..6035f4febc 100644 --- a/custom_components/uk_bin_collection/config_flow.py +++ b/custom_components/uk_bin_collection/config_flow.py @@ -253,7 +253,7 @@ async def async_step_reconfigure_confirm( async def get_councils_json(self) -> Dict[str, Any]: """Fetch and return the supported councils data, including aliases and sorted alphabetically.""" - url = "https://raw.githubusercontent.com/robbrad/UKBinCollectionData/0.151.0/uk_bin_collection/tests/input.json" + url = "https://raw.githubusercontent.com/robbrad/UKBinCollectionData/0.152.0/uk_bin_collection/tests/input.json" try: async with aiohttp.ClientSession() as session: async with session.get(url) as response: diff --git a/custom_components/uk_bin_collection/manifest.json b/custom_components/uk_bin_collection/manifest.json index 827f33c168..976a21b459 100644 --- a/custom_components/uk_bin_collection/manifest.json +++ b/custom_components/uk_bin_collection/manifest.json @@ -9,7 +9,7 @@ "integration_type": "service", "iot_class": "cloud_polling", "issue_tracker": "https://github.com/robbrad/UKBinCollectionData/issues", - "requirements": ["uk-bin-collection>=0.151.0"], - "version": "0.151.0", + "requirements": ["uk-bin-collection>=0.152.0"], + "version": "0.152.0", "zeroconf": [] } diff --git a/pyproject.toml b/pyproject.toml index 5ac89ea49e..df4c3cb59e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "uk_bin_collection" -version = "0.151.0" +version = "0.152.0" description = "Python Lib to collect UK Bin Data" readme = "README.md" authors = ["Robert Bradley "] diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index 877238b9d7..2b2ce2d500 100755 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -28,6 +28,16 @@ "wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN.", "LAD24CD": "E07000032" }, + "AngusCouncil": { + "uprn": "117053733", + "skip_get_url": true, + "postcode": "DD7 7LE", + "url": "https://www.angus.gov.uk/bins_litter_and_recycling/bin_collection_days", + "web_driver": "http://selenium:4444", + "wiki_name": "Angus", + "wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find the UPRN. Requires Selenium", + "LAD24CD": "S12000041" + }, "AntrimAndNewtonabbeyCouncil": { "LAD24CD": "N09000001", "url": "https://antrimandnewtownabbey.gov.uk/residents/bins-recycling/bins-schedule/?Id=643", @@ -117,13 +127,13 @@ "LAD24CD": "E07000200" }, "BarkingDagenham": { - "house_number": "19 KELLY WAY, CHADWELL HEATH, RM6 6XH", + "house_number": "19", "postcode": "RM6 6XH", "skip_get_url": true, "web_driver": "http://selenium:4444", "url": "https://www.lbbd.gov.uk/rubbish-recycling/household-bin-collection/check-your-bin-collection-days", "wiki_name": "Barking and Dagenham", - "wiki_note": "Use the full address as it appears on the drop-down on the site when you search by postcode.", + "wiki_note": "Use house number and postcode. Requires Selenium.", "LAD24CD": "E09000002" }, "BarnetCouncil": { @@ -311,13 +321,13 @@ "LAD24CD": "E09000005" }, "BrightonandHoveCityCouncil": { - "house_number": "44 Carden Avenue, Brighton, BN1 8NE", + "house_number": "44", "postcode": "BN1 8NE", "skip_get_url": true, "url": "https://cityclean.brighton-hove.gov.uk/link/collections", "web_driver": "http://selenium:4444", "wiki_name": "Brighton and Hove", - "wiki_note": "Use the full address as it appears on the drop-down on the site when you search by postcode.", + "wiki_note": "Use house number and postcode. Requires Selenium", "LAD24CD": "E06000043" }, "BristolCityCouncil": { @@ -330,12 +340,12 @@ }, "BroadlandDistrictCouncil": { "skip_get_url": true, - "house_number": "1 Park View, Horsford, Norfolk, NR10 3FD", + "house_number": "1", "postcode": "NR10 3FD", "url": "https://area.southnorfolkandbroadland.gov.uk/FindAddress", "web_driver": "http://selenium:4444", "wiki_name": "Broadland", - "wiki_note": "Use the full address as it appears on the drop-down on the site when you search by postcode.", + "wiki_note": "Use house number and postcode. Requires Selenium.", "LAD24CD": "E07000144" }, "BromleyBoroughCouncil": { @@ -521,7 +531,7 @@ "LAD24CD": "E07000034" }, "ChichesterDistrictCouncil": { - "house_number": "7, Plaistow Road, Kirdford, Billingshurst, West Sussex", + "house_number": "7", "postcode": "RH14 0JT", "skip_get_url": true, "url": "https://www.chichester.gov.uk/checkyourbinday", @@ -901,6 +911,15 @@ "wiki_note": "Pass the UPRN. You can find it using [FindMyAddress](https://www.findmyaddress.co.uk/search).", "LAD24CD": "E07000010" }, + "FermanaghOmaghDistrictCouncil": { + "house_number": "20", + "postcode": "BT74 6DQ", + "skip_get_url": true, + "url": "https://www.fermanaghomagh.com/services/environment-and-waste/waste-collection-calendar/", + "wiki_name": "Fermanagh and Omagh", + "wiki_note": "Pass the house number and postcode in their respective parameters.", + "LAD24CD": "N09000006" + }, "FifeCouncil": { "uprn": "320203521", "url": "https://www.fife.gov.uk", @@ -1199,6 +1218,7 @@ "LAD24CD": "E07000120", "uprn": "100010448773", "url": "https://iapp.itouchvision.com/iappcollectionday/collection-day/?uuid=FEBA68993831481FD81B2E605364D00A8DC017A4", + "skip_get_url": true, "web_driver": "http://selenium:4444", "wiki_name": "Hyndburn", "wiki_note": "Pass the UPRN. You can find it using [FindMyAddress](https://www.findmyaddress.co.uk/search). This parser requires a Selenium webdriver." @@ -1622,7 +1642,7 @@ "NorthEastDerbyshireDistrictCouncil": { "postcode": "S42 5RB", "skip_get_url": true, - "uprn": "010034492221", + "uprn": "010034492222", "url": "https://myselfservice.ne-derbyshire.gov.uk/service/Check_your_Bin_Day", "web_driver": "http://selenium:4444", "wiki_name": "North East Derbyshire", @@ -2013,6 +2033,15 @@ "wiki_note": "Follow the instructions [here](https://bins.shropshire.gov.uk/) until you get the page showing your bin collection dates, then copy the URL and replace the URL in the command.", "LAD24CD": "E06000051" }, + "SloughBoroughCouncil": { + "postcode": "SL2 2EW", + "skip_get_url": true, + "url": "https://www.slough.gov.uk/bin-collections", + "web_driver": "http://selenium:4444", + "wiki_name": "Slough", + "wiki_note": "Pass the UPRN and postcode in their respective parameters. This parser requires a Selenium webdriver.", + "LAD24CD": "E06000039" + }, "SolihullCouncil": { "url": "https://digital.solihull.gov.uk/BinCollectionCalendar/Calendar.aspx?UPRN=100071005444", "wiki_command_url_override": "https://digital.solihull.gov.uk/BinCollectionCalendar/Calendar.aspx?UPRN=XXXXXXXX", @@ -2332,6 +2361,14 @@ "wiki_note": "Provide your UPRN. Find it using [FindMyAddress](https://www.findmyaddress.co.uk/search).", "LAD24CD": "E06000020" }, + "TewkesburyBoroughCouncil": { + "skip_get_url": true, + "uprn": "10067626314", + "url": "https://tewkesbury.gov.uk/services/waste-and-recycling/", + "wiki_name": "Tewkesbury", + "wiki_note": "Provide your UPRN. Find it using [FindMyAddress](https://www.findmyaddress.co.uk/search).", + "LAD24CD": "E07000083" + }, "TendringDistrictCouncil": { "postcode": "CO15 4EU", "skip_get_url": true, diff --git a/uk_bin_collection/uk_bin_collection/councils/AngusCouncil.py b/uk_bin_collection/uk_bin_collection/councils/AngusCouncil.py new file mode 100644 index 0000000000..b28f262172 --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/AngusCouncil.py @@ -0,0 +1,149 @@ +import time +import re +from datetime import datetime + +from bs4 import BeautifulSoup +from selenium.webdriver.common.by import By +from selenium.webdriver.common.keys import Keys +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import Select, WebDriverWait + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass + + +class CouncilClass(AbstractGetBinDataClass): + def parse_data(self, page: str, **kwargs) -> dict: + driver = None + try: + user_postcode = kwargs.get("postcode") + if not user_postcode: + raise ValueError("No postcode provided.") + check_postcode(user_postcode) + + user_uprn = kwargs.get("uprn") + check_uprn(user_uprn) + + headless = kwargs.get("headless") + web_driver = kwargs.get("web_driver") + driver = create_webdriver(web_driver, headless, None, __name__) + page = "https://www.angus.gov.uk/bins_litter_and_recycling/bin_collection_days" + + driver.get(page) + + wait = WebDriverWait(driver, 10) + accept_cookies_button = wait.until( + EC.element_to_be_clickable((By.ID, "ccc-recommended-settings")) + ) + accept_cookies_button.click() + + find_your_collection_button = wait.until( + EC.element_to_be_clickable( + (By.XPATH, "/html/body/div[2]/div[2]/div/div/section/div[2]/div/article/div/div/p[2]/a") + ) + ) + find_your_collection_button.click() + + iframe = wait.until(EC.presence_of_element_located((By.ID, "fillform-frame-1"))) + driver.switch_to.frame(iframe) + + postcode_input = wait.until(EC.presence_of_element_located((By.ID, "searchString"))) + postcode_input.send_keys(user_postcode + Keys.TAB + Keys.ENTER) + + time.sleep(15) + + select_elem = wait.until(EC.presence_of_element_located((By.ID, "customerAddress"))) + WebDriverWait(driver, 10).until( + lambda d: len(select_elem.find_elements(By.TAG_NAME, "option")) > 1 + ) + dropdown = Select(select_elem) + dropdown.select_by_value(user_uprn) + + time.sleep(10) + + wait.until( + EC.presence_of_element_located( + (By.CSS_SELECTOR, "span.fieldInput.content.html.non-input")) + ) + + soup = BeautifulSoup(driver.page_source, "html.parser") + bin_data = {"bins": []} + current_date = datetime.now() + current_formatted_date = None + + spans = soup.select("span.fieldInput.content.html.non-input") + print(f"Found {len(spans)} bin info spans.") + + for i, span in enumerate(spans): + try: + # Look for any non-empty tag recursively + date_tag = next( + (u for u in span.find_all("u") if u and u.text.strip()), + None + ) + bin_type_tag = span.find("b") + + if date_tag: + raw_date = date_tag.text.strip().replace(",", "") + full_date_str = f"{raw_date} {current_date.year}" + full_date_str = re.sub(r"\s+", " ", full_date_str) + + try: + parsed_date = datetime.strptime(full_date_str, "%A %d %B %Y") + if parsed_date.date() < current_date.date(): + parsed_date = parsed_date.replace(year=current_date.year + 1) + current_formatted_date = parsed_date.strftime("%d/%m/%Y") + print(f"[{i}] Parsed date: {current_formatted_date}") + except ValueError as ve: + print(f"[{i}] Could not parse date: '{full_date_str}' - {ve}") + continue + else: + print(f"[{i}] No date tag found, using last valid date: {current_formatted_date}") + + if not current_formatted_date: + print(f"[{i}] No current date to associate bin type with — skipping.") + continue + + if not bin_type_tag or not bin_type_tag.text.strip(): + print(f"[{i}] No bin type found — skipping.") + continue + + bin_type = bin_type_tag.text.strip() + + # Optional seasonal override + try: + overrides_dict = get_seasonal_overrides() + if current_formatted_date in overrides_dict: + current_formatted_date = overrides_dict[current_formatted_date] + except Exception: + pass + + print(f"[{i}] Found bin: {bin_type} on {current_formatted_date}") + + bin_data["bins"].append({ + "type": bin_type, + "collectionDate": current_formatted_date + }) + + except Exception as inner_e: + print(f"[{i}] Skipping span due to error: {inner_e}") + continue + + except Exception as inner_e: + print(f"Skipping span due to error: {inner_e}") + continue + + if not bin_data["bins"]: + raise ValueError("No bin data found.") + + print(bin_data) + + return bin_data + + except Exception as e: + print(f"An error occurred: {e}") + raise + + finally: + if driver: + driver.quit() \ No newline at end of file diff --git a/uk_bin_collection/uk_bin_collection/councils/BarkingDagenham.py b/uk_bin_collection/uk_bin_collection/councils/BarkingDagenham.py index a76bd41f69..7fe52600c6 100644 --- a/uk_bin_collection/uk_bin_collection/councils/BarkingDagenham.py +++ b/uk_bin_collection/uk_bin_collection/councils/BarkingDagenham.py @@ -84,10 +84,19 @@ def parse_data(self, page: str, **kwargs) -> dict: EC.element_to_be_clickable((By.ID, "address")), message="Address dropdown not found", ) + dropdown = Select(address_select) - dropdown.select_by_visible_text(user_paon) - print("Address selected successfully") + found = False + for option in dropdown.options: + if user_paon in option.text: + option.click() + found = True + print("Address selected successfully") + break + + if not found: + raise Exception(f"No matching address containing '{user_paon}' found.") driver.switch_to.active_element.send_keys(Keys.TAB + Keys.ENTER) print("Pressed ENTER on Next button") diff --git a/uk_bin_collection/uk_bin_collection/councils/BrightonandHoveCityCouncil.py b/uk_bin_collection/uk_bin_collection/councils/BrightonandHoveCityCouncil.py index d6d6064f36..32d54790a2 100644 --- a/uk_bin_collection/uk_bin_collection/councils/BrightonandHoveCityCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/BrightonandHoveCityCouncil.py @@ -63,8 +63,16 @@ def parse_data(self, page: str, **kwargs) -> dict: # Create a 'Select' for it, then select the first address in the list # (Index 0 is "Make a selection from the list") - dropdownSelect = Select(parent_element) - dropdownSelect.select_by_visible_text(str(user_paon)) + options = parent_element.find_elements(By.TAG_NAME, "option") + found = False + for option in options: + if user_paon in option.text: + option.click() + found = True + break + + if not found: + raise Exception(f"Address containing '{user_paon}' not found in dropdown options") submit_btn = wait.until( EC.presence_of_element_located( @@ -125,6 +133,7 @@ def parse_data(self, page: str, **kwargs) -> dict: break dict_data = {"type": bin_type, "collectionDate": bin_date} data["bins"].append(dict_data) + print(data) except Exception as e: # Here you can log the exception if needed print(f"An error occurred: {e}") diff --git a/uk_bin_collection/uk_bin_collection/councils/BroadlandDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/BroadlandDistrictCouncil.py index ed5b6a6b4e..f454bf352f 100644 --- a/uk_bin_collection/uk_bin_collection/councils/BroadlandDistrictCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/BroadlandDistrictCouncil.py @@ -83,15 +83,30 @@ def parse_data(self, page: str, **kwargs) -> dict: ) print("Found address dropdown") - # Create a Select object for the dropdown dropdown_select = Select(address_dropdown) - # Search for the exact address - print(f"Looking for address: {user_paon}") + print(f"Looking for address containing: {user_paon}") - # Select the address by visible text - dropdown_select.select_by_visible_text(user_paon) - print(f"Selected address: {user_paon}") + found = False + user_paon_clean = user_paon.lower().strip() + + for option in dropdown_select.options: + option_text_clean = option.text.lower().strip() + + if ( + option_text_clean == user_paon_clean # Exact match if full address given + or option_text_clean.startswith(f"{user_paon_clean} ") # Startswith match if just a number + ): + option.click() + found = True + print(f"Selected address: {option.text.strip()}") + break + + if not found: + all_options = [opt.text for opt in dropdown_select.options] + raise Exception( + f"Could not find a matching address for '{user_paon}'. Available options: {all_options}" + ) print("Looking for submit button after address selection...") submit_btn = wait.until( diff --git a/uk_bin_collection/uk_bin_collection/councils/ChichesterDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/ChichesterDistrictCouncil.py index 63e88a3744..4223507709 100644 --- a/uk_bin_collection/uk_bin_collection/councils/ChichesterDistrictCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/ChichesterDistrictCouncil.py @@ -1,110 +1,162 @@ import time from datetime import datetime -from selenium.webdriver.support.ui import Select from bs4 import BeautifulSoup from selenium.webdriver.common.by import By -from selenium.webdriver.support import expected_conditions as EC -from selenium.webdriver.support.ui import Select -from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.common.keys import Keys +from selenium.webdriver.support.ui import WebDriverWait, Select +from selenium.webdriver.support import expected_conditions as EC +from selenium.common.exceptions import StaleElementReferenceException, TimeoutException from uk_bin_collection.uk_bin_collection.common import * from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass +date_format = "%d/%m/%Y" -# import the wonderful Beautiful Soup and the URL grabber class CouncilClass(AbstractGetBinDataClass): - """ - Concrete classes have to implement all abstract operations of the - base class. They can also override some operations with a default - implementation. - """ - def parse_data(self, page: str, **kwargs) -> dict: driver = None try: - # Make a BS4 object - page = "https://www.chichester.gov.uk/checkyourbinday" user_postcode = kwargs.get("postcode") - user_uprn = kwargs.get("uprn") + house_number = kwargs.get("paon") web_driver = kwargs.get("web_driver") headless = kwargs.get("headless") - house_number = kwargs.get("paon") driver = create_webdriver(web_driver, headless, None, __name__) driver.get(page) wait = WebDriverWait(driver, 60) - inputElement_postcodesearch = wait.until( + input_postcode = wait.until( EC.visibility_of_element_located( (By.ID, "WASTECOLLECTIONCALENDARV5_CALENDAR_ADDRESSLOOKUPPOSTCODE") ) ) + input_postcode.send_keys(user_postcode) - inputElement_postcodesearch.send_keys(user_postcode) - - inputElement_postcodesearch_btn = wait.until( - EC.visibility_of_element_located( - (By.ID, "WASTECOLLECTIONCALENDARV5_CALENDAR_ADDRESSLOOKUPSEARCH") - ) - ) - inputElement_postcodesearch_btn.send_keys(Keys.ENTER) - - inputElement_select_address = wait.until( + search_button = wait.until( EC.element_to_be_clickable( - (By.ID, "WASTECOLLECTIONCALENDARV5_CALENDAR_ADDRESSLOOKUPADDRESS") + (By.ID, "WASTECOLLECTIONCALENDARV5_CALENDAR_ADDRESSLOOKUPSEARCH") ) ) - dropdown_element = driver.find_element( - By.ID, "WASTECOLLECTIONCALENDARV5_CALENDAR_ADDRESSLOOKUPADDRESS" - ) + search_button.send_keys(Keys.ENTER) - # Now create a Select object based on the found element - dropdown = Select(dropdown_element) + self.smart_select_address(driver, house_number) - # Select the option by visible text - dropdown.select_by_visible_text(house_number) - - results = wait.until( - EC.element_to_be_clickable( + wait.until( + EC.presence_of_element_located( (By.CLASS_NAME, "bin-collection-dates-container") ) ) soup = BeautifulSoup(driver.page_source, features="html.parser") - soup.prettify() + table = soup.find("table", class_="defaultgeneral bin-collection-dates") + rows = table.find_all("tr") if table else [] - # Extract data from the table bin_collection_data = [] - rows = soup.find( - "table", class_="defaultgeneral bin-collection-dates" - ).find_all("tr") for row in rows: cells = row.find_all("td") if cells: date_str = cells[0].text.strip() bin_type = cells[1].text.strip() - # Convert date string to the required format DD/MM/YYYY date_obj = datetime.strptime(date_str, "%d %B %Y") - date_formatted = date_obj.strftime(date_format) - bin_collection_data.append( - {"collectionDate": date_formatted, "type": bin_type} - ) + formatted_date = date_obj.strftime(date_format) + bin_collection_data.append({ + "collectionDate": formatted_date, + "type": bin_type + }) - # Convert to JSON - json_data = {"bins": bin_collection_data} + print(bin_collection_data) + + return {"bins": bin_collection_data} except Exception as e: - # Here you can log the exception if needed print(f"An error occurred: {e}") - # Optionally, re-raise the exception if you want it to propagate raise finally: - # This block ensures that the driver is closed regardless of an exception if driver: driver.quit() - return json_data + + def smart_select_address(self, driver, house_number: str): + dropdown_id = "WASTECOLLECTIONCALENDARV5_CALENDAR_ADDRESSLOOKUPADDRESS" + + print("Waiting for address dropdown...") + + def dropdown_has_addresses(d): + try: + dropdown_el = d.find_element(By.ID, dropdown_id) + select = Select(dropdown_el) + return len(select.options) > 1 + except StaleElementReferenceException: + return False + + WebDriverWait(driver, 30).until(dropdown_has_addresses) + + dropdown_el = driver.find_element(By.ID, dropdown_id) + dropdown = Select(dropdown_el) + + print("Address dropdown options:") + for opt in dropdown.options: + print(f"- {opt.text.strip()}") + + user_input_clean = house_number.lower().strip() + found = False + + for option in dropdown.options: + option_text_clean = option.text.lower().strip() + print(f"Comparing: {repr(option_text_clean)} == {repr(user_input_clean)}") + + if ( + option_text_clean == user_input_clean + or option_text_clean.startswith(f"{user_input_clean},") + ): + try: + option.click() + found = True + print(f"Strict match clicked: {option.text.strip()}") + break + except StaleElementReferenceException: + print("Stale during click, retrying...") + dropdown_el = driver.find_element(By.ID, dropdown_id) + dropdown = Select(dropdown_el) + for fresh_option in dropdown.options: + if fresh_option.text.lower().strip() == option_text_clean: + fresh_option.click() + found = True + print(f"Strict match clicked after refresh: {fresh_option.text.strip()}") + break + + if found: + break + + if not found: + print("No strict match found, trying fuzzy match...") + for option in dropdown.options: + option_text_clean = option.text.lower().strip() + if user_input_clean in option_text_clean: + try: + option.click() + found = True + print(f"Fuzzy match clicked: {option.text.strip()}") + break + except StaleElementReferenceException: + print("Stale during fuzzy click, retrying...") + dropdown_el = driver.find_element(By.ID, dropdown_id) + dropdown = Select(dropdown_el) + for fresh_option in dropdown.options: + if fresh_option.text.lower().strip() == option_text_clean: + fresh_option.click() + found = True + print(f"Fuzzy match clicked after refresh: {fresh_option.text.strip()}") + break + + if found: + break + + if not found: + all_opts = [opt.text.strip() for opt in dropdown.options] + raise Exception( + f"Could not find address '{house_number}' in options: {all_opts}" + ) \ No newline at end of file diff --git a/uk_bin_collection/uk_bin_collection/councils/FermanaghOmaghDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/FermanaghOmaghDistrictCouncil.py new file mode 100644 index 0000000000..1dd104f27f --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/FermanaghOmaghDistrictCouncil.py @@ -0,0 +1,102 @@ +import difflib +from datetime import date, datetime + +import requests +from bs4 import BeautifulSoup + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass + + +# import the wonderful Beautiful Soup and the URL grabber +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + base_url = "https://fermanaghomagh.isl-fusion.com/" + + def parse_data(self, page: str, **kwargs) -> dict: + """ + This function will make a request to the search endpoint with the postcode, extract the + house numbers from the responses, then retrieve the ID of the entry with the house number that matches, + to then retrieve the bin schedule. + + The API here is a weird combination of HTML in json responses. + """ + postcode = kwargs.get("postcode") + paon = kwargs.get("paon") + + if not postcode: + raise ValueError("Must provide a postcode") + + if not paon: + raise ValueError("Must provide a house number") + + search_url = f"{self.base_url}/address/{postcode}" + + requests.packages.urllib3.disable_warnings() + s = requests.Session() + response = s.get(search_url) + response.raise_for_status() + + address_data = response.json() + + address_list = address_data["html"] + + soup = BeautifulSoup(address_list, features="html.parser") + + address_by_id = {} + + for li in soup.find_all("li"): + link = li.find_all("a")[0] + address_id = link.attrs["href"] + address = link.text + + address_by_id[address_id] = address + + addresses = list(address_by_id.values()) + + common = difflib.SequenceMatcher( + a=addresses[0], b=addresses[1] + ).find_longest_match() + extra_bit = addresses[0][common.a : common.a + common.size] + + ids_by_paon = { + a.replace(extra_bit, ""): a_id.replace("/view/", "").replace("/", "") + for a_id, a in address_by_id.items() + } + + property_id = ids_by_paon.get(paon) + if not property_id: + raise ValueError( + f"Invalid house number, valid values are {', '.join(ids_by_paon.keys())}" + ) + + today = date.today() + calendar_url = ( + f"{self.base_url}/calendar/{property_id}/{today.strftime('%Y-%m-%d')}" + ) + response = s.get(calendar_url) + response.raise_for_status() + calendar_data = response.json() + next_collections = calendar_data["nextCollections"] + + collections = list(next_collections["collections"].values()) + + data = {"bins": []} + + for collection in collections: + collection_date = datetime.strptime(collection["date"], "%Y-%m-%d") + bins = [c["name"] for c in collection["collections"].values()] + + for bin in bins: + data["bins"].append( + { + "type": bin, + "collectionDate": collection_date.strftime(date_format), + } + ) + return data diff --git a/uk_bin_collection/uk_bin_collection/councils/NorthEastDerbyshireDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/NorthEastDerbyshireDistrictCouncil.py index 4191ded543..58d5b48ef9 100644 --- a/uk_bin_collection/uk_bin_collection/councils/NorthEastDerbyshireDistrictCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/NorthEastDerbyshireDistrictCouncil.py @@ -1,4 +1,5 @@ from datetime import datetime +from time import sleep from bs4 import BeautifulSoup from selenium.webdriver.common.by import By @@ -9,8 +10,6 @@ from uk_bin_collection.uk_bin_collection.common import * from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass -# import the wonderful Beautiful Soup and the URL grabber - class CouncilClass(AbstractGetBinDataClass): """ @@ -34,82 +33,105 @@ def parse_data(self, page: str, **kwargs) -> dict: headless = kwargs.get("headless") check_uprn(user_uprn) check_postcode(user_postcode) - # Create Selenium webdriver + driver = create_webdriver(web_driver, headless, None, __name__) driver.get(page) - # If you bang in the house number (or property name) and postcode in the box it should find your property - iframe_presense = WebDriverWait(driver, 30).until( EC.presence_of_element_located((By.ID, "fillform-frame-1")) ) driver.switch_to.frame(iframe_presense) wait = WebDriverWait(driver, 60) + inputElement_postcodesearch = wait.until( EC.element_to_be_clickable((By.NAME, "postcode_search")) ) - inputElement_postcodesearch.send_keys(str(user_postcode)) - # Wait for the 'Select your property' dropdown to appear and select the first result dropdown = wait.until(EC.element_to_be_clickable((By.NAME, "selAddress"))) - dropdown_options = wait.until( EC.presence_of_element_located((By.CLASS_NAME, "lookup-option")) ) - # Create a 'Select' for it, then select the first address in the list - # (Index 0 is "Make a selection from the list") drop_down_values = Select(dropdown) option_element = wait.until( EC.presence_of_element_located( (By.CSS_SELECTOR, f'option.lookup-option[value="{str(user_uprn)}"]') ) ) - drop_down_values.select_by_value(str(user_uprn)) - # Wait for the 'View more' link to appear, then click it to get the full set of dates h3_element = wait.until( EC.presence_of_element_located( (By.XPATH, "//th[contains(text(), 'Waste Collection')]") ) ) + sleep(10) + soup = BeautifulSoup(driver.page_source, features="html.parser") + print("Parsing HTML content...") + + collection_rows = soup.find_all("tr") + + for row in collection_rows: + cells = row.find_all("td") + if len(cells) == 3: # Date, Image, Bin Type + # Extract date carefully + date_labels = cells[0].find_all("label") + collection_date = None + for label in date_labels: + label_text = label.get_text().strip() + if contains_date(label_text): + collection_date = label_text + break + + # Extract bin type + bin_label = cells[2].find("label") + bin_types = bin_label.get_text().strip() if bin_label else None + + if collection_date and bin_types: + print(f"Found collection: {collection_date} - {bin_types}") + + # Handle combined collections + if "&" in bin_types: + if "Burgundy" in bin_types: + data["bins"].append( + { + "type": "Burgundy Bin", + "collectionDate": datetime.strptime( + collection_date, "%d/%m/%Y" + ).strftime(date_format), + } + ) + if "Green" in bin_types: + data["bins"].append( + { + "type": "Green Bin", + "collectionDate": datetime.strptime( + collection_date, "%d/%m/%Y" + ).strftime(date_format), + } + ) + else: + if "Black" in bin_types: + data["bins"].append( + { + "type": "Black Bin", + "collectionDate": datetime.strptime( + collection_date, "%d/%m/%Y" + ).strftime(date_format), + } + ) + + print(f"Found {len(data['bins'])} collections") + print(f"Final data: {data}") - target_h3 = soup.find("h3", string="Collection Details") - tables_after_h3 = target_h3.parent.parent.find_next("table") - - table_rows = tables_after_h3.find_all("tr") - for row in table_rows: - rowdata = row.find_all("td") - if len(rowdata) == 3: - labels = rowdata[0].find_all("label") - # Strip the day (i.e., Monday) out of the collection date string for parsing - if len(labels) >= 2: - date_label = labels[1] - datestring = date_label.text.strip() - - # Add the bin type and collection date to the 'data' dictionary - data["bins"].append( - { - "type": rowdata[2].text.strip(), - "collectionDate": datetime.strptime( - datestring, "%d/%m/%Y" - ).strftime( - date_format - ), # Format the date as needed - } - ) except Exception as e: - # Here you can log the exception if needed print(f"An error occurred: {e}") - # Optionally, re-raise the exception if you want it to propagate raise finally: - # This block ensures that the driver is closed regardless of an exception if driver: driver.quit() return data diff --git a/uk_bin_collection/uk_bin_collection/councils/SloughBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/SloughBoroughCouncil.py new file mode 100644 index 0000000000..1811cd8f39 --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/SloughBoroughCouncil.py @@ -0,0 +1,140 @@ +import time +import re +import requests +from datetime import datetime +from bs4 import BeautifulSoup +from selenium.webdriver.common.by import By +from selenium.webdriver.common.keys import Keys +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass + +def get_street_from_postcode(postcode: str, api_key: str) -> str: + url = "https://maps.googleapis.com/maps/api/geocode/json" + params = {"address": postcode, "key": api_key} + response = requests.get(url, params=params) + data = response.json() + + if data["status"] != "OK": + raise ValueError(f"API error: {data['status']}") + + for component in data["results"][0]["address_components"]: + if "route" in component["types"]: + return component["long_name"] + + raise ValueError("No street (route) found in the response.") + +class CouncilClass(AbstractGetBinDataClass): + def parse_data(self, page: str, **kwargs) -> dict: + driver = None + bin_data = {"bins": []} + try: + user_postcode = kwargs.get("postcode") + if not user_postcode: + raise ValueError("No postcode provided.") + check_postcode(user_postcode) + + headless = kwargs.get("headless") + web_driver = kwargs.get("web_driver") + driver = create_webdriver(web_driver, headless, None, __name__) + page = "https://www.slough.gov.uk/bin-collections" + driver.get(page) + + # Accept cookies + WebDriverWait(driver, 10).until( + EC.element_to_be_clickable((By.ID, "ccc-recommended-settings")) + ).click() + + # Enter the street name into the address search + address_input = WebDriverWait(driver, 10).until( + EC.presence_of_element_located((By.ID, "keyword_directory25")) + ) + user_address = get_street_from_postcode(user_postcode, "AIzaSyBDLULT7EIlNtHerswPtfmL15Tt3Oc0bV8") + address_input.send_keys(user_address + Keys.ENTER) + + # Wait for address results to load + WebDriverWait(driver, 10).until( + EC.presence_of_all_elements_located((By.CSS_SELECTOR, "span.list__link-text")) + ) + span_elements = driver.find_elements(By.CSS_SELECTOR, "span.list__link-text") + + for span in span_elements: + if user_address.lower() in span.text.lower(): + span.click() + break + else: + raise Exception(f"No link found containing address: {user_address}") + + # Wait for address detail page + WebDriverWait(driver, 10).until( + EC.presence_of_element_located((By.CSS_SELECTOR, "section.site-content")) + ) + soup = BeautifulSoup(driver.page_source, "html.parser") + + # Extract each bin link and type + for heading in soup.select("dt.definition__heading"): + heading_text = heading.get_text(strip=True) + if "bin day details" in heading_text.lower(): + bin_type = heading_text.split()[0].capitalize() + " bin" + dd = heading.find_next_sibling("dd") + link = dd.find("a", href=True) + + if link: + bin_url = link["href"] + if not bin_url.startswith("http"): + bin_url = "https://www.slough.gov.uk" + bin_url + + # Visit the child page + print(f"Navigating to {bin_url}") + driver.get(bin_url) + WebDriverWait(driver, 10).until( + EC.presence_of_element_located((By.CSS_SELECTOR, "div.page-content")) + ) + child_soup = BeautifulSoup(driver.page_source, "html.parser") + + editor_div = child_soup.find("div", class_="editor") + if not editor_div: + print("No editor div found on bin detail page.") + continue + + ul = editor_div.find("ul") + if not ul: + print("No