Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
3595d7e
fix: used canonical 'nice name'
davida72 Apr 23, 2025
0ef8f15
fix: input.json changes
davida72 Apr 23, 2025
b53cf3a
fix: selenium navigation
davida72 Apr 23, 2025
376c63c
fix: parsed bin info
davida72 Apr 23, 2025
44664c9
fix: output check
davida72 Apr 23, 2025
9165280
fix: cloudflare fix - switch to selenium method
davida72 Apr 24, 2025
0ba03af
fix: updated input.json
davida72 Apr 24, 2025
729fb1e
feat: added pembrokeshire
davida72 Apr 24, 2025
5c71d0a
fix: processed all bins for Moray
davida72 Apr 27, 2025
3e9eeb4
fix: added melton
davida72 Apr 27, 2025
0ba5e6c
feat: added melton
davida72 Apr 27, 2025
93ef09b
fix: simplified blackburn
davida72 Apr 27, 2025
ec5c2fd
fix: Rugby fix
davida72 Apr 27, 2025
e208b3b
Merge pull request #1367 from davida72/input-cleanup
robbrad Apr 27, 2025
1d7242f
fix: input.json requires web_driver
davida72 Apr 27, 2025
31f9f38
bump: version 0.148.3 → 0.148.4
github-actions[bot] Apr 27, 2025
7367f37
Merge pull request #1371 from davida72/torbay
robbrad Apr 27, 2025
22366ef
bump: version 0.148.4 → 0.148.5
github-actions[bot] Apr 27, 2025
ffbf945
Merge branch 'master' into thanet
robbrad Apr 27, 2025
a312ea0
Merge branch 'master' into rugby-fix
davida72 Apr 27, 2025
7a5c459
Merge pull request #1373 from davida72/thanet
robbrad Apr 27, 2025
fa610d4
bump: version 0.148.5 → 0.148.6
github-actions[bot] Apr 27, 2025
c1de79d
Merge pull request #1374 from davida72/pembrokeshire
robbrad Apr 27, 2025
1fb913e
Merge pull request #1383 from davida72/moray-fix
robbrad Apr 27, 2025
cee431c
bump: version 0.148.6 → 0.149.0
github-actions[bot] Apr 27, 2025
b09334c
Merge pull request #1385 from davida72/melton
robbrad Apr 27, 2025
41b13e3
bump: version 0.149.0 → 0.150.0
github-actions[bot] Apr 27, 2025
d6feec7
Merge branch 'master' into Blackburn-no-selenium
robbrad Apr 27, 2025
eb70eba
Merge pull request #1386 from davida72/Blackburn-no-selenium
robbrad Apr 27, 2025
c32d109
Merge pull request #1387 from davida72/rugby-fix
robbrad Apr 27, 2025
ea08e53
fix: more robust brent date handling
davida72 Apr 27, 2025
20f21b4
Merge pull request #1388 from davida72/brent-fix
robbrad Apr 27, 2025
fe49d9f
feat: version bump
robbrad Apr 27, 2025
7e3c438
Merge pull request #1389 from robbrad/versionbump
robbrad Apr 27, 2025
1136eed
bump: version 0.150.0 → 0.151.0
github-actions[bot] Apr 27, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,56 @@
=======
## 0.151.0 (2025-04-27)

### Feat

- version bump

### Fix

- more robust brent date handling
- input.json requires web_driver
- Rugby fix
- simplified blackburn

## 0.150.0 (2025-04-27)

### Feat

- added melton

### Fix

- added melton
- processed all bins for Moray

## 0.149.0 (2025-04-27)

### Feat

- added pembrokeshire

## 0.148.6 (2025-04-27)

### Fix

- updated input.json
- cloudflare fix - switch to selenium method

## 0.148.5 (2025-04-27)

### Fix

- output check
- parsed bin info
- selenium navigation
- input.json changes

## 0.148.4 (2025-04-27)

### Fix

- used canonical 'nice name'

## 0.148.3 (2025-04-25)

### Fix
Expand Down
2 changes: 1 addition & 1 deletion custom_components/uk_bin_collection/config_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ async def async_step_reconfigure_confirm(

async def get_councils_json(self) -> Dict[str, Any]:
"""Fetch and return the supported councils data, including aliases and sorted alphabetically."""
url = "https://raw.githubusercontent.com/robbrad/UKBinCollectionData/0.148.3/uk_bin_collection/tests/input.json"
url = "https://raw.githubusercontent.com/robbrad/UKBinCollectionData/0.151.0/uk_bin_collection/tests/input.json"
try:
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
Expand Down
4 changes: 2 additions & 2 deletions custom_components/uk_bin_collection/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"integration_type": "service",
"iot_class": "cloud_polling",
"issue_tracker": "https://github.com/robbrad/UKBinCollectionData/issues",
"requirements": ["uk-bin-collection>=0.148.3"],
"version": "0.148.3",
"requirements": ["uk-bin-collection>=0.151.0"],
"version": "0.151.0",
"zeroconf": []
}
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "uk_bin_collection"
version = "0.148.3"
version = "0.151.0"
description = "Python Lib to collect UK Bin Data"
readme = "README.md"
authors = ["Robert Bradley <robbrad182@gmail.com>"]
Expand Down
653 changes: 334 additions & 319 deletions uk_bin_collection/tests/input.json

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@


# import the wonderful Beautiful Soup and the URL grabber


class CouncilClass(AbstractGetBinDataClass):
"""
Concrete classes have to implement all abstract operations of the
Expand Down
80 changes: 47 additions & 33 deletions uk_bin_collection/uk_bin_collection/councils/BrentCouncil.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,42 +74,56 @@ def parse_data(self, page: str, **kwargs) -> dict:
)

if service_details:

# Extract next collection date
# Extract next collection date only
next_collection_row = service_details.find(
"dt", string="Next collection"
)
next_collection = (
next_collection_row.find_next_sibling("dd").get_text(
strip=True
)
if next_collection_row
else "Unknown"
)

# Parse dates into standard dd/mm/yyyy format
next_collection_date = datetime.strptime(
remove_ordinal_indicator_from_date_string(next_collection),
"%A, %d %B",
)

if (datetime.now().month == 12) and (
next_collection.month == 1
):
next_collection_date = next_collection_date.replace(
year=next_year
if next_collection_row:
next_collection = next_collection_row.find_next_sibling(
"dd"
).get_text(strip=True)

# Remove the adjusted collection time message
if (
"(this collection has been adjusted from its usual time)"
in next_collection
):
next_collection = next_collection.replace(
"(this collection has been adjusted from its usual time)",
"",
).strip()

# Parse date from format like "Wednesday, 7th May"
next_collection = remove_ordinal_indicator_from_date_string(
next_collection
)
else:
next_collection_date = next_collection_date.replace(
year=current_year
)

dict_data = {
"type": collection_type.strip(),
"collectionDate": next_collection_date.strftime(
date_format
),
}
data["bins"].append(dict_data)
try:
next_collection_date = datetime.strptime(
next_collection, "%A, %d %B"
)

# Handle year rollover
if (
datetime.now().month == 12
and next_collection_date.month == 1
):
next_collection_date = next_collection_date.replace(
year=next_year
)
else:
next_collection_date = next_collection_date.replace(
year=current_year
)

dict_data = {
"type": collection_type.strip(),
"collectionDate": next_collection_date.strftime(
date_format
),
}
data["bins"].append(dict_data)
print(dict_data)
except ValueError as e:
print(f"Error parsing date {next_collection}: {e}")

return data
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
import time
from datetime import datetime

from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.wait import WebDriverWait

from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass

# import the wonderful Beautiful Soup and the URL grabber


class CouncilClass(AbstractGetBinDataClass):
"""
Concrete classes have to implement all abstract operations of the
base class. They can also override some operations with a default
implementation.
"""

def parse_data(self, page: str, **kwargs) -> dict:
driver = None
try:
page = "https://my.maidstone.gov.uk/service/Find-your-bin-day"
bin_data = {"bins": []}
user_paon = kwargs.get("paon")
user_postcode = kwargs.get("postcode")
web_driver = kwargs.get("web_driver")
headless = kwargs.get("headless")
check_postcode(user_postcode)

# Create Selenium webdriver
driver = create_webdriver(web_driver, headless, None, __name__)
driver.get(page)

iframe_presense = WebDriverWait(driver, 30).until(
EC.presence_of_element_located((By.ID, "fillform-frame-1"))
)
driver.switch_to.frame(iframe_presense)

wait = WebDriverWait(driver, 60)

# Postal code input
inputElement_postcodesearch = wait.until(
EC.element_to_be_clickable((By.NAME, "postcode"))
)
inputElement_postcodesearch.send_keys(user_postcode)

# Wait for the 'Select address' dropdown to be updated
dropdown_select = wait.until(
EC.presence_of_element_located((By.XPATH, "//span[contains(text(), 'Select...')]"))
)
dropdown_select.click()

dropdown = wait.until(
EC.element_to_be_clickable((By.XPATH, f"//div[contains(text(), ' {user_paon}')]"))
)
dropdown.click()

# Wait for 'Searching for...' to be added to page
WebDriverWait(driver, timeout=15).until(
EC.text_to_be_present_in_element(
(By.CSS_SELECTOR, "span[data-name=html1]"), "Searching"
)
)

# Wait for 'Searching for...' to be removed from page
WebDriverWait(driver, timeout=15).until(
EC.none_of(
EC.text_to_be_present_in_element(
(By.CSS_SELECTOR, "span[data-name=html1]"), "Searching"
)
)
)

# Even then it can still be adding data to the page...
time.sleep(5)

soup = BeautifulSoup(driver.page_source, features="html.parser")

# This is ugly but there is literally no consistency to the HTML
def is_a_collection_date(t):
return any("Next collection" in c for c in t.children)

for next_collection in soup.find_all(is_a_collection_date):
bin_info = list(
next_collection.parent.select_one("div:nth-child(1)").children
)
if not bin_info:
continue
bin = bin_info[0].get_text()
date = next_collection.select_one("strong").get_text(strip=True)
bin_date = datetime.strptime(date, "%d %b %Y")
dict_data = {
"type": bin,
"collectionDate": bin_date.strftime(date_format),
}
bin_data["bins"].append(dict_data)

bin_data["bins"].sort(
key=lambda x: datetime.strptime(x.get("collectionDate"), date_format)
)

except Exception as e:
# Here you can log the exception if needed
print(f"An error occurred: {e}")
# Optionally, re-raise the exception if you want it to propagate
raise
finally:
# This block ensures that the driver is closed regardless of an exception
if driver:
driver.quit()
return bin_data
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import json
from datetime import datetime, timedelta

import requests
from bs4 import BeautifulSoup

from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass


# import the wonderful Beautiful Soup and the URL grabber
class CouncilClass(AbstractGetBinDataClass):
"""
Concrete classes have to implement all abstract operations of the
base class. They can also override some operations with a default
implementation.
"""

def extract_dates(self, date_string: str) -> list:
"""
Extract dates from strings like "01/05/2025, and then 15/05/2025"
Returns list of datetime objects
"""
date_string = date_string.replace("and then", ",")
date_parts = [part.strip() for part in date_string.split(",") if part.strip()]

dates = []
for part in date_parts:
try:
date_obj = datetime.strptime(part, "%d/%m/%Y")
dates.append(date_obj)
except ValueError:
continue

return dates

def parse_data(self, page: str, **kwargs) -> dict:
user_uprn = kwargs.get("uprn")
check_uprn(user_uprn)

url = f"https://my.melton.gov.uk/set-location?id={user_uprn}&redirect=collections&rememberloc="
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

collections = []

# Find all box items
box_items = soup.find_all("li", class_=lambda x: x and x.startswith("box-item"))

for box in box_items:
bin_type = box.find("h2")
if (
bin_type and "Missed bin" not in bin_type.text
): # Skip the "Missed bin" section
bin_name = bin_type.text.strip()

# Find the strong tag containing dates
dates_element = box.find("strong")
if dates_element:
dates_text = dates_element.text.strip()
# Use self.extract_dates instead of extract_dates
collection_dates = self.extract_dates(dates_text)

# Add each date for this bin type to collections
for date in collection_dates:
collections.append((bin_name, date))

# Sort the collections by date
ordered_data = sorted(collections, key=lambda x: x[1])

# Format the data as required
data = {"bins": []}
for item in ordered_data:
dict_data = {
"type": item[0],
"collectionDate": item[1].strftime(date_format),
}
data["bins"].append(dict_data)

print(json.dumps(data, indent=2))

return data
Loading
Loading