Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 36 additions & 12 deletions web_programming/fetch_jobs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Scraping jobs given job title and location from indeed website
Scraping jobs given job title and location from Indeed website
"""

# /// script
Expand All @@ -10,25 +10,49 @@
# ]
# ///

from __future__ import annotations

from collections.abc import Generator

import httpx
from bs4 import BeautifulSoup

Check failure on line 16 in web_programming/fetch_jobs.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (I001)

web_programming/fetch_jobs.py:13:1: I001 Import block is un-sorted or un-formatted

url = "https://www.indeed.co.in/jobs?q=mobile+app+development&l="
BASE_URL = "https://www.indeed.co.in/jobs"


def fetch_jobs(
job_title: str = "mobile app development", location: str = "mumbai"
) -> Generator[tuple[str, str], None, None]:

Check failure on line 23 in web_programming/fetch_jobs.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (UP043)

web_programming/fetch_jobs.py:23:6: UP043 Unnecessary default type arguments
"""
Scrape job postings from Indeed for a given job title and location.

Args:
job_title: Keywords to search for (default: "mobile app development").
location: City or region to search jobs in (default: "mumbai").

Yields:
Tuples of (job title, company name).

Example:
>>> jobs = list(fetch_jobs("python developer", "Bangalore"))
>>> isinstance(jobs[0], tuple)
True
"""
headers = {"User-Agent": "Mozilla/5.0 (compatible; JobScraper/1.0)"}
params = {"q": job_title, "l": location}

response = httpx.get(BASE_URL, params=params, headers=headers, timeout=10)
response.raise_for_status()

soup = BeautifulSoup(response.content, "html.parser")

def fetch_jobs(location: str = "mumbai") -> Generator[tuple[str, str]]:
soup = BeautifulSoup(httpx.get(url + location, timeout=10).content, "html.parser")
# This attribute finds out all the specifics listed in a job
for job in soup.find_all("div", attrs={"data-tn-component": "organicJob"}):
job_title = job.find("a", attrs={"data-tn-element": "jobTitle"}).text.strip()
company_name = job.find("span", {"class": "company"}).text.strip()
yield job_title, company_name
title_tag = job.find("a", attrs={"data-tn-element": "jobTitle"})
company_tag = job.find("span", {"class": "company"})
if title_tag and company_tag:
yield title_tag.text.strip(), company_tag.text.strip()


if __name__ == "__main__":
for i, job in enumerate(fetch_jobs("Bangalore"), 1):
print(f"Job {i:>2} is {job[0]} at {job[1]}")
for i, (title, company) in enumerate(
fetch_jobs("python developer", "Bangalore"), 1
):
print(f"Job {i:>2} is {title} at {company}")
Loading