From 6a12e34d69312a5c72bd77bc8266ec74b22578e9 Mon Sep 17 00:00:00 2001 From: Thrive <147845726+VAIBHAVVARORA@users.noreply.github.com> Date: Thu, 25 Sep 2025 19:58:52 +0530 Subject: [PATCH 1/2] Refactor fetch_jobs function to include job_title parameter Updated the fetch_jobs function to accept job_title as an argument and improved the docstring for clarity. --- web_programming/fetch_jobs.py | 46 ++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/web_programming/fetch_jobs.py b/web_programming/fetch_jobs.py index 7a95f997078d..ee0dcb4fab27 100644 --- a/web_programming/fetch_jobs.py +++ b/web_programming/fetch_jobs.py @@ -1,5 +1,5 @@ """ -Scraping jobs given job title and location from indeed website +Scraping jobs given job title and location from Indeed website """ # /// script @@ -11,24 +11,46 @@ # /// from __future__ import annotations - from collections.abc import Generator - import httpx from bs4 import BeautifulSoup -url = "https://www.indeed.co.in/jobs?q=mobile+app+development&l=" +BASE_URL = "https://www.indeed.co.in/jobs" + + +def fetch_jobs( + job_title: str = "mobile app development", location: str = "mumbai" +) -> Generator[tuple[str, str], None, None]: + """ + Scrape job postings from Indeed for a given job title and location. + + Args: + job_title: Keywords to search for (default: "mobile app development"). + location: City or region to search jobs in (default: "mumbai"). + + Yields: + Tuples of (job title, company name). + + Example: + >>> jobs = list(fetch_jobs("python developer", "Bangalore")) + >>> isinstance(jobs[0], tuple) + True + """ + headers = {"User-Agent": "Mozilla/5.0 (compatible; JobScraper/1.0)"} + params = {"q": job_title, "l": location} + + response = httpx.get(BASE_URL, params=params, headers=headers, timeout=10) + response.raise_for_status() + soup = BeautifulSoup(response.content, "html.parser") -def fetch_jobs(location: str = "mumbai") -> Generator[tuple[str, str]]: - soup = BeautifulSoup(httpx.get(url + location, timeout=10).content, "html.parser") - # This attribute finds out all the specifics listed in a job for job in soup.find_all("div", attrs={"data-tn-component": "organicJob"}): - job_title = job.find("a", attrs={"data-tn-element": "jobTitle"}).text.strip() - company_name = job.find("span", {"class": "company"}).text.strip() - yield job_title, company_name + title_tag = job.find("a", attrs={"data-tn-element": "jobTitle"}) + company_tag = job.find("span", {"class": "company"}) + if title_tag and company_tag: + yield title_tag.text.strip(), company_tag.text.strip() if __name__ == "__main__": - for i, job in enumerate(fetch_jobs("Bangalore"), 1): - print(f"Job {i:>2} is {job[0]} at {job[1]}") + for i, (title, company) in enumerate(fetch_jobs("python developer", "Bangalore"), 1): + print(f"Job {i:>2} is {title} at {company}") From 1c7244cb0d0563a61d2b6333ab0aeb83fce50032 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 25 Sep 2025 14:39:52 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- web_programming/fetch_jobs.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/web_programming/fetch_jobs.py b/web_programming/fetch_jobs.py index ee0dcb4fab27..0f6acaaf7acf 100644 --- a/web_programming/fetch_jobs.py +++ b/web_programming/fetch_jobs.py @@ -52,5 +52,7 @@ def fetch_jobs( if __name__ == "__main__": - for i, (title, company) in enumerate(fetch_jobs("python developer", "Bangalore"), 1): + for i, (title, company) in enumerate( + fetch_jobs("python developer", "Bangalore"), 1 + ): print(f"Job {i:>2} is {title} at {company}")