container.
+The script includes fallbacks for slight HTML variations to ensure robustness.
+"""
+
+from __future__ import annotations
+import csv
+import sys
+from typing import List, Dict
+
+import requests
+from bs4 import BeautifulSoup
+
+SOURCE_URL = "https://realpython.github.io/fake-jobs/"
+OUTPUT_CSV = "fake_jobs.csv"
+CSV_HEADERS = ["Job Title", "Company", "Location", "Date Posted"]
+
+
+def fetch_page(url: str) -> str:
+ """
+ Fetch HTML content from the given URL.
+
+ Args:
+ url: The URL to fetch
+
+ Returns:
+ The HTML content as a string
+
+ Raises:
+ requests.HTTPError: If the request fails
+ """
+ resp = requests.get(url)
+ resp.raise_for_status()
+ return resp.text
+
+
+def _get_text(elem) -> str:
+ """
+ Safely extract text from a BeautifulSoup element.
+
+ Args:
+ elem: A BeautifulSoup element or None
+
+ Returns:
+ The stripped text content, or empty string if elem is None
+ """
+ return elem.get_text(strip=True) if elem else ""
+
+
+def parse_jobs(html: str) -> List[Dict[str, str]]:
+ """
+ Parse job postings from HTML content.
+
+ Uses class-based find/find_all per https://blog.apify.com/beautifulsoup-find-by-class/
+ to extract job data with fallbacks for HTML variations.
+
+ Args:
+ html: The HTML content to parse
+
+ Returns:
+ A list of dictionaries, each containing job data with keys:
+ "Job Title", "Company", "Location", "Date Posted"
+ """
+ soup = BeautifulSoup(html, "html.parser")
+
+ # The Real Python fake-jobs demo wraps each job card inside
+ #
. We use class_ parameter for find_all
+ # as demonstrated in https://blog.apify.com/beautifulsoup-find-by-class/
+ job_cards = soup.find_all("div", class_="card-content")
+
+ jobs = []
+ for card in job_cards:
+ # Extract job title from
+ # Fallback: try "title" class first, then any h2
+ title_elem = card.find("h2", class_="title") or card.find("h2")
+ title = _get_text(title_elem)
+
+ # Extract company from
+ # Fallback: try "company" class first, then any h3
+ company_elem = card.find("h3", class_="company") or card.find("h3")
+ company = _get_text(company_elem)
+
+ # Extract location from
+ # Fallback: try "location" class first, then first p tag
+ location_elem = card.find("p", class_="location") or card.find("p")
+ location = _get_text(location_elem)
+
+ # Extract date posted from