|
2 | 2 | import re |
3 | 3 | from bs4 import BeautifulSoup |
4 | 4 |
|
5 | | -class Forecast(): |
| 5 | +class Forecast: |
| 6 | + BASE_URL = "https://weather.umd.edu/" |
6 | 7 |
|
7 | 8 | def get_hourly_forecast(self): |
| 9 | + """Fetches the hourly weather forecast for the next five hours.""" |
8 | 10 | hourly_forecast = [] |
9 | | - url = "https://weather.umd.edu/" |
10 | | - |
11 | | - response = requests.get(url) |
12 | | - soup = BeautifulSoup(response.text, 'html.parser') |
13 | | - a = soup.find('div', id="umdwx_weeklyfcst_widget-9") |
14 | | - |
15 | | - for day in a.find_all('div', class_='fcst_day')[:5]: |
16 | | - time = day.find('div', class_='fcst_txt-day').text.strip() |
17 | | - temp = day.find('div', class_='fcst_txt-temp') |
18 | | - wind = day.find('div', class_='fcst_txt-wind') |
19 | | - |
20 | | - temp = temp.text.replace('\u2009', ' ').replace('F', ' ').strip() |
21 | | - |
22 | | - wind = re.sub(r'[^\x00-\x7F]+', '', wind.text) |
23 | | - |
24 | | - wind = wind.replace('mph', '').strip() |
25 | | - |
26 | | - hour, period = time.split() |
27 | | - |
28 | | - hour = int(hour) |
29 | | - |
30 | | - if period == 'PM' and hour != 12: |
31 | | - hour += 12 |
32 | | - elif period == 'AM' and hour == 12: |
33 | | - hour = 0 |
34 | | - |
35 | | - time = hour |
36 | | - |
37 | | - |
38 | | - if time and temp and wind: |
39 | | - hourly_forecast.append({ |
40 | | - 'time': time, |
41 | | - 'temperature': temp, |
42 | | - 'wind': wind |
43 | | - }) |
44 | | - return hourly_forecast |
| 11 | + soup = self._fetch_soup() |
| 12 | + |
| 13 | + forecast_div = soup.find('div', id="umdwx_weeklyfcst_widget-9") |
| 14 | + if not forecast_div: |
| 15 | + raise ValueError("Hourly forecast data not found on the page.") |
45 | 16 |
|
46 | | - def get_weekly_forecast(self): |
47 | | - url = "https://weather.umd.edu/" |
| 17 | + for day in forecast_div.find_all('div', class_='fcst_day')[:5]: |
| 18 | + forecast = self._parse_hourly_forecast(day) |
| 19 | + if forecast: |
| 20 | + hourly_forecast.append(forecast) |
48 | 21 |
|
49 | | - response = requests.get(url) |
50 | | - soup = BeautifulSoup(response.text, 'html.parser') |
| 22 | + return hourly_forecast |
51 | 23 |
|
52 | | - a = soup.find('div', id="umdwx_weeklyfcst_widget-9") |
53 | | - |
| 24 | + def get_weekly_forecast(self): |
| 25 | + """Fetches the weekly weather forecast.""" |
54 | 26 | weekly_forecast = [] |
55 | | - weekly_forecast_start = 5 # Skip first part for weekly forecast |
56 | | - for day in a.find_all('div', class_='fcst_day')[weekly_forecast_start:]: |
57 | | - day_name = day.find('div', class_='fcst_txt-day') |
58 | | - temp_range = day.find('div', class_='fcst_txt-temp') |
59 | | - |
60 | | - if day_name and temp_range: |
61 | | - # Fix Thursday typo in website |
62 | | - if day_name.text == 'Thi': |
63 | | - day_name = 'Thu' |
64 | | - else: |
65 | | - day_name = day_name.text |
66 | | - |
67 | | - # Extract high and low temperatures |
68 | | - temp_values = temp_range.text.split(' / ') # Split by the separator |
69 | | - if len(temp_values) == 2: # Ensure we have both high and low values |
70 | | - high_temp = temp_values[0].replace('\u2009', ' ').strip() # Remove narrow space |
71 | | - low_temp = temp_values[1].replace('\u2009', ' ').strip() # Remove narrow space |
72 | | - high_temp = temp_values[0].replace('F', ' ').strip() |
73 | | - low_temp = temp_values[1].replace('F', ' ').strip() |
74 | | - weekly_forecast.append({ |
75 | | - 'day': day_name, |
76 | | - 'high': high_temp, |
77 | | - 'low': low_temp |
78 | | - }) |
| 27 | + soup = self._fetch_soup() |
| 28 | + |
| 29 | + forecast_div = soup.find('div', id="umdwx_weeklyfcst_widget-9") |
| 30 | + if not forecast_div: |
| 31 | + raise ValueError("Weekly forecast data not found on the page.") |
| 32 | + |
| 33 | + for day in forecast_div.find_all('div', class_='fcst_day')[5:]: |
| 34 | + forecast = self._parse_weekly_forecast(day) |
| 35 | + if forecast: |
| 36 | + weekly_forecast.append(forecast) |
79 | 37 |
|
80 | 38 | return weekly_forecast |
| 39 | + |
| 40 | + def _fetch_soup(self): |
| 41 | + """Fetches the HTML content and returns a BeautifulSoup object.""" |
| 42 | + response = requests.get(self.BASE_URL) |
| 43 | + response.raise_for_status() # Raise an error for bad responses |
| 44 | + return BeautifulSoup(response.text, 'html.parser') |
| 45 | + |
| 46 | + def _parse_hourly_forecast(self, day): |
| 47 | + """Parses the hourly forecast data from a single day element.""" |
| 48 | + time = day.find('div', class_='fcst_txt-day').text.strip() |
| 49 | + temp_div = day.find('div', class_='fcst_txt-temp') |
| 50 | + wind_div = day.find('div', class_='fcst_txt-wind') |
| 51 | + |
| 52 | + if temp_div and wind_div: |
| 53 | + temp = self._clean_temperature(temp_div.text) |
| 54 | + wind = self._clean_wind_speed(wind_div.text) |
| 55 | + |
| 56 | + hour, period = self._convert_time_to_24_hour_format(time) |
| 57 | + return {'time': hour, 'temperature': temp, 'wind': wind} |
| 58 | + |
| 59 | + return None |
| 60 | + |
| 61 | + def _parse_weekly_forecast(self, day): |
| 62 | + """Parses the weekly forecast data from a single day element.""" |
| 63 | + day_name_div = day.find('div', class_='fcst_txt-day') |
| 64 | + temp_range_div = day.find('div', class_='fcst_txt-temp') |
| 65 | + |
| 66 | + if day_name_div and temp_range_div: |
| 67 | + day_name = self._fix_day_name(day_name_div.text) |
| 68 | + high_temp, low_temp = self._extract_temperatures(temp_range_div.text) |
| 69 | + return {'day': day_name, 'high': high_temp, 'low': low_temp} |
| 70 | + |
| 71 | + return None |
| 72 | + |
| 73 | + def _clean_temperature(self, temp_text): |
| 74 | + """Cleans and formats the temperature text.""" |
| 75 | + return temp_text.replace('\u2009', ' ').replace('F', '').strip() |
| 76 | + |
| 77 | + def _clean_wind_speed(self, wind_text): |
| 78 | + """Cleans and formats the wind speed text.""" |
| 79 | + return re.sub(r'[^\x00-\x7F]+', '', wind_text).replace('mph', '').strip() |
| 80 | + |
| 81 | + def _convert_time_to_24_hour_format(self, time_text): |
| 82 | + """Converts the 12-hour format time to 24-hour format.""" |
| 83 | + hour, period = time_text.split() |
| 84 | + hour = int(hour) |
| 85 | + |
| 86 | + if period == 'PM' and hour != 12: |
| 87 | + hour += 12 |
| 88 | + elif period == 'AM' and hour == 12: |
| 89 | + hour = 0 |
| 90 | + |
| 91 | + return hour, period |
| 92 | + |
| 93 | + def _fix_day_name(self, day_name): |
| 94 | + """Fixes specific typos in day names.""" |
| 95 | + return 'Thu' if day_name == 'Thi' else day_name |
| 96 | + |
| 97 | + def _extract_temperatures(self, temp_range_text): |
| 98 | + """Extracts high and low temperatures from the temperature range text.""" |
| 99 | + temp_values = temp_range_text.split(' / ') |
| 100 | + if len(temp_values) == 2: |
| 101 | + high_temp = self._clean_temperature(temp_values[0]) |
| 102 | + low_temp = self._clean_temperature(temp_values[1]) |
| 103 | + return high_temp, low_temp |
| 104 | + return None, None |
0 commit comments