Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 38 additions & 8 deletions hestia/hestia_utils/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,14 +232,44 @@ def parse_woonnet_rijnmond(self, r: requests.models.Response):
def parse_woonin(self, r: requests.models.Response):
results = json.loads(r.content)['objects']
for res in results:
# Woonin includes houses which are already rented, we only want the empty houses!
if res["type"] == "huur" and not res.get("verhuurd", False):
home = Home(agency="woonin")
home.address = res["straat"]
home.city = res["plaats"]
home.url = f"https://ik-zoek.woonin.nl{res['url']}" # Given URL links directly to listing
home.price = int(res["vraagPrijs"][2:].replace(".", "")) # Remove dot and skip currency+space prefix
self.homes.append(home)
if res.get("type") != "huur":
continue

# Exclude unavailable/rented listings.
unavailable_tokens = ("verhuurd", "onder optie", "withdrawn", "rentedwithreservation", "unavailable")
status_fields = " ".join(
str(res.get(field, "")).lower()
for field in ("className", "status", "statusLabel", "verhuurStatus")
)
if res.get("verhuurd", False) or any(token in status_fields for token in unavailable_tokens):
continue

street = str(res.get("straat", "")).strip()
house_number = str(res.get("huisnummer", "")).strip()
if not street:
continue

# Woonin now returns house number in a separate field for many listings.
if house_number and not re.search(r"\d", street):
address = f"{street} {house_number}".strip()
else:
address = street

# Project listings do not have a house number and should be skipped.
if not re.search(r"\d", address):
continue

price_raw = str(res.get("vraagPrijs", ""))
digits = re.sub(r"[^\d]", "", price_raw)
if not digits:
continue

home = Home(agency="woonin")
home.address = address
home.city = str(res.get("plaats", "")).strip()
home.url = f"https://ik-zoek.woonin.nl{res['url']}" # Given URL links directly to listing
home.price = int(digits)
self.homes.append(home)

def parse_vesteda(self, r: requests.models.Response):
results = json.loads(r.content)["results"]["objects"]
Expand Down
28 changes: 27 additions & 1 deletion tests/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,7 +484,8 @@ def test_basic_parsing(self, mock_response):
{
"type": "huur",
"verhuurd": False,
"straat": "Prinsengracht 100",
"straat": "Prinsengracht",
"huisnummer": "100",
"plaats": "Amsterdam",
"url": "/woning/prinsengracht-100",
"vraagPrijs": "€ 1.800"
Expand Down Expand Up @@ -524,6 +525,31 @@ def test_filters_non_huur(self, mock_response):
results = HomeResults("woonin", r)
assert len(results.homes) == 0

def test_filters_unavailable_and_requires_house_number(self, mock_response):
data = {"objects": [
{
"type": "huur",
"verhuurd": False,
"className": "unavailable",
"straat": "Straat",
"huisnummer": "1",
"plaats": "Amsterdam",
"url": "/1",
"vraagPrijs": "€ 1.000"
},
{
"type": "huur",
"verhuurd": False,
"straat": "Projectnaam",
"plaats": "Amsterdam",
"url": "/2",
"vraagPrijs": "€ 1.000"
}
]}
r = mock_response(data)
results = HomeResults("woonin", r)
assert len(results.homes) == 0


class TestParsePararius:
def _make_listing_html(
Expand Down