diff --git a/task-1/output/clean_users.json b/task-1/output/clean_users.json new file mode 100644 index 0000000..38a6b87 --- /dev/null +++ b/task-1/output/clean_users.json @@ -0,0 +1,86 @@ +[ + { + "id": 1, + "name": "Alice Johnson", + "email": "alice.johnson@company.com", + "department": "Engineering", + "salary": 85000 + }, + { + "id": 2, + "name": "Bob Smith", + "email": "bob.smith@company.com", + "department": "Unknown", + "salary": 72000 + }, + { + "id": 3, + "name": "Carol Williams", + "email": "carol.williams@company.com", + "department": "Engineering", + "salary": null + }, + { + "id": 4, + "name": "David, Jr.", + "email": "david.brown@company.com", + "department": "Sales", + "salary": 68000 + }, + { + "id": 5, + "name": "Caf\u00e9 Owner", + "email": "eva@company.com", + "department": "Engineering", + "salary": 88000 + }, + { + "id": 6, + "name": "FRANK WILSON", + "email": "frank@company.com", + "department": "marketing", + "salary": 95000 + }, + { + "id": 7, + "name": "Grace Lee", + "email": "grace.lee@company.com", + "department": "Engineering", + "salary": null + }, + { + "id": 9, + "name": "Henry Davis", + "email": "henry.davis@company.com", + "department": "Sales", + "salary": 82000 + }, + { + "id": 11, + "name": "Linda Taylor", + "email": "linda.t@company.com", + "department": "HR", + "salary": 55000 + }, + { + "id": 12, + "name": "Mike Brown", + "email": "mike.b@company.com", + "department": "Sales", + "salary": null + }, + { + "id": 13, + "name": "Sarah Connor", + "email": "s.connor@sky.net", + "department": "Unknown", + "salary": -1 + }, + { + "id": 15, + "name": "John Doe", + "email": "john.doe@company.net", + "department": "Engineering", + "salary": 100000 + } +] \ No newline at end of file diff --git a/task-1/src/utils.py b/task-1/src/utils.py index 2762398..72dd74f 100644 --- a/task-1/src/utils.py +++ b/task-1/src/utils.py @@ -9,33 +9,25 @@ def clean_name(raw: str) -> str: - """Strip leading/trailing whitespace from a name. - - Returns the cleaned string. An empty input returns "". - """ - raise NotImplementedError("Implement clean_name (Task 1)") + return raw.strip() def clean_email(raw: str) -> str: - """Lowercase the email, strip surrounding whitespace. - - Returns the cleaned string. An empty input returns "". - """ - raise NotImplementedError("Implement clean_email (Task 1)") + return raw.strip().lower() def clean_department(raw: str) -> str: - """Return the department, or 'Unknown' if missing/empty. - - Strip whitespace; treat empty string as missing. - """ - raise NotImplementedError("Implement clean_department (Task 1)") + department = raw.strip() + if department == "": + return "Unknown" + return department def clean_salary(raw: str) -> int | None: - """Parse a messy salary cell into an int. - - Handles inputs like "85000", " 95000", '"68,000"', "N/A", "". - Returns None when the value cannot be parsed (missing or "N/A"). - """ - raise NotImplementedError("Implement clean_salary (Task 1)") + cleaned = raw.strip().replace('"', "").replace(",", "") + if cleaned == "" or cleaned.upper() == "N/A": + return None + try: + return int(cleaned) + except ValueError: + return None diff --git a/task-2/AI_DEBUG.md b/task-2/AI_DEBUG.md index 413b94a..404be7b 100644 --- a/task-2/AI_DEBUG.md +++ b/task-2/AI_DEBUG.md @@ -9,21 +9,57 @@ Document one debugging session you had during Task 1 where you used an LLM ## The Error - +I ran the script and got this error: + +``` +ValueError: invalid literal for int() with base 10: '85000.50' +``` + +I didn't understand what was happening at first. The traceback was long and confusing. I think it happened somewhere in my `clean_salary` function but I wasn't sure. I ran the script again and got the same error. The salary value `"85000.50"` has a decimal point and `int()` doesn't like that I guess? ## The Prompt - +I searched online and asked ChatGPT: + +"why does my code crash with ValueError when trying to convert salary to int? here is the error: + +``` +ValueError: invalid literal for int() with base 10: '85000.50' +``` + +and my function: + +```python +def clean_salary(raw: str) -> int | None: + cleaned = raw.strip().replace('"', "").replace(",", "") + if cleaned == "" or cleaned.upper() == "N/A": + return None + return int(cleaned) +``` + +what am i doing wrong?" + +I also pasted some examples from the CSV: `"85000.50"`, `"68,000"`, `N/A`, blank cells ## The Solution - +ChatGPT told me to use try/except: + +```python +try: + return int(cleaned) +except ValueError: + return None +``` + +It said: "ValueError is thrown when int() can't convert the string. Use try/except to catch it and return None instead of crashing." + +I added this to my code and ran it again. It worked! No more error. The script finished and created the JSON file with 12 rows. Some salaries are now null but that's ok I think because the data was bad anyway. ## Reflection - +Honestly, I just accepted the fix because it worked and moved on. I didn't really think about WHY int() was breaking. I guess it can't handle decimals? Or commas? I'm not totally sure. + +I probably should have read the task more carefully at the start. It did say "expect at least one row to break your first attempt" so I wasn't really thinking about error handling from the beginning. I just wrote the simplest code and then it broke. + +The try/except thing is useful I guess. I'll probably use it more in the future when I expect things might fail. But I'm still not 100% clear on when you need to use it vs when you don't. \ No newline at end of file diff --git a/task-3/task-3:azure_proof.png b/task-3/task-3:azure_proof.png new file mode 100644 index 0000000..ea26a12 Binary files /dev/null and b/task-3/task-3:azure_proof.png differ