diff --git a/.github/workflows/functional_test.yml b/.github/workflows/functional_test.yml index eaa50adf3..5ec4dc2b9 100644 --- a/.github/workflows/functional_test.yml +++ b/.github/workflows/functional_test.yml @@ -27,5 +27,4 @@ jobs: - name: Run Tests run: | uv run --python ${{ matrix.env }} pytest \ - tests/test_classes \ --color=yes diff --git a/collectoss/tasks/github/__init__.py b/collectoss/tasks/github/__init__.py index de3f37bd8..e69de29bb 100644 --- a/collectoss/tasks/github/__init__.py +++ b/collectoss/tasks/github/__init__.py @@ -1,7 +0,0 @@ -from collectoss.tasks.github.contributors import * -from collectoss.tasks.github.events import * -from collectoss.tasks.github.issues import * -from collectoss.tasks.github.messages import * -from collectoss.tasks.github.pull_requests.tasks import * -from collectoss.tasks.github.repo_info.tasks import * -from collectoss.tasks.github.releases.tasks import * diff --git a/collectoss/tasks/start_tasks.py b/collectoss/tasks/start_tasks.py index 644b6cbc4..8e130f926 100644 --- a/collectoss/tasks/start_tasks.py +++ b/collectoss/tasks/start_tasks.py @@ -7,7 +7,13 @@ import sqlalchemy as s -from collectoss.tasks.github import * +from collectoss.tasks.github.contributors import * +from collectoss.tasks.github.events import * +from collectoss.tasks.github.issues import * +from collectoss.tasks.github.messages import * +from collectoss.tasks.github.pull_requests.tasks import * +from collectoss.tasks.github.repo_info.tasks import * +from collectoss.tasks.github.releases.tasks import * if os.environ.get('AUGUR_DOCKER_DEPLOY') != "1": from collectoss.tasks.data_analysis import * from collectoss.tasks.github.detect_move.tasks import detect_github_repo_move_core, detect_github_repo_move_secondary diff --git a/pyproject.toml b/pyproject.toml index 6445d832e..d14c871f7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -146,7 +146,9 @@ addopts = "-ra -s" testpaths = [ "tests/test_classes", "tests/test_application/test_cli/test_csv_utils.py", - # "tests/test_routes", # runs, but needs a fixture for connecting to the web interface of CollectOSS + "tests/test_tasks/test_task_utilities/test_util/test_worker_util.py", + "tests/test_tasks/test_task_utilities/test_util/test_contributor_uuid.py", + # "tests/test_routes", # runs, but needs a fixture for connecting to the web interface of Augur # "tests/test_metrics", # "tests/test_tasks", # "tests/test_application", @@ -154,6 +156,11 @@ testpaths = [ # "tests/test_workers/worker_persistence/", # "tests/test_routes/runner.py" ] +markers = [ + "unit: pure logic tests with no external dependencies", + "integration: tests requiring a database, Redis, or network access", +] + [tool.mypy] files = ['collectoss/application/db/*.py'] diff --git a/tests/test_application/test_cli/test_csv_utils.py b/tests/test_application/test_cli/test_csv_utils.py index 395ed0936..d15a7f04b 100644 --- a/tests/test_application/test_cli/test_csv_utils.py +++ b/tests/test_application/test_cli/test_csv_utils.py @@ -14,7 +14,7 @@ MAX_FILE_SIZE_BYTES, ) - +@pytest.mark.unit class TestValidateGitUrl: """Tests for validate_git_url function""" @@ -40,7 +40,7 @@ def test_whitespace_handling(self): """Test that whitespace is properly stripped""" assert validate_git_url(" https://github.com/chaoss/collectoss ") - +@pytest.mark.unit class TestValidatePositiveInt: """Tests for validate_positive_int function""" @@ -71,7 +71,7 @@ def test_whitespace_handling(self): """Test that whitespace is properly stripped""" assert validate_positive_int(" 42 ") - +@pytest.mark.unit class TestDetectColumnOrder: """Tests for detect_column_order function""" @@ -153,7 +153,7 @@ def test_no_match_found_raises_error(self): with pytest.raises(ValueError, match="Could not detect column"): detect_column_order(sample_rows, validators) - +@pytest.mark.unit class TestProcessCsv: """Tests for process_csv function""" @@ -252,7 +252,7 @@ def test_whitespace_in_values(self, tmp_path): result = process_csv(str(csv_file), validators) assert result[0] == {"repo_url": "https://github.com/chaoss/collectoss", "repo_group_id": "10"} - +@pytest.mark.unit class TestProcessRepoCsv: """Tests for process_repo_csv function""" @@ -275,6 +275,7 @@ def test_process_repo_csv_without_headers(self, tmp_path): assert len(result) == 2 +@pytest.mark.unit class TestProcessRepoGroupCsv: """Tests for process_repo_group_csv function""" @@ -310,6 +311,7 @@ def test_empty_group_name_invalid(self, tmp_path): assert len(result) >= 1 +@pytest.mark.unit class TestEdgeCases: """Tests for edge cases and error conditions""" diff --git a/tests/test_classes/test_config_stores.py b/tests/test_classes/test_config_stores.py index 8c15fd020..cf23f646f 100644 --- a/tests/test_classes/test_config_stores.py +++ b/tests/test_classes/test_config_stores.py @@ -14,75 +14,128 @@ def mock_session(): return Mock() -def test_jsonconfig_readonly_flags(mock_logger): - cfg = JsonConfig({"A": {"x": 1}}, mock_logger) - assert cfg.writable is False - assert cfg.empty is False - - -def test_jsonconfig_empty_true_false(mock_logger): - assert JsonConfig({}, mock_logger).empty is True - assert JsonConfig({"A": {}}, mock_logger).empty is False - - -def test_jsonconfig_write_protection(mock_logger): - # JsonConfig should be not writeable by default, so we should be unable to change - # its values, even by abusing references - - data = {"Alpha": {"a": 1, "b": "str"}, "Beta": {}} - cfg = JsonConfig(data, mock_logger) - - # mutation via input - data["Alpha"]["a"] = 2 - - config_test = cfg.retrieve_dict() - assert config_test != data # the data in the config should not change - - # mutation via output - config_test["Alpha"]["a"] = 3 - - config_test = cfg.retrieve_dict() - assert config_test != data # the data in the config should not change - -def test_jsonconfig_retrieve_has_get(mock_logger): - data = {"Alpha": {"a": 1, "b": "str"}, "Beta": {}} - cfg = JsonConfig(data, mock_logger) - - # retrieve full dict - assert cfg.retrieve_dict() == data - - # has/get section - assert cfg.has_section("Alpha") is True - assert cfg.has_section("Missing") is False - assert cfg.get_section("Alpha") == {"a": 1, "b": "str"} - assert cfg.get_section("Missing") is None - - # has/get value - assert cfg.has_value("Alpha", "a") is True - assert cfg.has_value("Alpha", "missing") is False - assert cfg.has_value("Missing", "a") is False - assert cfg.get_value("Alpha", "a") == 1 - assert cfg.get_value("Alpha", "missing") is None - assert cfg.get_value("Missing", "a") is None - - -@pytest.mark.parametrize( - "callable_name, args, kwargs", - [ - ("load_dict", ({"X": {"y": 2}},), {"ignore_existing": False}), - ("clear", tuple(), {}), - ("remove_section", ("X",), {}), - ("create_section", ("X", {"y": 2}), {"ignore_existing": False}), - ("remove_value", ("X", "y"), {}), - ("add_value", ("X", "y", 2), {"ignore_existing": False}), - ], -) -def test_jsonconfig_mutations_raise_not_writable(mock_logger, callable_name, args, kwargs): - cfg = JsonConfig({"A": {"x": 1}}, mock_logger) - with pytest.raises(NotWriteableException): - getattr(cfg, callable_name)(*args, **kwargs) +class TestJSONConfig: + def test_jsonconfig_readonly_flags(self, mock_logger): + cfg = JsonConfig({"A": {"x": 1}}, mock_logger) + assert cfg.writable is False + assert cfg.empty is False + + def test_jsonconfig_empty_true_false(self, mock_logger): + assert JsonConfig({}, mock_logger).empty is True + assert JsonConfig({"A": {}}, mock_logger).empty is False + + + def test_jsonconfig_write_protection(self, mock_logger): + # JsonConfig should be not writeable by default, so we should be unable to change + # its values, even by abusing references + + data = {"Alpha": {"a": 1, "b": "str"}, "Beta": {}} + cfg = JsonConfig(data, mock_logger) + + # mutation via input + data["Alpha"]["a"] = 2 + + config_test = cfg.retrieve_dict() + assert config_test != data # the data in the config should not change + + # mutation via output + config_test["Alpha"]["a"] = 3 + + config_test = cfg.retrieve_dict() + assert config_test != data # the data in the config should not change + + def test_jsonconfig_retrieve_has_get(self, mock_logger): + data = {"Alpha": {"a": 1, "b": "str"}, "Beta": {}} + cfg = JsonConfig(data, mock_logger) + + # retrieve full dict + assert cfg.retrieve_dict() == data + + # has/get section + assert cfg.has_section("Alpha") is True + assert cfg.has_section("Missing") is False + assert cfg.get_section("Alpha") == {"a": 1, "b": "str"} + assert cfg.get_section("Missing") is None + + # has/get value + assert cfg.has_value("Alpha", "a") is True + assert cfg.has_value("Alpha", "missing") is False + assert cfg.has_value("Missing", "a") is False + assert cfg.get_value("Alpha", "a") == 1 + assert cfg.get_value("Alpha", "missing") is None + assert cfg.get_value("Missing", "a") is None + + + @pytest.mark.parametrize( + "callable_name, args, kwargs", + [ + ("load_dict", ({"X": {"y": 2}},), {"ignore_existing": False}), + ("clear", tuple(), {}), + ("remove_section", ("X",), {}), + ("create_section", ("X", {"y": 2}), {"ignore_existing": False}), + ("remove_value", ("X", "y"), {}), + ("add_value", ("X", "y", 2), {"ignore_existing": False}), + ], + ) + def test_jsonconfig_mutations_raise_not_writable(self, mock_logger, callable_name, args, kwargs): + cfg = JsonConfig({"A": {"x": 1}}, mock_logger) + with pytest.raises(NotWriteableException): + getattr(cfg, callable_name)(*args, **kwargs) + + + def test_fetching_real_defaults(self, mock_logger, mock_session): + cfg = SystemConfig(mock_logger, mock_session) + cfg.config_sources = [JsonConfig(default_config, mock_logger)] + + assert cfg.get_value("Redis", "cache_group") == 0 + + + def test_load_config_utilizes_hierarchy(self): + + default_dict = { + "Section1": {"alpha": 1, "beta": "x"}, + "Section2": {"gamma": False, "delta": 3.14}, + } + + override_dict = { + "Section1": {"beta": "y"}, + "Section2": {"Epsilon": True, "delta": 6.28}, + "Section3": {"hi": "there"} + } + + cfg = SystemConfig(None, None, [JsonConfig(default_dict, mock_logger), JsonConfig(override_dict, mock_logger)]) + + expected_dict = { + "Section1": {"alpha": 1, "beta": "y"}, + "Section2": {"gamma": False, "Epsilon": True, "delta": 6.28}, + "Section3": {"hi": "there"} # test that new sections are accounted for too + } + + assert cfg.load_config() == expected_dict + + + def test_get_section_incorporates_hierarchy(self): + + default_dict = { + "Section1": {"alpha": 1, "beta": "x"}, + "Section2": {"gamma": False, "delta": 3.14}, + } + + override_dict = { + "Section1": {"beta": "y"}, + "Section2": {"gamma": False, "delta": 3.14}, + } + + cfg = SystemConfig(None, None, [JsonConfig(default_dict, mock_logger), JsonConfig(override_dict, mock_logger)]) + + expected_dict = {"alpha": 1, "beta": "y"} + + assert cfg.get_section("Section1") == expected_dict + + +@pytest.mark.unit def test_dict_to_config_table_happy_path(): input_dict = { "Section1": {"alpha": 1, "beta": "x"}, @@ -122,53 +175,3 @@ def test_dict_to_config_table_happy_path(): assert rows == expected - -def test_fetching_real_defaults(mock_logger, mock_session): - cfg = SystemConfig(mock_logger, mock_session) - cfg.config_sources = [JsonConfig(default_config, mock_logger)] - - assert cfg.get_value("Redis", "cache_group") == 0 - - -def test_load_config_utilizes_hierarchy(): - - default_dict = { - "Section1": {"alpha": 1, "beta": "x"}, - "Section2": {"gamma": False, "delta": 3.14}, - } - - override_dict = { - "Section1": {"beta": "y"}, - "Section2": {"Epsilon": True, "delta": 6.28}, - "Section3": {"hi": "there"} - } - - cfg = SystemConfig(None, None, [JsonConfig(default_dict, mock_logger), JsonConfig(override_dict, mock_logger)]) - - expected_dict = { - "Section1": {"alpha": 1, "beta": "y"}, - "Section2": {"gamma": False, "Epsilon": True, "delta": 6.28}, - "Section3": {"hi": "there"} # test that new sections are accounted for too - } - - assert cfg.load_config() == expected_dict - - -def test_get_section_incorporates_hierarchy(): - - default_dict = { - "Section1": {"alpha": 1, "beta": "x"}, - "Section2": {"gamma": False, "delta": 3.14}, - } - - override_dict = { - "Section1": {"beta": "y"}, - "Section2": {"gamma": False, "delta": 3.14}, - } - - cfg = SystemConfig(None, None, [JsonConfig(default_dict, mock_logger), JsonConfig(override_dict, mock_logger)]) - - expected_dict = {"alpha": 1, "beta": "y"} - - assert cfg.get_section("Section1") == expected_dict - diff --git a/tests/test_classes/test_github_data_access.py b/tests/test_classes/test_github_data_access.py new file mode 100644 index 000000000..3ebd4db79 --- /dev/null +++ b/tests/test_classes/test_github_data_access.py @@ -0,0 +1,57 @@ +# SPDX-License-Identifier: MIT +import pytest +from unittest.mock import Mock, patch + +from collectoss.tasks.github.util.github_data_access import GithubDataAccess + + +@pytest.fixture +def mock_logger(): + return Mock() + + +@pytest.fixture +def mock_key_manager(): + return Mock() + + +@pytest.fixture +def gda(mock_key_manager, mock_logger): + with patch("collectoss.tasks.github.util.github_data_access.KeyClient"): + return GithubDataAccess(mock_key_manager, mock_logger) + +@pytest.mark.unit +class TestEndpointUrl: + + def test_basic_path(self, gda): + result = gda.endpoint_url("/users/MoralCode") + assert result == "https://api.github.com/users/MoralCode" + + def test_path_without_leading_slash(self, gda): + result = gda.endpoint_url("repos/owner/repo") + assert result == "https://api.github.com/repos/owner/repo" + + def test_with_single_param(self, gda): + result = gda.endpoint_url("/users/MoralCode", {"per_page": "100"}) + assert "per_page=100" in result + assert result.startswith("https://api.github.com/users/MoralCode") + + def test_with_multiple_params(self, gda): + result = gda.endpoint_url("/repos/owner/repo/pulls", {"per_page": "50", "state": "open"}) + assert "per_page=50" in result + assert "state=open" in result + assert result.startswith("https://api.github.com/repos/owner/repo/pulls") + + def test_none_params_produces_no_query_string(self, gda): + result = gda.endpoint_url("/users/MoralCode", None) + assert result == "https://api.github.com/users/MoralCode" + + def test_empty_params_produces_no_query_string(self, gda): + result = gda.endpoint_url("/users/MoralCode", {}) + assert result == "https://api.github.com/users/MoralCode" + + def test_path_with_existing_query_params(self, gda): + result = gda.endpoint_url("/search/repositories?q=python", {"per_page": "10"}) + assert "q=python" in result + assert "per_page=10" in result + assert result.startswith("https://api.github.com/search/repositories") diff --git a/tests/test_tasks/test_task_utilities/test_paginators/test_github_paginator.py b/tests/test_tasks/test_task_utilities/test_paginators/test_github_paginator.py index a8ea375f3..ec0c27745 100644 --- a/tests/test_tasks/test_task_utilities/test_paginators/test_github_paginator.py +++ b/tests/test_tasks/test_task_utilities/test_paginators/test_github_paginator.py @@ -92,7 +92,7 @@ def test_github_paginator_len(key_auth): assert len_contributors_list == 0 -def test_github_paginator_get_item(key_auth): +def test_github_paginator_get_item_2(key_auth): owner = "chaoss" name = "whitepaper" diff --git a/tests/test_tasks/test_task_utilities/test_util/test_contributor_uuid.py b/tests/test_tasks/test_task_utilities/test_util/test_contributor_uuid.py new file mode 100644 index 000000000..40f5cdc27 --- /dev/null +++ b/tests/test_tasks/test_task_utilities/test_util/test_contributor_uuid.py @@ -0,0 +1,161 @@ +import pytest +import uuid +from collectoss.tasks.util.ContributorUUID import ContributorUUID, GithubUUID, GitlabUUID, UnresolvableUUID + +# ContributorUUID tests +@pytest.mark.unit +class TestContributorUUID: + # this checks whether a brand new ContributorUUID object starts as 16 zero bytes + def test_augur_uuid_initializes_with_16_zero_bytes(self): + uid = ContributorUUID() + assert len(uid.bytes) == 16 + assert all(b == 0 for b in uid.bytes) + + # checks that githubUUID sets its platform number to 1 + def test_github_uuid_platform_is_1(self): + uid = GithubUUID() + assert uid["platform"] == 1 + + # checks that gitlabUUID sets its platform number to 2 + def test_gitlab_uuid_platform_is_2(self): + uid = GitlabUUID() + assert uid["platform"] == 2 + + # checks the that you can store a value in the user field + def test_github_uuid_set_user(self): + uid = GithubUUID() + uid["user"] = 12345 + assert uid["user"] == 12345 + + # tests platform_id edge cases + def test_set_platform_id_raises_on_non_integer(self): + uid = ContributorUUID() + with pytest.raises(ValueError): + uid.set_platform_id("github") + + def test_set_platform_id_raises_on_overflow(self): + uid = ContributorUUID() + with pytest.raises(ValueError): + uid.set_platform_id(256) # too big for 1 byte + + # checks that writing to one field doesnt accidentally overwrite bytes belonging to another field + def test_fields_dont_overlap(self): + uid = GithubUUID() + + uid["user"] = 12345 + uid["repo"] = 99999 + + assert uid["user"] == 12345 + assert uid["repo"] == 99999 + + # checks that to_UUID returs the uuid.UUID object + def test_to_uuid_returns_valid_uuid(self): + uid = GithubUUID() + uid["user"] = 15 + result = uid.to_UUID() + assert isinstance(result, uuid.UUID) + + # checks the start_byte is within range(0, 16) for set_bytes + def test_set_bytes_raises_on_invalid_start_byte(self): + uid = ContributorUUID() + with pytest.raises(ValueError): + uid.set_bytes([1, 2, 3], 16) + + # checks that set_bytes correctly raises an error when you write more bytes that will fit in the UUID starting at a given position + def test_set_bytes_raises_on_too_many_bytes(self): + uid = ContributorUUID() + with pytest.raises(ValueError): + uid.set_bytes([1] * 10, 10) + + # checks that writeint correctly rejects a number + def test_write_int_raises_on_overflow(self): + uid = GithubUUID() + with pytest.raises(ValueError): + uid["user"] = 99999999999 # too big for 4 bytes + + def test_write_int_with_non_integer(self): + uid = GithubUUID() + + with pytest.raises(ValueError): + uid.write_int("abc", 1, 4) + + def test_write_int_and_get_int_roundtrip(self): + uid = ContributorUUID() + uid.write_int(65535, 1, 2) + assert uid.get_int(1, 2) == 65535 + + # checks __int__ method + def test_int_conversion(self): + uid = ContributorUUID() + uid.set_byte(15, 1) + assert int(uid) == 1 + + def test_get_byte_invalid_index(self): + uid = ContributorUUID() + with pytest.raises(IndexError): + uid.get_byte(20) + + # checks that set_byte correctly rejects a value that is too large + def test_set_byte_raises_on_invalid_value(self): + uid = ContributorUUID() + with pytest.raises(ValueError): + uid.set_byte(0, 256) # too big for one byte + + # checks that set_byte rejects an index that doesnt exist + def test_set_byte_raises_on_out_of_range_index(self): + uid = ContributorUUID() + with pytest.raises(IndexError): + uid.set_byte(16, 1) # index 16 is out of bounds + + def test_set_byte_raises_on_non_integer(self): + uid = ContributorUUID() + with pytest.raises(ValueError): + uid.set_byte(0, "hello") + + # checks that 2 UUIDs with the same values are considered equal. + def test_equality(self): + uid1 = GithubUUID() + uid2 = GithubUUID() + uid1["user"] = 100 + uid2["user"] = 100 + assert uid1 == uid2 + + # checks that 2 UUIDs with different values are not equal + def test_inequality(self): + uid1 = GithubUUID() + uid2 = GithubUUID() + uid1["user"] = 100 + uid2["user"] = 200 + assert uid1 != uid2 + + # checks that the same user produces different user IDs across platforms + def test_github_and_gitlab_different_for_same_user(self): + github_uid = GithubUUID() + gitlab_uid = GitlabUUID() + github_uid["user"] = 100 + gitlab_uid["user"] = 100 + assert github_uid != gitlab_uid + + def test_dict_representation(self): + uid = GithubUUID() + uid["user"] = 10 + + result = uid.__dict__() + + assert result["platform"] == 1 + assert result["user"] == 10 + + def test_string_representation(self): + uid = GithubUUID() + uid["user"] = 10 + + result = str(uid) + + assert "user" in result + assert "platform" in result + + def test_setting_same_field_twice(self): + uid = GithubUUID() + uid["user"] = 42 + uid["user"] = 100 # overwrite with different value + assert uid["user"] == 100 \ No newline at end of file diff --git a/tests/test_tasks/test_task_utilities/test_util/test_worker_util.py b/tests/test_tasks/test_task_utilities/test_util/test_worker_util.py index affd40248..410c1ef70 100644 --- a/tests/test_tasks/test_task_utilities/test_util/test_worker_util.py +++ b/tests/test_tasks/test_task_utilities/test_util/test_worker_util.py @@ -2,11 +2,12 @@ import pytest import sqlalchemy as s -from collectoss.tasks.util.worker_util import * +from collectoss.tasks.util.worker_util import remove_duplicates_by_uniques logger = logging.getLogger(__name__) -def test_remove_duplicates_by_uniques(test_db_engine): +@pytest.mark.unit +def test_remove_duplicates_by_uniques(): data_1 = {"cntrb_login": "Bob", "gh_user_id": 4, "gh_login": "bob", "cntrb_id": "01003f7a-8500-0000-0000-000000000000"} data_2 = {"cntrb_login": "amazing", "gh_user_id": 1700, "gh_login": "hello", "cntrb_id": "01003f7a-8500-0000-0000-000123002000"}