Skip to content

Handle moved repositories with no redirect (404) when looking for commits in get_login_with_commit_hash phase of contributor resolution #299

@cdolfi

Description

@cdolfi

there are UrlNotFoundExceptions during the commit look up in the get_login_with_commit_hash function. I think some basic error handling would solve this issue
/

Details

Stack Trace
Traceback (most recent call last):
  File "/collectoss/.venv/lib/python3.11/site-packages/celery/app/trace.py", line 453, in trace_task
    R = retval = fun(*args, **kwargs)
                 ^^^^^^^^^^^^^^^^^^^^
  File "/collectoss/.venv/lib/python3.11/site-packages/celery/app/trace.py", line 736, in __protected_call__
    return self.run(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/collectoss/collectoss/tasks/github/facade_github/tasks.py", line 248, in insert_facade_contributors
    process_commit_metadata(logger, key_auth, batch, repo_id, platform_id, tool_source, tool_version, data_source)
  File "/collectoss/collectoss/tasks/github/facade_github/tasks.py", line 66, in process_commit_metadata
    login = get_login_with_commit_hash(logger, auth, contributor, repo_id)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/collectoss/collectoss/tasks/github/facade_github/contributor_interfaceable/contributor_interface.py", line 371, in get_login_with_commit_hash
    login_json = github_data_access.get_resource(url)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/collectoss/collectoss/tasks/github/util/github_data_access.py", line 108, in get_resource
    response = self.make_request_with_retries(url)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/collectoss/collectoss/tasks/github/util/github_data_access.py", line 164, in make_request_with_retries
    return self.__make_request_with_retries(url, method, timeout)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/collectoss/.venv/lib/python3.11/site-packages/tenacity/__init__.py", line 330, in wrapped_f
    return self(f, *args, **kw)
           ^^^^^^^^^^^^^^^^^^^^
  File "/collectoss/.venv/lib/python3.11/site-packages/tenacity/__init__.py", line 467, in __call__
    do = self.iter(retry_state=retry_state)
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/collectoss/.venv/lib/python3.11/site-packages/tenacity/__init__.py", line 368, in iter
    result = action(retry_state)
             ^^^^^^^^^^^^^^^^^^^
  File "/collectoss/.venv/lib/python3.11/site-packages/tenacity/__init__.py", line 390, in <lambda>
    self._add_action_func(lambda rs: rs.outcome.result())
                                     ^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/concurrent/futures/_base.py", line 449, in result
    return self.__get_result()
           ^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/concurrent/futures/_base.py", line 401, in __get_result
    raise self._exception
  File "/collectoss/.venv/lib/python3.11/site-packages/tenacity/__init__.py", line 470, in __call__
    result = fn(*args, **kwargs)
             ^^^^^^^^^^^^^^^^^^^
  File "/collectoss/collectoss/tasks/github/util/github_data_access.py", line 186, in __make_request_with_retries
    result = self.make_request(url, method, timeout)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/collectoss/collectoss/tasks/github/util/github_data_access.py", line 134, in make_request
    raise UrlNotFoundException(f"Could not find {url}")
collectoss.tasks.github.util.github_data_access.UrlNotFoundException: Could not find https://api.github.com/repos/TheAlaskanPhysicist/simulation-physics-droplet/commits/427e12127328230ebb6a931029590ec4bf597d52

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/collectoss/.venv/lib/python3.11/site-packages/celery/backends/redis.py", line 533, in on_chord_part_return
    resl = [unpack(tup, decode) for tup in resl]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/collectoss/.venv/lib/python3.11/site-packages/celery/backends/redis.py", line 533, in <listcomp>
    resl = [unpack(tup, decode) for tup in resl]
            ^^^^^^^^^^^^^^^^^^^
  File "/collectoss/.venv/lib/python3.11/site-packages/celery/backends/redis.py", line 439, in _unpack_chord_result
    raise ChordError(f'Dependency {tid} raised {retval!r}')
celery.exceptions.ChordError: Dependency 2d71d7c2-aa42-49f7-a59b-8a1b42bfe05d raised UrlNotFoundException('Could not find https://api.github.com/repos/TheAlaskanPhysicist/simulation-physics-droplet/commits/427e12127328230ebb6a931029590ec4bf597d52')

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/collectoss/.venv/lib/python3.11/site-packages/celery/app/trace.py", line 470, in trace_task
    I, R, state, retval = on_error(task_request, exc)
                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/collectoss/.venv/lib/python3.11/site-packages/celery/app/trace.py", line 381, in on_error
    R = I.handle_error_state(
        ^^^^^^^^^^^^^^^^^^^^^
  File "/collectoss/.venv/lib/python3.11/site-packages/celery/app/trace.py", line 175, in handle_error_state
    return {
           ^
  File "/collectoss/.venv/lib/python3.11/site-packages/celery/app/trace.py", line 227, in handle_failure
    task.backend.mark_as_failure(
  File "/collectoss/.venv/lib/python3.11/site-packages/celery/backends/base.py", line 172, in mark_as_failure
    self.on_chord_part_return(request, state, exc)
  File "/collectoss/.venv/lib/python3.11/site-packages/celery/backends/redis.py", line 552, in on_chord_part_return
    return self.chord_error_from_stack(callback, exc)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/collectoss/.venv/lib/python3.11/site-packages/celery/backends/base.py", line 304, in chord_error_from_stack
    return backend.fail_from_current_stack(callback.id, exc=exc)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/collectoss/.venv/lib/python3.11/site-packages/celery/backends/base.py", line 311, in fail_from_current_stack
    self.mark_as_failure(task_id, exc, exception_info.traceback)
  File "/collectoss/.venv/lib/python3.11/site-packages/celery/backends/base.py", line 167, in mark_as_failure
    self.store_result(task_id, exc, state,
  File "/collectoss/.venv/lib/python3.11/site-packages/celery/backends/base.py", line 526, in store_result
    self._store_result(task_id, result, state, traceback,
  File "/collectoss/.venv/lib/python3.11/site-packages/celery/backends/base.py", line 975, in _store_result
    current_meta = self._get_task_meta_for(task_id)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/collectoss/.venv/lib/python3.11/site-packages/celery/backends/base.py", line 997, in _get_task_meta_for
    meta = self.get(self.get_key_for_task(task_id))
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/collectoss/.venv/lib/python3.11/site-packages/celery/backends/base.py", line 871, in get_key_for_task
    raise ValueError(f'task_id must not be empty. Got {task_id} instead.')
ValueError: task_id must not be empty. Got None instead

Metadata

Metadata

Assignees

No one assigned

    Labels

    deployed versionLive problems with deployed versions

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions