chore: evals by brodeynewman · Pull Request #20 · numerataz/p95

brodeynewman · 2026-03-19T02:50:34Z

No description provided.

github-actions · 2026-03-19T02:50:48Z

+        )
+
+    @classmethod
+    def from_dataframe(cls, df: "pandas.DataFrame", name: str) -> "Dataset":


⚠️ [ruff] <F821> _{reported by reviewdog 🐶}
Undefined name pandas

github-actions · 2026-03-19T02:56:59Z

+import time
+from unittest import mock
+
+import pytest


[ruff-check] _{reported by reviewdog 🐶}

Suggested change

import pytest

github-actions · 2026-03-19T02:57:00Z

+import pytest
+


⚠️ [ruff] <F401> _{reported by reviewdog 🐶}
pytest imported but unused

Suggested change

import pytest

github-actions · 2026-03-19T02:57:00Z

+            mock_batcher_class.return_value = mock_batcher
+
+            # Patch the imports within run module
+            with mock.patch.object(Run, "_init_remote_mode") as mock_init:


⚠️ [ruff] <F841> _{reported by reviewdog 🐶}
Local variable mock_init is assigned to but never used

Suggested change

with mock.patch.object(Run, "_init_remote_mode") as mock_init:

with mock.patch.object(Run, "_init_remote_mode"):

github-actions · 2026-03-19T03:23:27Z

+        # Early training - lower quality
+        words = base.split()
+        np.random.shuffle(words)
+        return " ".join(words[:len(words)//2])


[ruff-format] _{reported by reviewdog 🐶}

Suggested change

return " ".join(words[:len(words)//2])

return " ".join(words[: len(words) // 2])

github-actions · 2026-03-19T03:23:27Z

+            run.log_metrics({
+                "train/loss": loss,
+                "train/perplexity": perplexity,
+            }, step=epoch)


[ruff-format] _{reported by reviewdog 🐶}

Suggested change

run.log_metrics({

"train/loss": loss,

"train/perplexity": perplexity,

}, step=epoch)

run.log_metrics(

{

"train/loss": loss,

"train/perplexity": perplexity,

},

step=epoch,

)

github-actions · 2026-03-19T03:23:27Z

+
+                # Log the evaluation annotation
+                run.log_eval(
+                    message=f"Epoch {epoch}: {message}\nSample output: \"{output}\"",


[ruff-format] _{reported by reviewdog 🐶}

Suggested change

message=f"Epoch {epoch}: {message}\nSample output: \"{output}\"",

message=f'Epoch {epoch}: {message}\nSample output: "{output}"',

github-actions · 2026-03-19T03:23:27Z

+                        "epoch": epoch,
+                        "output_length": len(output),
+                        "sample_output": output,
+                    }


[ruff-format] _{reported by reviewdog 🐶}

Suggested change

}

},

github-actions · 2026-03-19T03:23:27Z

+            metadata={
+                "final_output": final_output,
+                "total_epochs": config["epochs"],
+            }


[ruff-format] _{reported by reviewdog 🐶}

Suggested change

}

},

github-actions · 2026-03-19T03:23:28Z

+        )
+        return response.get("scorers", [])
+
+    def list_evaluations(self, limit: int = 50, offset: int = 0, status: Optional[str] = None) -> List[Evaluation]:


[ruff-format] _{reported by reviewdog 🐶}

Suggested change

def list_evaluations(self, limit: int = 50, offset: int = 0, status: Optional[str] = None) -> List[Evaluation]:

def list_evaluations(

self, limit: int = 50, offset: int = 0, status: Optional[str] = None

) -> List[Evaluation]:

github-actions · 2026-03-19T03:23:28Z

+            evaluations.append(Evaluation(
+                id=item["id"],
+                name=item["name"],
+                status=item["status"],
+                dataset_id=item["dataset_id"],
+                scorer_ids=item["scorer_ids"],
+                target=item["target"],
+                overall_scores=item.get("overall_scores"),
+                rows_processed=item.get("rows_processed", 0),
+                rows_failed=item.get("rows_failed", 0),
+                created_at=item.get("created_at"),
+            ))


[ruff-format] _{reported by reviewdog 🐶}

Suggested change

evaluations.append(Evaluation(

id=item["id"],

name=item["name"],

status=item["status"],

dataset_id=item["dataset_id"],

scorer_ids=item["scorer_ids"],

target=item["target"],

overall_scores=item.get("overall_scores"),

rows_processed=item.get("rows_processed", 0),

rows_failed=item.get("rows_failed", 0),

created_at=item.get("created_at"),

))

evaluations.append(

Evaluation(

id=item["id"],

name=item["name"],

status=item["status"],

dataset_id=item["dataset_id"],

scorer_ids=item["scorer_ids"],

target=item["target"],

overall_scores=item.get("overall_scores"),

rows_processed=item.get("rows_processed", 0),

rows_failed=item.get("rows_failed", 0),

created_at=item.get("created_at"),

)

)

github-actions · 2026-03-19T03:23:28Z

+            assert "eval_logs" in meta
+            assert len(meta["eval_logs"]) == 1
+            assert meta["eval_logs"][0]["message"] == "This output looks great"
+            assert meta["eval_logs"][0]["step"] == 11  # Step after log_metrics incremented it


[ruff-format] _{reported by reviewdog 🐶}

Suggested change

assert meta["eval_logs"][0]["step"] == 11 # Step after log_metrics incremented it

assert (

meta["eval_logs"][0]["step"] == 11

) # Step after log_metrics incremented it

github-actions · 2026-03-19T03:23:28Z

+        with mock.patch("p95.client.P95Client") as mock_client_class, \
+             mock.patch("p95.metrics.MetricsBatcher") as mock_batcher_class:
+


[ruff-format] _{reported by reviewdog 🐶}

Suggested change

with mock.patch("p95.client.P95Client") as mock_client_class, \

mock.patch("p95.metrics.MetricsBatcher") as mock_batcher_class:

with (

mock.patch("p95.client.P95Client") as mock_client_class,

mock.patch("p95.metrics.MetricsBatcher") as mock_batcher_class,

):

github-actions · 2026-03-19T03:23:28Z

+                    threading.Thread(target=log_evals, args=(10,))
+                    for _ in range(5)


[ruff-format] _{reported by reviewdog 🐶}

Suggested change

threading.Thread(target=log_evals, args=(10,))

for _ in range(5)

threading.Thread(target=log_evals, args=(10,)) for _ in range(5)

chore: evals

580b584

github-actions Bot reviewed Mar 19, 2026

View reviewed changes

chore: tests

1908c62

github-actions Bot reviewed Mar 19, 2026

View reviewed changes

chore: tests

d7005ad

github-actions Bot reviewed Mar 19, 2026

View reviewed changes

	with mock.patch.object(Run, "_init_remote_mode") as mock_init:
	with mock.patch.object(Run, "_init_remote_mode"):

	return " ".join(words[:len(words)//2])
	return " ".join(words[: len(words) // 2])

	message=f"Epoch {epoch}: {message}\nSample output: \"{output}\"",
	message=f'Epoch {epoch}: {message}\nSample output: "{output}"',

		with mock.patch("p95.client.P95Client") as mock_client_class, \
		mock.patch("p95.metrics.MetricsBatcher") as mock_batcher_class:

		threading.Thread(target=log_evals, args=(10,))
		for _ in range(5)

Conversation

brodeynewman commented Mar 19, 2026

Uh oh!

github-actions Bot Mar 19, 2026

Choose a reason for hiding this comment

Uh oh!

github-actions Bot Mar 19, 2026

Choose a reason for hiding this comment

Uh oh!

github-actions Bot Mar 19, 2026

Choose a reason for hiding this comment

Uh oh!

github-actions Bot Mar 19, 2026

Choose a reason for hiding this comment

Uh oh!

github-actions Bot Mar 19, 2026

Choose a reason for hiding this comment

Uh oh!

github-actions Bot Mar 19, 2026

Choose a reason for hiding this comment

Uh oh!

github-actions Bot Mar 19, 2026

Choose a reason for hiding this comment

Uh oh!

github-actions Bot Mar 19, 2026

Choose a reason for hiding this comment

Uh oh!

github-actions Bot Mar 19, 2026

Choose a reason for hiding this comment

Uh oh!

github-actions Bot Mar 19, 2026

Choose a reason for hiding this comment

Uh oh!

github-actions Bot Mar 19, 2026

Choose a reason for hiding this comment

Uh oh!

github-actions Bot Mar 19, 2026

Choose a reason for hiding this comment

Uh oh!

github-actions Bot Mar 19, 2026

Choose a reason for hiding this comment

Uh oh!

github-actions Bot Mar 19, 2026

Choose a reason for hiding this comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

1 participant