Skip to content

chore: evals#20

Open
brodeynewman wants to merge 3 commits into
mainfrom
brodey/evals
Open

chore: evals#20
brodeynewman wants to merge 3 commits into
mainfrom
brodey/evals

Conversation

@brodeynewman
Copy link
Copy Markdown
Contributor

No description provided.

)

@classmethod
def from_dataframe(cls, df: "pandas.DataFrame", name: str) -> "Dataset":
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ [ruff] <F821> reported by reviewdog 🐶
Undefined name pandas

Comment thread sdk/python/tests/test_log_eval.py Outdated
import time
from unittest import mock

import pytest
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[ruff-check] reported by reviewdog 🐶

Suggested change
import pytest

Comment thread sdk/python/tests/test_log_eval.py Outdated
Comment on lines +9 to +10
import pytest

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ [ruff] <F401> reported by reviewdog 🐶
pytest imported but unused

Suggested change
import pytest

Comment thread sdk/python/tests/test_log_eval.py Outdated
mock_batcher_class.return_value = mock_batcher

# Patch the imports within run module
with mock.patch.object(Run, "_init_remote_mode") as mock_init:
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ [ruff] <F841> reported by reviewdog 🐶
Local variable mock_init is assigned to but never used

Suggested change
with mock.patch.object(Run, "_init_remote_mode") as mock_init:
with mock.patch.object(Run, "_init_remote_mode"):

# Early training - lower quality
words = base.split()
np.random.shuffle(words)
return " ".join(words[:len(words)//2])
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[ruff-format] reported by reviewdog 🐶

Suggested change
return " ".join(words[:len(words)//2])
return " ".join(words[: len(words) // 2])

Comment on lines +102 to +105
run.log_metrics({
"train/loss": loss,
"train/perplexity": perplexity,
}, step=epoch)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[ruff-format] reported by reviewdog 🐶

Suggested change
run.log_metrics({
"train/loss": loss,
"train/perplexity": perplexity,
}, step=epoch)
run.log_metrics(
{
"train/loss": loss,
"train/perplexity": perplexity,
},
step=epoch,
)


# Log the evaluation annotation
run.log_eval(
message=f"Epoch {epoch}: {message}\nSample output: \"{output}\"",
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[ruff-format] reported by reviewdog 🐶

Suggested change
message=f"Epoch {epoch}: {message}\nSample output: \"{output}\"",
message=f'Epoch {epoch}: {message}\nSample output: "{output}"',

"epoch": epoch,
"output_length": len(output),
"sample_output": output,
}
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[ruff-format] reported by reviewdog 🐶

Suggested change
}
},

metadata={
"final_output": final_output,
"total_epochs": config["epochs"],
}
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[ruff-format] reported by reviewdog 🐶

Suggested change
}
},

)
return response.get("scorers", [])

def list_evaluations(self, limit: int = 50, offset: int = 0, status: Optional[str] = None) -> List[Evaluation]:
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[ruff-format] reported by reviewdog 🐶

Suggested change
def list_evaluations(self, limit: int = 50, offset: int = 0, status: Optional[str] = None) -> List[Evaluation]:
def list_evaluations(
self, limit: int = 50, offset: int = 0, status: Optional[str] = None
) -> List[Evaluation]:

Comment on lines +713 to +724
evaluations.append(Evaluation(
id=item["id"],
name=item["name"],
status=item["status"],
dataset_id=item["dataset_id"],
scorer_ids=item["scorer_ids"],
target=item["target"],
overall_scores=item.get("overall_scores"),
rows_processed=item.get("rows_processed", 0),
rows_failed=item.get("rows_failed", 0),
created_at=item.get("created_at"),
))
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[ruff-format] reported by reviewdog 🐶

Suggested change
evaluations.append(Evaluation(
id=item["id"],
name=item["name"],
status=item["status"],
dataset_id=item["dataset_id"],
scorer_ids=item["scorer_ids"],
target=item["target"],
overall_scores=item.get("overall_scores"),
rows_processed=item.get("rows_processed", 0),
rows_failed=item.get("rows_failed", 0),
created_at=item.get("created_at"),
))
evaluations.append(
Evaluation(
id=item["id"],
name=item["name"],
status=item["status"],
dataset_id=item["dataset_id"],
scorer_ids=item["scorer_ids"],
target=item["target"],
overall_scores=item.get("overall_scores"),
rows_processed=item.get("rows_processed", 0),
rows_failed=item.get("rows_failed", 0),
created_at=item.get("created_at"),
)
)

assert "eval_logs" in meta
assert len(meta["eval_logs"]) == 1
assert meta["eval_logs"][0]["message"] == "This output looks great"
assert meta["eval_logs"][0]["step"] == 11 # Step after log_metrics incremented it
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[ruff-format] reported by reviewdog 🐶

Suggested change
assert meta["eval_logs"][0]["step"] == 11 # Step after log_metrics incremented it
assert (
meta["eval_logs"][0]["step"] == 11
) # Step after log_metrics incremented it

Comment on lines +153 to +155
with mock.patch("p95.client.P95Client") as mock_client_class, \
mock.patch("p95.metrics.MetricsBatcher") as mock_batcher_class:

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[ruff-format] reported by reviewdog 🐶

Suggested change
with mock.patch("p95.client.P95Client") as mock_client_class, \
mock.patch("p95.metrics.MetricsBatcher") as mock_batcher_class:
with (
mock.patch("p95.client.P95Client") as mock_client_class,
mock.patch("p95.metrics.MetricsBatcher") as mock_batcher_class,
):

Comment on lines +255 to +256
threading.Thread(target=log_evals, args=(10,))
for _ in range(5)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[ruff-format] reported by reviewdog 🐶

Suggested change
threading.Thread(target=log_evals, args=(10,))
for _ in range(5)
threading.Thread(target=log_evals, args=(10,)) for _ in range(5)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant