Add fancy Metrics plot to the together-py#344
Conversation
5f57c82 to
1c34308
Compare
948bbfd to
7aa34f9
Compare
There was a problem hiding this comment.
do we need this file?
| global_step_from: Annotated[int | Omit, Parameter(help="Filter metrics from this global step (inclusive).")] = omit, | ||
| global_step_to: Annotated[int | Omit, Parameter(help="Filter metrics to this global step (inclusive).")] = omit, | ||
| logged_at_from: Annotated[datetime | Omit, Parameter(help="Filter metrics logged at or after this time.")] = omit, | ||
| logged_at_to: Annotated[datetime | Omit, Parameter(help="Filter metrics logged at or before this time.")] = omit, | ||
| resolution: Annotated[int | Omit, Parameter(help="Number of data points to return (used for JSON output).")] = omit, |
| if config.json: | ||
| response = await show_loading_status( | ||
| "Fetching metrics...", | ||
| config.client.fine_tuning.list_metrics( | ||
| fine_tune_id, | ||
| global_step_from=global_step_from, | ||
| global_step_to=global_step_to, | ||
| logged_at_from=logged_at_from, | ||
| logged_at_to=logged_at_to, | ||
| resolution=resolution, | ||
| ), | ||
| ) | ||
| console.print_json(openapi_dumps(response.metrics or []).decode("utf-8")) | ||
| return | ||
|
|
||
| # For the ASCII chart always fetch at terminal width resolution for best fidelity. | ||
| response = await show_loading_status( | ||
| "Fetching metrics...", | ||
| config.client.fine_tuning.list_metrics( | ||
| fine_tune_id, | ||
| global_step_from=global_step_from, | ||
| global_step_to=global_step_to, | ||
| logged_at_from=logged_at_from, | ||
| logged_at_to=logged_at_to, | ||
| resolution=console.width - METRICS_WIDTH_PADDING, | ||
| ), | ||
| ) |
There was a problem hiding this comment.
| if config.json: | |
| response = await show_loading_status( | |
| "Fetching metrics...", | |
| config.client.fine_tuning.list_metrics( | |
| fine_tune_id, | |
| global_step_from=global_step_from, | |
| global_step_to=global_step_to, | |
| logged_at_from=logged_at_from, | |
| logged_at_to=logged_at_to, | |
| resolution=resolution, | |
| ), | |
| ) | |
| console.print_json(openapi_dumps(response.metrics or []).decode("utf-8")) | |
| return | |
| # For the ASCII chart always fetch at terminal width resolution for best fidelity. | |
| response = await show_loading_status( | |
| "Fetching metrics...", | |
| config.client.fine_tuning.list_metrics( | |
| fine_tune_id, | |
| global_step_from=global_step_from, | |
| global_step_to=global_step_to, | |
| logged_at_from=logged_at_from, | |
| logged_at_to=logged_at_to, | |
| resolution=console.width - METRICS_WIDTH_PADDING, | |
| ), | |
| ) | |
| # For the ASCII chart always fetch at terminal width resolution for best fidelity. | |
| resolution_value = console.width - METRICS_WIDTH_PADDING if config.json else resolution | |
| response = await show_loading_status( | |
| "Fetching metrics...", | |
| config.client.fine_tuning.list_metrics( | |
| fine_tune_id, | |
| global_step_from=global_step_from, | |
| global_step_to=global_step_to, | |
| logged_at_from=logged_at_from, | |
| logged_at_to=logged_at_to, | |
| resolution=resolution_value, | |
| ), | |
| ) | |
| if config.json: | |
| console.print_json(openapi_dumps(response.metrics or []).decode("utf-8")) | |
| return |
There was a problem hiding this comment.
a bit cleaner I think? not a big deal
There was a problem hiding this comment.
Yes, cleaner, applied
| fine_tune_id: str, | ||
| *, | ||
| config: CLIConfigParameter, | ||
| plots: Annotated[bool, Parameter(help="Print training metric sparklines.")] = True, |
There was a problem hiding this comment.
Cyclopts converts bools to an auto negative version. So if you do --help on this you'll see you can pass either --plots or --no-plots.
since --plots is the default true it basically does nothing to pass --plots and the user would have to pass --no-plots. Given there is a default of True I would say we should change the behavior to be only a negative param that defaults to false like this:
| plots: Annotated[bool, Parameter(help="Print training metric sparklines.")] = True, | |
| no_plots: Annotated[bool, Parameter(help="Print training metric sparklines.", negative=())] = False, |
| "render_line_chart", | ||
| "render_sparklines", | ||
| "should_log", | ||
| ] |
There was a problem hiding this comment.
can we move this from utils to components? I'm trying to put presentational utilities in the components folder
| help_epilogue=FINE_TUNING_DOWNLOAD_HELP_EXAMPLES, | ||
| ) | ||
| fine_tuning_app.command((f"{_CLI}.fine_tuning.delete:delete"), alias="-d", help="Delete a fine-tuning job") | ||
| fine_tuning_app.command( |
There was a problem hiding this comment.
I'd suggest adding some examples to the usage like we do with other commands that have different parameters
blainekasten
left a comment
There was a problem hiding this comment.
Looking great! Several minor nits but nothing too mind bending
| global_step_to: Annotated[int | Omit, Parameter(help="Filter metrics to this global step (inclusive).")] = omit, | ||
| logged_at_from: Annotated[datetime | Omit, Parameter(help="Filter metrics logged at or after this time.")] = omit, | ||
| logged_at_to: Annotated[datetime | Omit, Parameter(help="Filter metrics logged at or before this time.")] = omit, | ||
| resolution: Annotated[int | Omit, Parameter(help="Number of data points to return (used for JSON output).")] = omit, |
There was a problem hiding this comment.
I'm not sure if we want to expose this parameter at all. we can simply use resolution=None for any request, we rarely have jobs more than 20k steps, it's not that much. Originally the motivation to have it was the UI (so it can quickly fetch lets say 100 metrics and render them quickly too)
| resolution=resolution, | ||
| ), | ||
| ) | ||
| console.print_json(openapi_dumps(response.metrics or []).decode("utf-8")) |
There was a problem hiding this comment.
Are se sure we want to print them all?
There was a problem hiding this comment.
Probably we can separate the list into two commands - one is download (as we do for files, either output or stdout) and the second is plot.
There was a problem hiding this comment.
Added --save option to save metrics into the file
| global_step_to=global_step_to, | ||
| logged_at_from=logged_at_from, | ||
| logged_at_to=logged_at_to, | ||
| resolution=console.width - METRICS_WIDTH_PADDING, |
There was a problem hiding this comment.
What does happen if we set lets say resolution 1000 or no resolution at all? Will the plot be the same size with just some "grouping" of the metrics or plot size will explode?
There was a problem hiding this comment.
The _engine has a logic to compress the plot to the specific width, so it's not a big deal. However, if we can request fewer points to render -- it's better to do so.
| fine_tune_id: str, | ||
| *, | ||
| config: CLIConfigParameter, | ||
| plots: Annotated[bool, Parameter(help="Print training metric sparklines.")] = True, |
There was a problem hiding this comment.
plot_training_curves as an alternative
| ) | ||
| metrics = metrics_response.metrics or [] | ||
| except Exception: | ||
| # Metrics are optional; silently skip if unavailable. |
There was a problem hiding this comment.
Are we sure we want to skip silently here?
|
|
||
| def _get_step(row: dict[str, Any], fallback: int) -> int: | ||
| """Extract global step, trying several field names before falling back to index.""" | ||
| gs = row.get("global_step", row.get("train/global_step", row.get("step"))) |
There was a problem hiding this comment.
this nested get is a bit hard to read. can we probably loop over the keys here?
| to ``-inf`` so the rendering engine plots them at the very bottom of the | ||
| chart rather than silently dropping them. | ||
| """ | ||
| series: dict[str, tuple[list[float], list[float]]] = {} |
|
|
||
| def should_log(vals: list[float]) -> bool: | ||
| """Return True when values span more than 100×, suggesting log scale.""" | ||
| nz = [v for v in vals if v > 0] |
There was a problem hiding this comment.
nz = non zero? if so it's not quite accurate. I'd suggest positive_vals
| Non-finite values (e.g. the -inf sentinel used for NaN data points) are | ||
| excluded from the range computation so they don't corrupt the grid. | ||
| """ | ||
| finite = [v for v in vals if math.isfinite(v)] |
There was a problem hiding this comment.
finite_vals as an alternative
| logged_at_to: Annotated[Optional[datetime], Parameter(help="Filter metrics logged at or before this time.")] = None, | ||
| resolution: Annotated[ | ||
| Optional[int], | ||
| Parameter(help="Number of training metric points to return. Does not limit the number of eval metric points."), |
There was a problem hiding this comment.
Please add the "uniformly sampled training metrics" or something
| from together.lib.cli.components.plot_finetune_metrics import METRICS_WIDTH_PADDING, metrics_ascii_charts | ||
|
|
||
|
|
||
| async def list_metrics( |
There was a problem hiding this comment.
I still not sure if we want to keep it as one command. just as an example, lets say I want to only save metrics to the file but using this command I would get a bunch of plots I can be not interested in
Wouldn't it be more convenient for us to have save_metrics and plot_metrics? What do you think?

This PR adds Metrics Plot for the Fine-tuning Jobs. This PR includes:
list_metrics;Example of the output: