@@ -551,38 +551,45 @@ def run_evaluation(
551551 self ,
552552 perf_annotations : Union [PathLike , np .ndarray ],
553553 level : str = "note" ,
554- tolerances : list = TOLERANCES_IN_MILLISECONDS ,
555- musical_beat : bool = False , # beat annots are difference in some dataset
554+ tolerances : list = None ,
555+ musical_beat : bool = False ,
556556 debug : bool = False ,
557557 save_dir : PathLike = None ,
558558 run_name : str = None ,
559- domain : str = "performance" , # " score" or "performance"
559+ domain : str = "score" ,
560560 plot_dist_matrix : bool = True ,
561561 ) -> dict :
562562 """
563- Evaluate the score following process
563+ Evaluate the score following process.
564+
565+ When domain="score" (default), returns beat-based metrics as primary
566+ and ms-based metrics under "ms" key. When domain="performance",
567+ returns ms-based metrics only (legacy behavior).
564568
565569 Parameters
566570 ----------
567571 perf_annotations : PathLike or np.ndarray
568- Path to the performance annotations file (tab-separated),
569- or numpy array of annotation times in seconds.
572+ Path to the performance annotations file or numpy array of onset times (seconds).
570573 level : str
571- Level of annotations to use: bar, beat or note
572- tolerance : list
573- Tolerances to use for evaluation (in milliseconds)
574+ Annotation level: "beat" or "note"
575+ tolerances : list or None
576+ Tolerances for evaluation. If None, uses default for the domain.
577+ musical_beat : bool
578+ Whether to use musical beat
574579 debug : bool
575- Whether to save the score and performance audio with beat annotations
580+ Whether to save debug outputs
576581 domain : str
577- Evaluation domain, either "score" or "performance".
578- "score" domain evaluates in beat unit, "performance" domain evaluates in second unit. (Default: "performance")
582+ "score" (default, beat-based primary) or "performance" (ms-based, legacy)
579583
580584 Returns
581585 -------
582586 dict
583- Evaluation results with mean, median, std, skewness, kurtosis, and
584- accuracy for each tolerance
587+ Evaluation results. If domain="score", includes both beat and ms metrics.
585588 """
589+ if tolerances is None :
590+ tolerances = (
591+ TOLERANCES_IN_BEATS if domain == "score" else TOLERANCES_IN_MILLISECONDS
592+ )
586593 if not self ._has_run :
587594 raise ValueError ("Must call run() before evaluation" )
588595
@@ -643,26 +650,43 @@ def run_evaluation(
643650 wp_perf_sec ,
644651 total_counts = len (wp_score ),
645652 tolerances = tolerances ,
646- perf_times = wp_perf_sec ,
647- alignment_duration = self .alignment_duration ,
648653 )
649654 else :
650- # Score domain: compare predicted beats vs GT beats
655+ # Score domain: beat-based (primary) + ms-based (secondary)
651656 score_annots_predicted = transfer_positions (
652657 wp , perf_annots , frame_rate = self .frame_rate , domain = "score"
653658 )
654659 score_annots = score_annots [: len (score_annots_predicted )]
655- if tolerances == TOLERANCES_IN_MILLISECONDS :
656- tolerances = TOLERANCES_IN_BEATS
657- eval_results = get_evaluation_results (
660+ beat_tolerances = (
661+ tolerances
662+ if tolerances != TOLERANCES_IN_MILLISECONDS
663+ else TOLERANCES_IN_BEATS
664+ )
665+ beat_results = get_evaluation_results (
658666 score_annots ,
659667 score_annots_predicted ,
660668 total_counts = original_perf_annots_counts ,
661- tolerances = tolerances ,
669+ tolerances = beat_tolerances ,
662670 in_seconds = False ,
663- perf_times = perf_annots ,
664- alignment_duration = self .alignment_duration ,
665671 )
672+ ms_results = get_evaluation_results (
673+ gt_perf_times ,
674+ wp_perf_sec ,
675+ total_counts = len (wp_score ),
676+ tolerances = TOLERANCES_IN_MILLISECONDS ,
677+ )
678+ eval_results = {"beat" : beat_results , "ms" : ms_results }
679+
680+ # Real-Time Factor (domain-independent)
681+ if self .alignment_duration is not None :
682+ finite_perf = perf_annots [np .isfinite (perf_annots )]
683+ if len (finite_perf ) > 0 :
684+ perf_duration = float (np .max (finite_perf ) - np .min (finite_perf ))
685+ if perf_duration > 0 :
686+ eval_results ["rtf" ] = float (
687+ f"{ self .alignment_duration / perf_duration :.4f} "
688+ )
689+
666690 if self .input_type == "audio" :
667691 latency_results = self .get_latency_stats ()
668692 eval_results .update (latency_results )
0 commit comments