diff --git a/modelling/data/activity_heatmap.png b/modelling/data/activity_heatmap.png index 263ffd3..c8b4ef3 100644 Binary files a/modelling/data/activity_heatmap.png and b/modelling/data/activity_heatmap.png differ diff --git a/modelling/data/calendar_activity.json b/modelling/data/calendar_activity.json index 7ea6fb4..8099c33 100644 --- a/modelling/data/calendar_activity.json +++ b/modelling/data/calendar_activity.json @@ -11,6 +11,7 @@ { "cluster_id": 1, "calendar_label": "Redwing neighbourhood", + "description": "Single-species cluster containing Redwing, mainly representing winter visitor with autumn arrival component. The defining pattern is a winter peak around January, a moderate autumn component, moderate summer suppression, and slow arrival fast departure response dynamics. Its defining traits include year wrapping winter presence, core winter winter peak, and moderate autumn component. Compared with the full species set, autumn to winter weight ratio is higher than the whole-set average and decay to growth ratio is higher than the whole-set average.", "n_species": 1, "species": [ "Redwing" @@ -93,6 +94,7 @@ { "cluster_id": 2, "calendar_label": "Extended Spring Seasonal Presence", + "description": "Cluster of 13 species, mainly representing extended spring seasonal presence. The fitted active window runs roughly from March to October, with a mean peak around June, and and an average width of 6.6 months. It is characterised by very broad season and moderate active window. Common high-support traits include strong offseason suppression and early peak alignment.", "n_species": 13, "species": [ "Dandelion", @@ -187,6 +189,7 @@ { "cluster_id": 3, "calendar_label": "Rosebay Willowherb neighbourhood", + "description": "Single-species cluster containing Rosebay Willowherb, mainly representing moderate autumn seasonal presence. The fitted active window runs roughly from June to September, with a mean peak around September, and and an average width of 3.1 months. It is characterised by moderate season and sharp active window. Its defining traits include autumn peak, moderate season, and sharp seasonal window. Compared with the full species set, season start month is higher than the whole-set average and peak month is higher than the whole-set average.", "n_species": 1, "species": [ "Rosebay Willowherb" @@ -269,6 +272,7 @@ { "cluster_id": 4, "calendar_label": "Snowdrop neighbourhood", + "description": "Single-species cluster containing Snowdrop, mainly representing narrow winter seasonal presence. The fitted active window runs roughly from February to March, with a mean peak around February, and and an average width of 1.9 months. It is characterised by narrow season and moderate active window. Its defining traits include winter peak, narrow season, and moderate seasonal window. Compared with the full species set, season midpoint month is lower than the whole-set average and season end month is lower than the whole-set average.", "n_species": 1, "species": [ "Snowdrop" @@ -351,6 +355,7 @@ { "cluster_id": 5, "calendar_label": "Moderate Spring Seasonal Presence", + "description": "Cluster of 5 species, mainly representing moderate spring seasonal presence. The fitted active window runs roughly from April to June, with a mean peak around May, and and an average width of 2.3 months. It is characterised by moderate season and sharp active window. Common high-support traits include spring peak, central peak alignment, and sharp seasonal window. Compared with the full species set, fit score is lower than the whole-set average and season end month is lower than the whole-set average.", "n_species": 5, "species": [ "Bluebell", @@ -437,6 +442,7 @@ { "cluster_id": 6, "calendar_label": "Jay neighbourhood", + "description": "Single-species cluster containing Jay, mainly representing resident with summer detectability collapse. Detectability peaks around October and and is lowest around August. The shared pattern includes weak baseline presence, moderate summer suppression, weak autumn component, and decline biased response dynamics. Its defining traits include resident detectability pattern, weak baseline presence, and autumn detectability peak. Compared with the full species set, peak month is higher than the whole-set average and target amplitude is lower than the whole-set average.", "n_species": 1, "species": [ "Jay" @@ -519,6 +525,7 @@ { "cluster_id": 7, "calendar_label": "Resident With Spring Persistence And Summer Suppression", + "description": "Cluster of 7 species, mainly representing resident with spring persistence and summer suppression. Detectability peaks around February and and is lowest around September. The shared pattern includes strong baseline presence, strong summer suppression, weak autumn component, and rapid decline biased response dynamics. Common high-support traits include resident detectability pattern, meaningful year end component, and strong baseline presence. Compared with the full species set, year end to winter weight ratio is higher than the whole-set average and baseline to peak ratio is higher than the whole-set average.", "n_species": 7, "species": [ "Mute Swan", @@ -607,6 +614,7 @@ { "cluster_id": 8, "calendar_label": "Resident With Summer Detectability Collapse", + "description": "Cluster of 10 species, mainly representing resident with summer detectability collapse. Detectability peaks around April and and is lowest around September. The shared pattern includes weak baseline presence, moderate summer suppression, weak autumn component, and rapid decline biased response dynamics. Common high-support traits include resident detectability pattern, moderate summer suppression, and rapid decline biased response dynamics.", "n_species": 10, "species": [ "House Sparrow", diff --git a/modelling/data/cluster_analysis.json b/modelling/data/cluster_analysis.json index fc9e792..97f93f6 100644 --- a/modelling/data/cluster_analysis.json +++ b/modelling/data/cluster_analysis.json @@ -1,6 +1,6 @@ { "schema_version": "species-similarity-clusters/v2", - "created_utc": "2026-05-12T19:31:33.748438+00:00", + "created_utc": "2026-05-13T06:03:40.943790+00:00", "source_similarity_schema_version": "species-similarity/v1", "source_feature_schema_version": "species-feature-table/v1", "n_species": 39, @@ -1509,7 +1509,7 @@ "species": [ "Redwing" ], - "description": "Single-species cluster containing Redwing, mainly representing core winter winter visitor with autumn arrival component. The defining pattern is a winter peak around January, a moderate autumn component, moderate summer suppression, and slow arrival fast departure response dynamics. Its defining traits include year wrapping winter presence, core winter winter peak, and moderate autumn component. Compared with the full species set, autumn to winter weight ratio is higher than the whole-set average and decay to growth ratio is higher than the whole-set average.", + "description": "Single-species cluster containing Redwing, mainly representing winter visitor with autumn arrival component. The defining pattern is a winter peak around January, a moderate autumn component, moderate summer suppression, and slow arrival fast departure response dynamics. Its defining traits include year wrapping winter presence, core winter winter peak, and moderate autumn component. Compared with the full species set, autumn to winter weight ratio is higher than the whole-set average and decay to growth ratio is higher than the whole-set average.", "dominant_model_family": "winter_presence", "dominant_primary_class": "winter_visitor_with_autumn_arrival_component", "numeric_summary": { @@ -1755,7 +1755,7 @@ "Red Admiral Butterfly", "Speckled Wood Butterfly" ], - "description": "Cluster of 13 species, mainly representing spring extended spring seasonal presence. The fitted active window runs roughly from March to October, with a mean peak around June, and and an average width of 6.6 months. It is characterised by very broad season and moderate active window. Common high-support traits include strong offseason suppression and early peak alignment.", + "description": "Cluster of 13 species, mainly representing extended spring seasonal presence. The fitted active window runs roughly from March to October, with a mean peak around June, and and an average width of 6.6 months. It is characterised by very broad season and moderate active window. Common high-support traits include strong offseason suppression and early peak alignment.", "dominant_model_family": "seasonal_presence", "dominant_primary_class": "extended_spring_seasonal_presence", "numeric_summary": { @@ -2034,7 +2034,7 @@ "species": [ "Rosebay Willowherb" ], - "description": "Single-species cluster containing Rosebay Willowherb, mainly representing autumn moderate autumn seasonal presence. The fitted active window runs roughly from June to September, with a mean peak around September, and and an average width of 3.1 months. It is characterised by moderate season and sharp active window. Its defining traits include autumn peak, moderate season, and sharp seasonal window. Compared with the full species set, season start month is higher than the whole-set average and peak month is higher than the whole-set average.", + "description": "Single-species cluster containing Rosebay Willowherb, mainly representing moderate autumn seasonal presence. The fitted active window runs roughly from June to September, with a mean peak around September, and and an average width of 3.1 months. It is characterised by moderate season and sharp active window. Its defining traits include autumn peak, moderate season, and sharp seasonal window. Compared with the full species set, season start month is higher than the whole-set average and peak month is higher than the whole-set average.", "dominant_model_family": "seasonal_presence", "dominant_primary_class": "moderate_autumn_seasonal_presence", "numeric_summary": { @@ -2258,7 +2258,7 @@ "species": [ "Snowdrop" ], - "description": "Single-species cluster containing Snowdrop, mainly representing winter narrow winter seasonal presence. The fitted active window runs roughly from February to March, with a mean peak around February, and and an average width of 1.9 months. It is characterised by narrow season and moderate active window. Its defining traits include winter peak, narrow season, and moderate seasonal window. Compared with the full species set, season midpoint month is lower than the whole-set average and season end month is lower than the whole-set average.", + "description": "Single-species cluster containing Snowdrop, mainly representing narrow winter seasonal presence. The fitted active window runs roughly from February to March, with a mean peak around February, and and an average width of 1.9 months. It is characterised by narrow season and moderate active window. Its defining traits include winter peak, narrow season, and moderate seasonal window. Compared with the full species set, season midpoint month is lower than the whole-set average and season end month is lower than the whole-set average.", "dominant_model_family": "seasonal_presence", "dominant_primary_class": "narrow_winter_seasonal_presence", "numeric_summary": { @@ -2486,7 +2486,7 @@ "Cowslip", "Orange Tip Butterfly" ], - "description": "Cluster of 5 species, mainly representing spring moderate spring seasonal presence. The fitted active window runs roughly from April to June, with a mean peak around May, and and an average width of 2.3 months. It is characterised by moderate season and sharp active window. Common high-support traits include spring peak, central peak alignment, and sharp seasonal window. Compared with the full species set, fit score is lower than the whole-set average and season end month is lower than the whole-set average.", + "description": "Cluster of 5 species, mainly representing moderate spring seasonal presence. The fitted active window runs roughly from April to June, with a mean peak around May, and and an average width of 2.3 months. It is characterised by moderate season and sharp active window. Common high-support traits include spring peak, central peak alignment, and sharp seasonal window. Compared with the full species set, fit score is lower than the whole-set average and season end month is lower than the whole-set average.", "dominant_model_family": "seasonal_presence", "dominant_primary_class": "moderate_spring_seasonal_presence", "numeric_summary": { @@ -2774,7 +2774,7 @@ "species": [ "Jay" ], - "description": "Single-species cluster containing Jay, mainly representing autumn resident with summer detectability collapse. Detectability peaks around October and and is lowest around August. The shared pattern includes weak baseline presence, moderate summer suppression, weak autumn component, and decline biased response dynamics. Its defining traits include resident detectability pattern, weak baseline presence, and autumn detectability peak. Compared with the full species set, peak month is higher than the whole-set average and target amplitude is lower than the whole-set average.", + "description": "Single-species cluster containing Jay, mainly representing resident with summer detectability collapse. Detectability peaks around October and and is lowest around August. The shared pattern includes weak baseline presence, moderate summer suppression, weak autumn component, and decline biased response dynamics. Its defining traits include resident detectability pattern, weak baseline presence, and autumn detectability peak. Compared with the full species set, peak month is higher than the whole-set average and target amplitude is lower than the whole-set average.", "dominant_model_family": "resident_detectability", "dominant_primary_class": "resident_with_summer_detectability_collapse", "numeric_summary": { @@ -3071,7 +3071,7 @@ "Common Starling", "Magpie" ], - "description": "Cluster of 7 species, mainly representing winter resident with spring persistence and summer suppression. Detectability peaks around February and and is lowest around September. The shared pattern includes strong baseline presence, strong summer suppression, weak autumn component, and rapid decline biased response dynamics. Common high-support traits include resident detectability pattern, meaningful year end component, and strong baseline presence. Compared with the full species set, year end to winter weight ratio is higher than the whole-set average and baseline to peak ratio is higher than the whole-set average.", + "description": "Cluster of 7 species, mainly representing resident with spring persistence and summer suppression. Detectability peaks around February and and is lowest around September. The shared pattern includes strong baseline presence, strong summer suppression, weak autumn component, and rapid decline biased response dynamics. Common high-support traits include resident detectability pattern, meaningful year end component, and strong baseline presence. Compared with the full species set, year end to winter weight ratio is higher than the whole-set average and baseline to peak ratio is higher than the whole-set average.", "dominant_model_family": "resident_detectability", "dominant_primary_class": "resident_with_spring_persistence_and_summer_suppression", "numeric_summary": { @@ -3406,7 +3406,7 @@ "Daisy", "Song Thrush" ], - "description": "Cluster of 10 species, mainly representing spring resident with summer detectability collapse. Detectability peaks around April and and is lowest around September. The shared pattern includes weak baseline presence, moderate summer suppression, weak autumn component, and rapid decline biased response dynamics. Common high-support traits include resident detectability pattern, moderate summer suppression, and rapid decline biased response dynamics.", + "description": "Cluster of 10 species, mainly representing resident with summer detectability collapse. Detectability peaks around April and and is lowest around September. The shared pattern includes weak baseline presence, moderate summer suppression, weak autumn component, and rapid decline biased response dynamics. Common high-support traits include resident detectability pattern, moderate summer suppression, and rapid decline biased response dynamics.", "dominant_model_family": "resident_detectability", "dominant_primary_class": "resident_with_summer_detectability_collapse", "numeric_summary": { diff --git a/modelling/data/cluster_dendrogram.png b/modelling/data/cluster_dendrogram.png index 1318c68..42851dc 100644 Binary files a/modelling/data/cluster_dendrogram.png and b/modelling/data/cluster_dendrogram.png differ diff --git a/modelling/data/cluster_summary.txt b/modelling/data/cluster_summary.txt index cc4897a..8af98f0 100644 --- a/modelling/data/cluster_summary.txt +++ b/modelling/data/cluster_summary.txt @@ -4,7 +4,7 @@ Species clusters Cluster 1 --------- -Single-species cluster containing Redwing, mainly representing core winter winter visitor with autumn arrival component. The defining pattern is a winter peak around January, a moderate autumn component, moderate summer suppression, and slow arrival fast departure response dynamics. Its defining traits include year wrapping winter presence, core winter winter peak, and moderate autumn component. Compared with the full species set, autumn to winter weight ratio is higher than the whole-set average and decay to growth ratio is higher than the whole-set average. +Single-species cluster containing Redwing, mainly representing winter visitor with autumn arrival component. The defining pattern is a winter peak around January, a moderate autumn component, moderate summer suppression, and slow arrival fast departure response dynamics. Its defining traits include year wrapping winter presence, core winter winter peak, and moderate autumn component. Compared with the full species set, autumn to winter weight ratio is higher than the whole-set average and decay to growth ratio is higher than the whole-set average. Species (1): Redwing @@ -24,7 +24,7 @@ Distinguishing numeric features: Cluster 2 --------- -Cluster of 13 species, mainly representing spring extended spring seasonal presence. The fitted active window runs roughly from March to October, with a mean peak around June, and and an average width of 6.6 months. It is characterised by very broad season and moderate active window. Common high-support traits include strong offseason suppression and early peak alignment. +Cluster of 13 species, mainly representing extended spring seasonal presence. The fitted active window runs roughly from March to October, with a mean peak around June, and and an average width of 6.6 months. It is characterised by very broad season and moderate active window. Common high-support traits include strong offseason suppression and early peak alignment. Species (13): Dandelion @@ -57,7 +57,7 @@ Distinguishing numeric features: Cluster 3 --------- -Single-species cluster containing Rosebay Willowherb, mainly representing autumn moderate autumn seasonal presence. The fitted active window runs roughly from June to September, with a mean peak around September, and and an average width of 3.1 months. It is characterised by moderate season and sharp active window. Its defining traits include autumn peak, moderate season, and sharp seasonal window. Compared with the full species set, season start month is higher than the whole-set average and peak month is higher than the whole-set average. +Single-species cluster containing Rosebay Willowherb, mainly representing moderate autumn seasonal presence. The fitted active window runs roughly from June to September, with a mean peak around September, and and an average width of 3.1 months. It is characterised by moderate season and sharp active window. Its defining traits include autumn peak, moderate season, and sharp seasonal window. Compared with the full species set, season start month is higher than the whole-set average and peak month is higher than the whole-set average. Species (1): Rosebay Willowherb @@ -78,7 +78,7 @@ Distinguishing numeric features: Cluster 4 --------- -Single-species cluster containing Snowdrop, mainly representing winter narrow winter seasonal presence. The fitted active window runs roughly from February to March, with a mean peak around February, and and an average width of 1.9 months. It is characterised by narrow season and moderate active window. Its defining traits include winter peak, narrow season, and moderate seasonal window. Compared with the full species set, season midpoint month is lower than the whole-set average and season end month is lower than the whole-set average. +Single-species cluster containing Snowdrop, mainly representing narrow winter seasonal presence. The fitted active window runs roughly from February to March, with a mean peak around February, and and an average width of 1.9 months. It is characterised by narrow season and moderate active window. Its defining traits include winter peak, narrow season, and moderate seasonal window. Compared with the full species set, season midpoint month is lower than the whole-set average and season end month is lower than the whole-set average. Species (1): Snowdrop @@ -99,7 +99,7 @@ Distinguishing numeric features: Cluster 5 --------- -Cluster of 5 species, mainly representing spring moderate spring seasonal presence. The fitted active window runs roughly from April to June, with a mean peak around May, and and an average width of 2.3 months. It is characterised by moderate season and sharp active window. Common high-support traits include spring peak, central peak alignment, and sharp seasonal window. Compared with the full species set, fit score is lower than the whole-set average and season end month is lower than the whole-set average. +Cluster of 5 species, mainly representing moderate spring seasonal presence. The fitted active window runs roughly from April to June, with a mean peak around May, and and an average width of 2.3 months. It is characterised by moderate season and sharp active window. Common high-support traits include spring peak, central peak alignment, and sharp seasonal window. Compared with the full species set, fit score is lower than the whole-set average and season end month is lower than the whole-set average. Species (5): Bluebell @@ -124,7 +124,7 @@ Distinguishing numeric features: Cluster 6 --------- -Single-species cluster containing Jay, mainly representing autumn resident with summer detectability collapse. Detectability peaks around October and and is lowest around August. The shared pattern includes weak baseline presence, moderate summer suppression, weak autumn component, and decline biased response dynamics. Its defining traits include resident detectability pattern, weak baseline presence, and autumn detectability peak. Compared with the full species set, peak month is higher than the whole-set average and target amplitude is lower than the whole-set average. +Single-species cluster containing Jay, mainly representing resident with summer detectability collapse. Detectability peaks around October and and is lowest around August. The shared pattern includes weak baseline presence, moderate summer suppression, weak autumn component, and decline biased response dynamics. Its defining traits include resident detectability pattern, weak baseline presence, and autumn detectability peak. Compared with the full species set, peak month is higher than the whole-set average and target amplitude is lower than the whole-set average. Species (1): Jay @@ -144,7 +144,7 @@ Distinguishing numeric features: Cluster 7 --------- -Cluster of 7 species, mainly representing winter resident with spring persistence and summer suppression. Detectability peaks around February and and is lowest around September. The shared pattern includes strong baseline presence, strong summer suppression, weak autumn component, and rapid decline biased response dynamics. Common high-support traits include resident detectability pattern, meaningful year end component, and strong baseline presence. Compared with the full species set, year end to winter weight ratio is higher than the whole-set average and baseline to peak ratio is higher than the whole-set average. +Cluster of 7 species, mainly representing resident with spring persistence and summer suppression. Detectability peaks around February and and is lowest around September. The shared pattern includes strong baseline presence, strong summer suppression, weak autumn component, and rapid decline biased response dynamics. Common high-support traits include resident detectability pattern, meaningful year end component, and strong baseline presence. Compared with the full species set, year end to winter weight ratio is higher than the whole-set average and baseline to peak ratio is higher than the whole-set average. Species (7): Mute Swan @@ -170,7 +170,7 @@ Distinguishing numeric features: Cluster 8 --------- -Cluster of 10 species, mainly representing spring resident with summer detectability collapse. Detectability peaks around April and and is lowest around September. The shared pattern includes weak baseline presence, moderate summer suppression, weak autumn component, and rapid decline biased response dynamics. Common high-support traits include resident detectability pattern, moderate summer suppression, and rapid decline biased response dynamics. +Cluster of 10 species, mainly representing resident with summer detectability collapse. Detectability peaks around April and and is lowest around September. The shared pattern includes weak baseline presence, moderate summer suppression, weak autumn component, and rapid decline biased response dynamics. Common high-support traits include resident detectability pattern, moderate summer suppression, and rapid decline biased response dynamics. Species (10): House Sparrow diff --git a/modelling/data/extracted_clusters.json b/modelling/data/extracted_clusters.json index 1f36912..204336e 100644 --- a/modelling/data/extracted_clusters.json +++ b/modelling/data/extracted_clusters.json @@ -1,7 +1,7 @@ { "schema_version": "seasonal-ecological-calendar-clusters/v1", "source_schema_version": "species-similarity-clusters/v2", - "source_created_utc": "2026-05-12T19:31:33.748438+00:00", + "source_created_utc": "2026-05-13T06:03:40.943790+00:00", "n_species": 39, "n_clusters": 8, "cluster_caveat": "Clusters should be interpreted as exploratory seasonal assemblages rather than fixed ecological categories.", @@ -9,7 +9,7 @@ { "cluster_id": 1, "calendar_label": "Redwing neighbourhood", - "description": "Single-species cluster containing Redwing, mainly representing core winter winter visitor with autumn arrival component. The defining pattern is a winter peak around January, a moderate autumn component, moderate summer suppression, and slow arrival fast departure response dynamics. Its defining traits include year wrapping winter presence, core winter winter peak, and moderate autumn component. Compared with the full species set, autumn to winter weight ratio is higher than the whole-set average and decay to growth ratio is higher than the whole-set average.", + "description": "Single-species cluster containing Redwing, mainly representing winter visitor with autumn arrival component. The defining pattern is a winter peak around January, a moderate autumn component, moderate summer suppression, and slow arrival fast departure response dynamics. Its defining traits include year wrapping winter presence, core winter winter peak, and moderate autumn component. Compared with the full species set, autumn to winter weight ratio is higher than the whole-set average and decay to growth ratio is higher than the whole-set average.", "n_species": 1, "species": [ "Redwing" @@ -18,7 +18,7 @@ { "cluster_id": 2, "calendar_label": "Extended Spring Seasonal Presence", - "description": "Cluster of 13 species, mainly representing spring extended spring seasonal presence. The fitted active window runs roughly from March to October, with a mean peak around June, and and an average width of 6.6 months. It is characterised by very broad season and moderate active window. Common high-support traits include strong offseason suppression and early peak alignment.", + "description": "Cluster of 13 species, mainly representing extended spring seasonal presence. The fitted active window runs roughly from March to October, with a mean peak around June, and and an average width of 6.6 months. It is characterised by very broad season and moderate active window. Common high-support traits include strong offseason suppression and early peak alignment.", "n_species": 13, "species": [ "Dandelion", @@ -39,7 +39,7 @@ { "cluster_id": 3, "calendar_label": "Rosebay Willowherb neighbourhood", - "description": "Single-species cluster containing Rosebay Willowherb, mainly representing autumn moderate autumn seasonal presence. The fitted active window runs roughly from June to September, with a mean peak around September, and and an average width of 3.1 months. It is characterised by moderate season and sharp active window. Its defining traits include autumn peak, moderate season, and sharp seasonal window. Compared with the full species set, season start month is higher than the whole-set average and peak month is higher than the whole-set average.", + "description": "Single-species cluster containing Rosebay Willowherb, mainly representing moderate autumn seasonal presence. The fitted active window runs roughly from June to September, with a mean peak around September, and and an average width of 3.1 months. It is characterised by moderate season and sharp active window. Its defining traits include autumn peak, moderate season, and sharp seasonal window. Compared with the full species set, season start month is higher than the whole-set average and peak month is higher than the whole-set average.", "n_species": 1, "species": [ "Rosebay Willowherb" @@ -48,7 +48,7 @@ { "cluster_id": 4, "calendar_label": "Snowdrop neighbourhood", - "description": "Single-species cluster containing Snowdrop, mainly representing winter narrow winter seasonal presence. The fitted active window runs roughly from February to March, with a mean peak around February, and and an average width of 1.9 months. It is characterised by narrow season and moderate active window. Its defining traits include winter peak, narrow season, and moderate seasonal window. Compared with the full species set, season midpoint month is lower than the whole-set average and season end month is lower than the whole-set average.", + "description": "Single-species cluster containing Snowdrop, mainly representing narrow winter seasonal presence. The fitted active window runs roughly from February to March, with a mean peak around February, and and an average width of 1.9 months. It is characterised by narrow season and moderate active window. Its defining traits include winter peak, narrow season, and moderate seasonal window. Compared with the full species set, season midpoint month is lower than the whole-set average and season end month is lower than the whole-set average.", "n_species": 1, "species": [ "Snowdrop" @@ -57,7 +57,7 @@ { "cluster_id": 5, "calendar_label": "Moderate Spring Seasonal Presence", - "description": "Cluster of 5 species, mainly representing spring moderate spring seasonal presence. The fitted active window runs roughly from April to June, with a mean peak around May, and and an average width of 2.3 months. It is characterised by moderate season and sharp active window. Common high-support traits include spring peak, central peak alignment, and sharp seasonal window. Compared with the full species set, fit score is lower than the whole-set average and season end month is lower than the whole-set average.", + "description": "Cluster of 5 species, mainly representing moderate spring seasonal presence. The fitted active window runs roughly from April to June, with a mean peak around May, and and an average width of 2.3 months. It is characterised by moderate season and sharp active window. Common high-support traits include spring peak, central peak alignment, and sharp seasonal window. Compared with the full species set, fit score is lower than the whole-set average and season end month is lower than the whole-set average.", "n_species": 5, "species": [ "Bluebell", @@ -70,7 +70,7 @@ { "cluster_id": 6, "calendar_label": "Jay neighbourhood", - "description": "Single-species cluster containing Jay, mainly representing autumn resident with summer detectability collapse. Detectability peaks around October and and is lowest around August. The shared pattern includes weak baseline presence, moderate summer suppression, weak autumn component, and decline biased response dynamics. Its defining traits include resident detectability pattern, weak baseline presence, and autumn detectability peak. Compared with the full species set, peak month is higher than the whole-set average and target amplitude is lower than the whole-set average.", + "description": "Single-species cluster containing Jay, mainly representing resident with summer detectability collapse. Detectability peaks around October and and is lowest around August. The shared pattern includes weak baseline presence, moderate summer suppression, weak autumn component, and decline biased response dynamics. Its defining traits include resident detectability pattern, weak baseline presence, and autumn detectability peak. Compared with the full species set, peak month is higher than the whole-set average and target amplitude is lower than the whole-set average.", "n_species": 1, "species": [ "Jay" @@ -79,7 +79,7 @@ { "cluster_id": 7, "calendar_label": "Resident With Spring Persistence And Summer Suppression", - "description": "Cluster of 7 species, mainly representing winter resident with spring persistence and summer suppression. Detectability peaks around February and and is lowest around September. The shared pattern includes strong baseline presence, strong summer suppression, weak autumn component, and rapid decline biased response dynamics. Common high-support traits include resident detectability pattern, meaningful year end component, and strong baseline presence. Compared with the full species set, year end to winter weight ratio is higher than the whole-set average and baseline to peak ratio is higher than the whole-set average.", + "description": "Cluster of 7 species, mainly representing resident with spring persistence and summer suppression. Detectability peaks around February and and is lowest around September. The shared pattern includes strong baseline presence, strong summer suppression, weak autumn component, and rapid decline biased response dynamics. Common high-support traits include resident detectability pattern, meaningful year end component, and strong baseline presence. Compared with the full species set, year end to winter weight ratio is higher than the whole-set average and baseline to peak ratio is higher than the whole-set average.", "n_species": 7, "species": [ "Mute Swan", @@ -94,7 +94,7 @@ { "cluster_id": 8, "calendar_label": "Resident With Summer Detectability Collapse", - "description": "Cluster of 10 species, mainly representing spring resident with summer detectability collapse. Detectability peaks around April and and is lowest around September. The shared pattern includes weak baseline presence, moderate summer suppression, weak autumn component, and rapid decline biased response dynamics. Common high-support traits include resident detectability pattern, moderate summer suppression, and rapid decline biased response dynamics.", + "description": "Cluster of 10 species, mainly representing resident with summer detectability collapse. Detectability peaks around April and and is lowest around September. The shared pattern includes weak baseline presence, moderate summer suppression, weak autumn component, and rapid decline biased response dynamics. Common high-support traits include resident detectability pattern, moderate summer suppression, and rapid decline biased response dynamics.", "n_species": 10, "species": [ "House Sparrow", diff --git a/modelling/data/feature_matrix.json b/modelling/data/feature_matrix.json index 3077bdc..278bb9a 100644 --- a/modelling/data/feature_matrix.json +++ b/modelling/data/feature_matrix.json @@ -1,6 +1,6 @@ { "schema_version": "species-feature-table/v1", - "created_utc": "2026-05-12T19:31:33.002951+00:00", + "created_utc": "2026-05-13T06:03:40.064999+00:00", "description": "Whole-set seasonal ecology feature table compiled from per-species classification JSON files.", "n_species": 39, "source_files": [ diff --git a/modelling/data/species_similarity.json b/modelling/data/species_similarity.json index c2c30e2..e51358c 100644 --- a/modelling/data/species_similarity.json +++ b/modelling/data/species_similarity.json @@ -1,8 +1,8 @@ { "schema_version": "species-similarity/v1", - "created_utc": "2026-05-12T19:31:33.012198+00:00", + "created_utc": "2026-05-13T06:03:40.076291+00:00", "source_feature_schema_version": "species-feature-table/v1", - "source_feature_created_utc": "2026-05-12T19:31:33.002951+00:00", + "source_feature_created_utc": "2026-05-13T06:03:40.064999+00:00", "n_species": 39, "top_n": 5, "method": { diff --git a/modelling/data/species_similarity_heatmap.png b/modelling/data/species_similarity_heatmap.png index 3d79768..d6632c2 100644 Binary files a/modelling/data/species_similarity_heatmap.png and b/modelling/data/species_similarity_heatmap.png differ diff --git a/modelling/src/seasonal/calendar/activity.py b/modelling/src/seasonal/calendar/activity.py index 20da98e..1fcd32d 100644 --- a/modelling/src/seasonal/calendar/activity.py +++ b/modelling/src/seasonal/calendar/activity.py @@ -83,14 +83,9 @@ def build_neighbourhood_monthly_activity( output_clusters.append( { "cluster_id": cluster_id, - "calendar_label": cluster.get( - "calendar_label", - f"Cluster {cluster_id}", - ), - "n_species": cluster.get( - "n_species", - len(cluster.get("species", [])), - ), + "calendar_label": cluster.get("calendar_label", f"Cluster {cluster_id}"), + "description": cluster.get("description", f"Cluster {cluster_id}"), + "n_species": cluster.get("n_species", len(cluster.get("species", []))), "species": cluster.get("species", []), "monthly_activity": monthly_activity, } diff --git a/modelling/src/seasonal/calendar/heatmap.py b/modelling/src/seasonal/calendar/heatmap.py index 44a9b62..5b655b8 100644 --- a/modelling/src/seasonal/calendar/heatmap.py +++ b/modelling/src/seasonal/calendar/heatmap.py @@ -6,6 +6,7 @@ import matplotlib.pyplot as plt import numpy as np +from seasonal.support.clustering import first_sentence MONTH_NAMES = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"] @@ -36,7 +37,6 @@ def plot_neighbourhood_calendar_heatmap( for cluster in clusters: cluster_id = cluster.get("cluster_id") - # labels.append(f"Cluster {cluster_id}") labels.append(cluster_id) month_values = [0.0] * 12 @@ -99,7 +99,8 @@ def plot_neighbourhood_calendar_heatmap( for cluster in clusters: cluster_id = cluster.get("cluster_id") - label = cluster.get("calendar_label", cluster_id) + # label = cluster.get("calendar_label", cluster_id) + label = first_sentence(cluster.get("description", str(cluster_id))) legend_lines.append(f"{cluster_id}: {label}") legend_text = "\n".join(legend_lines) diff --git a/modelling/src/seasonal/similarity/clustering.py b/modelling/src/seasonal/similarity/clustering.py index 4851d8a..7f5f509 100644 --- a/modelling/src/seasonal/similarity/clustering.py +++ b/modelling/src/seasonal/similarity/clustering.py @@ -420,7 +420,10 @@ def _cluster_opening_sentence( descriptors = [] if timing: - descriptors.append(_humanise_token(timing)) + # This results in a duplication of the season that looks odd + # descriptors.append(_humanise_token(timing)) + pass + if primary_class: descriptors.append(_humanise_token(primary_class)) elif model_family: diff --git a/modelling/src/seasonal/similarity/dendrogram.py b/modelling/src/seasonal/similarity/dendrogram.py index 9f7b11b..d5bd05b 100644 --- a/modelling/src/seasonal/similarity/dendrogram.py +++ b/modelling/src/seasonal/similarity/dendrogram.py @@ -1,6 +1,5 @@ from pathlib import Path from typing import Any, Dict -import re import numpy as np import matplotlib.pyplot as plt @@ -8,18 +7,7 @@ from matplotlib.patches import Patch from scipy.cluster.hierarchy import dendrogram - -def _first_sentence(text: str) -> str: - """ - Extract the first sentence, excluding trainling full-stop, from a cluster description - - :param text: Full text - :return: First sentence of the text - """ - if not text: - return "" - match = re.search(r"(?<=[.!?])\s+", text.strip()) - return text.strip() if not match else text[: match.start()].strip() +from seasonal.support.clustering import first_sentence def _get_cluster_colour( @@ -228,7 +216,7 @@ def link_colour_func(node_id: int) -> str: # Get the descriptions for all the clusters cluster_descriptions = { - c["cluster_id"]: _first_sentence(c.get("description", "")) + c["cluster_id"]: first_sentence(c.get("description", "")) for c in cluster_data.get("clusters", []) } diff --git a/modelling/src/seasonal/support/clustering.py b/modelling/src/seasonal/support/clustering.py index 489b18a..c70b16f 100644 --- a/modelling/src/seasonal/support/clustering.py +++ b/modelling/src/seasonal/support/clustering.py @@ -4,6 +4,7 @@ from typing import Any, Dict, List, Sequence, Tuple import numpy as np +import re from scipy.cluster.hierarchy import leaves_list, linkage from scipy.spatial.distance import squareform @@ -131,3 +132,17 @@ def serialise_linkage_matrix( "matrix": scipy_rows, "merges": merges, } + + +def first_sentence(text: str) -> str: + """ + Extract the first sentence, excluding trainling full-stop, from a cluster description + + :param text: Full text + :return: First sentence of the text + """ + if not text: + return "" + + match = re.search(r"(?<=[.!?])\s+", text.strip()) + return text.strip() if not match else text[: match.start()].strip().removesuffix(".")