hedwig/algorithm.yaml at main · minsing-jin/hedwig · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
# Hedwig Algorithm Config — user-owned recommendation algorithm definition.
#
# This file is a first-class user asset, peer to criteria.yaml.
# It defines the Hybrid Ensemble structure and weights used by the ranking stage.
# All changes are audit-logged to evolution/algorithm_log.jsonl.
#
# Meta-Evolution mutates this file via shadow-mode experiments.
# Users may edit it directly or via natural-language commands.
#
# See docs/VISION_v3.md section 8-9 for the full architecture.

version: 1
updated_at: "2026-04-22"
origin: initial_default_v3_hybrid

# ---------------------------------------------------------------------------
# Stage A — Retrieval (cheap, fast candidate generation)
# Output: top_n candidates forwarded to Stage B
# ---------------------------------------------------------------------------
retrieval:
  top_n: 200
  threshold: 0.10        # pre_filter drops candidates below this numeric pre-score
  components:
    pre_scorer:
      enabled: true
      weight: 1.0
      # uses hedwig/engine/pre_scorer.py (5-factor numeric)
    embed_sim:
      enabled: false          # retrieval-stage embedding of ALL posts is costly; enable when your usage warrants
      weight: 0.8
      model: text-embedding-3-small
      criteria_vector_from: [signal_preferences.care_about, context.interests]
    collaborative_filter:
      enabled: false          # 사용자 풀 생기면
      weight: 0.5

# ---------------------------------------------------------------------------
# Stage B — Ranking Ensemble
# Output: top_k signals for delivery. Final score = Σ wᵢ · normalize(scoreᵢ)
# ---------------------------------------------------------------------------
ranking:
  top_k: 30
  # v3 default is a true 4-component hybrid.
  # The cheap components (ltr/content/popularity) rank all retrieval candidates;
  # llm_judge reranks only the top_k — see apply_to below.
  components:
    llm_judge:
      enabled: true
      weight: 0.35
      model: ${OPENAI_MODEL_FAST}
      apply_to: top_k         # honored: runs only on top_k AFTER cheap-component ranking
    ltr:
      enabled: true           # pure-Python logistic with default prior; online-trainable
      weight: 0.25
      features:
        - text_relevance
        - source_authority
        - engagement_velocity
        - recency_decay
        - convergence_count
        - past_upvote_similarity
        - past_downvote_similarity
        - dwell_time_proxy
    content_based:
      enabled: true           # OpenAI embeddings when key present, Jaccard fallback otherwise
      weight: 0.20
    popularity_prior:
      enabled: true
      weight: 0.10
      decay_hours: 48
    bandit:
      enabled: false          # meaningful only after feedback history accrues; meta-evolution can flip this
      weight: 0.10
      strategy: thompson_sampling
      exploration_rate: 0.1

# ---------------------------------------------------------------------------
# Fitness function — how we measure if an algorithm version is better
# Meta-Evolution uses this to accept/reject mutations. The sandbox reads
# these fields directly — changing the metrics here changes how mutations
# are judged.
# ---------------------------------------------------------------------------
fitness:
  short_horizon:                # daily/weekly
    metric: upvote_ratio        # (#up / (#up + #down))
    weight: 0.6
  long_horizon:                 # monthly
    metric: retention_x_acceptance
    weight: 0.4
    components:
      retention: days_with_any_feedback_in_last_28_days
      acceptance: on_demand_query_acceptance_rate
  diversity_bonus:
    enabled: true
    magnitude: 0.1              # adjustment applied for healthy component mix
  adoption_threshold: 0.05      # +5% required to adopt a mutation

# ---------------------------------------------------------------------------
# Meta-Evolution — Karpathy autoresearch pattern on the algorithm itself
# Phase 4 activates this. Runs monthly by default.
# ---------------------------------------------------------------------------
meta_evolution:
  enabled: false                # user flips on when they have enough feedback history
  cadence_days: 28
  mutation_strategies:
    - weight_perturbation
    - feature_toggle
    - feature_suggest_from_papers
    - structural_change
  shadow_mode:
    duration_days: 7
    parallel_variants: 3
  rollback_if_fitness_drops: true

# ---------------------------------------------------------------------------
# Personal Algorithm Engine — additive post-ranking policy
# Preserves the hybrid/SOTA ensemble final_score above; these policies control
# feed UX, reward interpretation, exploration, media metadata, and delivery
# after ranking. User-owned and natural-language editable.
# ---------------------------------------------------------------------------
personal_algorithm:
  ranking_boundary:
    canonical_pre_layer_score: final_score
    immutable_fields: [ensemble_score, final_score]
    post_ranking_layers: [feed, exploration, media, delivery, reward]
    contract: Post-ranking layers may annotate, reserve slots, route, and measure; they must not overwrite ensemble_score/final_score.
  feed:
    default_mode: grid
    available_modes: [grid, detail_swipe, dense_reader]
  swipe_policy:
    immutable_defaults:
      left:
        action: save_later
        reward: 0.8
        strength: strong_positive
      right:
        action: skip
        reward: -0.1
        strength: weak_negative
      next:
        action: skip
        reward: 0.0
        strength: weak_neutral
    user_overrides: {}
    left:
      action: save_later
      reward: 0.8
      strength: strong_positive
    right:
      action: skip
      reward: -0.1
      strength: weak_negative
    next:
      action: skip
      reward: 0.0
      strength: weak_neutral
    skip_strength: weak
    shadow_test_required_for_semantic_change: true
  reward_weights:
    save: 1.0
    open: 0.8
    not_interested: -1.0
    dwell: 0.2
    skip: -0.1
    swipe: 0.1
  exploration:
    enabled: true
    rate: 0.10
    min_rate: 0.05
    max_rate: 0.15
    labels: [anomaly, contrarian, critical, opposing]
  media:
    default_strategy: text_thumbnail_transcript
    default_media_mode:
      active_mode: Text+Thumbnail+Transcript
      text: true
      thumbnail: true
      transcript: true
    advanced_media_capability:
      name: Full Media Understanding
      enabled: false
      required_env_flag: HEDWIG_FULL_MEDIA_UNDERSTANDING
      policy_enabled: false
      extracted_features: []
    full_understanding_enabled: false
  delivery:
    schema_version: delivery_policy_config.v1
    enabled: true
    surfaces: [critical, daily, weekly, pwa, tray]
    preferred_surfaces: [daily]
    channels: [dashboard, email, slack, discord, pwa, tray]
    default_channel: dashboard
    timing:
      critical_timing: now
      daily_digest_time: "09:00"
      weekly_digest_day: monday
      weekly_digest_time: "09:00"
      timezone: local
      defer_to_quiet_hours: true
    repeat:
      enabled: true
      max_count: 2
      min_interval_minutes: 240
      snooze_minutes: 60
    quiet_hours:
      enabled: false
      start: "22:00"
      end: "07:00"
      timezone: local
      allow_critical_override: true
    urgency:
      critical_urgencies: [alert]
      critical_score_threshold: 0.85
      daily_score_threshold: 0.65
      exploration_surface: pwa
    policy_layer: post_ranking_delivery
    post_ranking_only: true
    ranking_input: false
    mutates_scores: false
    mutates_rank_identity: false
  active_post_ranking_policy: {}
  safe_preferences: {}
  risky_pending_policy: []
  future_ranking_experiments: []
  composite_fitness:
    optimization_enabled: false
    current_generation_role: shadow_test_evaluation_metric_only
    future_experiment_link: Composite Fitness future work
  staged_issues:
    - Feed UX
    - Behavior Raw+Reward
    - NL policy control
    - Delivery policy
    - optional Multimodal
    - Composite Fitness future work
    - Ranking Experimentation future work