From 03ce9f884d52abcd8d29ded4989bc946ce561529 Mon Sep 17 00:00:00 2001
From: Nawfal <nawfal1022@gmail.com>
Date: Sat, 30 May 2026 15:53:15 +0200
Subject: [PATCH 1/3] Add Group V project card

Add Group V project card for action/event-focused captioning.
---
 index.html | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
diff --git a/index.html b/index.html
index 7ceb546..300f3e6 100644
--- a/index.html
+++ b/index.html
@@ -110,6 +110,32 @@ <h3>Semantic Change Maps from Everyday Walks</h3>
             </div>
           </article>
 
+          
+          
+          <article class="project-card">
+            <div class="teaser" role="img" aria-label="AI image captioning system turning video frames into short action labels.">
+              <img src="assets/group_V.png" alt="Group V image captioning preview" style="position:absolute; inset:0; width:100%; height:100%; object-fit:cover; z-index:2;">
+              <span class="teaser-label" style="z-index:3;">Group V</span>
+            </div>
+            <div class="project-content">
+              <p class="project-meta">Video understanding, vision-language models, action captioning</p>
+              <h3>Action/Event-Focused Captioning: A Three-Model Comparison</h3>
+              <p class="project-abstract">
+                This project explores how pretrained image-captioning models can be adapted to produce short action-focused captions for video activity timelines. Instead of generating long descriptive captions, we fine-tune BLIP, ViT-GPT2, and Microsoft GIT on COCO action captions so that the models output compact labels such as “person walking” or “coffee being poured.”
+                <br><br>
+                For video inference, frames are sampled over time, captioned by the fine-tuned models, and de-duplicated into a simple activity timeline. The project compares original and fine-tuned models using BLEU-1, BLEU-2, METEOR, and ROUGE-L, and analyzes whether architecture choice still matters after all models are adapted to the same action-caption task.
+              </p>
+              <label class="project-toggle-label">
+                <input class="project-toggle" type="checkbox" aria-label="Toggle full project pitch">
+                <span class="project-toggle-more">Read more</span>
+                <span class="project-toggle-less">Show less</span>
+              </label>
+            </div>
+          </article>
+
+
+
+          
           <article class="project-card">
             <div class="teaser" role="img" aria-label="Two hands playing rock-paper-scissors, but one holds a banana instead of a valid sign, illustrating anomaly detection.">
               <img src="assets/group_J.png" alt="" style="position:absolute; inset:0; width:100%; height:100%; object-fit:cover; z-index:2;">

From 6d0721a7e7bc23f0ae7a11501486ced5794eb2df Mon Sep 17 00:00:00 2001
From: Nawfal <nawfal1022@gmail.com>
Date: Sat, 30 May 2026 16:10:49 +0200
Subject: [PATCH 2/3] Add Group V teaser image


From d33f9d506a3b28f4108e261fc38766e98c2a5152 Mon Sep 17 00:00:00 2001
From: nihermann <hermann.nicolai@googlemail.com>
Date: Mon, 1 Jun 2026 14:38:30 +0200
Subject: [PATCH 3/3] reordered cards

---
 index.html | 93 ++++++++++++------------------------------------------
 1 file changed, 21 insertions(+), 72 deletions(-)

diff --git a/index.html b/index.html
index 300f3e6..7c25518 100644
--- a/index.html
+++ b/index.html
@@ -110,32 +110,6 @@ <h3>Semantic Change Maps from Everyday Walks</h3>
             </div>
           </article>
 
-          
-          
-          <article class="project-card">
-            <div class="teaser" role="img" aria-label="AI image captioning system turning video frames into short action labels.">
-              <img src="assets/group_V.png" alt="Group V image captioning preview" style="position:absolute; inset:0; width:100%; height:100%; object-fit:cover; z-index:2;">
-              <span class="teaser-label" style="z-index:3;">Group V</span>
-            </div>
-            <div class="project-content">
-              <p class="project-meta">Video understanding, vision-language models, action captioning</p>
-              <h3>Action/Event-Focused Captioning: A Three-Model Comparison</h3>
-              <p class="project-abstract">
-                This project explores how pretrained image-captioning models can be adapted to produce short action-focused captions for video activity timelines. Instead of generating long descriptive captions, we fine-tune BLIP, ViT-GPT2, and Microsoft GIT on COCO action captions so that the models output compact labels such as “person walking” or “coffee being poured.”
-                <br><br>
-                For video inference, frames are sampled over time, captioned by the fine-tuned models, and de-duplicated into a simple activity timeline. The project compares original and fine-tuned models using BLEU-1, BLEU-2, METEOR, and ROUGE-L, and analyzes whether architecture choice still matters after all models are adapted to the same action-caption task.
-              </p>
-              <label class="project-toggle-label">
-                <input class="project-toggle" type="checkbox" aria-label="Toggle full project pitch">
-                <span class="project-toggle-more">Read more</span>
-                <span class="project-toggle-less">Show less</span>
-              </label>
-            </div>
-          </article>
-
-
-
-          
           <article class="project-card">
             <div class="teaser" role="img" aria-label="Two hands playing rock-paper-scissors, but one holds a banana instead of a valid sign, illustrating anomaly detection.">
               <img src="assets/group_J.png" alt="" style="position:absolute; inset:0; width:100%; height:100%; object-fit:cover; z-index:2;">
@@ -327,51 +301,6 @@ <h3>Open-Vocabulary Object Tracking with Grounding DINO, SAM 2 and CLIP</h3>
             </div>
           </article>
 
-
-
-
-
-
-
-
-
-          
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-          
-
           <article class="project-card">
             <div class="teaser" role="img" aria-label="Image retrieval with CLIP.">
               <img src="assets/group_Q.png" alt="Image retrieval preview" style="position:absolute; inset:0; width:100%; height:100%; object-fit:cover; z-index:2;">
@@ -461,7 +390,27 @@ <h3>From Raw Footage to Recipe: Extracting Cooking Steps from Egocentric Video</
               </label>
             </div>
           </article>
-
+          
+          <article class="project-card">
+            <div class="teaser" role="img" aria-label="AI image captioning system turning video frames into short action labels.">
+              <img src="assets/group_V.png" alt="Group V image captioning preview" style="position:absolute; inset:0; width:100%; height:100%; object-fit:cover; z-index:2;">
+              <span class="teaser-label" style="z-index:3;">Group V</span>
+            </div>
+            <div class="project-content">
+              <p class="project-meta">Video understanding, vision-language models, action captioning</p>
+              <h3>Action/Event-Focused Captioning: A Three-Model Comparison</h3>
+              <p class="project-abstract">
+                This project explores how pretrained image-captioning models can be adapted to produce short action-focused captions for video activity timelines. Instead of generating long descriptive captions, we fine-tune BLIP, ViT-GPT2, and Microsoft GIT on COCO action captions so that the models output compact labels such as “person walking” or “coffee being poured.”
+                <br><br>
+                For video inference, frames are sampled over time, captioned by the fine-tuned models, and de-duplicated into a simple activity timeline. The project compares original and fine-tuned models using BLEU-1, BLEU-2, METEOR, and ROUGE-L, and analyzes whether architecture choice still matters after all models are adapted to the same action-caption task.
+              </p>
+              <label class="project-toggle-label">
+                <input class="project-toggle" type="checkbox" aria-label="Toggle full project pitch">
+                <span class="project-toggle-more">Read more</span>
+                <span class="project-toggle-less">Show less</span>
+              </label>
+            </div>
+          </article>
 
 
           <article class="project-card add-project-card">