From 0fb42e880252d5c21bab0370ea4d9f4cea4fe592 Mon Sep 17 00:00:00 2001
From: ravencore06 <srinidhisadhanala@gmail.com>
Date: Sun, 3 May 2026 07:18:32 +0530
Subject: [PATCH 1/6] Basic voice AI model added

---
 __pycache__/asr.cpython-313.pyc | Bin 0 -> 1287 bytes
 __pycache__/llm.cpython-313.pyc | Bin 0 -> 2274 bytes
 __pycache__/tts.cpython-313.pyc | Bin 0 -> 675 bytes
 asr.py                          |  21 ++++++++++++++++
 llm.py                          |  41 ++++++++++++++++++++++++++++++++
 main.py                         |  25 +++++++++++++++++++
 requirements.txt                |   5 ++++
 tts.py                          |  13 ++++++++++
 8 files changed, 105 insertions(+)
 create mode 100644 __pycache__/asr.cpython-313.pyc
 create mode 100644 __pycache__/llm.cpython-313.pyc
 create mode 100644 __pycache__/tts.cpython-313.pyc
 create mode 100644 asr.py
 create mode 100644 llm.py
 create mode 100644 main.py
 create mode 100644 requirements.txt
 create mode 100644 tts.py

diff --git a/__pycache__/asr.cpython-313.pyc b/__pycache__/asr.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..814af6e9877c1bf683bb3b815732b22a76099edf
GIT binary patch
literal 1287
zcmZuwTW=dh6rR1sn{|9?9H5Pq?zlo#-4?4=6N!{dlo(oZi;%Z&1+`=?yJKftXU5Fz
z0`*fL0|FI+5P9UWh(Ev+5@~;cqee=*^+P0{c#CAzO7w-Z>kEs@v3%ytxqaU`+8Jar
z1_Jqb@rn0DM(B^|G8VYUGMI(q2V@~jdJl2QlIIbZTT)5+GYYReaneXl!KuK1a9@gQ
zu1{jsakq&Fp%Z9nbfd{kHIXth|IL&Um9D488J4sJIFVpfL)Gb_ozykpYbnce98V`B
zL%TNVKxLQ^eoG^!7#)v#W}dIPCXZ36lHj7&lq|Y1i2<VWGAg38=+vl%3tXGP#00h&
zt)F{CF;P=0CTGt+kCs9<cri*KX=D7TS1Vbc|KIwf2vIdrKUGuz%hZIqsk*ARMrTuM
zq9MCdYQ38=-uHx0s7G7na=A5xI9Mr3q3#CU_B}?s#z!o`!uH&YxO>WCoZmW&-@y(G
z+AgNd#{qQ-7rsqh>~9ck2d>A;-L=Xjfs;=IB7Dq=2-?2DP0lv)WyV@<f~^iA&IYa$
zhqb5|;bB4eXP!e|!=>p^7CbaY!7c&&1JB{Cv%x3{wGQ{FA0D;c8-ef}O~xDc<|iJZ
zeuFYkkZ`IU&lcwSuzQy@S}|&Pq(*O2_Bp+3w*&Gv=ZuFl)gjCHR?<RaQpCe4!2<38
zVmw=@`s52gl!(w^UpSI*xpJ{y1LC4CfRtB%i#fq2FcJ02;^ixK*vo3C!@&WAeaP7j
z;`nu2@N(x?m~rfmA8^u$oAWv789g)c8>ri;f28o^!gmYbo44QXPp9uJe6{ekxqYdx
zXL{M=d%Ag`&+hAIcJ(uNFa2uuB5U<wetth++|3t%I{6@9+R~m*BjecNjG@gP<c{s<
zPVeSU-(7l;JHMqumv4>j#qE>5Y;jLN@i14|N<V!8vcEB({q-W6$?m7k-LyGCvNjhZ
zzPhI`Kgt#Q>CBy#FIOi1e6?p?=~ZjJ?A1N}T0d8KcBnzg-=YK~{PN10to&ZYYX!6;
z8*BO0PQk30%Fbc|^u1KJGOyg5*T60*;jD<ihQ?_1(bvGE9mh-g84bWhyZ|cHU*BYI
f&?awkcmN0<9}>}PK2#)0dW_70IxEfbm%#H66T=_S

literal 0
HcmV?d00001

diff --git a/__pycache__/llm.cpython-313.pyc b/__pycache__/llm.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..77cd0cf4cb7862f1bf2b1737f043c953c840e44c
GIT binary patch
literal 2274
zcma(SOKclObaw4^9oKeBK5gva;-qPnJv2pW`bnr#npCaSHlbwWDm12}tvzv;tarCF
zYiPoyCzJyhBqG!bspSORxPp*?6W86g$ktGaN+3ioC~Ei|;mz7kLXt{6*|YD>oA>7Z
z=DqbqB8C9whyF5_A_)B~h$e&^$ok8GtRM|((q(i>qS7Uq%2`3nmqRqvg?f<|N+T_N
zLas~Da7k8`RY9mS?>JX+?FqX`%nLRh*DFjnuT1cS*qyc)h-EAhI$5Yo)>o-PtY-mP
zL3u<a4N+N>Xh@UOh=#>=RMEmTQc~0iZy&d<w+UssYuJ`<o+}Z{T@}_Bnwo^dP~*S;
zAs{Pgc7vx$CmMJb%{E*>XsYSHkZk}sDmTyOkSR0<FdJ=R|3gY?p4AZOfS}i*gw)XF
z(#b2fUNo#y*0c+{nJxUsv20mHG24IM&`tZ|)#;<Gted7fFfg#xRW=ILX7-$WY#VnE
zR~%|sE*1Xp);VgIu|tVVb;BY>-r_cMK!rP83GU$g3iG;)=MCoCbP*dxrb;}_h&jhZ
zsIq)`d?cS^5c50(<wpLzU8s~H>?}V%ba6WG&XaQv++w*#fn25bTcqITO|v}UEb=JE
zAmL)nU)&{c15K_76B~x*4tf$zc*zToqZfZk9jT^HK1`kTPQCspHMJamKl!9nt#%GS
z>>Tz+rXO|YU@KP3_AQ5hh-TJ77!@`W3Pc?K!prp}?5;rgr-0ZFQyXdB<jm~b_2e!k
zaukF#Nt1_VO9uTL>D|<Ivj?Fq1rxl#O|HOtg=M=)=Hz3t7Sh7Q@@rB~wnAI92+y_w
zXbDXvw`~HsMQYzpuBL$HVYt6-CvU2A=Uy(93u}?*L~{Sm^V%LQs<mh_ZLby|ZrS>X
zo}(kRQ^)SnfqsMr<=uVOLZX}me@G6nM6gD+BzUXjls=@kPIB3Cq0E!;I4ab_F0q(R
z+3G(4upKHIW%wvXchMonZ%`zwqEMz+2VpJE1@QaX2J-7!HOiB^3pzwk!eyN;@K{;D
zjZI>e+<6{%^db(P9y~7IVcD1xhrpAwN>ndn22GK-6m87(vSSjixVD2A>K}*ibxGMF
zR0lH&74;2vwX+4o!OS5AXbyGFOij?ez~vEQ6`*tQ7GkqHizdWAXr+aD9xCWA58D=D
zTw(LNLwMM<!46SVJPOTJTt#?%Lt2;|DH5SmjZjfyJXV2b0PotVxV+6G@8C^ZbPt}m
zX}fi{AZd4?niA&N79&j5dNu&sDmX7RFNLVokMP60^@<(Ggvks+i@JlJW_s@5xO=0T
z8T2!QUyOX8{4QC2b;gI^@fpu3dznEmV|mf^S_DP`ma5lnU;N(vSh?SFujRo|_0Vbm
z&}nbviq|{gX}O18*VYgklc(k7#2-<Ve(8StUi!gAweOVQck1iPH%nhG`Ke3G$*0-A
zPh+3Ns@day_INFxs&%Wi1KqV$=KjIE2dk+eKQ&bA>8<Tg*V;O2ZTo9o-D~ZrXJ`$@
zdSc7r$I;AM8g+DiH2T44wY}eO@Bgf$s*d{V=vTMCJz5>h`C~cn+KfN;hCg=QJA2(%
zXTAKJzIwxRZ+q?iUi;#5wAOlHxs{6NtA>I+2Z;<SVN%^A+OMUT3AEoB#?b65roiJE
z->T?lgVT!foI#mu!ia#<#8fm2Dhi!<V$56^v;_>+O^ursmoSV!N5O?b_Nd5au!#1^
zL`vR6&tye8{AV(ybkz@vGpfSlForBPXVWr_l3)M@V~QSWs-R*5Z0hFGGj)?k=_|kx
dvxteNv=)*i=_hpNM-=}%+#_{8L$Cy5{{r1J8dm@S

literal 0
HcmV?d00001

diff --git a/__pycache__/tts.cpython-313.pyc b/__pycache__/tts.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8a04799021a776db37ef1cebca8f0b8265d415db
GIT binary patch
literal 675
zcmZ8eL2DC16n?WiO>EXgt!XVnVG%s+p<db?EFy#k#VSR*E+SnlaXU@7*vT?8k*=qB
zGU&lv3iamIpA(F<j7NLvt+3$PHzAv9AMBg=&G)_U&CX=yatSc@mjA@h2*7Ux^03B5
zHiN|>_~4VLAi%e7gCH&9*=I(u9%0`krm(W{%T6#I!sdk7$lWG80C%7^Mw|p>!}MMX
zpVY?g+(Je15{=L&f#q9^_!qVD1Q#}^bDeJoP=O^YcX0t-oSeN_1Ncu-zF_hyKdB2X
z@aVdxqN&(f73uf1{$$1Vin^GjO38yIO=BJ_?X;MBF4_qbD%Fns2TUa*=1SYLnd*}0
z@H*dq*^HHEYpU3R(gns_F=v9JyEM1yx~{&zx)KJG2^mW4j>ASf>g+SFWVpKg_(iC8
zSv^SvZp0D8v3o31As**mlIn_a-BRr>nJ^aZitA`;Y~?FVT{taQzl5K{&#$|W&I@pL
z=Ga*nI15KRKb*VW#tAJQ)42hi>%Tiv-`;#phtxZz)4fK&+As7LhjbnlAJ;yt_0*77
zN9x%Do&8B~WoQnbB?`EVMt%oz!_?O>=_?QR+uNPD?4h`c&^)p<w|;IDLViIdqa`vw
GlK%x_;)*E%

literal 0
HcmV?d00001

diff --git a/asr.py b/asr.py
new file mode 100644
index 0000000..a7fcb05
--- /dev/null
+++ b/asr.py
@@ -0,0 +1,21 @@
+import speech_recognition as sr
+
+def capture_audio():
+    recognizer = sr.Recognizer()
+    with sr.Microphone() as source:
+        print("\nListening...")
+        # Adjust for ambient noise to reduce background noise issues
+        recognizer.adjust_for_ambient_noise(source, duration=0.5)
+        audio = recognizer.listen(source)
+
+    try:
+        # Use Google's free Web Speech API without API key
+        text = recognizer.recognize_google(audio)
+        print(f"You said: {text}")
+        return text
+    except sr.UnknownValueError:
+        print("Sorry, I could not understand the audio.")
+        return None
+    except sr.RequestError as e:
+        print(f"Could not request results from Google Speech Recognition service; {e}")
+        return None
diff --git a/llm.py b/llm.py
new file mode 100644
index 0000000..85acfe9
--- /dev/null
+++ b/llm.py
@@ -0,0 +1,41 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+
+class ConversationalAgent:
+    def __init__(self):
+        print("Loading local conversational model (DialoGPT-small)...")
+        # Use DialoGPT-small for lightweight local generation without API keys
+        self.tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-small")
+        self.model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-small")
+        self.chat_history_ids = None
+
+    def generate_response(self, user_input):
+        # Encode the new user input, add the eos_token and return a tensor in Pytorch
+        new_user_input_ids = self.tokenizer.encode(user_input + self.tokenizer.eos_token, return_tensors='pt')
+
+        # Append the new user input tokens to the chat history
+        # We limit the history to the last 100 tokens to prevent repetitive loops
+        if self.chat_history_ids is not None:
+            bot_input_ids = torch.cat([self.chat_history_ids[:, -100:], new_user_input_ids], dim=-1)
+        else:
+            bot_input_ids = new_user_input_ids
+
+        # Generate a response
+        # Using a fixed attention_mask for open-end generation
+        attention_mask = torch.ones(bot_input_ids.shape, dtype=torch.long)
+        
+        self.chat_history_ids = self.model.generate(
+            bot_input_ids,
+            attention_mask=attention_mask,
+            max_length=1000,
+            pad_token_id=self.tokenizer.eos_token_id,
+            no_repeat_ngram_size=3,
+            do_sample=True,
+            top_k=50,
+            top_p=0.95,
+            temperature=0.7
+        )
+
+        # Decode and return the response
+        response = self.tokenizer.decode(self.chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)
+        return response
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..49e3999
--- /dev/null
+++ b/main.py
@@ -0,0 +1,25 @@
+from asr import capture_audio
+from llm import ConversationalAgent
+from tts import text_to_speech
+
+def main():
+    print("=====================================================")
+    print("Initializing Voice-Based Conversational AI System...")
+    print("=====================================================")
+    agent = ConversationalAgent()
+    print("\nSystem ready! Speak into your microphone.")
+    print("Say 'exit', 'quit', or 'stop' to end the conversation.")
+    
+    while True:
+        user_input = capture_audio()
+        
+        if user_input:
+            if user_input.lower() in ['exit', 'quit', 'stop']:
+                text_to_speech("Goodbye!")
+                break
+                
+            response = agent.generate_response(user_input)
+            text_to_speech(response)
+
+if __name__ == "__main__":
+    main()
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..fd578bf
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+SpeechRecognition
+PyAudio==0.2.14
+transformers==4.38.2
+torch
+pyttsx3==2.90
diff --git a/tts.py b/tts.py
new file mode 100644
index 0000000..017e83f
--- /dev/null
+++ b/tts.py
@@ -0,0 +1,13 @@
+import pyttsx3
+
+def text_to_speech(text):
+    # Initialize pyttsx3 engine for offline TTS
+    engine = pyttsx3.init()
+    
+    # Adjust properties
+    rate = engine.getProperty('rate')
+    engine.setProperty('rate', rate - 20) # Slightly slower for clarity
+    
+    print(f"AI: {text}")
+    engine.say(text)
+    engine.runAndWait()

From 12cc486399230f1e534d8f9a19699aa6e8e42def Mon Sep 17 00:00:00 2001
From: ravencore06 <srinidhisadhanala@gmail.com>
Date: Sat, 9 May 2026 13:14:45 +0530
Subject: [PATCH 2/6]  Initial Contribution

---
 README.md                       | 14 +++++++++++++-
 vlm_evaluation/requirements.txt |  7 +++++++
 2 files changed, 20 insertions(+), 1 deletion(-)
 create mode 100644 vlm_evaluation/requirements.txt

diff --git a/README.md b/README.md
index 04e1239..75b9c74 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,13 @@
-# C4GT_2026
\ No newline at end of file
+# theApprenticeProject (C4GT 2026)
+
+This repository contains two distinct AI initiatives developed for The Apprentice Project:
+
+## 1. Voice-Based Conversational AI System
+A voice-based AI system that captures audio, generates conversational responses using an LLM, and converts the responses back to speech.
+- **Key Files**: `asr.py`, `llm.py`, `tts.py`, `main.py`
+- **Dependencies**: See `./requirements.txt`
+
+## 2. VLM Evaluation Pipeline
+A cost-efficient Vision Language Model (VLM) pipeline designed to evaluate student artifacts (images/videos) against 21st-century skills rubrics.
+- **Key Directory**: `vlm_evaluation/`
+- **Dependencies**: See `vlm_evaluation/requirements.txt`
\ No newline at end of file
diff --git a/vlm_evaluation/requirements.txt b/vlm_evaluation/requirements.txt
new file mode 100644
index 0000000..7cd0725
--- /dev/null
+++ b/vlm_evaluation/requirements.txt
@@ -0,0 +1,7 @@
+transformers>=4.38.2
+torch
+peft
+bitsandbytes
+Pillow
+accelerate
+datasets

From eb5b74e190438680d32a29ea21b270e0efa47a13 Mon Sep 17 00:00:00 2001
From: ravencore06 <srinidhisadhanala@gmail.com>
Date: Thu, 14 May 2026 22:40:19 +0530
Subject: [PATCH 3/6]  VLM Evaluation Pipeline

---
 .../__pycache__/dataset.cpython-313.pyc       | Bin 0 -> 2739 bytes
 .../__pycache__/evaluate.cpython-313.pyc      | Bin 0 -> 7956 bytes
 .../__pycache__/prompts.cpython-313.pyc       | Bin 0 -> 1043 bytes
 vlm_evaluation/dataset.py                     |  49 +++++
 vlm_evaluation/evaluate.py                    | 171 ++++++++++++++++++
 vlm_evaluation/generate_sample_data.py        |  57 ++++++
 vlm_evaluation/prompts.py                     |  21 +++
 vlm_evaluation/run_benchmark.ps1              |  29 +++
 vlm_evaluation/sample_data/sample_drawing.jpg | Bin 0 -> 3177 bytes
 vlm_evaluation/sample_data/sample_model.jpg   | Bin 0 -> 3177 bytes
 vlm_evaluation/sample_data/sample_origami.jpg | Bin 0 -> 3177 bytes
 vlm_evaluation/sample_dataset.json            |  23 +++
 12 files changed, 350 insertions(+)
 create mode 100644 vlm_evaluation/__pycache__/dataset.cpython-313.pyc
 create mode 100644 vlm_evaluation/__pycache__/evaluate.cpython-313.pyc
 create mode 100644 vlm_evaluation/__pycache__/prompts.cpython-313.pyc
 create mode 100644 vlm_evaluation/dataset.py
 create mode 100644 vlm_evaluation/evaluate.py
 create mode 100644 vlm_evaluation/generate_sample_data.py
 create mode 100644 vlm_evaluation/prompts.py
 create mode 100644 vlm_evaluation/run_benchmark.ps1
 create mode 100644 vlm_evaluation/sample_data/sample_drawing.jpg
 create mode 100644 vlm_evaluation/sample_data/sample_model.jpg
 create mode 100644 vlm_evaluation/sample_data/sample_origami.jpg
 create mode 100644 vlm_evaluation/sample_dataset.json

diff --git a/vlm_evaluation/__pycache__/dataset.cpython-313.pyc b/vlm_evaluation/__pycache__/dataset.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..deda7c917213902676ddfdcd6ba6009d329f7bcb
GIT binary patch
literal 2739
zcmbVOTWl2989sB{o7ecZHnwrh!7**Nv6fQoP`0gA+n`{B4BG9cw7b=GynA*TYj$Qi
zbCzIN1*A%qQBwg`kV#qzl9#$~Jmx7<nwRD&iy*?9xKGJT-rSgqDt+ny&+gd9NUhX=
z<^Np%%Q^F3zT>U-_80+*exWV?CPc^=xClsXO}55ia))F|Rysp?bCkFQYx#^kr*MUm
zelkL`${@+A6LQmvs}(7!eT9qpq~vL{Rina+O<9w-6v<W!)^|vOpi3@iC9Y)Uf;ujv
z!$p;AS#^-`P*wvT&W3oT5>7_Ejwvq8vQZQ#4Pm%Ue5HXGFIs|?UNgkSV&EPRgD)cr
z{A~?Oc+`bX3wM1wLUP1{-6rLOZzdVE*+ZG=o()*$q=}aGD=ndMl#qN|pqZ1hQV<iF
z*m6D|NHY6^+0HmZa&q8r_r0<*{sKRgROEbDV3k#J(msg%F(t61WS3Oi*{#Tj$QTLk
zS%Q0)Wl9bb3crf{Bjq3|OUaP&1_)xawkb@*GMAZ4#YILjun=ZyIYx=`RBXz1>s63#
z2b|U%(-xrYP=n58&&|`aX)#)KY+;zTX;&z_Y*=+en2t@WOc-dHj^$$%=vO|+*@%gS
z;-_#VO-E4ah`;3?aYenvY;iQTSgVYT?O3$-b+d#PN56M$x6o*ni7u9E4CUvzSuv{S
zZXI4<;AYV`PSbgZvU1tsVvJ6t>3PP>tSD&Nu}UsA>=LbU=E5!f@7V36!kxNZ(gm-J
zi@IBMIG#4{-}2XEG?hxlei8E|KEK@eQ-fnX()462rhcH7N2%@LLEt4tGfdQdEy}7j
zu@r<qwLEd&Vvq=0<jfEZQe(Q!Ks%gPn9Vru8q_taHHbcj53-g@MtK<O#Zz&^lO5Mn
zu}QoTyJETmR!)uCp1SBdz%j+1R^yP)q~fVAv&v0tsqA7GL6*F;GwDLkW!x>epl23N
zIz=c4$bVsG{Pmv{pkk(K5Gh-jMRuM$ix9)YWvi<1)IcG)TW!gU=sGwNy6*MwY3o!V
z!FF`3z^;?cAu{;#Tl>}%eJko)(J$NDZyvvK{O071$-BuXZKDs{MmM^8Z<+Ud9(NsH
z>)IHi_h#Rny>I;S>@D@}=re_Mf2+}T_-Tl=cQ+E>Sv#@zYGdMOjqaa6j^{VxU5)P9
z&*F2>qM-Fz*Wqtm?BfrIrY4n-CS%j_a1mN#n=lsmw>1Xr4#{uJawM}GeOd-VAC8iw
zG@q0`#bP!`^i4{<8wMBcDNg|R!nzJph{ygH<KYX2(V!F9b+XaXeQSL6#O)KmyV{7n
zv;}+QIq<{8``7#%9B>EVE`ut2C?_F)+Hy|Lpza<52y1<utqha@10+ete`cO{yX{l`
zA0FGEx5HI#8|=2>>>$AIe~0gm7yVqA2|)Fnlv8Xik1_QrgE`ql0MFZSn(5smge5?9
z5T{Tsl#{bsN**PLNWL%74HM%fh;ukU5bOYaD+u4Y@IG=(4eTjAvv%kp^6*r6{@cy5
zER{(B)AL?z2P!=!^ZN91;uPlq1K7Wax4xJz%V`SLrui!`5+HA~-LE}QsW8E@$~itv
zUbtj(+o-Z;!)vB%EC4zz0<?3i3yyD=7Y0zj%((EPr>+!P4H2E=bCituh3qM2>58YC
zf>k+uEb$@>075!=_BzUS%hGr5%^_*Tlb9QU5pcz)W(&0GI$RB%KpPx*u?@C`hrrbR
z0N8c1-qCY&>BiFCo*P$J)b-wh)eE;T+_fI{zPb|IRKwAMFZU0uzIpr2)oZt}-4}np
z`rg$?`(Il*y>WW()9@$ZPuo9fZ@lr#C#Q{vr;R@?+^f7(xo5p&t$qLT@Q)jnODpF#
z5(BGqf0(;_?NQ>$lf?0diQ^w=j}ntBr#9oH@4$Nh&_6V#Gqy>TXzX84qoi|Si^$Od
z1eL@qYlCa?2h$HOJt#K1(~sjnSWoo-{?xxWHBk7*?T5?1KixB(CVx%GW)3MIt35MA
z%Ev<*%#*U8CqEza$tXf4<_BUD)Ag!Ts#`dZ>-wd-VYMV3x?VQ9D=gDywgY0t6`W(e
zax6QJc;)r#x{I$^Q8$F(=0aUCSJ(eS_yJTw6ygX8KEsJS2<*3HQ|{BwNcUfUN^t!8
zyOK8i&rqipYnq{AQsdZYeo=YK`PsA0%JH4yf08=~c#$7AzhIKT1_EpyKNg#cBuSr>
Vw$DlQ@1*ycIwr-wCcu2l{{UqiUm*Yh

literal 0
HcmV?d00001

diff --git a/vlm_evaluation/__pycache__/evaluate.cpython-313.pyc b/vlm_evaluation/__pycache__/evaluate.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9b7b3414831ddaa86bf94de9817b9872e95a0b65
GIT binary patch
literal 7956
zcmbt3TWlLwcJtzU_}0smL{SebrfpG{ABm+zwiQ`^NT%fFh*<|xK#a(dOq&|gcZPlt
z>xHn}1?V(eOW3BClPoL)i;V&Vs0tLV+n;99Vv(X)fHakgJ5jeT)<6D}V;8}0KiYGL
z9FlfqZ-QQ%Gk5Mi=iGDdJ@>ry#A-DY(EjK5x(oev1o3<Pp*5=9dGbr>yiafhM~)Gw
z^d=Fhep852-!!7tH-i{>Q)9YO9n$$QEIp<lH6X*N5gA8K$fS-jW9Cr{vZ#IC7&~f3
z)=?X>!MEN+AUnr$1`lz8L=Mgf-%id1PZwu~r+d1NvkaUg2;N8#)9w!9P?<>|F-Ojk
ze(NLrJ?<wJM!poA_tQ%2nFN2CKaoVkNii0e<4KWEoD@U>@wl&8hthKLEJ{WNNlGHc
z^=4d_hQ!#L*JMEgjM?}c(6SF9IX=rr<>S1}OM>jD6f5^OH#riFoDEF`&rT|yxe`es
zB3$MZX_a;)g_83rIa=mWeHq|M!|%yq67T6$WiF)cc4dShI$*~+s-F~zFcH$#Oq9DE
zJ*C%1Ifm15`hLduB3MHWtdTSI(=UWHa~7b+z68R`**H7rcnO5FW;NGKS94FnNtPN{
zSAn!>W7W5I(p`kVemv9o)|sGhq_Wq(v+<OWh>HT8mL(=gQ4~+%^UYX#dSY;BkrXeV
zpUl*qg_loG`tTSWq;Hg)7zZU`<ZCHGF(TnoI*x>x!kiZpsSMlGGoOqJiHOM03z?<_
zR-~Tuv%RHGU*J&BYcuferw|KlDu$Ra%cm1^#@y3$Db0&=JR>M(NlqdmBBQhbTB8RY
z0<Mr9;4{{q9x)lIKw3(WvwT7VR<vOt?!?3y5K6`<$+VnG%c?vX76}q8C<QJ^AgTk{
z<*!CW;YvhKUKB*>5dli(*DFRI&82t*WNkxeE<G=Z@>%?e6qb+0A~36tDYPUbg^r7|
zVphTNj0791SHF-1)Civv93Ges1F|$Np|}{IKAw!K=+gA?!IP8I@_At>l|ryaJSxD&
zx*$a5>C1`vnu{@Az9T{)bqzHGQT5?~j`S%!7Klx2{f4#czO^gYH=4JOEu1Ja<_)GX
z&omZ|^&btdcI6I@ZPA3T=`lg+^jmsDXDLlD+dk!T1Cu!n8?GQNv#Zme9aINTRR`^B
z*3U-O!HJr|PowvasDq=G!Cceszjfb>s-tffEzS)~OWx8_uy}Kf_X+SzL~HME<%a(n
zX5J^l<Sq;XjR#4m@D{3#Bd01@Ax0amzVXfH=zd0|CcwgqRP{cED)*%d-2t~gOmWPg
zps53T^ptAxG46{f9V&AvGUpB+q~N~k$0rq9oIU7g6gH9MW0ANRIXDxSm4+E{M(stS
zVCvGc5Q(WbX-8!|Ex}kaJ(Cbhny$DjQ>uzbRCtC5D-8G0T~_;&{fw&iS24$g%b-h<
zc|N6B;5J3imsauX5jKB{P90MWv%s3%_bS4>sn}=1_<{u%WW<A6h#_o0{W`^5z0t@G
zL#SINp^hphu-GL>AYpJHZm1Olm{qY=DAWrBcu>NYeS!F$!M^F;SKQIM*}3cE!MlUS
z?%hS-&c`NQn>lZ6-m(#GojGIkL)O2+_TOjwi$+(mrMKwbv01m{R&Xf@fL8s&$WxYZ
zdN&+B_Z>Z}7YdI4h0{fgBWG#OG0n&e>qIM>10R?<)r<TcbiR!j3D<g>ZM5wWrUp$U
zYxH$5=quJ{wS~!$N$XeN9Hr@s_7*9Q3R4m_NDT~>yfjCvJ>c03I@(2mr-pxiW;|o#
zh9^QJ1Jly(UDL5W{zq8EN9A*(kf7)#frryp%%@L|1K{uwH-bC>qv+;PGM!TGws2Ke
zbo0CnU^*!ZrdGnDRL;!`^9YP?M4dyx60skC(g}EgondV^PrrLQ*YL_}-KXfUuYYpA
zzz!~q{La8`I^2sRx5k#nR>+@?7aW~gUD3iW>VA53+4<hlqPr=32J8>p{9hJ(>2xZ0
z8R+8ssh$o3f#7s1Cl@BGy9BqDRO1-H+(&c~Q{@vNAbRo;Q+OBDc8)w$rtPl8Iu$B(
z4Ho^av6vbx=G$V0sUGmjqOJy4=K-s;4OjXwRn?nw`!OD;?*WN8gC<MO{F^VI59>JN
zHdr&;VR5E_4y=`V9N{f{=EmUNJpjKahvCr)ItZ2-cs~VBCk?<U*EttHjTrQU;LJJV
zGV#97M+992VFBnl@-=#v{DeY%fIj{S@d>R^fqe>@QpmYy70&bqjs&l!&=I9Wvx)Oy
zgyK;t@J1pT1@C?&P=PrH<Rq-pk3bu9_EY!0gG;eR{xj#V8g7wGBz*os!s_%Ri-%k?
zdB|sE!aAbLbQU@_GPP(jRW<LNs|T#UrZ*gcZ=@<~Q;n=(W~#EHK_L1GsnGn4gpNZ0
z88Oi?jG*K27{XJX@K7aiP2O)mcDrub`7gttlmDvw=h0k<`_;s)zNJ1GFN@OHP5#`}
z{Q^;r0i_yIASEds&x=GUA5%k_bpjugpBzW{e!q<EH(_3ajViHi9l5{r`0dH%@4XjY
zY+u@W^FsCl4E#Z20N_V0-e-Bmx}|P=06I0if|@6tI1~ti(d5W7XVzypqsCPYLPr1<
zBD-VoLh24;qGexG-CMlL5@=wFNk{(&V~dw=IhULtjFq=iUr)YgswW@CFbpQ~2y+Zx
z&MGHmJ`s`l`BXxX6q|4r+dw`VO@lGKrZ}&}<@4af`>I3kd4U%raM;NNcH;s<kPVf?
zKuDDYaQ$>b5FtiP&nt8$o>FvRgT<KNpjg1m#^P#VD=AiNn#C9nc;)jF_RYYEqEi`A
zjM};iJ<o%ESL4c<gq;jnY}`uT*8behLGJ*>>+qBA!2{cE`=YhLdKX5D26mDBai(Z@
zW=#)Sd)|KPqv*<>b?;!oeyYHpS{T{1yYjBdU$|GM*4vL2T$2U+<ie><vo&uW`31My
zyuR~z!8%efk9@Ann}Z9(n^s5O(SLgkJaEC<zi<L#OHbRTr)@=d*SbdMeFxUQSMa>H
z+1jykINy0-Et>D_zt>l29o%f|TH#jPKfJ!#(7b%&_V_B7Z`rrjUT8S@*rI3kTZCSx
zFL??Opwe)bgDuk&2DYHqQ)s!4NW46v3swAcg=WP|XmMu@R+*~!Zm!~ke6^z#WSEt>
zuu97Y`TGG{vm1Z`>Rf8u!)uTtIfYXkwN<?y$EY3;>}w4_It>TM1vTS(SOKR5&o$u4
z05kYjJl9~xc%X*bRJ{hUJ*DlbKn6;wCa1HP5E=OC;HRHz)M!oZsPwjxP?Lp&9J-%!
z6DP@2%iuK)=NhrG7L8#e$ErOO^sH*n95$;)16G*AxsckzrVx%WpKCbl)_ma$ARK=Z
z!pXTb8l1bI7A;}AgE;q(kbBqn)1kKKmW0p$zXtMPnz)7ul55oVM`UZL`o6hlwXQ>f
z_YG5_j_qK$J^P3-dz^%m?E2<B*Tn4zP{+wvNzn>*DEOzz4HW6nE8oQUzLjI}nQ$Ji
zxt|hkTua!7tqu>XhixJMH!-*tO(L!pV}|y86Gpqs6Lz5Sl6lcS)uUmD?HXOt@q&}~
zRv?^VCWLd6=h_SKYIVpFc82XQfUD6Yt}Re=ufmQf*sPOeFa^8fEcY)%U$-q64vkOP
zb^W#w&ME{@{jlUPr*NdDwsUW3=O((o%S{H@Z7dq+3(#R#nBm$3wCJ9~j$Ca3eNgLH
z-&}{bHbsbaQ?F_>p~G6Q`i30b9j@a#xh`B6dtT#eEZfeiyfvfU+m60c9SyreSQ(#d
znsz}f3l<~|H%+V;8*1q83fF^W^oJX4!~v#CEeLiDM|(ApHEY$3?%sZ9fLbN@!<M_9
z*h^GA4oMKRl#gfw`aaN<@q2@G8!=117-oF21mWL1o_YQIJQCyL+<<R5e0<0!CS@Pg
z-Qo!zY6t<}X>nG7;!;%bU5O_Wz8S$MC6ZTsNzpfaHXO**pMmlOAXf@}xD4RS(1Ace
zO$sueatYTLmwW<LXRif(BUhm;EY1ZoHVs+L^L-hMZ<khv8_Y0%py4OgqJd%_DOVmv
zTpq@?xu{}<FrUYDMMW2%=jYVYUpfYLut+?n7}DZJF?j{*&3w6(Tq=C%QF;b)1BLL@
zC<;f51pJtVheD-f7<>(P^(sM#<Di_8Mq)&U;$9L-3KLDoc!iFp(kGy}XnVH$r<h{N
zas)UDOuD6u@l-@g3DG!2hNS{w#yrGvr@6_Y@yUTl{|ilV)rN1Sq}i`iIaBQAFi(h;
zlj%#CH5Tz#B9L{bk|NXuG55eUR1Tn2kEadgLM!Cz`f@qPP)>&wL%DjXkXO(cMubXZ
zT*Nz;Iba_A&Mo5_d28rQcxdwU#JDdwG8sBO%w_g}XEK8s%IC{a1HOziRI0A}B>u7x
z!&T!_-0m&aEU`O47qO6RT#8gwRMDlNEGon9Yh`DIgESPvkIKfVMI?k{KZv7waZW%u
zLR08DK~^X^sTiO#Jcsz0V$}YyfaWoPJ|;w=Bn`EWR08U6I;hyjQ)*cQYRjpVAjZ^K
zSz(gUS1dKfVuis)V$jv}JjCvCS(umnjS7QHyhz5pVdsNzPJ^!Cl|T$3CMo)IAWz1j
z9HXWa3WF6L&0ru1{qdhTKp~+F1BVACKuIx|SB(QY)Wk8rAV)<<5JqHc^$dac0MSZl
z3OTEg7Zvid1on);Il(rqlqN9qP})P_Fo<#ZNjqRWAvdtp|M?pW#~)f8HwSJEY*;$-
zmW~ZeSKiXKX|(2SW9!B<4;fRAZC~m7IB++RYwufg=gf!JnO8S$t;?}H;%%{D>sdIt
zX)tGB`|%GJFBJ?;D(3EW<DR0?2FTg*<%4&Q-aeZ1>{)$%P5%4qe{+2;`PpR7Ji5+|
zZ8l=cy?1->9sj4`KLqoQqj{!oVdR0$v2e1;+8|}I+80hdv^xKC=STGN$q(6rYv<~j
zFRcApvgmAD@?`Zzd;Q{tyuCfk6detVSC;Hq-2;o`#^7(+x`z$>ijB>;t}b0&VOQgY
z#=}LA_s*`{yH<{_oi2Ee6y1%t4lf;EKD9DaaQhz{m_~Ef@|Y!D%`2AAyYkM74QD9t
z3_*@^>%!87jfS3lL(fJ-Z@!`T#)-A?6Pj|Fv-+n-!sfpD&b#mYG*aB*z2p7B`<P%n
zwxX+Pi-z}uw$6>V{rR^2MNiuk0}NzGzorRWD-=52EgP<mysKknV3of+_*pFH>d3jq
zvIa<^94*UN3ifU&npx{^25$tH_ZHY!R;fI@XM^p_vwiFAfd|f}Tdhm2KWp1E5GE(o
zLz{Q48VjDjti3F9&UdiTcqq$0Fxqe0ZrC1pJMNsneSYN!Yi}34M~j{Ni{1N+zFo!k
zogX*dZCdTU*I#HKf$Ym_drBKk=B+xY*R5N+SKnCQGhAeyH_zNSv)s1M`ircq#L))(
z%Hs%0*an`?kc8d4EEH^e7fx;&iRQLDy|;UdO)Yn<x2>3&A1?h6HrH+5G81*pMYrd%
z-DuM<oOoJCm~GkEhRvI|c~|IlQ`aiFX=HDj-Zd?DFSoA@-8Xi_UYU13g}+#sv%Tw#
zXA@M>>|JNtpp)J8XA?z7<1Oow^{?zVm@Jv?1I1gU7W)=E7emYBqLiKgcVo+AnsEBC
z76Ld%XF+Q>yn(zokZbMDnfHCc^nD!%F<Sp3^2e{=B4N2NOkIDJpwdJ9n(YX->Hf{p
z80@2e(`F6^Ouy+f2ag&QV+5+}P!Ndt$w_}bf^rIhvhYx-q=aycgj8#%+A9_cs-iTT
zMDy@BAhp=8*4EWpe94kvd#4ymra?luex+KslG((Os%o%QL#5tcygdFBTO^_yST#>B
z-EiX@;EK};;Rw15Xb@Hq(m8l+(IiQJN!Y(6EMF2P^*{R`ME%zkLsFouB=z5Bg6#TF
zV(=@ynQVLNagi-w?befyuUrmN|5f+fB)Na<Dn&4CHdCN?{FX8P<OFCY<55@sKhHd&
A(f|Me

literal 0
HcmV?d00001

diff --git a/vlm_evaluation/__pycache__/prompts.cpython-313.pyc b/vlm_evaluation/__pycache__/prompts.cpython-313.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fd31c656084ce186d086af42ea205a706b2f278b
GIT binary patch
literal 1043
zcmY*YL2uJA6n2t!>%tO-#Ay<eHzBbKsDw5SRMSN13QVxAYFeRbnW{8(yNooBY^S3=
z^-pl;gy0vzaYv3jM;wsg2Slp4ao{;^2D9Y%Y`?tE{=WAzH4GiWFn{IGzNiTOl$&f6
z=8Ne%EZ!pvF=Q#WVyQ4Sm^oY7Dp>hdbe3akf1$WI{&Pel>@b3z0F!qiVFHt(<42C5
z3_F|?&b^?Ixrn+X5b&k&dX6hN?m@hHN^m6%8ALpn;5wsc#1$nSFd6Ps$HyMWf@X*W
z3rGkV9eQ2T#ooZ_6HI&9$>8sCd`{V0+;KR>X>k2}7Im2Crs#MOae+IK-H|tXL;3!0
z1}y<yDmnQS-hS?hQ#gWT(vuv0Cvb#EgOVQWb6#z)8ygw7wityZUYK16o`{GAu9j&G
zehH@BPdC7T%*rwN*)Wl%wh&mYrGU+~RIvHYrMsxOa4<GoR;^LS+bQ)OeY~#gF9|#p
zWV#aJN@KjSw_C>J+r?CCDV^&G(x+?$r>iAhZ=`~x(b}wLz^bf|Kk9njC!pbV;WK{M
z<;_%u;>oh$k*pmfJfr~tc(WSxsPEHr*$7bffg{SgRXuFf%J?`fwUw&<=;7lN{dujn
z`>axZm4z6*g9bA56o{Vadd0H#tY+n)SuQHEVI5h`+J3v<INYx{iyDK5Modb}a$vYz
zVT|L~)w12<gmIfQFYxSL>P7?TI&N3DUNmjc=3g7myU-r`gZ8|YK^7a@ZV(Dy3P<s3
zp9F+~mjA`uvpAEbU6fZp0W(2~s;nsA6?1YkLF&rdkG1<BmM<SXy|{A$2U=ZGk_Ds|
zCWQnoF5O5BW7$lQkvFH1j1GX0go-sTSPJ|ZG5PN@`J2Xt?E&pZKG|UgY!#Ti1SZiG
iKvlI{xrvc1n~M2qGeO|K^3;8+rtY%&=C?de@%{oP`Zi?%

literal 0
HcmV?d00001

diff --git a/vlm_evaluation/dataset.py b/vlm_evaluation/dataset.py
new file mode 100644
index 0000000..7802cee
--- /dev/null
+++ b/vlm_evaluation/dataset.py
@@ -0,0 +1,49 @@
+import json
+import os
+from PIL import Image
+
+class ArtifactDataset:
+    def __init__(self, data_path: str):
+        """
+        Initializes the dataset loader.
+        Assumes data_path points to a JSON file containing evaluation metadata:
+        [
+            {
+                "image_path": "data/images/student1.jpg",
+                "student_id": "123",
+                "artifact_type": "Origami",
+                "rubric": "1: No effort, 5: Perfect folds and presentation",
+                "ground_truth_score": 4
+            }, ...
+        ]
+        """
+        self.data_path = data_path
+        self.data = []
+        
+        if os.path.exists(data_path):
+            with open(data_path, 'r') as f:
+                self.data = json.load(f)
+        else:
+            print(f"Warning: Dataset file {data_path} not found. Returning empty dataset.")
+            print("Please create this file or generate a sample dataset.")
+            
+    def __len__(self):
+        return len(self.data)
+        
+    def __getitem__(self, idx):
+        item = self.data[idx]
+        image_path = item.get("image_path")
+        
+        try:
+            # Handle absolute or relative paths gracefully based on the json directory
+            base_dir = os.path.dirname(self.data_path)
+            full_image_path = os.path.join(base_dir, image_path) if not os.path.isabs(image_path) else image_path
+            image = Image.open(full_image_path).convert("RGB")
+        except Exception as e:
+            print(f"Error loading image {image_path}: {e}")
+            image = None
+            
+        return {
+            "image": image,
+            "metadata": item
+        }
diff --git a/vlm_evaluation/evaluate.py b/vlm_evaluation/evaluate.py
new file mode 100644
index 0000000..70f5dfb
--- /dev/null
+++ b/vlm_evaluation/evaluate.py
@@ -0,0 +1,171 @@
+import argparse
+import json
+import os
+import re
+import torch
+from tqdm import tqdm
+from transformers import (
+    LlavaForConditionalGeneration,
+    AutoProcessor,
+    BitsAndBytesConfig,
+)
+from dataset import ArtifactDataset
+from prompts import SYSTEM_PROMPT, generate_evaluation_prompt
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="VLM Evaluation Pipeline")
+    parser.add_argument(
+        "--data_path", type=str, required=True, help="Path to dataset JSON"
+    )
+    parser.add_argument("--model_name", type=str, default="llava-hf/llava-1.5-7b-hf")
+    parser.add_argument("--quantize", action="store_true", default=True)
+    parser.add_argument("--no_quantize", action="store_false", dest="quantize")
+    parser.add_argument("--output_path", type=str, default="results.json")
+    parser.add_argument("--max_new_tokens", type=int, default=256)
+    return parser.parse_args()
+
+
+def load_model(model_name, quantize=True):
+    if quantize:
+        quantization_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_compute_dtype=torch.float16,
+            bnb_4bit_use_double_quant=True,
+            bnb_4bit_quant_type="nf4",
+        )
+    else:
+        quantization_config = None
+
+    model = LlavaForConditionalGeneration.from_pretrained(
+        model_name,
+        quantization_config=quantization_config,
+        device_map="auto",
+        torch_dtype=torch.float16,
+    )
+    processor = AutoProcessor.from_pretrained(model_name)
+    return model, processor
+
+
+def extract_score(text):
+    match = re.search(r"SCORE:\s*(\d+)", text, re.IGNORECASE)
+    if match:
+        score = int(match.group(1))
+        if 1 <= score <= 5:
+            return score
+    return None
+
+
+def compute_metrics(predictions, ground_truths):
+    total = len(ground_truths)
+    if total == 0:
+        return {}
+
+    exact = sum(1 for p, g in zip(predictions, ground_truths) if p == g)
+    within_1 = sum(1 for p, g in zip(predictions, ground_truths) if abs(p - g) <= 1)
+    mae = sum(abs(p - g) for p, g in zip(predictions, ground_truths)) / total
+    parsed = sum(1 for p in predictions if p is not None)
+
+    return {
+        "total_samples": total,
+        "exact_accuracy": round(exact / total * 100, 2),
+        "within_1_accuracy": round(within_1 / total * 100, 2),
+        "mean_absolute_error": round(mae, 4),
+        "parse_rate": round(parsed / total * 100, 2),
+    }
+
+
+def main():
+    args = parse_args()
+
+    if not torch.cuda.is_available():
+        print("Warning: CUDA not available. Inference will be slow on CPU.")
+
+    print(f"Loading dataset from {args.data_path}...")
+    dataset = ArtifactDataset(args.data_path)
+    if len(dataset) == 0:
+        print("Dataset is empty. Exiting.")
+        return
+
+    print(f"Loading model {args.model_name} (quantize={args.quantize})...")
+    model, processor = load_model(args.model_name, quantize=args.quantize)
+
+    results = []
+    preds = []
+    truths = []
+
+    for i in tqdm(range(len(dataset)), desc="Evaluating"):
+        sample = dataset[i]
+        meta = sample["metadata"]
+        image = sample["image"]
+
+        if image is None:
+            continue
+
+        prompt_text = generate_evaluation_prompt(
+            student_id=meta.get("student_id", "unknown"),
+            artifact_type=meta.get("artifact_type", "unknown"),
+            rubric=meta.get("rubric", ""),
+        )
+
+        inputs = processor(text=prompt_text, images=image, return_tensors="pt").to(
+            "cuda" if torch.cuda.is_available() else "cpu"
+        )
+
+        with torch.no_grad():
+            output_ids = model.generate(
+                **inputs,
+                max_new_tokens=args.max_new_tokens,
+                do_sample=False,
+            )
+
+        decoded = processor.decode(output_ids[0], skip_special_tokens=True)
+        response = (
+            decoded.split("ASSISTANT:")[-1].strip()
+            if "ASSISTANT:" in decoded
+            else decoded.strip()
+        )
+
+        predicted_score = extract_score(response)
+        ground_truth = meta.get("ground_truth_score")
+
+        results.append(
+            {
+                "student_id": meta.get("student_id", "unknown"),
+                "predicted_score": predicted_score,
+                "ground_truth_score": ground_truth,
+                "raw_response": response,
+                "artifact_type": meta.get("artifact_type", "unknown"),
+            }
+        )
+
+        if predicted_score is not None and ground_truth is not None:
+            preds.append(predicted_score)
+            truths.append(ground_truth)
+
+    metrics = compute_metrics(preds, truths)
+
+    output = {
+        "config": {
+            "model_name": args.model_name,
+            "quantize": args.quantize,
+            "dataset": args.data_path,
+        },
+        "metrics": metrics,
+        "results": results,
+    }
+
+    with open(args.output_path, "w") as f:
+        json.dump(output, f, indent=2)
+
+    print("\n" + "=" * 50)
+    print("EVALUATION METRICS")
+    print("=" * 50)
+    for k, v in metrics.items():
+        print(f"  {k}: {v}")
+    print("=" * 50)
+    print(f"Results saved to {args.output_path}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/vlm_evaluation/generate_sample_data.py b/vlm_evaluation/generate_sample_data.py
new file mode 100644
index 0000000..ccdce04
--- /dev/null
+++ b/vlm_evaluation/generate_sample_data.py
@@ -0,0 +1,57 @@
+import json
+import os
+from PIL import Image, ImageDraw
+
+SAMPLE_DATA = [
+    {
+        "image_path": "sample_origami.jpg",
+        "student_id": "S001",
+        "artifact_type": "Origami",
+        "rubric": "1: No recognizable shape, 5: Perfect folds with clean edges and symmetry",
+        "ground_truth_score": 4,
+    },
+    {
+        "image_path": "sample_drawing.jpg",
+        "student_id": "S002",
+        "artifact_type": "Drawing",
+        "rubric": "1: No effort, 5: Detailed and creative composition",
+        "ground_truth_score": 3,
+    },
+    {
+        "image_path": "sample_model.jpg",
+        "student_id": "S003",
+        "artifact_type": "Clay Model",
+        "rubric": "1: Unrecognizable, 5: Realistic and well-finished model",
+        "ground_truth_score": 5,
+    },
+]
+
+
+def create_dummy_image(path, size=(224, 224), color=(200, 100, 50)):
+    img = Image.new("RGB", size, color)
+    draw = ImageDraw.Draw(img)
+    draw.rectangle([50, 50, 174, 174], outline=(255, 255, 255), width=3)
+    draw.ellipse([80, 80, 144, 144], fill=(100, 200, 100))
+    img.save(path)
+    print(f"Created {path}")
+
+
+def main():
+    output_dir = os.path.dirname(os.path.abspath(__file__))
+    data_dir = os.path.join(output_dir, "sample_data")
+    os.makedirs(data_dir, exist_ok=True)
+
+    for item in SAMPLE_DATA:
+        image_path = os.path.join(data_dir, item["image_path"])
+        create_dummy_image(image_path)
+        item["image_path"] = os.path.join("sample_data", item["image_path"])
+
+    json_path = os.path.join(output_dir, "sample_dataset.json")
+    with open(json_path, "w") as f:
+        json.dump(SAMPLE_DATA, f, indent=2)
+
+    print(f"Sample dataset saved to {json_path}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/vlm_evaluation/prompts.py b/vlm_evaluation/prompts.py
new file mode 100644
index 0000000..aa5a1ee
--- /dev/null
+++ b/vlm_evaluation/prompts.py
@@ -0,0 +1,21 @@
+SYSTEM_PROMPT = """You are an expert evaluator assessing student artifacts for The Apprentice Project.
+Your goal is to evaluate the provided image of a student's work based on the provided rubric.
+You must be objective and provide a score along with a brief explanation.
+"""
+
+
+def generate_evaluation_prompt(student_id: str, artifact_type: str, rubric: str) -> str:
+    return f"""USER: <image>
+{SYSTEM_PROMPT}
+
+Here is a student artifact (ID: {student_id}) for the category: {artifact_type}.
+
+Rubric for Evaluation:
+{rubric}
+
+Please evaluate the artifact based on the rubric.
+Provide your response in the following format:
+SCORE: [Your Score 1-5]
+FEEDBACK: [Your reasoning here]
+
+ASSISTANT:"""
diff --git a/vlm_evaluation/run_benchmark.ps1 b/vlm_evaluation/run_benchmark.ps1
new file mode 100644
index 0000000..47c69e2
--- /dev/null
+++ b/vlm_evaluation/run_benchmark.ps1
@@ -0,0 +1,29 @@
+param(
+    [string]$DataPath = "sample_dataset.json",
+    [string]$ModelName = "llava-hf/llava-1.5-7b-hf",
+    [switch]$NoQuantize = $false,
+    [string]$OutputPath = "results.json",
+    [int]$MaxNewTokens = 256
+)
+
+$QuantizeFlag = if ($NoQuantize) { "--no_quantize" } else { "" }
+
+Write-Host "=== VLM Evaluation Benchmark ===" -ForegroundColor Cyan
+Write-Host "Dataset : $DataPath"
+Write-Host "Model   : $ModelName"
+Write-Host "Quantize: $(-not $NoQuantize)"
+Write-Host "Output  : $OutputPath"
+Write-Host ""
+
+python evaluate.py `
+    --data_path $DataPath `
+    --model_name $ModelName `
+    $QuantizeFlag `
+    --output_path $OutputPath `
+    --max_new_tokens $MaxNewTokens
+
+if ($LASTEXITCODE -eq 0) {
+    Write-Host "Benchmark completed successfully." -ForegroundColor Green
+} else {
+    Write-Host "Benchmark failed with exit code $LASTEXITCODE." -ForegroundColor Red
+}
diff --git a/vlm_evaluation/sample_data/sample_drawing.jpg b/vlm_evaluation/sample_data/sample_drawing.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..68eafd6f7a099fae3825e5e490d383e0d363d052
GIT binary patch
literal 3177
zcmdUwdo<MR8pq!;!_1%<ji`)qE91H&ZMQN-<GM~7<C^9;NQ|QGmdnBvQhSKdxSidR
z+m0BrlMy=#p;>z;a?3rp3gsGx;de%zbxy2v)~UVrAA7&Q^}K6+f4^tF>-qei=XoEf
z8yW)QmZlb_00sj9#(x0R3rqk?NC+t;h(aQfXf#S#6uV7SL_~C##11hmZZ}>Qw_8R=
zPDxW)PC;E!MuwoPqP|~CM@I**tbbTf`;g`V9qo-nU}!X2R76yI+cs%!c^P@_uU^n2
zfI)#FFa(Dw009gPj)6f90MCz85cY9_e=V2*93d!#M4^R6_zw5QfdC8+7eK%T1rZ3o
zcLe`CKwtzV<h70p?I61%75uQ;my)tkipFJ+C2d+~ln%K0N1=sxN=fh9y?387K}A*P
z;J3PZ`UWP)NTz1y7M2uSJ9`I5r<3ldJ<fP~(Yyl!gDwPzgoZ`OT#mhR^?OEg$_-{}
zT6#uiPHtZQkGBg7@BDPH{C)+y@<CNyeZ!N+rsk)dwioRkon758dtMEXjE;@Je)D!>
zc5Z&*!{X91cV&YM2H>Bt`1>cYEnFBrmjD6*M<6%2U;@E>gJTea@>)U?N6AQ6za0wN
zmrz*ar0lZCXvG6IGm>upt-?E%bYAV9-9Y<D_GMsEzlrSMVE^Ij0itjiKY4HrAc6!2
zp)}Lz5g~^G{!I36O8t<&$Iv9s!S2P_&GDpM4eYh#U?aUgB&mR29*%~<5oZXjPC;On
za}dwnn`yQd!Jbd8oR}=6q%aArG?Q57H?&{+{xMzj{sOVb2mygx8W31pq%V)*H%q^E
z`^bI>sIDfJcBIW)O?$ep<qlb|2n<4C&=Ud)<jv9nN0R;Mem%m8u2Zx4e@`dB8=NbB
zfc&GMeJZM@uBs-k=oGEz^sJSq@3Y-bd!~KdP~2yc(_uqJWi!FNxEcZkO0CzDE!&vK
zL;WjD9v+H6+q(GOczo9J(ki_PLe7KjGdT<b*kbUqcKT>44gX<r@Y%_cqRlF7wW&4I
z?Atvw`uFRuB&<EsoJ_)kt>)l9G|iO?Cqv+`S*uFSrY*W=&vM*r?t#Bc@suZOb^O}N
zq`w7T#*&W+ZZTX!E(DbBne#C!*8RJ1)GgY%YrrUOPXBJN70-HJn2>YVz&I}!t?W})
z;@7^2Jt|_o|5`{3<&By@Az8Xk?h&44>mJbfYk*z!sq3BRXXZUZ0^higK_Fy+C&nqh
zqbjO!S@xpaO_F!A*^{m(T_w}@e}MqC0>h+Q-bKhZ*wN0(U6+t@4=Y`^yF;L9dkvpG
z5uCp--Z0Kdqj!9l&3Dql1!~!WPU5-og%fH!C%SvB*!mPsCWl<M=yd%2huVI-HhLCM
zf*)w!l5sU2NpG$mPFz`lz)PhPgUy;XGuYrleELIo?JJ@kH#Z@Np$5<UZ#TkJ(C6*6
zSq;?8F1RH98r$PCOiw41Brmm{qdn(dSZ(HJW!1B}OGSb0C$1J45Wtz6&iSV!J>p6x
zyO<6g5U7uYk+2l~ItJmEz^$LJJge|%QXD@O*1lj9mPsyBwNtb)ydCgEPL^s+#`8dX
zxicA#_}h(^WE<+2QKh><AZE>BJi&lLDBga3OZjb-<JXm-MXMU}1jirMoPVc6f}6@g
zb5q8MZ7ln@{rGe@uPVuWiv@nEQndI#{-Q9Upu}U&r++>={+rIEz(P)5yJGe8hK%SI
zMU`fCbKct<WJi2~u_}g471*4T|B<5K9p*6WF*{v%uk`@@p^swmRor&lg@Y}&i(NL(
zJJ`&_wjHMShtvmERtBeO;b&(d5amlgVG=6Zk4LVu9v4nJ_bpeId)uUGn3wbF7^*71
zt}(dx+Qb-tFG&MRp{Ao+t%C6pM9M{j6vrjDi&u)Ub?U>zEi5UAQa>hMJ@jqgnm5Z+
z#pPH;f_2KzhKn6FnsIENcn(8VEL(Ug90vh~X$Eof1q4nfS59ns!LFxc!XdCP;CIgN
z$18@?=_Kx|<RN=;ID>@XXeF03o66g&m^7K_?y?`rB$%|RaH~B(!%<NiRr^J$``YfB
zJ~#9th%P|u32MAY^H8@bFIEcDSjvjZk2PBK&f>}U5ZAVK;*;sKoXJ8<%9!6&Rc_R^
z5XyLbbxmSzb`zPJ__+`9)%I2hyz#xuOW}_#d%AsL@2tzX&xb1NU>^jUM+!=s*2Ul9
z*JCHeo)Y<EaOR2^1Qv81BP}4%L|D0he2@Ww{VV;N)9>k{e(etK5I|i>gn&uVmC-Do
zf8w%9IeleMF;NKuBR3*W-ah|?o*v<w7Q959)0<E$<F}xp83eMc>gTN2_02wy%!ox{
z57+);#I~wl&Em=gNp?e^4#&TWpZB|Rl_4OLyD6|wf#aR?``21{{GOhoZzAIlflYB0
z*}hvYofl)Zy<dO5Zib^KM%Ru~XU*2a=u^pCe9Nbb%USTFog;}ZKUF7BbTI9xwxeo8
vu*%nlMV30A=y@xju#!f-_+zRjsk#rl%=$xRgGpEi`2j4M1pZ7$hhF{@ua>LK

literal 0
HcmV?d00001

diff --git a/vlm_evaluation/sample_data/sample_model.jpg b/vlm_evaluation/sample_data/sample_model.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..68eafd6f7a099fae3825e5e490d383e0d363d052
GIT binary patch
literal 3177
zcmdUwdo<MR8pq!;!_1%<ji`)qE91H&ZMQN-<GM~7<C^9;NQ|QGmdnBvQhSKdxSidR
z+m0BrlMy=#p;>z;a?3rp3gsGx;de%zbxy2v)~UVrAA7&Q^}K6+f4^tF>-qei=XoEf
z8yW)QmZlb_00sj9#(x0R3rqk?NC+t;h(aQfXf#S#6uV7SL_~C##11hmZZ}>Qw_8R=
zPDxW)PC;E!MuwoPqP|~CM@I**tbbTf`;g`V9qo-nU}!X2R76yI+cs%!c^P@_uU^n2
zfI)#FFa(Dw009gPj)6f90MCz85cY9_e=V2*93d!#M4^R6_zw5QfdC8+7eK%T1rZ3o
zcLe`CKwtzV<h70p?I61%75uQ;my)tkipFJ+C2d+~ln%K0N1=sxN=fh9y?387K}A*P
z;J3PZ`UWP)NTz1y7M2uSJ9`I5r<3ldJ<fP~(Yyl!gDwPzgoZ`OT#mhR^?OEg$_-{}
zT6#uiPHtZQkGBg7@BDPH{C)+y@<CNyeZ!N+rsk)dwioRkon758dtMEXjE;@Je)D!>
zc5Z&*!{X91cV&YM2H>Bt`1>cYEnFBrmjD6*M<6%2U;@E>gJTea@>)U?N6AQ6za0wN
zmrz*ar0lZCXvG6IGm>upt-?E%bYAV9-9Y<D_GMsEzlrSMVE^Ij0itjiKY4HrAc6!2
zp)}Lz5g~^G{!I36O8t<&$Iv9s!S2P_&GDpM4eYh#U?aUgB&mR29*%~<5oZXjPC;On
za}dwnn`yQd!Jbd8oR}=6q%aArG?Q57H?&{+{xMzj{sOVb2mygx8W31pq%V)*H%q^E
z`^bI>sIDfJcBIW)O?$ep<qlb|2n<4C&=Ud)<jv9nN0R;Mem%m8u2Zx4e@`dB8=NbB
zfc&GMeJZM@uBs-k=oGEz^sJSq@3Y-bd!~KdP~2yc(_uqJWi!FNxEcZkO0CzDE!&vK
zL;WjD9v+H6+q(GOczo9J(ki_PLe7KjGdT<b*kbUqcKT>44gX<r@Y%_cqRlF7wW&4I
z?Atvw`uFRuB&<EsoJ_)kt>)l9G|iO?Cqv+`S*uFSrY*W=&vM*r?t#Bc@suZOb^O}N
zq`w7T#*&W+ZZTX!E(DbBne#C!*8RJ1)GgY%YrrUOPXBJN70-HJn2>YVz&I}!t?W})
z;@7^2Jt|_o|5`{3<&By@Az8Xk?h&44>mJbfYk*z!sq3BRXXZUZ0^higK_Fy+C&nqh
zqbjO!S@xpaO_F!A*^{m(T_w}@e}MqC0>h+Q-bKhZ*wN0(U6+t@4=Y`^yF;L9dkvpG
z5uCp--Z0Kdqj!9l&3Dql1!~!WPU5-og%fH!C%SvB*!mPsCWl<M=yd%2huVI-HhLCM
zf*)w!l5sU2NpG$mPFz`lz)PhPgUy;XGuYrleELIo?JJ@kH#Z@Np$5<UZ#TkJ(C6*6
zSq;?8F1RH98r$PCOiw41Brmm{qdn(dSZ(HJW!1B}OGSb0C$1J45Wtz6&iSV!J>p6x
zyO<6g5U7uYk+2l~ItJmEz^$LJJge|%QXD@O*1lj9mPsyBwNtb)ydCgEPL^s+#`8dX
zxicA#_}h(^WE<+2QKh><AZE>BJi&lLDBga3OZjb-<JXm-MXMU}1jirMoPVc6f}6@g
zb5q8MZ7ln@{rGe@uPVuWiv@nEQndI#{-Q9Upu}U&r++>={+rIEz(P)5yJGe8hK%SI
zMU`fCbKct<WJi2~u_}g471*4T|B<5K9p*6WF*{v%uk`@@p^swmRor&lg@Y}&i(NL(
zJJ`&_wjHMShtvmERtBeO;b&(d5amlgVG=6Zk4LVu9v4nJ_bpeId)uUGn3wbF7^*71
zt}(dx+Qb-tFG&MRp{Ao+t%C6pM9M{j6vrjDi&u)Ub?U>zEi5UAQa>hMJ@jqgnm5Z+
z#pPH;f_2KzhKn6FnsIENcn(8VEL(Ug90vh~X$Eof1q4nfS59ns!LFxc!XdCP;CIgN
z$18@?=_Kx|<RN=;ID>@XXeF03o66g&m^7K_?y?`rB$%|RaH~B(!%<NiRr^J$``YfB
zJ~#9th%P|u32MAY^H8@bFIEcDSjvjZk2PBK&f>}U5ZAVK;*;sKoXJ8<%9!6&Rc_R^
z5XyLbbxmSzb`zPJ__+`9)%I2hyz#xuOW}_#d%AsL@2tzX&xb1NU>^jUM+!=s*2Ul9
z*JCHeo)Y<EaOR2^1Qv81BP}4%L|D0he2@Ww{VV;N)9>k{e(etK5I|i>gn&uVmC-Do
zf8w%9IeleMF;NKuBR3*W-ah|?o*v<w7Q959)0<E$<F}xp83eMc>gTN2_02wy%!ox{
z57+);#I~wl&Em=gNp?e^4#&TWpZB|Rl_4OLyD6|wf#aR?``21{{GOhoZzAIlflYB0
z*}hvYofl)Zy<dO5Zib^KM%Ru~XU*2a=u^pCe9Nbb%USTFog;}ZKUF7BbTI9xwxeo8
vu*%nlMV30A=y@xju#!f-_+zRjsk#rl%=$xRgGpEi`2j4M1pZ7$hhF{@ua>LK

literal 0
HcmV?d00001

diff --git a/vlm_evaluation/sample_data/sample_origami.jpg b/vlm_evaluation/sample_data/sample_origami.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..68eafd6f7a099fae3825e5e490d383e0d363d052
GIT binary patch
literal 3177
zcmdUwdo<MR8pq!;!_1%<ji`)qE91H&ZMQN-<GM~7<C^9;NQ|QGmdnBvQhSKdxSidR
z+m0BrlMy=#p;>z;a?3rp3gsGx;de%zbxy2v)~UVrAA7&Q^}K6+f4^tF>-qei=XoEf
z8yW)QmZlb_00sj9#(x0R3rqk?NC+t;h(aQfXf#S#6uV7SL_~C##11hmZZ}>Qw_8R=
zPDxW)PC;E!MuwoPqP|~CM@I**tbbTf`;g`V9qo-nU}!X2R76yI+cs%!c^P@_uU^n2
zfI)#FFa(Dw009gPj)6f90MCz85cY9_e=V2*93d!#M4^R6_zw5QfdC8+7eK%T1rZ3o
zcLe`CKwtzV<h70p?I61%75uQ;my)tkipFJ+C2d+~ln%K0N1=sxN=fh9y?387K}A*P
z;J3PZ`UWP)NTz1y7M2uSJ9`I5r<3ldJ<fP~(Yyl!gDwPzgoZ`OT#mhR^?OEg$_-{}
zT6#uiPHtZQkGBg7@BDPH{C)+y@<CNyeZ!N+rsk)dwioRkon758dtMEXjE;@Je)D!>
zc5Z&*!{X91cV&YM2H>Bt`1>cYEnFBrmjD6*M<6%2U;@E>gJTea@>)U?N6AQ6za0wN
zmrz*ar0lZCXvG6IGm>upt-?E%bYAV9-9Y<D_GMsEzlrSMVE^Ij0itjiKY4HrAc6!2
zp)}Lz5g~^G{!I36O8t<&$Iv9s!S2P_&GDpM4eYh#U?aUgB&mR29*%~<5oZXjPC;On
za}dwnn`yQd!Jbd8oR}=6q%aArG?Q57H?&{+{xMzj{sOVb2mygx8W31pq%V)*H%q^E
z`^bI>sIDfJcBIW)O?$ep<qlb|2n<4C&=Ud)<jv9nN0R;Mem%m8u2Zx4e@`dB8=NbB
zfc&GMeJZM@uBs-k=oGEz^sJSq@3Y-bd!~KdP~2yc(_uqJWi!FNxEcZkO0CzDE!&vK
zL;WjD9v+H6+q(GOczo9J(ki_PLe7KjGdT<b*kbUqcKT>44gX<r@Y%_cqRlF7wW&4I
z?Atvw`uFRuB&<EsoJ_)kt>)l9G|iO?Cqv+`S*uFSrY*W=&vM*r?t#Bc@suZOb^O}N
zq`w7T#*&W+ZZTX!E(DbBne#C!*8RJ1)GgY%YrrUOPXBJN70-HJn2>YVz&I}!t?W})
z;@7^2Jt|_o|5`{3<&By@Az8Xk?h&44>mJbfYk*z!sq3BRXXZUZ0^higK_Fy+C&nqh
zqbjO!S@xpaO_F!A*^{m(T_w}@e}MqC0>h+Q-bKhZ*wN0(U6+t@4=Y`^yF;L9dkvpG
z5uCp--Z0Kdqj!9l&3Dql1!~!WPU5-og%fH!C%SvB*!mPsCWl<M=yd%2huVI-HhLCM
zf*)w!l5sU2NpG$mPFz`lz)PhPgUy;XGuYrleELIo?JJ@kH#Z@Np$5<UZ#TkJ(C6*6
zSq;?8F1RH98r$PCOiw41Brmm{qdn(dSZ(HJW!1B}OGSb0C$1J45Wtz6&iSV!J>p6x
zyO<6g5U7uYk+2l~ItJmEz^$LJJge|%QXD@O*1lj9mPsyBwNtb)ydCgEPL^s+#`8dX
zxicA#_}h(^WE<+2QKh><AZE>BJi&lLDBga3OZjb-<JXm-MXMU}1jirMoPVc6f}6@g
zb5q8MZ7ln@{rGe@uPVuWiv@nEQndI#{-Q9Upu}U&r++>={+rIEz(P)5yJGe8hK%SI
zMU`fCbKct<WJi2~u_}g471*4T|B<5K9p*6WF*{v%uk`@@p^swmRor&lg@Y}&i(NL(
zJJ`&_wjHMShtvmERtBeO;b&(d5amlgVG=6Zk4LVu9v4nJ_bpeId)uUGn3wbF7^*71
zt}(dx+Qb-tFG&MRp{Ao+t%C6pM9M{j6vrjDi&u)Ub?U>zEi5UAQa>hMJ@jqgnm5Z+
z#pPH;f_2KzhKn6FnsIENcn(8VEL(Ug90vh~X$Eof1q4nfS59ns!LFxc!XdCP;CIgN
z$18@?=_Kx|<RN=;ID>@XXeF03o66g&m^7K_?y?`rB$%|RaH~B(!%<NiRr^J$``YfB
zJ~#9th%P|u32MAY^H8@bFIEcDSjvjZk2PBK&f>}U5ZAVK;*;sKoXJ8<%9!6&Rc_R^
z5XyLbbxmSzb`zPJ__+`9)%I2hyz#xuOW}_#d%AsL@2tzX&xb1NU>^jUM+!=s*2Ul9
z*JCHeo)Y<EaOR2^1Qv81BP}4%L|D0he2@Ww{VV;N)9>k{e(etK5I|i>gn&uVmC-Do
zf8w%9IeleMF;NKuBR3*W-ah|?o*v<w7Q959)0<E$<F}xp83eMc>gTN2_02wy%!ox{
z57+);#I~wl&Em=gNp?e^4#&TWpZB|Rl_4OLyD6|wf#aR?``21{{GOhoZzAIlflYB0
z*}hvYofl)Zy<dO5Zib^KM%Ru~XU*2a=u^pCe9Nbb%USTFog;}ZKUF7BbTI9xwxeo8
vu*%nlMV30A=y@xju#!f-_+zRjsk#rl%=$xRgGpEi`2j4M1pZ7$hhF{@ua>LK

literal 0
HcmV?d00001

diff --git a/vlm_evaluation/sample_dataset.json b/vlm_evaluation/sample_dataset.json
new file mode 100644
index 0000000..52812f1
--- /dev/null
+++ b/vlm_evaluation/sample_dataset.json
@@ -0,0 +1,23 @@
+[
+  {
+    "image_path": "sample_data\\sample_origami.jpg",
+    "student_id": "S001",
+    "artifact_type": "Origami",
+    "rubric": "1: No recognizable shape, 5: Perfect folds with clean edges and symmetry",
+    "ground_truth_score": 4
+  },
+  {
+    "image_path": "sample_data\\sample_drawing.jpg",
+    "student_id": "S002",
+    "artifact_type": "Drawing",
+    "rubric": "1: No effort, 5: Detailed and creative composition",
+    "ground_truth_score": 3
+  },
+  {
+    "image_path": "sample_data\\sample_model.jpg",
+    "student_id": "S003",
+    "artifact_type": "Clay Model",
+    "rubric": "1: Unrecognizable, 5: Realistic and well-finished model",
+    "ground_truth_score": 5
+  }
+]
\ No newline at end of file

From 002561bffec8e20a42a4084b674c97b920cf04c9 Mon Sep 17 00:00:00 2001
From: ravencore06 <srinidhisadhanala@gmail.com>
Date: Thu, 21 May 2026 13:17:30 +0530
Subject: [PATCH 4/6] Add Input Validation

---
 asr.py  | 16 ++++++++++++++--
 llm.py  | 37 +++++++++++++++++++++++++------------
 main.py | 27 +++++++++++++++++----------
 3 files changed, 56 insertions(+), 24 deletions(-)

diff --git a/asr.py b/asr.py
index a7fcb05..3a7e099 100644
--- a/asr.py
+++ b/asr.py
@@ -1,15 +1,14 @@
 import speech_recognition as sr
 
+
 def capture_audio():
     recognizer = sr.Recognizer()
     with sr.Microphone() as source:
         print("\nListening...")
-        # Adjust for ambient noise to reduce background noise issues
         recognizer.adjust_for_ambient_noise(source, duration=0.5)
         audio = recognizer.listen(source)
 
     try:
-        # Use Google's free Web Speech API without API key
         text = recognizer.recognize_google(audio)
         print(f"You said: {text}")
         return text
@@ -19,3 +18,16 @@ def capture_audio():
     except sr.RequestError as e:
         print(f"Could not request results from Google Speech Recognition service; {e}")
         return None
+
+
+def validate_transcription(text):
+    if text is None:
+        return False, "No speech detected."
+    stripped = text.strip()
+    if not stripped:
+        return False, "Empty transcription."
+    if len(stripped) > 500:
+        return False, "Input too long."
+    if len(stripped) < 2:
+        return False, "Input too short."
+    return True, None
diff --git a/llm.py b/llm.py
index 85acfe9..368b9e7 100644
--- a/llm.py
+++ b/llm.py
@@ -1,29 +1,40 @@
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 
+
 class ConversationalAgent:
     def __init__(self):
         print("Loading local conversational model (DialoGPT-small)...")
-        # Use DialoGPT-small for lightweight local generation without API keys
         self.tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-small")
         self.model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-small")
         self.chat_history_ids = None
 
+    MAX_INPUT_TOKENS = 200
+
     def generate_response(self, user_input):
-        # Encode the new user input, add the eos_token and return a tensor in Pytorch
-        new_user_input_ids = self.tokenizer.encode(user_input + self.tokenizer.eos_token, return_tensors='pt')
+        if not user_input or not user_input.strip():
+            return "I didn't catch that. Could you please repeat?"
+
+        if len(user_input) > 1000:
+            return "That's quite long! Could you keep it shorter?"
+
+        input_ids = self.tokenizer.encode(user_input, return_tensors="pt")
+        if input_ids.shape[1] > self.MAX_INPUT_TOKENS:
+            return "I can only process about 200 words at a time. Please say that in fewer words."
+
+        new_user_input_ids = self.tokenizer.encode(
+            user_input + self.tokenizer.eos_token, return_tensors="pt"
+        )
 
-        # Append the new user input tokens to the chat history
-        # We limit the history to the last 100 tokens to prevent repetitive loops
         if self.chat_history_ids is not None:
-            bot_input_ids = torch.cat([self.chat_history_ids[:, -100:], new_user_input_ids], dim=-1)
+            bot_input_ids = torch.cat(
+                [self.chat_history_ids[:, -100:], new_user_input_ids], dim=-1
+            )
         else:
             bot_input_ids = new_user_input_ids
 
-        # Generate a response
-        # Using a fixed attention_mask for open-end generation
         attention_mask = torch.ones(bot_input_ids.shape, dtype=torch.long)
-        
+
         self.chat_history_ids = self.model.generate(
             bot_input_ids,
             attention_mask=attention_mask,
@@ -33,9 +44,11 @@ def generate_response(self, user_input):
             do_sample=True,
             top_k=50,
             top_p=0.95,
-            temperature=0.7
+            temperature=0.7,
         )
 
-        # Decode and return the response
-        response = self.tokenizer.decode(self.chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)
+        response = self.tokenizer.decode(
+            self.chat_history_ids[:, bot_input_ids.shape[-1] :][0],
+            skip_special_tokens=True,
+        )
         return response
diff --git a/main.py b/main.py
index 49e3999..adbf9f9 100644
--- a/main.py
+++ b/main.py
@@ -1,7 +1,8 @@
-from asr import capture_audio
+from asr import capture_audio, validate_transcription
 from llm import ConversationalAgent
 from tts import text_to_speech
 
+
 def main():
     print("=====================================================")
     print("Initializing Voice-Based Conversational AI System...")
@@ -9,17 +10,23 @@ def main():
     agent = ConversationalAgent()
     print("\nSystem ready! Speak into your microphone.")
     print("Say 'exit', 'quit', or 'stop' to end the conversation.")
-    
+
     while True:
         user_input = capture_audio()
-        
-        if user_input:
-            if user_input.lower() in ['exit', 'quit', 'stop']:
-                text_to_speech("Goodbye!")
-                break
-                
-            response = agent.generate_response(user_input)
-            text_to_speech(response)
+
+        valid, error_msg = validate_transcription(user_input)
+        if not valid:
+            print(f"Validation: {error_msg}")
+            continue
+
+        user_input = user_input.strip()
+        if user_input.lower() in ["exit", "quit", "stop"]:
+            text_to_speech("Goodbye!")
+            break
+
+        response = agent.generate_response(user_input)
+        text_to_speech(response)
+
 
 if __name__ == "__main__":
     main()

From 43ba3cdde6f30f846e331385c07b63591e0bbce4 Mon Sep 17 00:00:00 2001
From: ravencore06 <srinidhisadhanala@gmail.com>
Date: Mon, 25 May 2026 20:07:51 +0530
Subject: [PATCH 5/6] feat: Implement structured JSON outputs and constrained
 decoding

---
 vlm_evaluation/evaluate.py             | 41 ++++++++++++++++++--------
 vlm_evaluation/generate_sample_data.py | 21 +++++++++++--
 vlm_evaluation/prompts.py              | 19 ++++++++----
 vlm_evaluation/requirements.txt        |  2 ++
 vlm_evaluation/sample_dataset.json     | 21 +++++++++++--
 5 files changed, 80 insertions(+), 24 deletions(-)

diff --git a/vlm_evaluation/evaluate.py b/vlm_evaluation/evaluate.py
index 70f5dfb..b1d4634 100644
--- a/vlm_evaluation/evaluate.py
+++ b/vlm_evaluation/evaluate.py
@@ -4,6 +4,9 @@
 import re
 import torch
 from tqdm import tqdm
+from pydantic import BaseModel
+from lmformatenforcer import JsonSchemaParser
+from lmformatenforcer.integrations.transformers import build_transformers_prefix_allowed_tokens_fn
 from transformers import (
     LlavaForConditionalGeneration,
     AutoProcessor,
@@ -13,6 +16,13 @@
 from prompts import SYSTEM_PROMPT, generate_evaluation_prompt
 
 
+class EvaluationOutput(BaseModel):
+    skill: str
+    dimension: str
+    score: int
+    max: int
+
+
 def parse_args():
     parser = argparse.ArgumentParser(description="VLM Evaluation Pipeline")
     parser.add_argument(
@@ -48,11 +58,13 @@ def load_model(model_name, quantize=True):
 
 
 def extract_score(text):
-    match = re.search(r"SCORE:\s*(\d+)", text, re.IGNORECASE)
-    if match:
-        score = int(match.group(1))
-        if 1 <= score <= 5:
-            return score
+    try:
+        data = json.loads(text)
+        return data.get("score")
+    except json.JSONDecodeError:
+        match = re.search(r'"score"\s*:\s*(\d+)', text, re.IGNORECASE)
+        if match:
+            return int(match.group(1))
     return None
 
 
@@ -105,26 +117,31 @@ def main():
         prompt_text = generate_evaluation_prompt(
             student_id=meta.get("student_id", "unknown"),
             artifact_type=meta.get("artifact_type", "unknown"),
-            rubric=meta.get("rubric", ""),
+            rubric=meta.get("rubric", {}),
         )
 
         inputs = processor(text=prompt_text, images=image, return_tensors="pt").to(
             "cuda" if torch.cuda.is_available() else "cpu"
         )
 
+        try:
+            schema = EvaluationOutput.model_json_schema()
+        except AttributeError:
+            schema = EvaluationOutput.schema()
+            
+        parser = JsonSchemaParser(schema)
+        prefix_function = build_transformers_prefix_allowed_tokens_fn(processor.tokenizer, parser)
+
         with torch.no_grad():
             output_ids = model.generate(
                 **inputs,
                 max_new_tokens=args.max_new_tokens,
                 do_sample=False,
+                prefix_allowed_tokens_fn=prefix_function,
             )
 
-        decoded = processor.decode(output_ids[0], skip_special_tokens=True)
-        response = (
-            decoded.split("ASSISTANT:")[-1].strip()
-            if "ASSISTANT:" in decoded
-            else decoded.strip()
-        )
+        decoded = processor.decode(output_ids[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
+        response = decoded.strip()
 
         predicted_score = extract_score(response)
         ground_truth = meta.get("ground_truth_score")
diff --git a/vlm_evaluation/generate_sample_data.py b/vlm_evaluation/generate_sample_data.py
index ccdce04..b22fee3 100644
--- a/vlm_evaluation/generate_sample_data.py
+++ b/vlm_evaluation/generate_sample_data.py
@@ -7,21 +7,36 @@
         "image_path": "sample_origami.jpg",
         "student_id": "S001",
         "artifact_type": "Origami",
-        "rubric": "1: No recognizable shape, 5: Perfect folds with clean edges and symmetry",
+        "rubric": {
+            "skill": "creativity",
+            "dimension": "originality",
+            "max": 5,
+            "criteria": "1: No recognizable shape, 5: Perfect folds with clean edges and symmetry"
+        },
         "ground_truth_score": 4,
     },
     {
         "image_path": "sample_drawing.jpg",
         "student_id": "S002",
         "artifact_type": "Drawing",
-        "rubric": "1: No effort, 5: Detailed and creative composition",
+        "rubric": {
+            "skill": "creativity",
+            "dimension": "composition",
+            "max": 5,
+            "criteria": "1: No effort, 5: Detailed and creative composition"
+        },
         "ground_truth_score": 3,
     },
     {
         "image_path": "sample_model.jpg",
         "student_id": "S003",
         "artifact_type": "Clay Model",
-        "rubric": "1: Unrecognizable, 5: Realistic and well-finished model",
+        "rubric": {
+            "skill": "problem_solving",
+            "dimension": "execution",
+            "max": 5,
+            "criteria": "1: Unrecognizable, 5: Realistic and well-finished model"
+        },
         "ground_truth_score": 5,
     },
 ]
diff --git a/vlm_evaluation/prompts.py b/vlm_evaluation/prompts.py
index aa5a1ee..a09d83c 100644
--- a/vlm_evaluation/prompts.py
+++ b/vlm_evaluation/prompts.py
@@ -1,21 +1,28 @@
+import json
+
 SYSTEM_PROMPT = """You are an expert evaluator assessing student artifacts for The Apprentice Project.
 Your goal is to evaluate the provided image of a student's work based on the provided rubric.
-You must be objective and provide a score along with a brief explanation.
+You must be objective and provide a score.
 """
 
 
-def generate_evaluation_prompt(student_id: str, artifact_type: str, rubric: str) -> str:
+def generate_evaluation_prompt(student_id: str, artifact_type: str, rubric: dict) -> str:
+    rubric_str = json.dumps(rubric, indent=2)
     return f"""USER: <image>
 {SYSTEM_PROMPT}
 
 Here is a student artifact (ID: {student_id}) for the category: {artifact_type}.
 
 Rubric for Evaluation:
-{rubric}
+{rubric_str}
 
 Please evaluate the artifact based on the rubric.
-Provide your response in the following format:
-SCORE: [Your Score 1-5]
-FEEDBACK: [Your reasoning here]
+Provide your response as a JSON object matching the following schema:
+{{
+  "skill": "{rubric.get('skill', 'skill')}",
+  "dimension": "{rubric.get('dimension', 'dimension')}",
+  "score": <Your Score 1-{rubric.get('max', 5)}>,
+  "max": {rubric.get('max', 5)}
+}}
 
 ASSISTANT:"""
diff --git a/vlm_evaluation/requirements.txt b/vlm_evaluation/requirements.txt
index 7cd0725..35836a8 100644
--- a/vlm_evaluation/requirements.txt
+++ b/vlm_evaluation/requirements.txt
@@ -5,3 +5,5 @@ bitsandbytes
 Pillow
 accelerate
 datasets
+lm-format-enforcer
+pydantic
diff --git a/vlm_evaluation/sample_dataset.json b/vlm_evaluation/sample_dataset.json
index 52812f1..2e5e0b1 100644
--- a/vlm_evaluation/sample_dataset.json
+++ b/vlm_evaluation/sample_dataset.json
@@ -3,21 +3,36 @@
     "image_path": "sample_data\\sample_origami.jpg",
     "student_id": "S001",
     "artifact_type": "Origami",
-    "rubric": "1: No recognizable shape, 5: Perfect folds with clean edges and symmetry",
+    "rubric": {
+      "skill": "creativity",
+      "dimension": "originality",
+      "max": 5,
+      "criteria": "1: No recognizable shape, 5: Perfect folds with clean edges and symmetry"
+    },
     "ground_truth_score": 4
   },
   {
     "image_path": "sample_data\\sample_drawing.jpg",
     "student_id": "S002",
     "artifact_type": "Drawing",
-    "rubric": "1: No effort, 5: Detailed and creative composition",
+    "rubric": {
+      "skill": "creativity",
+      "dimension": "composition",
+      "max": 5,
+      "criteria": "1: No effort, 5: Detailed and creative composition"
+    },
     "ground_truth_score": 3
   },
   {
     "image_path": "sample_data\\sample_model.jpg",
     "student_id": "S003",
     "artifact_type": "Clay Model",
-    "rubric": "1: Unrecognizable, 5: Realistic and well-finished model",
+    "rubric": {
+      "skill": "problem_solving",
+      "dimension": "execution",
+      "max": 5,
+      "criteria": "1: Unrecognizable, 5: Realistic and well-finished model"
+    },
     "ground_truth_score": 5
   }
 ]
\ No newline at end of file

From 624bbe29342cb2a326f7aedc126f2d1d35962a52 Mon Sep 17 00:00:00 2001
From: ravencore06 <srinidhisadhanala@gmail.com>
Date: Mon, 25 May 2026 20:25:26 +0530
Subject: [PATCH 6/6]  feat: Implement JSON rubric schema and constrained
 decoding

---
 vlm_evaluation/evaluate.py         | 11 ++++++-----
 vlm_evaluation/prompts.py          | 29 +++++++++--------------------
 vlm_evaluation/sample_dataset.json | 27 ++++++++++++++++++---------
 3 files changed, 33 insertions(+), 34 deletions(-)

diff --git a/vlm_evaluation/evaluate.py b/vlm_evaluation/evaluate.py
index b1d4634..3aaaf10 100644
--- a/vlm_evaluation/evaluate.py
+++ b/vlm_evaluation/evaluate.py
@@ -59,12 +59,13 @@ def load_model(model_name, quantize=True):
 
 def extract_score(text):
     try:
-        data = json.loads(text)
-        return data.get("score")
+        # Parse the JSON directly instead of using Regex
+        parsed = json.loads(text)
+        score = parsed.get("score")
+        if isinstance(score, int) and 1 <= score <= 5:
+            return score
     except json.JSONDecodeError:
-        match = re.search(r'"score"\s*:\s*(\d+)', text, re.IGNORECASE)
-        if match:
-            return int(match.group(1))
+        pass
     return None
 
 
diff --git a/vlm_evaluation/prompts.py b/vlm_evaluation/prompts.py
index a09d83c..2ae472f 100644
--- a/vlm_evaluation/prompts.py
+++ b/vlm_evaluation/prompts.py
@@ -1,28 +1,17 @@
 import json
-
 SYSTEM_PROMPT = """You are an expert evaluator assessing student artifacts for The Apprentice Project.
-Your goal is to evaluate the provided image of a student's work based on the provided rubric.
-You must be objective and provide a score.
-"""
-
+You must output your evaluation STRICTLY as a valid JSON object. Do not include any other conversational text."""
 
-def generate_evaluation_prompt(student_id: str, artifact_type: str, rubric: dict) -> str:
-    rubric_str = json.dumps(rubric, indent=2)
-    return f"""USER: <image>
+def generate_evaluation_prompt(student_id: str, artifact_type: str, rubric: str) -> str:
+    return f"""USER: 
 {SYSTEM_PROMPT}
 
-Here is a student artifact (ID: {student_id}) for the category: {artifact_type}.
-
-Rubric for Evaluation:
-{rubric_str}
+Artifact ID: {student_id}
+Category: {artifact_type}
+Rubric Schema:
+{rubric}
 
 Please evaluate the artifact based on the rubric.
-Provide your response as a JSON object matching the following schema:
-{{
-  "skill": "{rubric.get('skill', 'skill')}",
-  "dimension": "{rubric.get('dimension', 'dimension')}",
-  "score": <Your Score 1-{rubric.get('max', 5)}>,
-  "max": {rubric.get('max', 5)}
-}}
-
+Output strictly in this JSON format:
+{{"score": <int>, "feedback": "<brief reasoning>"}}
 ASSISTANT:"""
diff --git a/vlm_evaluation/sample_dataset.json b/vlm_evaluation/sample_dataset.json
index 2e5e0b1..6bcada6 100644
--- a/vlm_evaluation/sample_dataset.json
+++ b/vlm_evaluation/sample_dataset.json
@@ -1,37 +1,46 @@
 [
   {
-    "image_path": "sample_data\\sample_origami.jpg",
+    "image_path": "sample_data/sample_origami.jpg",
     "student_id": "S001",
     "artifact_type": "Origami",
     "rubric": {
       "skill": "creativity",
       "dimension": "originality",
-      "max": 5,
-      "criteria": "1: No recognizable shape, 5: Perfect folds with clean edges and symmetry"
+      "max_score": 5,
+      "descriptions": {
+        "1": "No recognizable shape",
+        "5": "Perfect folds with clean edges and symmetry"
+      }
     },
     "ground_truth_score": 4
   },
   {
-    "image_path": "sample_data\\sample_drawing.jpg",
+    "image_path": "sample_data/sample_drawing.jpg",
     "student_id": "S002",
     "artifact_type": "Drawing",
     "rubric": {
       "skill": "creativity",
       "dimension": "composition",
-      "max": 5,
-      "criteria": "1: No effort, 5: Detailed and creative composition"
+      "max_score": 5,
+      "descriptions": {
+        "1": "No effort",
+        "5": "Detailed and creative composition"
+      }
     },
     "ground_truth_score": 3
   },
   {
-    "image_path": "sample_data\\sample_model.jpg",
+    "image_path": "sample_data/sample_model.jpg",
     "student_id": "S003",
     "artifact_type": "Clay Model",
     "rubric": {
       "skill": "problem_solving",
       "dimension": "execution",
-      "max": 5,
-      "criteria": "1: Unrecognizable, 5: Realistic and well-finished model"
+      "max_score": 5,
+      "descriptions": {
+        "1": "Unrecognizable",
+        "5": "Realistic and well-finished model"
+      }
     },
     "ground_truth_score": 5
   }