From 0fb42e880252d5c21bab0370ea4d9f4cea4fe592 Mon Sep 17 00:00:00 2001 From: ravencore06 Date: Sun, 3 May 2026 07:18:32 +0530 Subject: [PATCH 1/6] Basic voice AI model added --- __pycache__/asr.cpython-313.pyc | Bin 0 -> 1287 bytes __pycache__/llm.cpython-313.pyc | Bin 0 -> 2274 bytes __pycache__/tts.cpython-313.pyc | Bin 0 -> 675 bytes asr.py | 21 ++++++++++++++++ llm.py | 41 ++++++++++++++++++++++++++++++++ main.py | 25 +++++++++++++++++++ requirements.txt | 5 ++++ tts.py | 13 ++++++++++ 8 files changed, 105 insertions(+) create mode 100644 __pycache__/asr.cpython-313.pyc create mode 100644 __pycache__/llm.cpython-313.pyc create mode 100644 __pycache__/tts.cpython-313.pyc create mode 100644 asr.py create mode 100644 llm.py create mode 100644 main.py create mode 100644 requirements.txt create mode 100644 tts.py diff --git a/__pycache__/asr.cpython-313.pyc b/__pycache__/asr.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..814af6e9877c1bf683bb3b815732b22a76099edf GIT binary patch literal 1287 zcmZuwTW=dh6rR1sn{|9?9H5Pq?zlo#-4?4=6N!{dlo(oZi;%Z&1+`=?yJKftXU5Fz z0`*fL0|FI+5P9UWh(Ev+5@~;cqee=*^+P0{c#CAzO7w-Z>kEs@v3%ytxqaU`+8Jar z1_Jqb@rn0DM(B^|G8VYUGMI(q2V@~jdJl2QlIIbZTT)5+GYYReaneXl!KuK1a9@gQ zu1{jsakq&Fp%Z9nbfd{kHIXth|IL&Um9D488J4sJIFVpfL)Gb_ozykpYbnce98V`B zL%TNVKxLQ^eoG^!7#)v#W}dIPCXZ36lHj7&lq|Y1i2WCoZmW&-@y(G z+AgNd#{qQ-7rsqh>~9ck2d>A;-L=Xjfs;=IB7Dq=2-?2DP0lv)WyV@p^7CbaY!7c&&1JB{Cv%x3{wGQ{FA0D;c8-ef}O~xDc<|iJZ zeuFYkkZ`IU&lcwSuzQy@S}|&Pq(*O2_Bp+3w*&Gv=ZuFl)gjCHR?ri;f28o^!gmYbo44QXPp9uJe6{ekxqYdx zXL{M=d%Ag`&+hAIcJ(uNFa2uuB5Uj#qE>5Y;jLN@i14|NCBy#FIOi1e6?p?=~ZjJ?A1N}T0d8KcBnzg-=YK~{PN10to&ZYYX!6; z8*BO0PQk30%Fbc|^u1KJGOyg5*T60*;jD}PK2#)0dW_70IxEfbm%#H66T=_S literal 0 HcmV?d00001 diff --git a/__pycache__/llm.cpython-313.pyc b/__pycache__/llm.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..77cd0cf4cb7862f1bf2b1737f043c953c840e44c GIT binary patch literal 2274 zcma(SOKclObaw4^9oKeBK5gva;-qPnJv2pW`bnr#npCaSHlbwWDm12}tvzv;tarCF zYiPoyCzJyhBqG!bspSORxPp*?6W86g$ktGaN+3ioC~Ei|;mz7kLXt{6*|YD>oA>7Z z=DqbqB8C9whyF5_A_)B~h$e&^$ok8GtRM|((q(i>qS7Uq%2`3nmqRqvg?f<|N+T_N zLas~Da7k8`RY9mS?>JX+?FqX`%nLRh*DFjnuT1cS*qyc)h-EAhI$5Yo)>o-PtY-mP zL3uXh@UOh=#>=RMEmTQc~0iZy&dXsYSHkZk}sDmTyOkSR0-r_cMK!rP83GU$g3iG;)=MCoCbP*dxrb;}_h&jhZ zsIq)`d?cS^5c50(?}V%ba6WG&XaQv++w*#fn25bTcqITO|v}UEb=JE zAmL)nU)&{c15K_76B~x*4tf$zc*zToqZfZk9jT^HK1`kTPQCspHMJamKl!9nt#%GS z>>Tz+rXO|YU@KP3_AQ5hh-TJ77!@`W3Pc?K!prp}?5;rgr-0ZFQyXdBD|X zo}(kRQ^)SnfqsMr<=uVOLZX}me@G6nM6gD+BzUXjls=@kPIB3Cq0E!;I4ab_F0q(R z+3G(4upKHIW%wvXchMonZ%`zwqEMz+2VpJE1@QaX2J-7!HOiB^3pzwk!eyN;@K{;D zjZI>e+<6{%^db(P9y~7IVcD1xhrpAwN>ndn22GK-6m87(vSSjixVD2A>K}*ibxGMF zR0lH&74;2vwX+4o!OS5AXbyGFOij?ez~vEQ6`*tQ7GkqHizdWAXr+aD9xCWA58D=D zTw(LNLwMM8h`C~cn+KfN;hCg=QJA2(% zXTAKJzIwxRZ+q?iUi;#5wAOlHxs{6NtA>I+2Z;T?lgVT!foI#mu!ia#<#8fm2Dhi!+O^ursmoSV!N5O?b_Nd5au!#1^ zL`vR6&tye8{AV(ybkz@vGpfSlForBPXVWr_l3)M@V~QSWs-R*5Z0hFGGj)?k=_|kx dvxteNv=)*i=_hpNM-=}%+#_{8L$Cy5{{r1J8dm@S literal 0 HcmV?d00001 diff --git a/__pycache__/tts.cpython-313.pyc b/__pycache__/tts.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8a04799021a776db37ef1cebca8f0b8265d415db GIT binary patch literal 675 zcmZ8eL2DC16n?WiO>EXgt!XVnVG%s+p_~4VLAi%e7gCH&9*=I(u9%0`krm(W{%T6#I!sdk7$lWG80C%7^Mw|p>!}MMX zpVY?g+(Je15{=L&f#q9^_!qVD1Q#}^bDeJoP=O^YcX0t-oSeN_1Ncu-zF_hyKdB2X z@aVdxqN&(f73uf1{$$1Vin^GjO38yIO=BJ_?X;MBF4_qbD%Fns2TUa*=1SYLnd*}0 z@H*dq*^HHEYpU3R(gns_F=v9JyEM1yx~{&zx)KJG2^mW4j>ASf>g+SFWVpKg_(iC8 zSv^SvZp0D8v3o31As**mlIn_a-BRr>nJ^aZitA`;Y~?FVT{taQzl5K{&#$|W&I@pL z=Ga*nI15KRKb*VW#tAJQ)42hi>%Tiv-`;#phtxZz)4fK&+As7LhjbnlAJ;yt_0*77 zN9x%Do&8B~WoQnbB?`EVMt%oz!_?O>=_?QR+uNPD?4h`c&^)p Date: Sat, 9 May 2026 13:14:45 +0530 Subject: [PATCH 2/6] Initial Contribution --- README.md | 14 +++++++++++++- vlm_evaluation/requirements.txt | 7 +++++++ 2 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 vlm_evaluation/requirements.txt diff --git a/README.md b/README.md index 04e1239..75b9c74 100644 --- a/README.md +++ b/README.md @@ -1 +1,13 @@ -# C4GT_2026 \ No newline at end of file +# theApprenticeProject (C4GT 2026) + +This repository contains two distinct AI initiatives developed for The Apprentice Project: + +## 1. Voice-Based Conversational AI System +A voice-based AI system that captures audio, generates conversational responses using an LLM, and converts the responses back to speech. +- **Key Files**: `asr.py`, `llm.py`, `tts.py`, `main.py` +- **Dependencies**: See `./requirements.txt` + +## 2. VLM Evaluation Pipeline +A cost-efficient Vision Language Model (VLM) pipeline designed to evaluate student artifacts (images/videos) against 21st-century skills rubrics. +- **Key Directory**: `vlm_evaluation/` +- **Dependencies**: See `vlm_evaluation/requirements.txt` \ No newline at end of file diff --git a/vlm_evaluation/requirements.txt b/vlm_evaluation/requirements.txt new file mode 100644 index 0000000..7cd0725 --- /dev/null +++ b/vlm_evaluation/requirements.txt @@ -0,0 +1,7 @@ +transformers>=4.38.2 +torch +peft +bitsandbytes +Pillow +accelerate +datasets From eb5b74e190438680d32a29ea21b270e0efa47a13 Mon Sep 17 00:00:00 2001 From: ravencore06 Date: Thu, 14 May 2026 22:40:19 +0530 Subject: [PATCH 3/6] VLM Evaluation Pipeline --- .../__pycache__/dataset.cpython-313.pyc | Bin 0 -> 2739 bytes .../__pycache__/evaluate.cpython-313.pyc | Bin 0 -> 7956 bytes .../__pycache__/prompts.cpython-313.pyc | Bin 0 -> 1043 bytes vlm_evaluation/dataset.py | 49 +++++ vlm_evaluation/evaluate.py | 171 ++++++++++++++++++ vlm_evaluation/generate_sample_data.py | 57 ++++++ vlm_evaluation/prompts.py | 21 +++ vlm_evaluation/run_benchmark.ps1 | 29 +++ vlm_evaluation/sample_data/sample_drawing.jpg | Bin 0 -> 3177 bytes vlm_evaluation/sample_data/sample_model.jpg | Bin 0 -> 3177 bytes vlm_evaluation/sample_data/sample_origami.jpg | Bin 0 -> 3177 bytes vlm_evaluation/sample_dataset.json | 23 +++ 12 files changed, 350 insertions(+) create mode 100644 vlm_evaluation/__pycache__/dataset.cpython-313.pyc create mode 100644 vlm_evaluation/__pycache__/evaluate.cpython-313.pyc create mode 100644 vlm_evaluation/__pycache__/prompts.cpython-313.pyc create mode 100644 vlm_evaluation/dataset.py create mode 100644 vlm_evaluation/evaluate.py create mode 100644 vlm_evaluation/generate_sample_data.py create mode 100644 vlm_evaluation/prompts.py create mode 100644 vlm_evaluation/run_benchmark.ps1 create mode 100644 vlm_evaluation/sample_data/sample_drawing.jpg create mode 100644 vlm_evaluation/sample_data/sample_model.jpg create mode 100644 vlm_evaluation/sample_data/sample_origami.jpg create mode 100644 vlm_evaluation/sample_dataset.json diff --git a/vlm_evaluation/__pycache__/dataset.cpython-313.pyc b/vlm_evaluation/__pycache__/dataset.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..deda7c917213902676ddfdcd6ba6009d329f7bcb GIT binary patch literal 2739 zcmbVOTWl2989sB{o7ecZHnwrh!7**Nv6fQoP`0gA+n`{B4BG9cw7b=GynA*TYj$Qi zbCzIN1*A%qQBwg`kV#qzl9#$~Jmx7U-_80+*exWV?CPc^=xClsXO}55ia))F|Rysp?bCkFQYx#^kr*MUm zelkL`${@+A6LQmvs}(7!eT9qpq~vL{Rina+O<9w-6v7_Ejwvq8vQZQ#4Pm%Ue5HXGFIs|?UNgkSV&EPRgD)cr z{A~?Oc+`bX3wM1wLUP1{-6rLOZzdVE*+ZG=o()*$q=}aGD=ndMl#qN|pqZ1hQVs63# z2b|U%(-xrYP=n58&&|`aX)#)KY+;zTX;&z_Y*=+en2t@WOc-dHj^$$%=vO|+*@%gS z;-_#VO-E4ah`;3?aYenvY;iQTSgVYT?O3$-b+d#PN56M$x6o*ni7u9E4CUvzSuv{S zZXI4<;AYV`PSbgZvU1tsVvJ6t>3PP>tSD&Nu}UsA>=LbU=E5!f@7V36!kxNZ(gm-J zi@IBMIG#4{-}2XEG?hxlei8E|KEK@eQ-fnX()462rhcH7N2%@LLEt4tGfdQdEy}7j zu@repl23N zIz=c4$bVsG{Pmv{pkk(K5Gh-jMRuM$ix9)YWvi<1)IcG)TW!gU=sGwNy6*MwY3o!V z!FF`3z^;?cAu{;#Tl>}%eJko)(J$NDZyvvK{O071$-BuXZKDs{MmM^8Z<+Ud9(NsH z>)IHi_h#Rny>I;S>@D@}=re_Mf2+}T_-Tl=cQ+E>Sv#@zYGdMOjqaa6j^{VxU5)P9 z&*F2>qM-Fz*Wqtm?BfrIrY4n-CS%j_a1mN#n=lsmw>1Xr4#{uJawM}GeOd-VAC8iw zG@q0`#bP!`^i4{<8wMBcDNg|R!nzJph{ygHEVE`ut2C?_F)+Hy|Lpza<52y1>>$AIe~0gm7yVqA2|)Fnlv8Xik1_QrgE`ql0MFZSn(5smge5?9 z5T{Tsl#{bsN**PLNWL%74HM%fh;ukU5bOYaD+u4Y@IG=(4eTjAvv%kp^6*r6{@cy5 zER{(B)AL?z2P!=!^ZN91;uPlq1K7Wax4xJz%V`SLrui!`5+HA~-LE}QsW8E@$~itv zUbtj(+o-Z;!)vB%EC4zz0+!P4H2E=bCituh3qM2>58YC zf>k+uEb$@>075!=_BzUS%hGr5%^_*Tlb9QU5pcz)W(&0GI$RB%KpPx*u?@C`hrrbR z0N8c1-qCY&>BiFCo*P$J)b-wh)eE;T+_fI{zPb|IRKwAMFZU0uzIpr2)oZt}-4}np z`rg$?`(Il*y>WW()9@$ZPuo9fZ@lr#C#Q{vr;R@?+^f7(xo5p&t$qLT@Q)jnODpF# z5(BGqf0(;_?NQ>$lf?0diQ^w=j}ntBr#9oH@4$Nh&_6V#Gqy>TXzX84qoi|Si^$Od z1eL@qYlCa?2h$HOJt#K1(~sjnSWoo-{?xxWHBk7*?T5?1KixB(CVx%GW)3MIt35MA z%Ev<*%#*U8CqEza$tXf4<_BUD)Ag!Ts#`dZ>-wd-VYMV3x?VQ9D=gDywgY0t6`W(e zax6QJc;)r#x{I$^Q8$F(=0aUCSJ(eS_yJTw6ygX8KEsJS2<*3HQ|{BwNcUfUN^t!8 zyOK8i&rqipYnq{AQsdZYeo=YK`PsA0%JH4yf08=~c#$7AzhIKT1_EpyKNg#cBuSr> Vw$DlQ@1*ycIwr-wCcu2l{{UqiUm*Yh literal 0 HcmV?d00001 diff --git a/vlm_evaluation/__pycache__/evaluate.cpython-313.pyc b/vlm_evaluation/__pycache__/evaluate.cpython-313.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9b7b3414831ddaa86bf94de9817b9872e95a0b65 GIT binary patch literal 7956 zcmbt3TWlLwcJtzU_}0smL{SebrfpG{ABm+zwiQ`^NT%fFh*<|xK#a(dOq&|gcZPlt z>xHn}1?V(eOW3BClPoL)i;V&Vs0tLV+n;99Vv(X)fHakgJ5jeT)<6D}V;8}0KiYGL z9FlfqZ-QQ%Gk5Mi=iGDdJ@>ry#A-DY(EjK5x(oev1o3yiafhM~)Gw z^d=Fhep852-!!7tH-i{>Q)9YO9n$$QEIp!MEN+AUnr$1`lz8L=Mgf-%id1PZwu~r+d1NvkaUg2;N8#)9w!9P?<>|F-Ojk ze(NLrJ?@wl&8hthKLEJ{WNNlGHc z^=4d_hQ!#L*JMEgjM?}c(6SF9IX=rr<>S1}OM>jD6f5^OH#riFoDEF`&rT|yxe`es zB3$MZX_a;)g_83rIa=mWeHq|M!|%yq67T6$WiF)cc4dShI$*~+s-F~zFcH$#Oq9DE zJ*C%1Ifm15`hLduB3MHWtdTSI(=UWHa~7b+z68R`**H7rcnO5FW;NGKS94FnNtPN{ zSAn!>W7W5I(p`kVemv9o)|sGhq_Wq(v+HT8mL(=gQ4~+%^UYX#dSY;BkrXeV zpUl*qg_loG`tTSWq;Hg)7zZU`x!kiZpsSMlGGoOqJiHOM03z?<_ zR-~Tuv%RHGU*J&BYcuferw|KlDu$Ra%cm1^#@y3$Db0&=JR>M(NlqdmBBQhbTB8RY z05dli(*DFRI&82t*WNkxeE%?e6qb+0A~36tDYPUbg^r7| zVphTNj0791SHF-1)Civv93Ges1F|$Np|}{IKAw!K=+gA?!IP8I@_At>l|ryaJSxD& zx*$a5>C1`vnu{@Az9T{)bqzHGQT5?~j`S%!7Klx2{f4#czO^gYH=4JOEu1Ja<_)GX z&omZ|^&btdcI6I@ZPA3T=`lg+^jmsDXDLlD+dk!T1Cu!n8?GQNv#Zme9aINTRR`^B z*3U-O!HJr|PowvasDq=G!Cceszjfb>s-tffEzS)~OWx8_uy}Kf_X+SzL~HME<%a(n zX5J^luzbRCtC5D-8G0T~_;&{fw&iS24$g%b-h< zc|N6B;5J3imsauX5jKB{P90MWv%s3%_bS4>sn}=1_<{u%WWAYpJHZm1Olm{qY=DAWrBcu>NYeS!F$!M^F;SKQIM*}3cE!MlUS z?%hS-&c`NQn>lZ6-m(#GojGIkL)O2+_TOjwi$+(mrMKwbv01m{R&Xf@fL8s&$WxYZ zdN&+B_Z>Z}7YdI4h0{fgBWG#OG0n&e>qIM>10R?<)rZM5wWrUp$U zYxH$5=quJ{wS~!$N$XeN9Hr@s_7*9Q3R4m_NDT~>yfjCvJ>c03I@(2mr-pxiW;|o# zh9^QJ1Jly(UDL5W{zq8EN9A*(kf7)#frryp%%@L|1K{uwH-bC>qv+;PGM!TGws2Ke zbo0CnU^*!ZrdGnDRL;!`^9YP?M4dyx60skC(g}EgondV^PrrLQ*YL_}-KXfUuYYpA zzz!~q{La8`I^2sRx5k#nR>+@?7aW~gUD3iW>VA53+4p{9hJ(>2xZ0 z8R+8ssh$o3f#7s1Cl@BGy9BqDRO1-H+(&c~Q{@vNAbRo;Q+OBDc8)w$rtPl8Iu$B( z4Ho^av6vbx=G$V0sUGmjqOJy4=K-s;4OjXwRn?nw`!OD;?*WN8gCJN zHdr&;VR5E_4y=`V9N{f{=EmUNJpjKahvCr)ItZ2-cs~VBCk?R^fqe>@QpmYy70&bqjs&l!&=I9Wvx)Oy zgyK;t@J1pT1@C?&P=PrHI3kd4U%raM;NNcH;sb5FtiP&nt8$o>FvRgT>+qBA!2{cE`=YhLdKX5D26mDBai(Z@ zW=#)Sd)|KPqv*<>b?;!oeyYHpS{T{1yYjBdU$|GM*4vL2T$2U+H z+1jykINy0-Et>D_zt>l29o%f|TH#jPKfJ!#(7b%&_V_B7Z`rrjUT8S@*rI3kTZCSx zFL??Opwe)bgDuk&2DYHqQ)s!4NW46v3swAcg=WP|XmMu@R+*~!Zm!~ke6^z#WSEt> zuu97Y`TGG{vm1Z`>Rf8u!)uTtIfYXkwN}w4_It>TM1vTS(SOKR5&o$u4 z05kYjJl9~xc%X*bRJ{hUJ*DlbKn6;wCa1HP5E=OC;HRHz)M!oZsPwjxP?Lp&9J-%! z6DP@2%iuK)=NhrG7L8#e$ErOO^sH*n95$;)16G*AxsckzrVx%WpKCbl)_ma$ARK=Z z!pXTb8l1bI7A;}AgE;q(kbBqn)1kKKmW0p$zXtMPnz)7ul55oVM`UZL`o6hlwXQ>f z_YG5_j_qK$J^P3-dz^%m?E2*DEOzz4HW6nE8oQUzLjI}nQ$Ji zxt|hkTua!7tqu>XhixJMH!-*tO(L!pV}|y86Gpqs6Lz5Sl6lcS)uUmD?HXOt@q&}~ zRv?^VCWLd6=h_SKYIVpFc82XQfUD6Yt}Re=ufmQf*sPOeFa^8fEcY)%U$-q64vkOP zb^W#w&ME{@{jlUPr*NdDwsUW3=O((o%S{H@Z7dq+3(#R#nBm$3wCJ9~j$Ca3eNgLH z-&}{bHbsbaQ?F_>p~G6Q`i30b9j@a#xh`B6dtT#eEZfeiyfvfU+m60c9SyreSQ(#d znsz}f3l<~|H%+V;8*1q83fF^W^oJX4!~v#CEeLiDM|(ApHEY$3?%sZ9fLbN@!nG7;!;%bU5O_Wz8S$MC6ZTsNzpfaHXO**pMmlOAXf@}xD4RS(1Ace zO$sueatYTLmwW(P^(sM#&wL%DjXkXO(cMubXZ zT*Nz;Iba_A&Mo5_d28rQcxdwU#JDdwG8sBO%w_g}XEK8s%IC{a1HOziRI0A}B>u7x z!&T!_-0m&aEU`O47qO6RT#8gwRMDlNEGon9Yh`DIgESPvkIKfVMI?k{KZv7waZW%u zLR08DK~^X^sTiO#Jcsz0V$}YyfaWoPJ|;w=Bn`EWR08U6I;hyjQ)*cQYRjpVAjZ^K zSz(gUS1dKfVuis)V$jv}JjCvCS(umnjS7QHyhz5pVdsNzPJ^!Cl|T$3CMo)IAWz1j z9HXWa3WF6L&0ru1{qdhTKp~+F1BVACKuIx|SB(QY)Wk8rAV)<<5JqHc^$dac0MSZl z3OTEg7Zvid1on);Il(rqlqN9qP})P_Fo<#ZNjqRWAvdtp|M?pW#~)f8HwSJEY*;$- zmW~ZeSKiXKX|(2SW9!B<4;fRAZC~m7IB++RYwufg=gf!JnO8S$t;?}H;%%{D>sdIt zX)tGB`|%GJFBJ?;D(3EW{)$%P5%4qe{+2;`PpR7Ji5+| zZ8l=cy?1->9sj4`KLqoQqj{!oVdR0$v2e1;+8|}I+80hdv^xKC=STGN$q(6rYv<~j zFRcApvgmAD@?`Zzd;Q{tyuCfk6detVSC;Hq-2;o`#^7(+x`z$>ijB>;t}b0&VOQgY z#=}LA_s*`{yH<{_oi2Ee6y1%t4lf;EKD9DaaQhz{m_~Ef@|Y!D%`2AAyYkM74QD9t z3_*@^>%!87jfS3lL(fJ-Z@!`T#)-A?6Pj|Fv-+n-!sfpD&b#mYG*aB*z2p7B`V{rR^2MNiuk0}NzGzorRWD-=52EgPd3jq zvIa<^94*UN3ifU&npx{^25$tH_ZHY!R;fI@XM^p_vwiFAfd|f}Tdhm2KWp1E5GE(o zLz{Q48VjDjti3F9&UdiTcqq$0Fxqe0ZrC1pJMNsneSYN!Yi}34M~j{Ni{1N+zFo!k zogX*dZCdTU*I#HKf$Ym_drBKk=B+xY*R5N+SKnCQGhAeyH_zNSv)s1M`ircq#L))( z%Hs%0*an`?kc8d4EEH^e7fx;&iRQLDy|;UdO)Yn3&A1?h6HrH+5G81*pMYrd% z-DuM>|JNtpp)J8XA?z7<1Oow^{?zVm@Jv?1I1gU7W)=E7emYBqLiKgcVo+AnsEBC z76Ld%XF+Q>yn(zokZbMDnfHCc^nD!%FHf{p z80@2e(`F6^Ouy+f2ag&QV+5+}P!Ndt$w_}bf^rIhvhYx-q=aycgj8#%+A9_cs-iTT zMDy@BAhp=8*4EWpe94kvd#4ymra?luex+KslG((Os%o%QL#5tcygdFBTO^_yST#>B z-EiX@;EK};;Rw15Xb@Hq(m8l+(IiQJN!Y(6EMF2P^*{R`ME%zkLsFouB=z5Bg6#TF zV(=@ynQVLNagi-w?befyuUrmN|5f+fB)Na%tO-#Ayhdbe3akf1$WI{&Pel>@b3z0F!qiVFHt(<42C5 z3_F|?&b^?Ixrn+X5b&k&dX6hN?m@hHN^m6%8ALpn;5wsc#1$nSFd6Ps$HyMWf@X*W z3rGkV9eQ2T#ooZ_6HI&9$>8sCd`{V0+;KR>X>k2}7Im2Crs#MOae+IK-H|tXL;3!0 z1}yJ+r?CCDV^&G(x+?$r>iAhZ=`~x(b}wLz^bf|Kk9njC!pbV;WK{M z<;_%u;>oh$k*pmfJfr~tc(WSxsPEHr*$7bffg{SgRXuFf%J?`fwUw&<=;7lN{dujn z`>axZm4z6*g9bA56o{Vadd0H#tY+n)SuQHEVI5h`+J3vP7?TI&N3DUNmjc=3g7myU-r`gZ8|YK^7a@ZV(Dy3P str: + return f"""USER: +{SYSTEM_PROMPT} + +Here is a student artifact (ID: {student_id}) for the category: {artifact_type}. + +Rubric for Evaluation: +{rubric} + +Please evaluate the artifact based on the rubric. +Provide your response in the following format: +SCORE: [Your Score 1-5] +FEEDBACK: [Your reasoning here] + +ASSISTANT:""" diff --git a/vlm_evaluation/run_benchmark.ps1 b/vlm_evaluation/run_benchmark.ps1 new file mode 100644 index 0000000..47c69e2 --- /dev/null +++ b/vlm_evaluation/run_benchmark.ps1 @@ -0,0 +1,29 @@ +param( + [string]$DataPath = "sample_dataset.json", + [string]$ModelName = "llava-hf/llava-1.5-7b-hf", + [switch]$NoQuantize = $false, + [string]$OutputPath = "results.json", + [int]$MaxNewTokens = 256 +) + +$QuantizeFlag = if ($NoQuantize) { "--no_quantize" } else { "" } + +Write-Host "=== VLM Evaluation Benchmark ===" -ForegroundColor Cyan +Write-Host "Dataset : $DataPath" +Write-Host "Model : $ModelName" +Write-Host "Quantize: $(-not $NoQuantize)" +Write-Host "Output : $OutputPath" +Write-Host "" + +python evaluate.py ` + --data_path $DataPath ` + --model_name $ModelName ` + $QuantizeFlag ` + --output_path $OutputPath ` + --max_new_tokens $MaxNewTokens + +if ($LASTEXITCODE -eq 0) { + Write-Host "Benchmark completed successfully." -ForegroundColor Green +} else { + Write-Host "Benchmark failed with exit code $LASTEXITCODE." -ForegroundColor Red +} diff --git a/vlm_evaluation/sample_data/sample_drawing.jpg b/vlm_evaluation/sample_data/sample_drawing.jpg new file mode 100644 index 0000000000000000000000000000000000000000..68eafd6f7a099fae3825e5e490d383e0d363d052 GIT binary patch literal 3177 zcmdUwdoz;a?3rp3gsGx;de%zbxy2v)~UVrAA7&Q^}K6+f4^tF>-qei=XoEf z8yW)QmZlb_00sj9#(x0R3rqk?NC+t;h(aQfXf#S#6uV7SL_~C##11hmZZ}>Qw_8R= zPDxW)PC;E!MuwoPqP|~CM@I**tbbTf`;g`V9qo-nU}!X2R76yI+cs%!c^P@_uU^n2 zfI)#FFa(Dw009gPj)6f90MCz85cY9_e=V2*93d!#M4^R6_zw5QfdC8+7eK%T1rZ3o zcLe`CKwtzV zc5Z&*!{X91cV&YM2H>Bt`1>cYEnFBrmjD6*M<6%2U;@E>gJTea@>)U?N6AQ6za0wN zmrz*ar0lZCXvG6IGm>upt-?E%bYAV9-9YsIDfJcBIW)O?$ep44gX)l9G|iO?Cqv+`S*uFSrY*W=&vM*r?t#Bc@suZOb^O}N zq`w7T#*&W+ZZTX!E(DbBne#C!*8RJ1)GgY%YrrUOPXBJN70-HJn2>YVz&I}!t?W}) z;@7^2Jt|_o|5`{3<&By@Az8Xk?h&44>mJbfYk*z!sq3BRXXZUZ0^higK_Fy+C&nqh zqbjO!S@xpaO_F!A*^{m(T_w}@e}MqC0>h+Q-bKhZ*wN0(U6+t@4=Y`^yF;L9dkvpG z5uCp--Z0Kdqj!9l&3Dql1!~!WPU5-og%fH!C%SvB*!mPsCWlp6x zyO<6g5U7uYk+2l~ItJmEz^$LJJge|%QXD@O*1lj9mPsyBwNtb)ydCgEPL^s+#`8dX zxicA#_}h(^WE<+2QKh>BJi&lLDBga3OZjb-={+rIEz(P)5yJGe8hK%SI zMU`fCbKctzEi5UAQa>hMJ@jqgnm5Z+ z#pPH;f_2KzhKn6FnsIENcn(8VEL(Ug90vh~X$Eof1q4nfS59ns!LFxc!XdCP;CIgN z$18@?=_Kx|@XXeF03o66g&m^7K_?y?`rB$%|RaH~B(!%}U5ZAVK;*;sKoXJ8<%9!6&Rc_R^ z5XyLbbxmSzb`zPJ__+`9)%I2hyz#xuOW}_#d%AsL@2tzX&xb1NU>^jUM+!=s*2Ul9 z*JCHeo)Yk{e(etK5I|i>gn&uVmC-Do zf8w%9IeleMF;NKuBR3*W-ah|?o*vgTN2_02wy%!ox{ z57+);#I~wl&Em=gNp?e^4#&TWpZB|Rl_4OLyD6|wf#aR?``21{{GOhoZzAIlflYB0 z*}hvYofl)ZyLK literal 0 HcmV?d00001 diff --git a/vlm_evaluation/sample_data/sample_model.jpg b/vlm_evaluation/sample_data/sample_model.jpg new file mode 100644 index 0000000000000000000000000000000000000000..68eafd6f7a099fae3825e5e490d383e0d363d052 GIT binary patch literal 3177 zcmdUwdoz;a?3rp3gsGx;de%zbxy2v)~UVrAA7&Q^}K6+f4^tF>-qei=XoEf z8yW)QmZlb_00sj9#(x0R3rqk?NC+t;h(aQfXf#S#6uV7SL_~C##11hmZZ}>Qw_8R= zPDxW)PC;E!MuwoPqP|~CM@I**tbbTf`;g`V9qo-nU}!X2R76yI+cs%!c^P@_uU^n2 zfI)#FFa(Dw009gPj)6f90MCz85cY9_e=V2*93d!#M4^R6_zw5QfdC8+7eK%T1rZ3o zcLe`CKwtzV zc5Z&*!{X91cV&YM2H>Bt`1>cYEnFBrmjD6*M<6%2U;@E>gJTea@>)U?N6AQ6za0wN zmrz*ar0lZCXvG6IGm>upt-?E%bYAV9-9YsIDfJcBIW)O?$ep44gX)l9G|iO?Cqv+`S*uFSrY*W=&vM*r?t#Bc@suZOb^O}N zq`w7T#*&W+ZZTX!E(DbBne#C!*8RJ1)GgY%YrrUOPXBJN70-HJn2>YVz&I}!t?W}) z;@7^2Jt|_o|5`{3<&By@Az8Xk?h&44>mJbfYk*z!sq3BRXXZUZ0^higK_Fy+C&nqh zqbjO!S@xpaO_F!A*^{m(T_w}@e}MqC0>h+Q-bKhZ*wN0(U6+t@4=Y`^yF;L9dkvpG z5uCp--Z0Kdqj!9l&3Dql1!~!WPU5-og%fH!C%SvB*!mPsCWlp6x zyO<6g5U7uYk+2l~ItJmEz^$LJJge|%QXD@O*1lj9mPsyBwNtb)ydCgEPL^s+#`8dX zxicA#_}h(^WE<+2QKh>BJi&lLDBga3OZjb-={+rIEz(P)5yJGe8hK%SI zMU`fCbKctzEi5UAQa>hMJ@jqgnm5Z+ z#pPH;f_2KzhKn6FnsIENcn(8VEL(Ug90vh~X$Eof1q4nfS59ns!LFxc!XdCP;CIgN z$18@?=_Kx|@XXeF03o66g&m^7K_?y?`rB$%|RaH~B(!%}U5ZAVK;*;sKoXJ8<%9!6&Rc_R^ z5XyLbbxmSzb`zPJ__+`9)%I2hyz#xuOW}_#d%AsL@2tzX&xb1NU>^jUM+!=s*2Ul9 z*JCHeo)Yk{e(etK5I|i>gn&uVmC-Do zf8w%9IeleMF;NKuBR3*W-ah|?o*vgTN2_02wy%!ox{ z57+);#I~wl&Em=gNp?e^4#&TWpZB|Rl_4OLyD6|wf#aR?``21{{GOhoZzAIlflYB0 z*}hvYofl)ZyLK literal 0 HcmV?d00001 diff --git a/vlm_evaluation/sample_data/sample_origami.jpg b/vlm_evaluation/sample_data/sample_origami.jpg new file mode 100644 index 0000000000000000000000000000000000000000..68eafd6f7a099fae3825e5e490d383e0d363d052 GIT binary patch literal 3177 zcmdUwdoz;a?3rp3gsGx;de%zbxy2v)~UVrAA7&Q^}K6+f4^tF>-qei=XoEf z8yW)QmZlb_00sj9#(x0R3rqk?NC+t;h(aQfXf#S#6uV7SL_~C##11hmZZ}>Qw_8R= zPDxW)PC;E!MuwoPqP|~CM@I**tbbTf`;g`V9qo-nU}!X2R76yI+cs%!c^P@_uU^n2 zfI)#FFa(Dw009gPj)6f90MCz85cY9_e=V2*93d!#M4^R6_zw5QfdC8+7eK%T1rZ3o zcLe`CKwtzV zc5Z&*!{X91cV&YM2H>Bt`1>cYEnFBrmjD6*M<6%2U;@E>gJTea@>)U?N6AQ6za0wN zmrz*ar0lZCXvG6IGm>upt-?E%bYAV9-9YsIDfJcBIW)O?$ep44gX)l9G|iO?Cqv+`S*uFSrY*W=&vM*r?t#Bc@suZOb^O}N zq`w7T#*&W+ZZTX!E(DbBne#C!*8RJ1)GgY%YrrUOPXBJN70-HJn2>YVz&I}!t?W}) z;@7^2Jt|_o|5`{3<&By@Az8Xk?h&44>mJbfYk*z!sq3BRXXZUZ0^higK_Fy+C&nqh zqbjO!S@xpaO_F!A*^{m(T_w}@e}MqC0>h+Q-bKhZ*wN0(U6+t@4=Y`^yF;L9dkvpG z5uCp--Z0Kdqj!9l&3Dql1!~!WPU5-og%fH!C%SvB*!mPsCWlp6x zyO<6g5U7uYk+2l~ItJmEz^$LJJge|%QXD@O*1lj9mPsyBwNtb)ydCgEPL^s+#`8dX zxicA#_}h(^WE<+2QKh>BJi&lLDBga3OZjb-={+rIEz(P)5yJGe8hK%SI zMU`fCbKctzEi5UAQa>hMJ@jqgnm5Z+ z#pPH;f_2KzhKn6FnsIENcn(8VEL(Ug90vh~X$Eof1q4nfS59ns!LFxc!XdCP;CIgN z$18@?=_Kx|@XXeF03o66g&m^7K_?y?`rB$%|RaH~B(!%}U5ZAVK;*;sKoXJ8<%9!6&Rc_R^ z5XyLbbxmSzb`zPJ__+`9)%I2hyz#xuOW}_#d%AsL@2tzX&xb1NU>^jUM+!=s*2Ul9 z*JCHeo)Yk{e(etK5I|i>gn&uVmC-Do zf8w%9IeleMF;NKuBR3*W-ah|?o*vgTN2_02wy%!ox{ z57+);#I~wl&Em=gNp?e^4#&TWpZB|Rl_4OLyD6|wf#aR?``21{{GOhoZzAIlflYB0 z*}hvYofl)ZyLK literal 0 HcmV?d00001 diff --git a/vlm_evaluation/sample_dataset.json b/vlm_evaluation/sample_dataset.json new file mode 100644 index 0000000..52812f1 --- /dev/null +++ b/vlm_evaluation/sample_dataset.json @@ -0,0 +1,23 @@ +[ + { + "image_path": "sample_data\\sample_origami.jpg", + "student_id": "S001", + "artifact_type": "Origami", + "rubric": "1: No recognizable shape, 5: Perfect folds with clean edges and symmetry", + "ground_truth_score": 4 + }, + { + "image_path": "sample_data\\sample_drawing.jpg", + "student_id": "S002", + "artifact_type": "Drawing", + "rubric": "1: No effort, 5: Detailed and creative composition", + "ground_truth_score": 3 + }, + { + "image_path": "sample_data\\sample_model.jpg", + "student_id": "S003", + "artifact_type": "Clay Model", + "rubric": "1: Unrecognizable, 5: Realistic and well-finished model", + "ground_truth_score": 5 + } +] \ No newline at end of file From 002561bffec8e20a42a4084b674c97b920cf04c9 Mon Sep 17 00:00:00 2001 From: ravencore06 Date: Thu, 21 May 2026 13:17:30 +0530 Subject: [PATCH 4/6] Add Input Validation --- asr.py | 16 ++++++++++++++-- llm.py | 37 +++++++++++++++++++++++++------------ main.py | 27 +++++++++++++++++---------- 3 files changed, 56 insertions(+), 24 deletions(-) diff --git a/asr.py b/asr.py index a7fcb05..3a7e099 100644 --- a/asr.py +++ b/asr.py @@ -1,15 +1,14 @@ import speech_recognition as sr + def capture_audio(): recognizer = sr.Recognizer() with sr.Microphone() as source: print("\nListening...") - # Adjust for ambient noise to reduce background noise issues recognizer.adjust_for_ambient_noise(source, duration=0.5) audio = recognizer.listen(source) try: - # Use Google's free Web Speech API without API key text = recognizer.recognize_google(audio) print(f"You said: {text}") return text @@ -19,3 +18,16 @@ def capture_audio(): except sr.RequestError as e: print(f"Could not request results from Google Speech Recognition service; {e}") return None + + +def validate_transcription(text): + if text is None: + return False, "No speech detected." + stripped = text.strip() + if not stripped: + return False, "Empty transcription." + if len(stripped) > 500: + return False, "Input too long." + if len(stripped) < 2: + return False, "Input too short." + return True, None diff --git a/llm.py b/llm.py index 85acfe9..368b9e7 100644 --- a/llm.py +++ b/llm.py @@ -1,29 +1,40 @@ from transformers import AutoModelForCausalLM, AutoTokenizer import torch + class ConversationalAgent: def __init__(self): print("Loading local conversational model (DialoGPT-small)...") - # Use DialoGPT-small for lightweight local generation without API keys self.tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-small") self.model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-small") self.chat_history_ids = None + MAX_INPUT_TOKENS = 200 + def generate_response(self, user_input): - # Encode the new user input, add the eos_token and return a tensor in Pytorch - new_user_input_ids = self.tokenizer.encode(user_input + self.tokenizer.eos_token, return_tensors='pt') + if not user_input or not user_input.strip(): + return "I didn't catch that. Could you please repeat?" + + if len(user_input) > 1000: + return "That's quite long! Could you keep it shorter?" + + input_ids = self.tokenizer.encode(user_input, return_tensors="pt") + if input_ids.shape[1] > self.MAX_INPUT_TOKENS: + return "I can only process about 200 words at a time. Please say that in fewer words." + + new_user_input_ids = self.tokenizer.encode( + user_input + self.tokenizer.eos_token, return_tensors="pt" + ) - # Append the new user input tokens to the chat history - # We limit the history to the last 100 tokens to prevent repetitive loops if self.chat_history_ids is not None: - bot_input_ids = torch.cat([self.chat_history_ids[:, -100:], new_user_input_ids], dim=-1) + bot_input_ids = torch.cat( + [self.chat_history_ids[:, -100:], new_user_input_ids], dim=-1 + ) else: bot_input_ids = new_user_input_ids - # Generate a response - # Using a fixed attention_mask for open-end generation attention_mask = torch.ones(bot_input_ids.shape, dtype=torch.long) - + self.chat_history_ids = self.model.generate( bot_input_ids, attention_mask=attention_mask, @@ -33,9 +44,11 @@ def generate_response(self, user_input): do_sample=True, top_k=50, top_p=0.95, - temperature=0.7 + temperature=0.7, ) - # Decode and return the response - response = self.tokenizer.decode(self.chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True) + response = self.tokenizer.decode( + self.chat_history_ids[:, bot_input_ids.shape[-1] :][0], + skip_special_tokens=True, + ) return response diff --git a/main.py b/main.py index 49e3999..adbf9f9 100644 --- a/main.py +++ b/main.py @@ -1,7 +1,8 @@ -from asr import capture_audio +from asr import capture_audio, validate_transcription from llm import ConversationalAgent from tts import text_to_speech + def main(): print("=====================================================") print("Initializing Voice-Based Conversational AI System...") @@ -9,17 +10,23 @@ def main(): agent = ConversationalAgent() print("\nSystem ready! Speak into your microphone.") print("Say 'exit', 'quit', or 'stop' to end the conversation.") - + while True: user_input = capture_audio() - - if user_input: - if user_input.lower() in ['exit', 'quit', 'stop']: - text_to_speech("Goodbye!") - break - - response = agent.generate_response(user_input) - text_to_speech(response) + + valid, error_msg = validate_transcription(user_input) + if not valid: + print(f"Validation: {error_msg}") + continue + + user_input = user_input.strip() + if user_input.lower() in ["exit", "quit", "stop"]: + text_to_speech("Goodbye!") + break + + response = agent.generate_response(user_input) + text_to_speech(response) + if __name__ == "__main__": main() From 43ba3cdde6f30f846e331385c07b63591e0bbce4 Mon Sep 17 00:00:00 2001 From: ravencore06 Date: Mon, 25 May 2026 20:07:51 +0530 Subject: [PATCH 5/6] feat: Implement structured JSON outputs and constrained decoding --- vlm_evaluation/evaluate.py | 41 ++++++++++++++++++-------- vlm_evaluation/generate_sample_data.py | 21 +++++++++++-- vlm_evaluation/prompts.py | 19 ++++++++---- vlm_evaluation/requirements.txt | 2 ++ vlm_evaluation/sample_dataset.json | 21 +++++++++++-- 5 files changed, 80 insertions(+), 24 deletions(-) diff --git a/vlm_evaluation/evaluate.py b/vlm_evaluation/evaluate.py index 70f5dfb..b1d4634 100644 --- a/vlm_evaluation/evaluate.py +++ b/vlm_evaluation/evaluate.py @@ -4,6 +4,9 @@ import re import torch from tqdm import tqdm +from pydantic import BaseModel +from lmformatenforcer import JsonSchemaParser +from lmformatenforcer.integrations.transformers import build_transformers_prefix_allowed_tokens_fn from transformers import ( LlavaForConditionalGeneration, AutoProcessor, @@ -13,6 +16,13 @@ from prompts import SYSTEM_PROMPT, generate_evaluation_prompt +class EvaluationOutput(BaseModel): + skill: str + dimension: str + score: int + max: int + + def parse_args(): parser = argparse.ArgumentParser(description="VLM Evaluation Pipeline") parser.add_argument( @@ -48,11 +58,13 @@ def load_model(model_name, quantize=True): def extract_score(text): - match = re.search(r"SCORE:\s*(\d+)", text, re.IGNORECASE) - if match: - score = int(match.group(1)) - if 1 <= score <= 5: - return score + try: + data = json.loads(text) + return data.get("score") + except json.JSONDecodeError: + match = re.search(r'"score"\s*:\s*(\d+)', text, re.IGNORECASE) + if match: + return int(match.group(1)) return None @@ -105,26 +117,31 @@ def main(): prompt_text = generate_evaluation_prompt( student_id=meta.get("student_id", "unknown"), artifact_type=meta.get("artifact_type", "unknown"), - rubric=meta.get("rubric", ""), + rubric=meta.get("rubric", {}), ) inputs = processor(text=prompt_text, images=image, return_tensors="pt").to( "cuda" if torch.cuda.is_available() else "cpu" ) + try: + schema = EvaluationOutput.model_json_schema() + except AttributeError: + schema = EvaluationOutput.schema() + + parser = JsonSchemaParser(schema) + prefix_function = build_transformers_prefix_allowed_tokens_fn(processor.tokenizer, parser) + with torch.no_grad(): output_ids = model.generate( **inputs, max_new_tokens=args.max_new_tokens, do_sample=False, + prefix_allowed_tokens_fn=prefix_function, ) - decoded = processor.decode(output_ids[0], skip_special_tokens=True) - response = ( - decoded.split("ASSISTANT:")[-1].strip() - if "ASSISTANT:" in decoded - else decoded.strip() - ) + decoded = processor.decode(output_ids[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True) + response = decoded.strip() predicted_score = extract_score(response) ground_truth = meta.get("ground_truth_score") diff --git a/vlm_evaluation/generate_sample_data.py b/vlm_evaluation/generate_sample_data.py index ccdce04..b22fee3 100644 --- a/vlm_evaluation/generate_sample_data.py +++ b/vlm_evaluation/generate_sample_data.py @@ -7,21 +7,36 @@ "image_path": "sample_origami.jpg", "student_id": "S001", "artifact_type": "Origami", - "rubric": "1: No recognizable shape, 5: Perfect folds with clean edges and symmetry", + "rubric": { + "skill": "creativity", + "dimension": "originality", + "max": 5, + "criteria": "1: No recognizable shape, 5: Perfect folds with clean edges and symmetry" + }, "ground_truth_score": 4, }, { "image_path": "sample_drawing.jpg", "student_id": "S002", "artifact_type": "Drawing", - "rubric": "1: No effort, 5: Detailed and creative composition", + "rubric": { + "skill": "creativity", + "dimension": "composition", + "max": 5, + "criteria": "1: No effort, 5: Detailed and creative composition" + }, "ground_truth_score": 3, }, { "image_path": "sample_model.jpg", "student_id": "S003", "artifact_type": "Clay Model", - "rubric": "1: Unrecognizable, 5: Realistic and well-finished model", + "rubric": { + "skill": "problem_solving", + "dimension": "execution", + "max": 5, + "criteria": "1: Unrecognizable, 5: Realistic and well-finished model" + }, "ground_truth_score": 5, }, ] diff --git a/vlm_evaluation/prompts.py b/vlm_evaluation/prompts.py index aa5a1ee..a09d83c 100644 --- a/vlm_evaluation/prompts.py +++ b/vlm_evaluation/prompts.py @@ -1,21 +1,28 @@ +import json + SYSTEM_PROMPT = """You are an expert evaluator assessing student artifacts for The Apprentice Project. Your goal is to evaluate the provided image of a student's work based on the provided rubric. -You must be objective and provide a score along with a brief explanation. +You must be objective and provide a score. """ -def generate_evaluation_prompt(student_id: str, artifact_type: str, rubric: str) -> str: +def generate_evaluation_prompt(student_id: str, artifact_type: str, rubric: dict) -> str: + rubric_str = json.dumps(rubric, indent=2) return f"""USER: {SYSTEM_PROMPT} Here is a student artifact (ID: {student_id}) for the category: {artifact_type}. Rubric for Evaluation: -{rubric} +{rubric_str} Please evaluate the artifact based on the rubric. -Provide your response in the following format: -SCORE: [Your Score 1-5] -FEEDBACK: [Your reasoning here] +Provide your response as a JSON object matching the following schema: +{{ + "skill": "{rubric.get('skill', 'skill')}", + "dimension": "{rubric.get('dimension', 'dimension')}", + "score": , + "max": {rubric.get('max', 5)} +}} ASSISTANT:""" diff --git a/vlm_evaluation/requirements.txt b/vlm_evaluation/requirements.txt index 7cd0725..35836a8 100644 --- a/vlm_evaluation/requirements.txt +++ b/vlm_evaluation/requirements.txt @@ -5,3 +5,5 @@ bitsandbytes Pillow accelerate datasets +lm-format-enforcer +pydantic diff --git a/vlm_evaluation/sample_dataset.json b/vlm_evaluation/sample_dataset.json index 52812f1..2e5e0b1 100644 --- a/vlm_evaluation/sample_dataset.json +++ b/vlm_evaluation/sample_dataset.json @@ -3,21 +3,36 @@ "image_path": "sample_data\\sample_origami.jpg", "student_id": "S001", "artifact_type": "Origami", - "rubric": "1: No recognizable shape, 5: Perfect folds with clean edges and symmetry", + "rubric": { + "skill": "creativity", + "dimension": "originality", + "max": 5, + "criteria": "1: No recognizable shape, 5: Perfect folds with clean edges and symmetry" + }, "ground_truth_score": 4 }, { "image_path": "sample_data\\sample_drawing.jpg", "student_id": "S002", "artifact_type": "Drawing", - "rubric": "1: No effort, 5: Detailed and creative composition", + "rubric": { + "skill": "creativity", + "dimension": "composition", + "max": 5, + "criteria": "1: No effort, 5: Detailed and creative composition" + }, "ground_truth_score": 3 }, { "image_path": "sample_data\\sample_model.jpg", "student_id": "S003", "artifact_type": "Clay Model", - "rubric": "1: Unrecognizable, 5: Realistic and well-finished model", + "rubric": { + "skill": "problem_solving", + "dimension": "execution", + "max": 5, + "criteria": "1: Unrecognizable, 5: Realistic and well-finished model" + }, "ground_truth_score": 5 } ] \ No newline at end of file From 624bbe29342cb2a326f7aedc126f2d1d35962a52 Mon Sep 17 00:00:00 2001 From: ravencore06 Date: Mon, 25 May 2026 20:25:26 +0530 Subject: [PATCH 6/6] feat: Implement JSON rubric schema and constrained decoding --- vlm_evaluation/evaluate.py | 11 ++++++----- vlm_evaluation/prompts.py | 29 +++++++++-------------------- vlm_evaluation/sample_dataset.json | 27 ++++++++++++++++++--------- 3 files changed, 33 insertions(+), 34 deletions(-) diff --git a/vlm_evaluation/evaluate.py b/vlm_evaluation/evaluate.py index b1d4634..3aaaf10 100644 --- a/vlm_evaluation/evaluate.py +++ b/vlm_evaluation/evaluate.py @@ -59,12 +59,13 @@ def load_model(model_name, quantize=True): def extract_score(text): try: - data = json.loads(text) - return data.get("score") + # Parse the JSON directly instead of using Regex + parsed = json.loads(text) + score = parsed.get("score") + if isinstance(score, int) and 1 <= score <= 5: + return score except json.JSONDecodeError: - match = re.search(r'"score"\s*:\s*(\d+)', text, re.IGNORECASE) - if match: - return int(match.group(1)) + pass return None diff --git a/vlm_evaluation/prompts.py b/vlm_evaluation/prompts.py index a09d83c..2ae472f 100644 --- a/vlm_evaluation/prompts.py +++ b/vlm_evaluation/prompts.py @@ -1,28 +1,17 @@ import json - SYSTEM_PROMPT = """You are an expert evaluator assessing student artifacts for The Apprentice Project. -Your goal is to evaluate the provided image of a student's work based on the provided rubric. -You must be objective and provide a score. -""" - +You must output your evaluation STRICTLY as a valid JSON object. Do not include any other conversational text.""" -def generate_evaluation_prompt(student_id: str, artifact_type: str, rubric: dict) -> str: - rubric_str = json.dumps(rubric, indent=2) - return f"""USER: +def generate_evaluation_prompt(student_id: str, artifact_type: str, rubric: str) -> str: + return f"""USER: {SYSTEM_PROMPT} -Here is a student artifact (ID: {student_id}) for the category: {artifact_type}. - -Rubric for Evaluation: -{rubric_str} +Artifact ID: {student_id} +Category: {artifact_type} +Rubric Schema: +{rubric} Please evaluate the artifact based on the rubric. -Provide your response as a JSON object matching the following schema: -{{ - "skill": "{rubric.get('skill', 'skill')}", - "dimension": "{rubric.get('dimension', 'dimension')}", - "score": , - "max": {rubric.get('max', 5)} -}} - +Output strictly in this JSON format: +{{"score": , "feedback": ""}} ASSISTANT:""" diff --git a/vlm_evaluation/sample_dataset.json b/vlm_evaluation/sample_dataset.json index 2e5e0b1..6bcada6 100644 --- a/vlm_evaluation/sample_dataset.json +++ b/vlm_evaluation/sample_dataset.json @@ -1,37 +1,46 @@ [ { - "image_path": "sample_data\\sample_origami.jpg", + "image_path": "sample_data/sample_origami.jpg", "student_id": "S001", "artifact_type": "Origami", "rubric": { "skill": "creativity", "dimension": "originality", - "max": 5, - "criteria": "1: No recognizable shape, 5: Perfect folds with clean edges and symmetry" + "max_score": 5, + "descriptions": { + "1": "No recognizable shape", + "5": "Perfect folds with clean edges and symmetry" + } }, "ground_truth_score": 4 }, { - "image_path": "sample_data\\sample_drawing.jpg", + "image_path": "sample_data/sample_drawing.jpg", "student_id": "S002", "artifact_type": "Drawing", "rubric": { "skill": "creativity", "dimension": "composition", - "max": 5, - "criteria": "1: No effort, 5: Detailed and creative composition" + "max_score": 5, + "descriptions": { + "1": "No effort", + "5": "Detailed and creative composition" + } }, "ground_truth_score": 3 }, { - "image_path": "sample_data\\sample_model.jpg", + "image_path": "sample_data/sample_model.jpg", "student_id": "S003", "artifact_type": "Clay Model", "rubric": { "skill": "problem_solving", "dimension": "execution", - "max": 5, - "criteria": "1: Unrecognizable, 5: Realistic and well-finished model" + "max_score": 5, + "descriptions": { + "1": "Unrecognizable", + "5": "Realistic and well-finished model" + } }, "ground_truth_score": 5 }