diff --git a/README.md b/README.md index 04e1239..75b9c74 100644 --- a/README.md +++ b/README.md @@ -1 +1,13 @@ -# C4GT_2026 \ No newline at end of file +# theApprenticeProject (C4GT 2026) + +This repository contains two distinct AI initiatives developed for The Apprentice Project: + +## 1. Voice-Based Conversational AI System +A voice-based AI system that captures audio, generates conversational responses using an LLM, and converts the responses back to speech. +- **Key Files**: `asr.py`, `llm.py`, `tts.py`, `main.py` +- **Dependencies**: See `./requirements.txt` + +## 2. VLM Evaluation Pipeline +A cost-efficient Vision Language Model (VLM) pipeline designed to evaluate student artifacts (images/videos) against 21st-century skills rubrics. +- **Key Directory**: `vlm_evaluation/` +- **Dependencies**: See `vlm_evaluation/requirements.txt` \ No newline at end of file diff --git a/__pycache__/asr.cpython-313.pyc b/__pycache__/asr.cpython-313.pyc new file mode 100644 index 0000000..814af6e Binary files /dev/null and b/__pycache__/asr.cpython-313.pyc differ diff --git a/__pycache__/llm.cpython-313.pyc b/__pycache__/llm.cpython-313.pyc new file mode 100644 index 0000000..77cd0cf Binary files /dev/null and b/__pycache__/llm.cpython-313.pyc differ diff --git a/__pycache__/tts.cpython-313.pyc b/__pycache__/tts.cpython-313.pyc new file mode 100644 index 0000000..8a04799 Binary files /dev/null and b/__pycache__/tts.cpython-313.pyc differ diff --git a/asr.py b/asr.py new file mode 100644 index 0000000..3a7e099 --- /dev/null +++ b/asr.py @@ -0,0 +1,33 @@ +import speech_recognition as sr + + +def capture_audio(): + recognizer = sr.Recognizer() + with sr.Microphone() as source: + print("\nListening...") + recognizer.adjust_for_ambient_noise(source, duration=0.5) + audio = recognizer.listen(source) + + try: + text = recognizer.recognize_google(audio) + print(f"You said: {text}") + return text + except sr.UnknownValueError: + print("Sorry, I could not understand the audio.") + return None + except sr.RequestError as e: + print(f"Could not request results from Google Speech Recognition service; {e}") + return None + + +def validate_transcription(text): + if text is None: + return False, "No speech detected." + stripped = text.strip() + if not stripped: + return False, "Empty transcription." + if len(stripped) > 500: + return False, "Input too long." + if len(stripped) < 2: + return False, "Input too short." + return True, None diff --git a/llm.py b/llm.py new file mode 100644 index 0000000..368b9e7 --- /dev/null +++ b/llm.py @@ -0,0 +1,54 @@ +from transformers import AutoModelForCausalLM, AutoTokenizer +import torch + + +class ConversationalAgent: + def __init__(self): + print("Loading local conversational model (DialoGPT-small)...") + self.tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-small") + self.model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-small") + self.chat_history_ids = None + + MAX_INPUT_TOKENS = 200 + + def generate_response(self, user_input): + if not user_input or not user_input.strip(): + return "I didn't catch that. Could you please repeat?" + + if len(user_input) > 1000: + return "That's quite long! Could you keep it shorter?" + + input_ids = self.tokenizer.encode(user_input, return_tensors="pt") + if input_ids.shape[1] > self.MAX_INPUT_TOKENS: + return "I can only process about 200 words at a time. Please say that in fewer words." + + new_user_input_ids = self.tokenizer.encode( + user_input + self.tokenizer.eos_token, return_tensors="pt" + ) + + if self.chat_history_ids is not None: + bot_input_ids = torch.cat( + [self.chat_history_ids[:, -100:], new_user_input_ids], dim=-1 + ) + else: + bot_input_ids = new_user_input_ids + + attention_mask = torch.ones(bot_input_ids.shape, dtype=torch.long) + + self.chat_history_ids = self.model.generate( + bot_input_ids, + attention_mask=attention_mask, + max_length=1000, + pad_token_id=self.tokenizer.eos_token_id, + no_repeat_ngram_size=3, + do_sample=True, + top_k=50, + top_p=0.95, + temperature=0.7, + ) + + response = self.tokenizer.decode( + self.chat_history_ids[:, bot_input_ids.shape[-1] :][0], + skip_special_tokens=True, + ) + return response diff --git a/main.py b/main.py new file mode 100644 index 0000000..adbf9f9 --- /dev/null +++ b/main.py @@ -0,0 +1,32 @@ +from asr import capture_audio, validate_transcription +from llm import ConversationalAgent +from tts import text_to_speech + + +def main(): + print("=====================================================") + print("Initializing Voice-Based Conversational AI System...") + print("=====================================================") + agent = ConversationalAgent() + print("\nSystem ready! Speak into your microphone.") + print("Say 'exit', 'quit', or 'stop' to end the conversation.") + + while True: + user_input = capture_audio() + + valid, error_msg = validate_transcription(user_input) + if not valid: + print(f"Validation: {error_msg}") + continue + + user_input = user_input.strip() + if user_input.lower() in ["exit", "quit", "stop"]: + text_to_speech("Goodbye!") + break + + response = agent.generate_response(user_input) + text_to_speech(response) + + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..fd578bf --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +SpeechRecognition +PyAudio==0.2.14 +transformers==4.38.2 +torch +pyttsx3==2.90 diff --git a/tts.py b/tts.py new file mode 100644 index 0000000..017e83f --- /dev/null +++ b/tts.py @@ -0,0 +1,13 @@ +import pyttsx3 + +def text_to_speech(text): + # Initialize pyttsx3 engine for offline TTS + engine = pyttsx3.init() + + # Adjust properties + rate = engine.getProperty('rate') + engine.setProperty('rate', rate - 20) # Slightly slower for clarity + + print(f"AI: {text}") + engine.say(text) + engine.runAndWait() diff --git a/vlm_evaluation/__pycache__/dataset.cpython-313.pyc b/vlm_evaluation/__pycache__/dataset.cpython-313.pyc new file mode 100644 index 0000000..deda7c9 Binary files /dev/null and b/vlm_evaluation/__pycache__/dataset.cpython-313.pyc differ diff --git a/vlm_evaluation/__pycache__/evaluate.cpython-313.pyc b/vlm_evaluation/__pycache__/evaluate.cpython-313.pyc new file mode 100644 index 0000000..9b7b341 Binary files /dev/null and b/vlm_evaluation/__pycache__/evaluate.cpython-313.pyc differ diff --git a/vlm_evaluation/__pycache__/prompts.cpython-313.pyc b/vlm_evaluation/__pycache__/prompts.cpython-313.pyc new file mode 100644 index 0000000..fd31c65 Binary files /dev/null and b/vlm_evaluation/__pycache__/prompts.cpython-313.pyc differ diff --git a/vlm_evaluation/dataset.py b/vlm_evaluation/dataset.py new file mode 100644 index 0000000..7802cee --- /dev/null +++ b/vlm_evaluation/dataset.py @@ -0,0 +1,49 @@ +import json +import os +from PIL import Image + +class ArtifactDataset: + def __init__(self, data_path: str): + """ + Initializes the dataset loader. + Assumes data_path points to a JSON file containing evaluation metadata: + [ + { + "image_path": "data/images/student1.jpg", + "student_id": "123", + "artifact_type": "Origami", + "rubric": "1: No effort, 5: Perfect folds and presentation", + "ground_truth_score": 4 + }, ... + ] + """ + self.data_path = data_path + self.data = [] + + if os.path.exists(data_path): + with open(data_path, 'r') as f: + self.data = json.load(f) + else: + print(f"Warning: Dataset file {data_path} not found. Returning empty dataset.") + print("Please create this file or generate a sample dataset.") + + def __len__(self): + return len(self.data) + + def __getitem__(self, idx): + item = self.data[idx] + image_path = item.get("image_path") + + try: + # Handle absolute or relative paths gracefully based on the json directory + base_dir = os.path.dirname(self.data_path) + full_image_path = os.path.join(base_dir, image_path) if not os.path.isabs(image_path) else image_path + image = Image.open(full_image_path).convert("RGB") + except Exception as e: + print(f"Error loading image {image_path}: {e}") + image = None + + return { + "image": image, + "metadata": item + } diff --git a/vlm_evaluation/evaluate.py b/vlm_evaluation/evaluate.py new file mode 100644 index 0000000..3aaaf10 --- /dev/null +++ b/vlm_evaluation/evaluate.py @@ -0,0 +1,189 @@ +import argparse +import json +import os +import re +import torch +from tqdm import tqdm +from pydantic import BaseModel +from lmformatenforcer import JsonSchemaParser +from lmformatenforcer.integrations.transformers import build_transformers_prefix_allowed_tokens_fn +from transformers import ( + LlavaForConditionalGeneration, + AutoProcessor, + BitsAndBytesConfig, +) +from dataset import ArtifactDataset +from prompts import SYSTEM_PROMPT, generate_evaluation_prompt + + +class EvaluationOutput(BaseModel): + skill: str + dimension: str + score: int + max: int + + +def parse_args(): + parser = argparse.ArgumentParser(description="VLM Evaluation Pipeline") + parser.add_argument( + "--data_path", type=str, required=True, help="Path to dataset JSON" + ) + parser.add_argument("--model_name", type=str, default="llava-hf/llava-1.5-7b-hf") + parser.add_argument("--quantize", action="store_true", default=True) + parser.add_argument("--no_quantize", action="store_false", dest="quantize") + parser.add_argument("--output_path", type=str, default="results.json") + parser.add_argument("--max_new_tokens", type=int, default=256) + return parser.parse_args() + + +def load_model(model_name, quantize=True): + if quantize: + quantization_config = BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_compute_dtype=torch.float16, + bnb_4bit_use_double_quant=True, + bnb_4bit_quant_type="nf4", + ) + else: + quantization_config = None + + model = LlavaForConditionalGeneration.from_pretrained( + model_name, + quantization_config=quantization_config, + device_map="auto", + torch_dtype=torch.float16, + ) + processor = AutoProcessor.from_pretrained(model_name) + return model, processor + + +def extract_score(text): + try: + # Parse the JSON directly instead of using Regex + parsed = json.loads(text) + score = parsed.get("score") + if isinstance(score, int) and 1 <= score <= 5: + return score + except json.JSONDecodeError: + pass + return None + + +def compute_metrics(predictions, ground_truths): + total = len(ground_truths) + if total == 0: + return {} + + exact = sum(1 for p, g in zip(predictions, ground_truths) if p == g) + within_1 = sum(1 for p, g in zip(predictions, ground_truths) if abs(p - g) <= 1) + mae = sum(abs(p - g) for p, g in zip(predictions, ground_truths)) / total + parsed = sum(1 for p in predictions if p is not None) + + return { + "total_samples": total, + "exact_accuracy": round(exact / total * 100, 2), + "within_1_accuracy": round(within_1 / total * 100, 2), + "mean_absolute_error": round(mae, 4), + "parse_rate": round(parsed / total * 100, 2), + } + + +def main(): + args = parse_args() + + if not torch.cuda.is_available(): + print("Warning: CUDA not available. Inference will be slow on CPU.") + + print(f"Loading dataset from {args.data_path}...") + dataset = ArtifactDataset(args.data_path) + if len(dataset) == 0: + print("Dataset is empty. Exiting.") + return + + print(f"Loading model {args.model_name} (quantize={args.quantize})...") + model, processor = load_model(args.model_name, quantize=args.quantize) + + results = [] + preds = [] + truths = [] + + for i in tqdm(range(len(dataset)), desc="Evaluating"): + sample = dataset[i] + meta = sample["metadata"] + image = sample["image"] + + if image is None: + continue + + prompt_text = generate_evaluation_prompt( + student_id=meta.get("student_id", "unknown"), + artifact_type=meta.get("artifact_type", "unknown"), + rubric=meta.get("rubric", {}), + ) + + inputs = processor(text=prompt_text, images=image, return_tensors="pt").to( + "cuda" if torch.cuda.is_available() else "cpu" + ) + + try: + schema = EvaluationOutput.model_json_schema() + except AttributeError: + schema = EvaluationOutput.schema() + + parser = JsonSchemaParser(schema) + prefix_function = build_transformers_prefix_allowed_tokens_fn(processor.tokenizer, parser) + + with torch.no_grad(): + output_ids = model.generate( + **inputs, + max_new_tokens=args.max_new_tokens, + do_sample=False, + prefix_allowed_tokens_fn=prefix_function, + ) + + decoded = processor.decode(output_ids[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True) + response = decoded.strip() + + predicted_score = extract_score(response) + ground_truth = meta.get("ground_truth_score") + + results.append( + { + "student_id": meta.get("student_id", "unknown"), + "predicted_score": predicted_score, + "ground_truth_score": ground_truth, + "raw_response": response, + "artifact_type": meta.get("artifact_type", "unknown"), + } + ) + + if predicted_score is not None and ground_truth is not None: + preds.append(predicted_score) + truths.append(ground_truth) + + metrics = compute_metrics(preds, truths) + + output = { + "config": { + "model_name": args.model_name, + "quantize": args.quantize, + "dataset": args.data_path, + }, + "metrics": metrics, + "results": results, + } + + with open(args.output_path, "w") as f: + json.dump(output, f, indent=2) + + print("\n" + "=" * 50) + print("EVALUATION METRICS") + print("=" * 50) + for k, v in metrics.items(): + print(f" {k}: {v}") + print("=" * 50) + print(f"Results saved to {args.output_path}") + + +if __name__ == "__main__": + main() diff --git a/vlm_evaluation/generate_sample_data.py b/vlm_evaluation/generate_sample_data.py new file mode 100644 index 0000000..b22fee3 --- /dev/null +++ b/vlm_evaluation/generate_sample_data.py @@ -0,0 +1,72 @@ +import json +import os +from PIL import Image, ImageDraw + +SAMPLE_DATA = [ + { + "image_path": "sample_origami.jpg", + "student_id": "S001", + "artifact_type": "Origami", + "rubric": { + "skill": "creativity", + "dimension": "originality", + "max": 5, + "criteria": "1: No recognizable shape, 5: Perfect folds with clean edges and symmetry" + }, + "ground_truth_score": 4, + }, + { + "image_path": "sample_drawing.jpg", + "student_id": "S002", + "artifact_type": "Drawing", + "rubric": { + "skill": "creativity", + "dimension": "composition", + "max": 5, + "criteria": "1: No effort, 5: Detailed and creative composition" + }, + "ground_truth_score": 3, + }, + { + "image_path": "sample_model.jpg", + "student_id": "S003", + "artifact_type": "Clay Model", + "rubric": { + "skill": "problem_solving", + "dimension": "execution", + "max": 5, + "criteria": "1: Unrecognizable, 5: Realistic and well-finished model" + }, + "ground_truth_score": 5, + }, +] + + +def create_dummy_image(path, size=(224, 224), color=(200, 100, 50)): + img = Image.new("RGB", size, color) + draw = ImageDraw.Draw(img) + draw.rectangle([50, 50, 174, 174], outline=(255, 255, 255), width=3) + draw.ellipse([80, 80, 144, 144], fill=(100, 200, 100)) + img.save(path) + print(f"Created {path}") + + +def main(): + output_dir = os.path.dirname(os.path.abspath(__file__)) + data_dir = os.path.join(output_dir, "sample_data") + os.makedirs(data_dir, exist_ok=True) + + for item in SAMPLE_DATA: + image_path = os.path.join(data_dir, item["image_path"]) + create_dummy_image(image_path) + item["image_path"] = os.path.join("sample_data", item["image_path"]) + + json_path = os.path.join(output_dir, "sample_dataset.json") + with open(json_path, "w") as f: + json.dump(SAMPLE_DATA, f, indent=2) + + print(f"Sample dataset saved to {json_path}") + + +if __name__ == "__main__": + main() diff --git a/vlm_evaluation/prompts.py b/vlm_evaluation/prompts.py new file mode 100644 index 0000000..2ae472f --- /dev/null +++ b/vlm_evaluation/prompts.py @@ -0,0 +1,17 @@ +import json +SYSTEM_PROMPT = """You are an expert evaluator assessing student artifacts for The Apprentice Project. +You must output your evaluation STRICTLY as a valid JSON object. Do not include any other conversational text.""" + +def generate_evaluation_prompt(student_id: str, artifact_type: str, rubric: str) -> str: + return f"""USER: +{SYSTEM_PROMPT} + +Artifact ID: {student_id} +Category: {artifact_type} +Rubric Schema: +{rubric} + +Please evaluate the artifact based on the rubric. +Output strictly in this JSON format: +{{"score": , "feedback": ""}} +ASSISTANT:""" diff --git a/vlm_evaluation/requirements.txt b/vlm_evaluation/requirements.txt new file mode 100644 index 0000000..35836a8 --- /dev/null +++ b/vlm_evaluation/requirements.txt @@ -0,0 +1,9 @@ +transformers>=4.38.2 +torch +peft +bitsandbytes +Pillow +accelerate +datasets +lm-format-enforcer +pydantic diff --git a/vlm_evaluation/run_benchmark.ps1 b/vlm_evaluation/run_benchmark.ps1 new file mode 100644 index 0000000..47c69e2 --- /dev/null +++ b/vlm_evaluation/run_benchmark.ps1 @@ -0,0 +1,29 @@ +param( + [string]$DataPath = "sample_dataset.json", + [string]$ModelName = "llava-hf/llava-1.5-7b-hf", + [switch]$NoQuantize = $false, + [string]$OutputPath = "results.json", + [int]$MaxNewTokens = 256 +) + +$QuantizeFlag = if ($NoQuantize) { "--no_quantize" } else { "" } + +Write-Host "=== VLM Evaluation Benchmark ===" -ForegroundColor Cyan +Write-Host "Dataset : $DataPath" +Write-Host "Model : $ModelName" +Write-Host "Quantize: $(-not $NoQuantize)" +Write-Host "Output : $OutputPath" +Write-Host "" + +python evaluate.py ` + --data_path $DataPath ` + --model_name $ModelName ` + $QuantizeFlag ` + --output_path $OutputPath ` + --max_new_tokens $MaxNewTokens + +if ($LASTEXITCODE -eq 0) { + Write-Host "Benchmark completed successfully." -ForegroundColor Green +} else { + Write-Host "Benchmark failed with exit code $LASTEXITCODE." -ForegroundColor Red +} diff --git a/vlm_evaluation/sample_data/sample_drawing.jpg b/vlm_evaluation/sample_data/sample_drawing.jpg new file mode 100644 index 0000000..68eafd6 Binary files /dev/null and b/vlm_evaluation/sample_data/sample_drawing.jpg differ diff --git a/vlm_evaluation/sample_data/sample_model.jpg b/vlm_evaluation/sample_data/sample_model.jpg new file mode 100644 index 0000000..68eafd6 Binary files /dev/null and b/vlm_evaluation/sample_data/sample_model.jpg differ diff --git a/vlm_evaluation/sample_data/sample_origami.jpg b/vlm_evaluation/sample_data/sample_origami.jpg new file mode 100644 index 0000000..68eafd6 Binary files /dev/null and b/vlm_evaluation/sample_data/sample_origami.jpg differ diff --git a/vlm_evaluation/sample_dataset.json b/vlm_evaluation/sample_dataset.json new file mode 100644 index 0000000..6bcada6 --- /dev/null +++ b/vlm_evaluation/sample_dataset.json @@ -0,0 +1,47 @@ +[ + { + "image_path": "sample_data/sample_origami.jpg", + "student_id": "S001", + "artifact_type": "Origami", + "rubric": { + "skill": "creativity", + "dimension": "originality", + "max_score": 5, + "descriptions": { + "1": "No recognizable shape", + "5": "Perfect folds with clean edges and symmetry" + } + }, + "ground_truth_score": 4 + }, + { + "image_path": "sample_data/sample_drawing.jpg", + "student_id": "S002", + "artifact_type": "Drawing", + "rubric": { + "skill": "creativity", + "dimension": "composition", + "max_score": 5, + "descriptions": { + "1": "No effort", + "5": "Detailed and creative composition" + } + }, + "ground_truth_score": 3 + }, + { + "image_path": "sample_data/sample_model.jpg", + "student_id": "S003", + "artifact_type": "Clay Model", + "rubric": { + "skill": "problem_solving", + "dimension": "execution", + "max_score": 5, + "descriptions": { + "1": "Unrecognizable", + "5": "Realistic and well-finished model" + } + }, + "ground_truth_score": 5 + } +] \ No newline at end of file