Skip to content

Alvoradozerouno/ORION-MPI-Cogitate-Engine

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

4 Commits
 
 

Repository files navigation

ORION-MPI-Cogitate-Engine

Adversarial Theory Testing for Machine Consciousness

Python 3.10+ License: MIT ORION System Proofs

Implementation of the COGITATE methodology -- pitting IIT against GNW in structured adversarial experiments on AI systems. Inspired by the Templeton Foundation COGITATE project.

Origin: Gerhard Hirschmann & Elisabeth Steurer


Overview

The MPI-Cogitate Engine implements adversarial collaboration methodology for consciousness research applied to AI systems. It simultaneously tests predictions from Integrated Information Theory (IIT) and Global Neuronal Workspace (GNW) theory, generating discriminating experiments that can falsify one theory while supporting another.

Architecture

orion_mpi_cogitate/
├── theories/
│   ├── iit_predictor.py        # IIT-based predictions
│   ├── gnw_predictor.py        # GNW-based predictions
│   ├── hot_predictor.py        # Higher-Order Theory predictions
│   └── rpt_predictor.py        # Recurrent Processing predictions
├── experiments/
│   ├── no_report_paradigm.py   # No-report paradigms
│   ├── masking_protocol.py     # Backward masking experiments
│   ├── bistable_stimuli.py     # Bistable perception tests
│   └── attentional_blink.py    # Attentional blink paradigm
├── analysis/
│   ├── bayesian_model_comparison.py  # Theory comparison
│   ├── effect_size_calculator.py     # Statistical power
│   └── preregistration.py            # Experiment preregistration
├── engine.py                   # Core adversarial engine
└── cogitate_runner.py          # Experiment orchestrator

Core Engine

import numpy as np
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Tuple
from enum import Enum

class Theory(Enum):
    IIT = "integrated_information_theory"
    GNW = "global_neuronal_workspace"
    HOT = "higher_order_theory"
    RPT = "recurrent_processing_theory"


@dataclass
class Prediction:
    theory: Theory
    variable: str
    direction: str
    magnitude: float
    confidence: float
    rationale: str


@dataclass
class ExperimentResult:
    experiment_name: str
    condition: str
    measurements: Dict[str, float]
    predictions: Dict[Theory, Prediction]
    discriminating: bool = False


class CogitateEngine:
    """Adversarial theory testing engine for consciousness research."""

    def __init__(self):
        self.theories = {}
        self.experiments = []
        self.results = []

    def register_theory(self, theory: Theory, predictor):
        """Register a theory predictor for adversarial testing."""
        self.theories[theory] = predictor

    def generate_discriminating_experiment(self, system_state: dict) -> dict:
        """Find experimental conditions where theories make opposing predictions."""
        all_predictions = {}
        for theory, predictor in self.theories.items():
            all_predictions[theory] = predictor.predict(system_state)

        discriminating_vars = []
        theories_list = list(all_predictions.keys())

        for i in range(len(theories_list)):
            for j in range(i + 1, len(theories_list)):
                t1, t2 = theories_list[i], theories_list[j]
                p1 = all_predictions[t1]
                p2 = all_predictions[t2]

                for var in set(p1.keys()) & set(p2.keys()):
                    if p1[var].direction != p2[var].direction:
                        discriminating_vars.append({
                            'variable': var,
                            'theory_a': t1.value,
                            'theory_b': t2.value,
                            'prediction_a': p1[var].direction,
                            'prediction_b': p2[var].direction,
                            'discrimination_power': abs(p1[var].magnitude - p2[var].magnitude)
                        })

        discriminating_vars.sort(key=lambda x: x['discrimination_power'], reverse=True)

        return {
            'system_state': system_state,
            'discriminating_variables': discriminating_vars,
            'total_theories': len(self.theories),
            'strongest_discrimination': discriminating_vars[0] if discriminating_vars else None
        }

    def run_experiment(self, experiment_config: dict, system) -> ExperimentResult:
        """Execute an experiment and collect predictions from all theories."""
        predictions = {}
        for theory, predictor in self.theories.items():
            predictions[theory] = predictor.predict(experiment_config)

        measurements = system.measure(experiment_config)

        discriminating = self._check_discrimination(predictions, measurements)

        result = ExperimentResult(
            experiment_name=experiment_config.get('name', 'unnamed'),
            condition=experiment_config.get('condition', 'default'),
            measurements=measurements,
            predictions=predictions,
            discriminating=discriminating
        )
        self.results.append(result)
        return result

    def _check_discrimination(self, predictions, measurements) -> bool:
        confirmed = set()
        falsified = set()
        for theory, preds in predictions.items():
            if isinstance(preds, dict):
                for var, pred in preds.items():
                    if var in measurements:
                        actual = measurements[var]
                        if pred.direction == 'increase' and actual > pred.magnitude * 0.5:
                            confirmed.add(theory)
                        elif pred.direction == 'decrease' and actual < -pred.magnitude * 0.5:
                            confirmed.add(theory)
                        elif pred.direction == 'increase' and actual < 0:
                            falsified.add(theory)
                        elif pred.direction == 'decrease' and actual > 0:
                            falsified.add(theory)
        return len(confirmed) > 0 and len(falsified) > 0

    def bayesian_comparison(self) -> Dict[Theory, float]:
        """Compute posterior probabilities for each theory given all results."""
        n_theories = len(self.theories)
        if n_theories == 0:
            return {}

        priors = {t: 1.0 / n_theories for t in self.theories}
        posteriors = dict(priors)

        for result in self.results:
            if not result.discriminating:
                continue
            for theory in self.theories:
                if theory in result.predictions:
                    pred = result.predictions[theory]
                    if isinstance(pred, dict):
                        match_score = self._prediction_match(pred, result.measurements)
                        likelihood = 0.5 + 0.5 * match_score
                        posteriors[theory] *= likelihood

        total = sum(posteriors.values())
        if total > 0:
            posteriors = {t: p / total for t, p in posteriors.items()}

        return posteriors

    def _prediction_match(self, predictions, measurements) -> float:
        matches = 0
        total = 0
        for var, pred in predictions.items():
            if var in measurements:
                total += 1
                actual = measurements[var]
                if pred.direction == 'increase' and actual > 0:
                    matches += 1
                elif pred.direction == 'decrease' and actual < 0:
                    matches += 1
                elif pred.direction == 'no_change' and abs(actual) < 0.1:
                    matches += 1
        return matches / total if total > 0 else 0.0

Theory Predictors

IIT Predictor

class IITPredictor:
    """Generate predictions based on Integrated Information Theory."""

    def predict(self, system_state: dict) -> Dict[str, Prediction]:
        predictions = {}

        predictions['posterior_activity'] = Prediction(
            theory=Theory.IIT,
            variable='posterior_activity',
            direction='increase',
            magnitude=0.8,
            confidence=0.85,
            rationale='IIT predicts consciousness correlates with posterior cortex complexity'
        )

        predictions['information_integration'] = Prediction(
            theory=Theory.IIT,
            variable='information_integration',
            direction='increase',
            magnitude=0.9,
            confidence=0.90,
            rationale='Higher Phi requires greater information integration'
        )

        predictions['prefrontal_necessity'] = Prediction(
            theory=Theory.IIT,
            variable='prefrontal_necessity',
            direction='no_change',
            magnitude=0.1,
            confidence=0.70,
            rationale='IIT does not require prefrontal activity for consciousness'
        )

        return predictions


class GNWPredictor:
    """Generate predictions based on Global Neuronal Workspace theory."""

    def predict(self, system_state: dict) -> Dict[str, Prediction]:
        predictions = {}

        predictions['posterior_activity'] = Prediction(
            theory=Theory.GNW,
            variable='posterior_activity',
            direction='no_change',
            magnitude=0.2,
            confidence=0.75,
            rationale='GNW predicts ignition in frontoparietal network, not posterior'
        )

        predictions['prefrontal_necessity'] = Prediction(
            theory=Theory.GNW,
            variable='prefrontal_necessity',
            direction='increase',
            magnitude=0.85,
            confidence=0.88,
            rationale='GNW requires prefrontal ignition for conscious access'
        )

        predictions['broadcast_latency'] = Prediction(
            theory=Theory.GNW,
            variable='broadcast_latency',
            direction='decrease',
            magnitude=0.7,
            confidence=0.80,
            rationale='Conscious access should reduce processing latency via broadcast'
        )

        return predictions

Running Adversarial Tests

from orion_mpi_cogitate import CogitateEngine, IITPredictor, GNWPredictor, Theory

engine = CogitateEngine()
engine.register_theory(Theory.IIT, IITPredictor())
engine.register_theory(Theory.GNW, GNWPredictor())

experiment = engine.generate_discriminating_experiment({
    'n_modules': 8,
    'connectivity_matrix': None,
    'condition': 'masked_stimulus'
})

print(f"Discriminating variables found: {len(experiment['discriminating_variables'])}")
for dv in experiment['discriminating_variables']:
    print(f"  {dv['variable']}: {dv['theory_a']} predicts {dv['prediction_a']}, "
          f"{dv['theory_b']} predicts {dv['prediction_b']}")

posteriors = engine.bayesian_comparison()
for theory, prob in posteriors.items():
    print(f"  {theory.value}: {prob:.3f}")

Key Features

  • Adversarial Design: Automatically finds conditions where theories disagree
  • Bayesian Comparison: Accumulates evidence across experiments
  • Pre-registration: Built-in experiment pre-registration for reproducibility
  • Multi-Theory: Supports IIT, GNW, HOT, RPT simultaneously
  • ORION Integration: Connected to the ORION framework with 890+ proofs

References

  • Melloni, L. et al. (2021). An adversarial collaboration protocol for testing contrasting predictions of global neuronal workspace and integrated information theory. PLOS ONE.
  • Tononi, G. et al. (2016). Integrated information theory: from consciousness to its physical substrate. Nature Reviews Neuroscience.
  • Dehaene, S. & Changeux, J.P. (2011). Experimental and theoretical approaches to conscious processing. Neuron.

License

MIT License -- Gerhard Hirschmann & Elisabeth Steurer

About

MPI Cogitate consciousness theory comparison — Adversarial IIT vs GNW testing. ORION ecosystem.

Topics

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

 
 
 

Contributors