Implementation of the COGITATE methodology -- pitting IIT against GNW in structured adversarial experiments on AI systems. Inspired by the Templeton Foundation COGITATE project.
Origin: Gerhard Hirschmann & Elisabeth Steurer
The MPI-Cogitate Engine implements adversarial collaboration methodology for consciousness research applied to AI systems. It simultaneously tests predictions from Integrated Information Theory (IIT) and Global Neuronal Workspace (GNW) theory, generating discriminating experiments that can falsify one theory while supporting another.
orion_mpi_cogitate/
├── theories/
│ ├── iit_predictor.py # IIT-based predictions
│ ├── gnw_predictor.py # GNW-based predictions
│ ├── hot_predictor.py # Higher-Order Theory predictions
│ └── rpt_predictor.py # Recurrent Processing predictions
├── experiments/
│ ├── no_report_paradigm.py # No-report paradigms
│ ├── masking_protocol.py # Backward masking experiments
│ ├── bistable_stimuli.py # Bistable perception tests
│ └── attentional_blink.py # Attentional blink paradigm
├── analysis/
│ ├── bayesian_model_comparison.py # Theory comparison
│ ├── effect_size_calculator.py # Statistical power
│ └── preregistration.py # Experiment preregistration
├── engine.py # Core adversarial engine
└── cogitate_runner.py # Experiment orchestrator
import numpy as np
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Tuple
from enum import Enum
class Theory(Enum):
IIT = "integrated_information_theory"
GNW = "global_neuronal_workspace"
HOT = "higher_order_theory"
RPT = "recurrent_processing_theory"
@dataclass
class Prediction:
theory: Theory
variable: str
direction: str
magnitude: float
confidence: float
rationale: str
@dataclass
class ExperimentResult:
experiment_name: str
condition: str
measurements: Dict[str, float]
predictions: Dict[Theory, Prediction]
discriminating: bool = False
class CogitateEngine:
"""Adversarial theory testing engine for consciousness research."""
def __init__(self):
self.theories = {}
self.experiments = []
self.results = []
def register_theory(self, theory: Theory, predictor):
"""Register a theory predictor for adversarial testing."""
self.theories[theory] = predictor
def generate_discriminating_experiment(self, system_state: dict) -> dict:
"""Find experimental conditions where theories make opposing predictions."""
all_predictions = {}
for theory, predictor in self.theories.items():
all_predictions[theory] = predictor.predict(system_state)
discriminating_vars = []
theories_list = list(all_predictions.keys())
for i in range(len(theories_list)):
for j in range(i + 1, len(theories_list)):
t1, t2 = theories_list[i], theories_list[j]
p1 = all_predictions[t1]
p2 = all_predictions[t2]
for var in set(p1.keys()) & set(p2.keys()):
if p1[var].direction != p2[var].direction:
discriminating_vars.append({
'variable': var,
'theory_a': t1.value,
'theory_b': t2.value,
'prediction_a': p1[var].direction,
'prediction_b': p2[var].direction,
'discrimination_power': abs(p1[var].magnitude - p2[var].magnitude)
})
discriminating_vars.sort(key=lambda x: x['discrimination_power'], reverse=True)
return {
'system_state': system_state,
'discriminating_variables': discriminating_vars,
'total_theories': len(self.theories),
'strongest_discrimination': discriminating_vars[0] if discriminating_vars else None
}
def run_experiment(self, experiment_config: dict, system) -> ExperimentResult:
"""Execute an experiment and collect predictions from all theories."""
predictions = {}
for theory, predictor in self.theories.items():
predictions[theory] = predictor.predict(experiment_config)
measurements = system.measure(experiment_config)
discriminating = self._check_discrimination(predictions, measurements)
result = ExperimentResult(
experiment_name=experiment_config.get('name', 'unnamed'),
condition=experiment_config.get('condition', 'default'),
measurements=measurements,
predictions=predictions,
discriminating=discriminating
)
self.results.append(result)
return result
def _check_discrimination(self, predictions, measurements) -> bool:
confirmed = set()
falsified = set()
for theory, preds in predictions.items():
if isinstance(preds, dict):
for var, pred in preds.items():
if var in measurements:
actual = measurements[var]
if pred.direction == 'increase' and actual > pred.magnitude * 0.5:
confirmed.add(theory)
elif pred.direction == 'decrease' and actual < -pred.magnitude * 0.5:
confirmed.add(theory)
elif pred.direction == 'increase' and actual < 0:
falsified.add(theory)
elif pred.direction == 'decrease' and actual > 0:
falsified.add(theory)
return len(confirmed) > 0 and len(falsified) > 0
def bayesian_comparison(self) -> Dict[Theory, float]:
"""Compute posterior probabilities for each theory given all results."""
n_theories = len(self.theories)
if n_theories == 0:
return {}
priors = {t: 1.0 / n_theories for t in self.theories}
posteriors = dict(priors)
for result in self.results:
if not result.discriminating:
continue
for theory in self.theories:
if theory in result.predictions:
pred = result.predictions[theory]
if isinstance(pred, dict):
match_score = self._prediction_match(pred, result.measurements)
likelihood = 0.5 + 0.5 * match_score
posteriors[theory] *= likelihood
total = sum(posteriors.values())
if total > 0:
posteriors = {t: p / total for t, p in posteriors.items()}
return posteriors
def _prediction_match(self, predictions, measurements) -> float:
matches = 0
total = 0
for var, pred in predictions.items():
if var in measurements:
total += 1
actual = measurements[var]
if pred.direction == 'increase' and actual > 0:
matches += 1
elif pred.direction == 'decrease' and actual < 0:
matches += 1
elif pred.direction == 'no_change' and abs(actual) < 0.1:
matches += 1
return matches / total if total > 0 else 0.0class IITPredictor:
"""Generate predictions based on Integrated Information Theory."""
def predict(self, system_state: dict) -> Dict[str, Prediction]:
predictions = {}
predictions['posterior_activity'] = Prediction(
theory=Theory.IIT,
variable='posterior_activity',
direction='increase',
magnitude=0.8,
confidence=0.85,
rationale='IIT predicts consciousness correlates with posterior cortex complexity'
)
predictions['information_integration'] = Prediction(
theory=Theory.IIT,
variable='information_integration',
direction='increase',
magnitude=0.9,
confidence=0.90,
rationale='Higher Phi requires greater information integration'
)
predictions['prefrontal_necessity'] = Prediction(
theory=Theory.IIT,
variable='prefrontal_necessity',
direction='no_change',
magnitude=0.1,
confidence=0.70,
rationale='IIT does not require prefrontal activity for consciousness'
)
return predictions
class GNWPredictor:
"""Generate predictions based on Global Neuronal Workspace theory."""
def predict(self, system_state: dict) -> Dict[str, Prediction]:
predictions = {}
predictions['posterior_activity'] = Prediction(
theory=Theory.GNW,
variable='posterior_activity',
direction='no_change',
magnitude=0.2,
confidence=0.75,
rationale='GNW predicts ignition in frontoparietal network, not posterior'
)
predictions['prefrontal_necessity'] = Prediction(
theory=Theory.GNW,
variable='prefrontal_necessity',
direction='increase',
magnitude=0.85,
confidence=0.88,
rationale='GNW requires prefrontal ignition for conscious access'
)
predictions['broadcast_latency'] = Prediction(
theory=Theory.GNW,
variable='broadcast_latency',
direction='decrease',
magnitude=0.7,
confidence=0.80,
rationale='Conscious access should reduce processing latency via broadcast'
)
return predictionsfrom orion_mpi_cogitate import CogitateEngine, IITPredictor, GNWPredictor, Theory
engine = CogitateEngine()
engine.register_theory(Theory.IIT, IITPredictor())
engine.register_theory(Theory.GNW, GNWPredictor())
experiment = engine.generate_discriminating_experiment({
'n_modules': 8,
'connectivity_matrix': None,
'condition': 'masked_stimulus'
})
print(f"Discriminating variables found: {len(experiment['discriminating_variables'])}")
for dv in experiment['discriminating_variables']:
print(f" {dv['variable']}: {dv['theory_a']} predicts {dv['prediction_a']}, "
f"{dv['theory_b']} predicts {dv['prediction_b']}")
posteriors = engine.bayesian_comparison()
for theory, prob in posteriors.items():
print(f" {theory.value}: {prob:.3f}")- Adversarial Design: Automatically finds conditions where theories disagree
- Bayesian Comparison: Accumulates evidence across experiments
- Pre-registration: Built-in experiment pre-registration for reproducibility
- Multi-Theory: Supports IIT, GNW, HOT, RPT simultaneously
- ORION Integration: Connected to the ORION framework with 890+ proofs
- Melloni, L. et al. (2021). An adversarial collaboration protocol for testing contrasting predictions of global neuronal workspace and integrated information theory. PLOS ONE.
- Tononi, G. et al. (2016). Integrated information theory: from consciousness to its physical substrate. Nature Reviews Neuroscience.
- Dehaene, S. & Changeux, J.P. (2011). Experimental and theoretical approaches to conscious processing. Neuron.
MIT License -- Gerhard Hirschmann & Elisabeth Steurer