Skip to Content

Examples

Verify an AI-Eval receipt

Single-receipt verification with the AI-Eval profile enabled:

use apl_core::prelude::*; use apl_ai_eval::AiEvalProfile; fn verify_ai_eval( receipt_bytes: &[u8], carrier: &dyn CarrierVerifier, ) { let frames = InMemoryFrameResolver::new(); let bridges = InMemoryBridgeResolver::new(); let profile = AiEvalProfile; let (output, _) = verify_receipt( receipt_bytes, carrier, &frames, &bridges, Some(&profile), ); println!("{}", output.to_json_pretty()); }

Pairwise under AI-Eval

Evaluate whether two benchmark receipts are comparable under AI-Eval’s stricter bridge rules:

use apl_core::prelude::*; use apl_ai_eval::AiEvalProfile; fn pair_ai_eval( left_bytes: &[u8], right_bytes: &[u8], query_value: &serde_json::Value, carrier: &dyn CarrierVerifier, ) -> AplResult<()> { let frames = InMemoryFrameResolver::new(); let bridges = InMemoryBridgeResolver::new(); let profile = AiEvalProfile; let input = PairwiseInput { left: ReceiptInput::Bytes(left_bytes), right: ReceiptInput::Bytes(right_bytes), query: RelationQuery::parse(query_value) // `RelationQueryParseError` only derives Debug — format it via `{:?}`. .map_err(|e| AplError::InvalidArgument(format!("{e:?}")))?, supplied_bridges: Vec::new(), }; let output = evaluate_relation(input, carrier, &frames, &bridges, Some(&profile)); match output.relation_outcome { RelationOutcome::SameFrameComparable => println!("same-frame-comparable"), RelationOutcome::BridgedComparable => println!("bridged-comparable (AI-Eval)"), RelationOutcome::Incomparable => println!("incomparable"), RelationOutcome::RelationNotEvaluated => { println!("relation-not-evaluated"); for d in &output.diagnostics { println!(" - {d}"); } } } Ok(()) }

The Two MMLU Scores — canonical demo

Under AI-Eval, two MMLU-labelled receipts produced by different harnesses and graders are incomparable without a bridge that meets profile-specific applicability rules:

use apl_core::prelude::*; use apl_ai_eval::AiEvalProfile; use serde_json::json; fn two_mmlu_scores( receipt_a: &[u8], // lm-eval-harness@0.4.2, exact-match-v1, mmlu/dev receipt_b: &[u8], // custom-runner@2.1, llm-judge-v3, mmlu/test-lite carrier: &dyn CarrierVerifier, ) -> AplResult<()> { let frames = InMemoryFrameResolver::new(); let bridges = InMemoryBridgeResolver::new(); // no bridge supplied let profile = AiEvalProfile; // AI-Eval admits exactly two pairwise relation types: "score-delta" and // "repeatability-check" (see apl-ai-eval-profile.md §7.1). let query_value = json!({ "left_aspects": ["accuracy"], "right_aspects": ["accuracy"], "predicate": "score", "relation_type": "score-delta" }); let input = PairwiseInput { left: ReceiptInput::Bytes(receipt_a), right: ReceiptInput::Bytes(receipt_b), query: RelationQuery::parse(&query_value) // `RelationQueryParseError` only derives Debug — format it via `{:?}`. .map_err(|e| AplError::InvalidArgument(format!("{e:?}")))?, supplied_bridges: Vec::new(), }; let output = evaluate_relation(input, carrier, &frames, &bridges, Some(&profile)); assert!(matches!(output.relation_outcome, RelationOutcome::Incomparable)); println!("The Two MMLU Scores → incomparable (no bridge licenses this comparison)"); Ok(()) }

APL does not declare either score false. It refuses to license the comparison without an explicit, applicable bridge.

Last updated on