Examples
Verify an AI-Eval receipt
Single-receipt verification with the AI-Eval profile enabled:
use apl_core::prelude::*;
use apl_ai_eval::AiEvalProfile;
fn verify_ai_eval(
receipt_bytes: &[u8],
carrier: &dyn CarrierVerifier,
) {
let frames = InMemoryFrameResolver::new();
let bridges = InMemoryBridgeResolver::new();
let profile = AiEvalProfile;
let (output, _) = verify_receipt(
receipt_bytes,
carrier,
&frames,
&bridges,
Some(&profile),
);
println!("{}", output.to_json_pretty());
}Pairwise under AI-Eval
Evaluate whether two benchmark receipts are comparable under AI-Eval’s stricter bridge rules:
use apl_core::prelude::*;
use apl_ai_eval::AiEvalProfile;
fn pair_ai_eval(
left_bytes: &[u8],
right_bytes: &[u8],
query_value: &serde_json::Value,
carrier: &dyn CarrierVerifier,
) -> AplResult<()> {
let frames = InMemoryFrameResolver::new();
let bridges = InMemoryBridgeResolver::new();
let profile = AiEvalProfile;
let input = PairwiseInput {
left: ReceiptInput::Bytes(left_bytes),
right: ReceiptInput::Bytes(right_bytes),
query: RelationQuery::parse(query_value)
// `RelationQueryParseError` only derives Debug — format it via `{:?}`.
.map_err(|e| AplError::InvalidArgument(format!("{e:?}")))?,
supplied_bridges: Vec::new(),
};
let output = evaluate_relation(input, carrier, &frames, &bridges, Some(&profile));
match output.relation_outcome {
RelationOutcome::SameFrameComparable => println!("same-frame-comparable"),
RelationOutcome::BridgedComparable => println!("bridged-comparable (AI-Eval)"),
RelationOutcome::Incomparable => println!("incomparable"),
RelationOutcome::RelationNotEvaluated => {
println!("relation-not-evaluated");
for d in &output.diagnostics {
println!(" - {d}");
}
}
}
Ok(())
}The Two MMLU Scores — canonical demo
Under AI-Eval, two MMLU-labelled receipts produced by different harnesses and graders are incomparable without a bridge that meets profile-specific applicability rules:
use apl_core::prelude::*;
use apl_ai_eval::AiEvalProfile;
use serde_json::json;
fn two_mmlu_scores(
receipt_a: &[u8], // lm-eval-harness@0.4.2, exact-match-v1, mmlu/dev
receipt_b: &[u8], // custom-runner@2.1, llm-judge-v3, mmlu/test-lite
carrier: &dyn CarrierVerifier,
) -> AplResult<()> {
let frames = InMemoryFrameResolver::new();
let bridges = InMemoryBridgeResolver::new(); // no bridge supplied
let profile = AiEvalProfile;
// AI-Eval admits exactly two pairwise relation types: "score-delta" and
// "repeatability-check" (see apl-ai-eval-profile.md §7.1).
let query_value = json!({
"left_aspects": ["accuracy"],
"right_aspects": ["accuracy"],
"predicate": "score",
"relation_type": "score-delta"
});
let input = PairwiseInput {
left: ReceiptInput::Bytes(receipt_a),
right: ReceiptInput::Bytes(receipt_b),
query: RelationQuery::parse(&query_value)
// `RelationQueryParseError` only derives Debug — format it via `{:?}`.
.map_err(|e| AplError::InvalidArgument(format!("{e:?}")))?,
supplied_bridges: Vec::new(),
};
let output = evaluate_relation(input, carrier, &frames, &bridges, Some(&profile));
assert!(matches!(output.relation_outcome, RelationOutcome::Incomparable));
println!("The Two MMLU Scores → incomparable (no bridge licenses this comparison)");
Ok(())
}APL does not declare either score false. It refuses to license the comparison without an explicit, applicable bridge.
Last updated on