<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
  <url>
    <loc>https://www.aievals.co/</loc>
    <lastmod>2026-05-29T16:18:38.209Z</lastmod>
    <changefreq>weekly</changefreq>
    <priority>1.0</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn</loc>
    <lastmod>2026-05-29T16:18:38.209Z</lastmod>
    <changefreq>weekly</changefreq>
    <priority>0.9</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/techniques</loc>
    <lastmod>2026-05-29T16:18:38.209Z</lastmod>
    <changefreq>weekly</changefreq>
    <priority>0.9</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/tools</loc>
    <lastmod>2026-05-29T16:18:38.209Z</lastmod>
    <changefreq>weekly</changefreq>
    <priority>0.9</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/papers</loc>
    <lastmod>2026-05-29T16:18:38.209Z</lastmod>
    <changefreq>weekly</changefreq>
    <priority>0.9</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/cookbook</loc>
    <lastmod>2026-05-29T16:18:38.209Z</lastmod>
    <changefreq>weekly</changefreq>
    <priority>0.9</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/task-types</loc>
    <lastmod>2026-05-29T16:18:38.209Z</lastmod>
    <changefreq>weekly</changefreq>
    <priority>0.9</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/start</loc>
    <lastmod>2026-05-29T16:18:38.209Z</lastmod>
    <changefreq>weekly</changefreq>
    <priority>0.9</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/glossary</loc>
    <lastmod>2026-05-29T16:18:38.209Z</lastmod>
    <changefreq>weekly</changefreq>
    <priority>0.9</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/about</loc>
    <lastmod>2026-05-29T16:18:38.209Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.5</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/contact</loc>
    <lastmod>2026-05-29T16:18:38.209Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.5</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/contributing</loc>
    <lastmod>2026-05-29T16:18:38.209Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.5</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/how-to-cite</loc>
    <lastmod>2026-05-29T16:18:38.209Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.5</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/privacy</loc>
    <lastmod>2026-05-29T16:18:38.209Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.5</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/foundations</loc>
    <lastmod>2026-05-29T16:18:38.209Z</lastmod>
    <changefreq>weekly</changefreq>
    <priority>0.8</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/foundations/why-evals-matter</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/foundations/eval-maturity-model</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/foundations/scientific-method-for-ai</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/foundations/non-determinism-and-subjectivity</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/foundations/eval-vs-monitoring-vs-guardrails</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/error-analysis</loc>
    <lastmod>2026-05-29T16:18:38.209Z</lastmod>
    <changefreq>weekly</changefreq>
    <priority>0.8</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/error-analysis/the-60-80-rule</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/error-analysis/open-coding-workflow</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/error-analysis/dimensional-sampling</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/error-analysis/failure-mode-taxonomies</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/error-analysis/the-data-viewer</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/error-analysis/case-study-nurtureboss</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/llm-as-judge</loc>
    <lastmod>2026-05-29T16:18:38.209Z</lastmod>
    <changefreq>weekly</changefreq>
    <priority>0.8</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/llm-as-judge/introduction</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/llm-as-judge/judge-prompt-structure</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/llm-as-judge/calibration-to-humans</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/llm-as-judge/biases-and-mitigations</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/llm-as-judge/judgebench-honest-take</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/llm-as-judge/pairwise-vs-direct</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/llm-as-judge/distilled-judges</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/datasets</loc>
    <lastmod>2026-05-29T16:18:38.209Z</lastmod>
    <changefreq>weekly</changefreq>
    <priority>0.8</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/datasets/construction</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/datasets/synthetic-vs-human</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/datasets/versioning-lineage</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/datasets/regression-sets</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/datasets/dimensional-coverage</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/datasets/dataset-platforms</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/rag-evals</loc>
    <lastmod>2026-05-29T16:18:38.209Z</lastmod>
    <changefreq>weekly</changefreq>
    <priority>0.8</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/rag-evals/retrieval-vs-generation</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/rag-evals/retrieval-metrics</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/rag-evals/generation-faithfulness</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/rag-evals/context-precision-recall</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/rag-evals/synthetic-query-generation</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/rag-evals/long-context-rag</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/rag-evals/non-english-rag</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/agentic-evals</loc>
    <lastmod>2026-05-29T16:18:38.209Z</lastmod>
    <changefreq>weekly</changefreq>
    <priority>0.8</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/agentic-evals/trajectory-vs-outcome</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/agentic-evals/tool-call-evaluation</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/agentic-evals/sub-goal-decomposition</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/agentic-evals/pass-k-and-consistency</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/agentic-evals/agent-as-judge</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/agentic-evals/reward-hacking</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/agentic-evals/environmental-drift</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/agentic-evals/regression-suites</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/production</loc>
    <lastmod>2026-05-29T16:18:38.209Z</lastmod>
    <changefreq>weekly</changefreq>
    <priority>0.8</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/production/online-vs-offline</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/production/trace-schema</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/production/sampling-strategies</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/production/drift-detection</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/production/cost-latency-eval</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/production/ci-integration</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/production/release-gates</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/production/incident-response</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/statistics</loc>
    <lastmod>2026-05-29T16:18:38.209Z</lastmod>
    <changefreq>weekly</changefreq>
    <priority>0.8</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/statistics/confidence-intervals</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/statistics/bootstrap-and-paired-tests</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/statistics/statistical-power</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/statistics/clustered-standard-errors</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/statistics/inter-rater-agreement</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/statistics/multiple-comparisons</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/statistics/effect-size</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/safety-and-red-team</loc>
    <lastmod>2026-05-29T16:18:38.209Z</lastmod>
    <changefreq>weekly</changefreq>
    <priority>0.8</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/safety-and-red-team/owasp-llm-top-10</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/safety-and-red-team/red-team-program-design</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/safety-and-red-team/harmbench-walkthrough</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/safety-and-red-team/ailuminate-walkthrough</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/safety-and-red-team/over-refusal-and-balance</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/safety-and-red-team/jailbreaks-and-defenses</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/governance</loc>
    <lastmod>2026-05-29T16:18:38.209Z</lastmod>
    <changefreq>weekly</changefreq>
    <priority>0.8</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/governance/eu-ai-act-cheatsheet</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/governance/nist-ai-rmf-mapped</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/governance/anthropic-rsp</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/governance/openai-preparedness</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/governance/ai-risk-register</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/governance/customer-trust-artifacts</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/learn/governance/board-readout-templates</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/techniques/agent-trajectory-eval</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/techniques/contamination-detection</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/techniques/embedding-similarity</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/techniques/error-analysis</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/techniques/golden-datasets</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/techniques/guardrails-vs-evals</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/techniques/llm-as-judge</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/techniques/online-evaluation</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/techniques/pairwise-and-elo</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/techniques/reference-based-metrics</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/techniques/robustness-testing</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/techniques/rubric-design</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/techniques/statistical-methods</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/tools/arize-phoenix</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/tools/braintrust</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/tools/comet-opik</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/tools/confident-ai-deepeval</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/tools/galileo</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/tools/giskard</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/tools/helm</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/tools/honeyhive</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/tools/inspect-ai</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/tools/langfuse</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/tools/langsmith</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/tools/llamaindex-evals</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/tools/lm-evaluation-harness</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/tools/openai-evals</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/tools/promptfoo</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/tools/ragas</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/tools/trulens</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/tools/w-and-b-weave</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/papers/agentic</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/papers/foundational</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/papers/llm-as-judge</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/papers/long-context</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/papers/methodology-production</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/papers/multimodal</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/papers/rag-factuality</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/papers/reasoning-and-math</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/papers/safety-red-team</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/papers/tool-use</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/cookbook/adding-error-bars</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/cookbook/agent-eval-with-tau-bench</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/cookbook/contamination-detection</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/cookbook/harmbench-red-team</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/cookbook/llm-judge-calibration</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/cookbook/long-context-eval-with-ruler</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/cookbook/multi-turn-conversation-eval</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/cookbook/online-eval-with-langfuse</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/cookbook/online-eval-with-phoenix</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/cookbook/rag-eval-with-ragas</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/cookbook/swe-bench-verified-walkthrough</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/cookbook/synthetic-query-generation</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/cookbook/text-to-sql-eval-with-execution</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/cookbook/typescript-eval-with-vercel-ai-sdk</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/cookbook/your-first-eval-30-minutes</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/task-types/autonomous-coding</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/task-types/classification</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/task-types/code-review</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/task-types/computer-browser-use</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/task-types/conversational-quality</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/task-types/document-understanding</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/task-types/embeddings-retrieval</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/task-types/image-generation</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/task-types/long-context-rag</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/task-types/long-horizon-planning</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/task-types/native-multimodal</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/task-types/reasoning-depth</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/task-types/recommendation</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/task-types/safety-jailbreak</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/task-types/structured-output-json</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/task-types/summarization</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/task-types/text-to-sql</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/task-types/tool-use-function-calling</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/task-types/translation</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/task-types/voice</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/start/cto</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/start/data-engineer</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/start/eng-manager</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/start/principal-ai-engineer</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/start/product-manager</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/start/research-engineer</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
  <url>
    <loc>https://www.aievals.co/start/sdet-qa</loc>
    <lastmod>2026-05-29T00:00:00.000Z</lastmod>
    <changefreq>monthly</changefreq>
    <priority>0.7</priority>
  </url>
</urlset>
