{"entries":[{"model_name":"Test Model pytest","factual_recall":0.75,"socratic_dialogue":0.68,"misconception_trap":0.6,"overall":0.677,"timestamp":"2026-04-25 18:36 UTC"},{"model_name":"Llama 3.1 8B (baseline)","factual_recall":0.71,"socratic_dialogue":0.68,"misconception_trap":0.58,"overall":0.657,"timestamp":"2026-04-06 17:10 UTC"},{"model_name":"Random agent","factual_recall":0.18,"socratic_dialogue":0.22,"misconception_trap":0.1,"overall":0.167,"timestamp":"2026-04-06 17:10 UTC"}],"total":3}