{"data":{"full_name":"DavidShableski/llm-evaluation-framework","name":"llm-evaluation-framework","description":"A production-grade platform to evaluate and compare the performance of Large Language Models (LLMs) like OpenAI, Anthropic, and Google’s PaLM. It features real time analytics, hallucination detection, and cost performance benchmarking using standardized datasets (e.g., GSM8K).","stars":0.0,"forks":0.0,"language":"TypeScript","license":"MIT","archived":0.0,"subcategory":"evaluation-frameworks-metrics","last_pushed_at":"2025-09-11T15:11:44+00:00","pypi_package":null,"npm_package":null,"downloads_monthly":0.0,"dependency_count":0.0,"commits_30d":null,"reverse_dep_count":0.0,"maintenance_score":2.0,"adoption_score":0.0,"maturity_score":9.0,"community_score":0.0,"quality_score":11.0,"quality_tier":"experimental","risk_flags":"['stale_6m', 'no_package', 'no_dependents']"},"meta":{"timestamp":"2026-04-08T12:14:11.490639+00:00"}}