{"data":{"full_name":"abaheti95/LoL-RL","name":"LoL-RL","description":"Advantage Leftover Lunch Reinforcement Learning (A-LoL RL): Improving Language Models with Advantage-based Offline Policy Gradients","stars":26.0,"forks":8.0,"language":"Python","license":"MIT","archived":0.0,"subcategory":"variational-autoencoders-nlp","last_pushed_at":"2024-09-10T07:56:24+00:00","pypi_package":null,"npm_package":null,"downloads_monthly":0.0,"dependency_count":0.0,"commits_30d":null,"reverse_dep_count":0.0,"maintenance_score":0.0,"adoption_score":7.0,"maturity_score":9.0,"community_score":17.0,"quality_score":33.0,"quality_tier":"emerging","risk_flags":"['stale_6m', 'no_package', 'no_dependents']"},"meta":{"timestamp":"2026-04-05T22:05:45.041252+00:00"}}