{"data":{"full_name":"nanowell/Differential-Transformer-PyTorch","name":"Differential-Transformer-PyTorch","description":"PyTorch implementation of the Differential-Transformer architecture for sequence modeling, specifically tailored as a decoder-only model similar to large language models (LLMs). The architecture incorporates a novel Differential Attention mechanism, Multi-Head structure, RMSNorm, and SwiGLU.","stars":86.0,"forks":6.0,"language":"Python","license":"MIT","archived":0.0,"subcategory":"transformer-architecture-education","last_pushed_at":"2024-10-27T19:43:03+00:00","pypi_package":null,"npm_package":null,"downloads_monthly":0.0,"dependency_count":0.0,"commits_30d":null,"reverse_dep_count":0.0,"maintenance_score":0.0,"adoption_score":9.0,"maturity_score":16.0,"community_score":9.0,"quality_score":34.0,"quality_tier":"emerging","risk_flags":"['stale_6m', 'no_package', 'no_dependents']"},"meta":{"timestamp":"2026-04-06T18:46:21.092402+00:00"}}