[
  {
    "name": "Transformer",
    "family": null,
    "org": "Google Brain / Google Research",
    "release_date": "2017-06-12",
    "paper_arxiv_id": "1706.03762",
    "paper_url": "https://arxiv.org/abs/1706.03762",
    "params": "65M (base) / 213M (big)",
    "weights": "open-research-only",
    "architecture": "encoder-decoder transformer",
    "notes": "The 'Attention Is All You Need' paper by Vaswani et al.; introduced multi-head self-attention and the encoder-decoder transformer architecture that became the substrate of every subsequent LLM."
  },
  {
    "name": "ULMFiT",
    "family": null,
    "org": "fast.ai / Aylien",
    "release_date": "2018-01-18",
    "paper_arxiv_id": "1801.06146",
    "paper_url": "https://arxiv.org/abs/1801.06146",
    "params": "~24M (AWD-LSTM)",
    "weights": "open",
    "architecture": "AWD-LSTM (3-layer)",
    "notes": "Howard & Ruder popularized the pretrain-then-fine-tune recipe for NLP; pre-transformer but methodologically foundational for the LLM era."
  },
  {
    "name": "ELMo",
    "family": null,
    "org": "Allen Institute for AI / U. Washington",
    "release_date": "2018-02-15",
    "paper_arxiv_id": "1802.05365",
    "paper_url": "https://arxiv.org/abs/1802.05365",
    "params": "~94M",
    "weights": "open",
    "architecture": "bidirectional LSTM",
    "notes": "Deep contextualized word representations from a biLSTM language model; first to popularize contextual embeddings that varied with usage."
  },
  {
    "name": "GPT-1",
    "family": "GPT",
    "org": "OpenAI",
    "release_date": "2018-06-11",
    "paper_arxiv_id": null,
    "paper_url": "https://openai.com/index/language-unsupervised/",
    "params": "117M",
    "weights": "open",
    "architecture": "decoder-only transformer (12 layers)",
    "notes": "'Improving Language Understanding by Generative Pre-Training' (Radford et al.); first generative-pretrained transformer LM; published as OpenAI tech report, never on arXiv."
  },
  {
    "name": "BERT",
    "family": "BERT",
    "org": "Google AI Language",
    "release_date": "2018-10-11",
    "paper_arxiv_id": "1810.04805",
    "paper_url": "https://arxiv.org/abs/1810.04805",
    "params": "110M (base) / 340M (large)",
    "weights": "open",
    "architecture": "encoder-only transformer",
    "notes": "Bidirectional Encoder Representations from Transformers; masked-LM + next-sentence-prediction pretraining; the inflection paper that made pretrain-then-fine-tune NLP's dominant paradigm."
  },
  {
    "name": "GPT-2",
    "family": "GPT",
    "org": "OpenAI",
    "release_date": "2019-02-14",
    "paper_arxiv_id": null,
    "paper_url": "https://openai.com/index/better-language-models/",
    "params": "1.5B (full); 117M/345M/762M (smaller)",
    "weights": "open",
    "architecture": "decoder-only transformer",
    "notes": "Initial announcement Feb 14 2019 with staged release; full 1.5B model released Nov 5 2019. First high-profile 'too dangerous to release' moment; never an arXiv submission, only OpenAI's blog + tech report."
  },
  {
    "name": "XLNet",
    "family": null,
    "org": "Google Brain / CMU",
    "release_date": "2019-06-19",
    "paper_arxiv_id": "1906.08237",
    "paper_url": "https://arxiv.org/abs/1906.08237",
    "params": "340M (large)",
    "weights": "open",
    "architecture": "permutation-LM transformer (Transformer-XL backbone)",
    "notes": "Permutation language modeling objective; briefly displaced BERT atop GLUE in mid-2019."
  },
  {
    "name": "RoBERTa",
    "family": "BERT",
    "org": "Facebook AI Research (Meta)",
    "release_date": "2019-07-26",
    "paper_arxiv_id": "1907.11692",
    "paper_url": "https://arxiv.org/abs/1907.11692",
    "params": "125M (base) / 355M (large)",
    "weights": "open",
    "architecture": "encoder-only transformer",
    "notes": "'Robustly Optimized BERT'; showed BERT was undertrained; longer, larger-batch pretraining without next-sentence prediction; established that scale and data trump objective tweaks."
  },
  {
    "name": "T5",
    "family": "T5",
    "org": "Google Research",
    "release_date": "2019-10-23",
    "paper_arxiv_id": "1910.10683",
    "paper_url": "https://arxiv.org/abs/1910.10683",
    "params": "60M / 220M / 770M / 3B / 11B",
    "weights": "open",
    "architecture": "encoder-decoder transformer",
    "notes": "Text-to-Text Transfer Transformer; cast every NLP task as text-to-text; the 11B version was, at release, the largest open-released encoder-decoder LM."
  },
  {
    "name": "BART",
    "family": null,
    "org": "Facebook AI Research (Meta)",
    "release_date": "2019-10-29",
    "paper_arxiv_id": "1910.13461",
    "paper_url": "https://arxiv.org/abs/1910.13461",
    "params": "140M (base) / 400M (large)",
    "weights": "open",
    "architecture": "encoder-decoder transformer (denoising autoencoder)",
    "notes": "Bidirectional encoder + autoregressive decoder pretrained as a denoising autoencoder; strong on summarization and generation."
  },
  {
    "name": "GPT-3",
    "family": "GPT",
    "org": "OpenAI",
    "release_date": "2020-05-28",
    "paper_arxiv_id": "2005.14165",
    "paper_url": "https://arxiv.org/abs/2005.14165",
    "params": "175B (plus 13 smaller sizes)",
    "weights": "closed-api",
    "architecture": "decoder-only transformer (96 layers)",
    "notes": "'Language Models are Few-Shot Learners'; the defining 175B model that introduced in-context learning at scale and launched the modern LLM era."
  },
  {
    "name": "mT5",
    "family": "T5",
    "org": "Google Research",
    "release_date": "2020-10-22",
    "paper_arxiv_id": "2010.11934",
    "paper_url": "https://arxiv.org/abs/2010.11934",
    "params": "300M to 13B",
    "weights": "open",
    "architecture": "encoder-decoder transformer",
    "notes": "Multilingual T5 trained on mC4 covering 101 languages; benchmark multilingual pretrained model of the era."
  },
  {
    "name": "WuDao 2.0",
    "family": "WuDao",
    "org": "Beijing Academy of Artificial Intelligence (BAAI)",
    "release_date": "2021-06-01",
    "paper_arxiv_id": null,
    "paper_url": "https://en.wikipedia.org/wiki/Wu_Dao",
    "params": "1.75T",
    "weights": "unknown",
    "architecture": "MoE multimodal transformer",
    "notes": "Chinese state-supported trillion-parameter multimodal model announced at the 2021 BAAI conference; weights never broadly released; primarily a scaling milestone."
  },
  {
    "name": "Codex",
    "family": "GPT",
    "org": "OpenAI",
    "release_date": "2021-07-07",
    "paper_arxiv_id": "2107.03374",
    "paper_url": "https://arxiv.org/abs/2107.03374",
    "params": "12B (largest evaluated); GPT-3 family",
    "weights": "closed-api",
    "architecture": "decoder-only transformer",
    "notes": "GPT-3 fine-tuned on GitHub; powered the first GitHub Copilot; introduced the HumanEval benchmark."
  },
  {
    "name": "Megatron-Turing NLG 530B",
    "family": null,
    "org": "Microsoft / NVIDIA",
    "release_date": "2021-10-11",
    "paper_arxiv_id": "2201.11990",
    "paper_url": "https://arxiv.org/abs/2201.11990",
    "params": "530B",
    "weights": "closed-api",
    "architecture": "decoder-only transformer (dense)",
    "notes": "Joint Microsoft/NVIDIA model; largest dense LM at announcement; 3x GPT-3 by params. Paper Jan 2022 (arxiv 2201.11990); model announced Oct 11 2021."
  },
  {
    "name": "Gopher",
    "family": null,
    "org": "DeepMind",
    "release_date": "2021-12-08",
    "paper_arxiv_id": "2112.11446",
    "paper_url": "https://arxiv.org/abs/2112.11446",
    "params": "280B (also 44M, 117M, 417M, 1.4B, 7.1B)",
    "weights": "closed-api",
    "architecture": "decoder-only transformer",
    "notes": "DeepMind's first frontier-scale LLM; evaluated across 152 tasks; main vehicle for studying scaling effects pre-Chinchilla."
  },
  {
    "name": "LaMDA",
    "family": null,
    "org": "Google Research",
    "release_date": "2022-01-20",
    "paper_arxiv_id": "2201.08239",
    "paper_url": "https://arxiv.org/abs/2201.08239",
    "params": "137B",
    "weights": "closed-api",
    "architecture": "decoder-only transformer",
    "notes": "Dialog-specialized LM; foundation of early Bard. Initially announced May 2021 at Google I/O; paper landed Jan 2022."
  },
  {
    "name": "Chinchilla",
    "family": null,
    "org": "DeepMind",
    "release_date": "2022-03-29",
    "paper_arxiv_id": "2203.15556",
    "paper_url": "https://arxiv.org/abs/2203.15556",
    "params": "70B",
    "weights": "closed-api",
    "architecture": "decoder-only transformer",
    "notes": "Compute-optimal scaling laws: model and tokens should scale 1:1; 70B model with 4x more data outperformed 280B Gopher; reshaped how the field budgets compute."
  },
  {
    "name": "PaLM",
    "family": "PaLM",
    "org": "Google Research",
    "release_date": "2022-04-05",
    "paper_arxiv_id": "2204.02311",
    "paper_url": "https://arxiv.org/abs/2204.02311",
    "params": "540B (also 8B, 62B)",
    "weights": "closed-api",
    "architecture": "decoder-only transformer (dense)",
    "notes": "Pathways Language Model; densely activated 540B trained on TPU v4 pods; first dense LM to convincingly outperform humans on BIG-Bench."
  },
  {
    "name": "OPT-175B",
    "family": "OPT",
    "org": "Meta AI",
    "release_date": "2022-05-02",
    "paper_arxiv_id": "2205.01068",
    "paper_url": "https://arxiv.org/abs/2205.01068",
    "params": "125M to 175B",
    "weights": "open-research-only",
    "architecture": "decoder-only transformer",
    "notes": "First GPT-3-scale open model from a major lab; trained at 1/7 the GPT-3 carbon footprint; released with a full training logbook."
  },
  {
    "name": "BLOOM",
    "family": null,
    "org": "BigScience (Hugging Face-led collaboration)",
    "release_date": "2022-07-11",
    "paper_arxiv_id": "2211.05100",
    "paper_url": "https://arxiv.org/abs/2211.05100",
    "params": "176B",
    "weights": "open",
    "architecture": "decoder-only transformer",
    "notes": "First fully open 175B-scale multilingual LM (46 natural + 13 programming languages); developed by ~1000 researchers; released under RAIL license. Weights released July 11 2022; arXiv paper Nov 2022."
  },
  {
    "name": "Flan-T5 / Flan-PaLM",
    "family": "T5 / PaLM",
    "org": "Google Research",
    "release_date": "2022-10-20",
    "paper_arxiv_id": "2210.11416",
    "paper_url": "https://arxiv.org/abs/2210.11416",
    "params": "T5: 80M-11B; PaLM: 540B",
    "weights": "open (Flan-T5) / closed-api (Flan-PaLM)",
    "architecture": "encoder-decoder / decoder-only transformer",
    "notes": "'Scaling Instruction-Finetuned Language Models'; finetuned on 1.8K tasks with chain-of-thought; the prototype instruction-tuning recipe that everyone copied."
  },
  {
    "name": "Galactica",
    "family": null,
    "org": "Meta AI",
    "release_date": "2022-11-16",
    "paper_arxiv_id": "2211.09085",
    "paper_url": "https://arxiv.org/abs/2211.09085",
    "params": "125M / 1.3B / 6.7B / 30B / 120B",
    "weights": "open-research-only",
    "architecture": "decoder-only transformer",
    "notes": "Trained on 106B tokens of scientific text (papers, knowledge bases); demo pulled within 3 days after hallucinated-citations backlash, marking the era when public demos started getting scrutinized."
  },
  {
    "name": "ChatGPT",
    "family": "GPT",
    "org": "OpenAI",
    "release_date": "2022-11-30",
    "paper_arxiv_id": null,
    "paper_url": "https://openai.com/index/chatgpt/",
    "params": "GPT-3.5 lineage (unknown exact)",
    "weights": "closed-api",
    "architecture": "decoder-only transformer (RLHF fine-tune of text-davinci-002/003)",
    "notes": "Consumer-product breakout; 1M users in 5 days, 100M in 2 months; fastest-growing consumer technology product in history at the time; the moment LLMs became mainstream."
  },
  {
    "name": "LLaMA-1",
    "family": "LLaMA",
    "org": "Meta AI / FAIR",
    "release_date": "2023-02-27",
    "paper_arxiv_id": "2302.13971",
    "paper_url": "https://arxiv.org/abs/2302.13971",
    "params": "7B / 13B / 33B / 65B",
    "weights": "open-research-only",
    "architecture": "decoder-only transformer (RoPE, SwiGLU, RMSNorm)",
    "notes": "Compute-optimal trained on 1-1.4T tokens; weights leaked to 4chan March 2023, kick-starting the open-weight LLaMA era; established the modern open-LLM blueprint."
  },
  {
    "name": "GPT-4",
    "family": "GPT",
    "org": "OpenAI",
    "release_date": "2023-03-14",
    "paper_arxiv_id": "2303.08774",
    "paper_url": "https://arxiv.org/abs/2303.08774",
    "params": "unknown (rumored ~1.8T MoE)",
    "weights": "closed-api",
    "architecture": "rumored MoE multimodal transformer",
    "notes": "GPT-4 Technical Report; first model with human-expert performance on professional/academic benchmarks; OpenAI declined to disclose architecture or training details."
  },
  {
    "name": "Claude 1",
    "family": "Claude",
    "org": "Anthropic",
    "release_date": "2023-03-14",
    "paper_arxiv_id": null,
    "paper_url": "https://www.anthropic.com/news/introducing-claude",
    "params": "unknown",
    "weights": "closed-api",
    "architecture": "decoder-only transformer (Constitutional AI-trained)",
    "notes": "First Claude release; available only via API to selected partners; introduced Constitutional AI alignment approach."
  },
  {
    "name": "PaLM 2",
    "family": "PaLM",
    "org": "Google",
    "release_date": "2023-05-10",
    "paper_arxiv_id": "2305.10403",
    "paper_url": "https://arxiv.org/abs/2305.10403",
    "params": "unknown (340B rumored)",
    "weights": "closed-api",
    "architecture": "decoder-only transformer",
    "notes": "Backbone of Bard and Google's Duet AI; emphasized multilingual + reasoning capability with smaller compute; announced at Google I/O 2023."
  },
  {
    "name": "Falcon-40B",
    "family": "Falcon",
    "org": "Technology Innovation Institute (TII), Abu Dhabi",
    "release_date": "2023-05-25",
    "paper_arxiv_id": "2311.16867",
    "paper_url": "https://arxiv.org/abs/2311.16867",
    "params": "40B (also 7B)",
    "weights": "open",
    "architecture": "decoder-only transformer (multi-query attention)",
    "notes": "First major UAE-origin open LLM; trained on the RefinedWeb dataset; topped HuggingFace's Open LLM Leaderboard for several months in mid-2023."
  },
  {
    "name": "Claude 2",
    "family": "Claude",
    "org": "Anthropic",
    "release_date": "2023-07-11",
    "paper_arxiv_id": null,
    "paper_url": "https://www.anthropic.com/news/claude-2",
    "params": "unknown",
    "weights": "closed-api",
    "architecture": "decoder-only transformer",
    "notes": "First Anthropic model available to the general public via claude.ai; 100K-token context window; surge of long-document use cases."
  },
  {
    "name": "LLaMA-2",
    "family": "LLaMA",
    "org": "Meta AI",
    "release_date": "2023-07-18",
    "paper_arxiv_id": "2307.09288",
    "paper_url": "https://arxiv.org/abs/2307.09288",
    "params": "7B / 13B / 70B",
    "weights": "open",
    "architecture": "decoder-only transformer (GQA on 70B)",
    "notes": "First Meta LLM with a permissive commercial license; pretrained on 2T tokens; Chat variants RLHF-fine-tuned. The release that made open LLMs commercially viable."
  },
  {
    "name": "Falcon-180B",
    "family": "Falcon",
    "org": "Technology Innovation Institute (TII), Abu Dhabi",
    "release_date": "2023-09-06",
    "paper_arxiv_id": null,
    "paper_url": "https://www.tii.ae/news/technology-innovation-institute-introduces-worlds-most-powerful-open-llm-falcon-180b",
    "params": "180B",
    "weights": "open",
    "architecture": "decoder-only transformer (multi-query attention)",
    "notes": "Largest open LLM at release; trained on 3.5T tokens; briefly top of the HF leaderboard before LLaMA-2 + fine-tunes overtook it."
  },
  {
    "name": "Mistral 7B",
    "family": "Mistral",
    "org": "Mistral AI",
    "release_date": "2023-09-27",
    "paper_arxiv_id": "2310.06825",
    "paper_url": "https://arxiv.org/abs/2310.06825",
    "params": "7.3B",
    "weights": "open",
    "architecture": "decoder-only transformer (GQA + sliding-window attention)",
    "notes": "Apache 2.0 7B model that beat LLaMA-2 13B across the board; effectively redefined what was possible at 7B and made Mistral AI a major European player."
  },
  {
    "name": "Qwen-1.0",
    "family": "Qwen",
    "org": "Alibaba Cloud",
    "release_date": "2023-09-28",
    "paper_arxiv_id": "2309.16609",
    "paper_url": "https://arxiv.org/abs/2309.16609",
    "params": "1.8B / 7B / 14B / 72B",
    "weights": "open",
    "architecture": "decoder-only transformer",
    "notes": "First Alibaba open LLM family (rebranded from Tongyi Qianwen); bilingual EN/CN, pretrained on up to 3T tokens; start of the Qwen series that became China's flagship open-weight lineage."
  },
  {
    "name": "Yi-34B",
    "family": "Yi",
    "org": "01.AI",
    "release_date": "2023-11-02",
    "paper_arxiv_id": "2403.04652",
    "paper_url": "https://arxiv.org/abs/2403.04652",
    "params": "6B / 34B",
    "weights": "open",
    "architecture": "decoder-only transformer",
    "notes": "Bilingual EN/CN models trained on 3.1T tokens; founded by Kai-Fu Lee; Yi-34B briefly held the top spot on HF's leaderboard."
  },
  {
    "name": "Grok-1",
    "family": "Grok",
    "org": "xAI",
    "release_date": "2023-11-04",
    "paper_arxiv_id": null,
    "paper_url": "https://x.ai/news/grok-os",
    "params": "314B (MoE; ~86B active)",
    "weights": "open (weights released March 17 2024)",
    "architecture": "MoE transformer",
    "notes": "xAI's debut chatbot; trained in 2 months; weights open-sourced under Apache 2.0 in March 2024; integrated with X (Twitter)."
  },
  {
    "name": "DeepSeek LLM",
    "family": "DeepSeek",
    "org": "DeepSeek AI",
    "release_date": "2023-11-29",
    "paper_arxiv_id": "2401.02954",
    "paper_url": "https://arxiv.org/abs/2401.02954",
    "params": "7B / 67B",
    "weights": "open",
    "architecture": "decoder-only transformer (LLaMA-like)",
    "notes": "DeepSeek's debut LLM; 67B variant surpassed LLaMA-2 70B on code, math, reasoning; presented its own scaling-laws study. Models released Nov 29 2023; paper Jan 2024."
  },
  {
    "name": "Gemini 1.0",
    "family": "Gemini",
    "org": "Google DeepMind",
    "release_date": "2023-12-06",
    "paper_arxiv_id": "2312.11805",
    "paper_url": "https://arxiv.org/abs/2312.11805",
    "params": "Ultra / Pro / Nano (sizes undisclosed)",
    "weights": "closed-api",
    "architecture": "natively multimodal transformer",
    "notes": "Google DeepMind's first natively multimodal frontier family; Ultra was the first model to surpass human experts on MMLU at 90.0%."
  },
  {
    "name": "Mixtral 8x7B",
    "family": "Mistral",
    "org": "Mistral AI",
    "release_date": "2023-12-11",
    "paper_arxiv_id": "2401.04088",
    "paper_url": "https://arxiv.org/abs/2401.04088",
    "params": "47B total / 13B active (8 experts, top-2 routing)",
    "weights": "open",
    "architecture": "Sparse Mixture-of-Experts transformer",
    "notes": "First commercially viable open MoE LLM; matched or beat LLaMA-2 70B and GPT-3.5 across benchmarks; weights released via magnet link Dec 8 2023, paper Jan 8 2024."
  },
  {
    "name": "Gemini 1.5 Pro",
    "family": "Gemini",
    "org": "Google DeepMind",
    "release_date": "2024-02-15",
    "paper_arxiv_id": "2403.05530",
    "paper_url": "https://arxiv.org/abs/2403.05530",
    "params": "unknown (MoE)",
    "weights": "closed-api",
    "architecture": "MoE multimodal transformer",
    "notes": "Introduced 1M-token context (later 2M); first frontier model with truly long-context multimodal understanding (video, audio, full codebases)."
  },
  {
    "name": "Gemma 1",
    "family": "Gemma",
    "org": "Google DeepMind",
    "release_date": "2024-02-21",
    "paper_arxiv_id": "2403.08295",
    "paper_url": "https://arxiv.org/abs/2403.08295",
    "params": "2B / 7B",
    "weights": "open",
    "architecture": "decoder-only transformer (Gemini-derived)",
    "notes": "Google's first open-weight model family from the Gemini program; outperformed Mistral 7B and LLaMA-2 across many benchmarks."
  },
  {
    "name": "Claude 3 (Opus, Sonnet, Haiku)",
    "family": "Claude",
    "org": "Anthropic",
    "release_date": "2024-03-04",
    "paper_arxiv_id": null,
    "paper_url": "https://www.anthropic.com/news/claude-3-family",
    "params": "unknown",
    "weights": "closed-api",
    "architecture": "multimodal transformer",
    "notes": "First Claude family with vision and three-tier size structure (Opus/Sonnet/Haiku); Opus briefly matched or beat GPT-4 on most benchmarks."
  },
  {
    "name": "InternLM 2",
    "family": "InternLM",
    "org": "Shanghai AI Lab",
    "release_date": "2024-03-26",
    "paper_arxiv_id": "2403.17297",
    "paper_url": "https://arxiv.org/abs/2403.17297",
    "params": "1.8B / 7B / 20B",
    "weights": "open",
    "architecture": "decoder-only transformer",
    "notes": "Major Chinese open-weight LM with strong long-context evaluation; from Shanghai AI Lab (SenseTime + universities)."
  },
  {
    "name": "DBRX",
    "family": null,
    "org": "Databricks (Mosaic AI)",
    "release_date": "2024-03-27",
    "paper_arxiv_id": null,
    "paper_url": "https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm",
    "params": "132B total / 36B active",
    "weights": "open",
    "architecture": "fine-grained MoE transformer (16 experts, top-4 routing)",
    "notes": "Databricks' open LLM; finer-grained MoE than Mixtral; pretrained on 12T tokens; up to 2x more compute-efficient at inference than LLaMA-2 70B."
  },
  {
    "name": "Llama 3",
    "family": "LLaMA",
    "org": "Meta AI",
    "release_date": "2024-04-18",
    "paper_arxiv_id": "2407.21783",
    "paper_url": "https://arxiv.org/abs/2407.21783",
    "params": "8B / 70B",
    "weights": "open",
    "architecture": "decoder-only transformer (GQA, 128K-token tokenizer)",
    "notes": "Trained on 15T tokens (5x Llama 2); new 128K vocabulary tokenizer; the 70B briefly matched GPT-4 on several benchmarks."
  },
  {
    "name": "Phi-3",
    "family": "Phi",
    "org": "Microsoft Research",
    "release_date": "2024-04-22",
    "paper_arxiv_id": "2404.14219",
    "paper_url": "https://arxiv.org/abs/2404.14219",
    "params": "3.8B (mini) / 7B (small) / 14B (medium)",
    "weights": "open",
    "architecture": "decoder-only transformer",
    "notes": "Phi-3-mini at 3.8B rivaled Mixtral 8x7B and GPT-3.5; built on the 'Textbooks Are All You Need' data-quality philosophy; first phone-deployable frontier-ish SLM."
  },
  {
    "name": "DeepSeek-V2",
    "family": "DeepSeek",
    "org": "DeepSeek AI",
    "release_date": "2024-05-07",
    "paper_arxiv_id": "2405.04434",
    "paper_url": "https://arxiv.org/abs/2405.04434",
    "params": "236B total / 21B active",
    "weights": "open",
    "architecture": "MoE transformer (Multi-head Latent Attention, DeepSeekMoE)",
    "notes": "Introduced MLA to slash KV-cache by 93% and triggered an industry-wide API price war (priced at ~1 yuan per million tokens)."
  },
  {
    "name": "GPT-4o",
    "family": "GPT",
    "org": "OpenAI",
    "release_date": "2024-05-13",
    "paper_arxiv_id": null,
    "paper_url": "https://openai.com/index/hello-gpt-4o/",
    "params": "unknown",
    "weights": "closed-api",
    "architecture": "natively multimodal transformer",
    "notes": "First natively-multimodal GPT (text/audio/image/video in & out); 2x faster and half the price of GPT-4 Turbo; responds to audio in ~232ms (human-conversational latency)."
  },
  {
    "name": "Falcon 2",
    "family": "Falcon",
    "org": "TII Abu Dhabi",
    "release_date": "2024-05-14",
    "paper_arxiv_id": "2407.14885",
    "paper_url": "https://arxiv.org/abs/2407.14885",
    "params": "11B",
    "weights": "open",
    "architecture": "decoder-only transformer (multimodal Falcon 2 VLM variant)",
    "notes": "Second-generation TII Falcon; 11 languages; 5.5T training tokens; also offered as a vision-language model variant."
  },
  {
    "name": "Qwen2",
    "family": "Qwen",
    "org": "Alibaba Cloud",
    "release_date": "2024-06-07",
    "paper_arxiv_id": "2407.10671",
    "paper_url": "https://arxiv.org/abs/2407.10671",
    "params": "0.5B / 1.5B / 7B / 57B-A14B (MoE) / 72B",
    "weights": "open",
    "architecture": "decoder-only transformer (+ MoE variant)",
    "notes": "Qwen2-72B matched or beat LLaMA-3 70B and Mistral Large on most benchmarks; 30-language support; weights released first week of June, paper Jul 15."
  },
  {
    "name": "Claude 3.5 Sonnet",
    "family": "Claude",
    "org": "Anthropic",
    "release_date": "2024-06-20",
    "paper_arxiv_id": null,
    "paper_url": "https://www.anthropic.com/news/claude-3-5-sonnet",
    "params": "unknown",
    "weights": "closed-api",
    "architecture": "multimodal transformer",
    "notes": "Mid-tier model outperformed flagship Claude 3 Opus at 1/5 the cost; introduced Artifacts UI; widely regarded as the best coding model of mid-2024."
  },
  {
    "name": "Gemma 2",
    "family": "Gemma",
    "org": "Google DeepMind",
    "release_date": "2024-06-27",
    "paper_arxiv_id": "2408.00118",
    "paper_url": "https://arxiv.org/abs/2408.00118",
    "params": "2B / 9B / 27B",
    "weights": "open",
    "architecture": "decoder-only transformer (interleaved local/global attention, knowledge distillation)",
    "notes": "27B variant became one of the highest-ranking open LMs on LMSYS Chatbot Arena, outperforming much larger models."
  },
  {
    "name": "Llama 3.1",
    "family": "LLaMA",
    "org": "Meta AI",
    "release_date": "2024-07-23",
    "paper_arxiv_id": "2407.21783",
    "paper_url": "https://arxiv.org/abs/2407.21783",
    "params": "8B / 70B / 405B",
    "weights": "open",
    "architecture": "decoder-only transformer (GQA, 128K context)",
    "notes": "First frontier-class open model; the 405B matched GPT-4 / Claude 3.5 Sonnet on many benchmarks; 'The Llama 3 Herd of Models' paper introduced the full family."
  },
  {
    "name": "Mistral Large 2",
    "family": "Mistral",
    "org": "Mistral AI",
    "release_date": "2024-07-24",
    "paper_arxiv_id": null,
    "paper_url": "https://mistral.ai/news/mistral-large-2407/",
    "params": "123B",
    "weights": "open-research-only",
    "architecture": "decoder-only transformer (dense)",
    "notes": "Dense 123B with 128K context; Mistral Research License (commercial use requires paid license); strong on multilingual + code generation."
  },
  {
    "name": "o1 (preview & mini)",
    "family": "GPT / o-series",
    "org": "OpenAI",
    "release_date": "2024-09-12",
    "paper_arxiv_id": null,
    "paper_url": "https://openai.com/index/learning-to-reason-with-llms/",
    "params": "unknown",
    "weights": "closed-api",
    "architecture": "decoder-only transformer (reinforcement-learning chain-of-thought)",
    "notes": "First major 'reasoning model'; spent inference compute on hidden chain-of-thought; PhD-level performance on physics/chem/bio; solved 83% of AIME problems vs GPT-4o's 13%."
  },
  {
    "name": "Qwen2.5",
    "family": "Qwen",
    "org": "Alibaba Cloud",
    "release_date": "2024-09-19",
    "paper_arxiv_id": "2412.15115",
    "paper_url": "https://arxiv.org/abs/2412.15115",
    "params": "0.5B / 1.5B / 3B / 7B / 14B / 32B / 72B (+ Coder + Math + MoE)",
    "weights": "open",
    "architecture": "decoder-only transformer (+ MoE variants Turbo/Plus)",
    "notes": "Pretrained on 18T tokens (up from 7T); Qwen2.5-72B-Instruct rivaled Llama-3-405B at ~1/5 the parameters; paper landed Dec 19 2024."
  },
  {
    "name": "Llama 3.2",
    "family": "LLaMA",
    "org": "Meta AI",
    "release_date": "2024-09-25",
    "paper_arxiv_id": null,
    "paper_url": "https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/",
    "params": "1B / 3B (text); 11B / 90B (vision)",
    "weights": "open",
    "architecture": "decoder-only transformer + vision adapters",
    "notes": "First multimodal Llama; 1B/3B for edge devices, 11B/90B with vision; not released in EU due to regulatory issues."
  },
  {
    "name": "Claude 3.5 Sonnet (new) + Haiku + Computer Use",
    "family": "Claude",
    "org": "Anthropic",
    "release_date": "2024-10-22",
    "paper_arxiv_id": null,
    "paper_url": "https://www.anthropic.com/news/3-5-models-and-computer-use",
    "params": "unknown",
    "weights": "closed-api",
    "architecture": "multimodal transformer",
    "notes": "Updated 3.5 Sonnet; first frontier model with 'Computer Use' beta (screen-perception + cursor + keyboard); SWE-bench Verified jumped from 33.4% to 49.0%."
  },
  {
    "name": "Llama 3.3 70B",
    "family": "LLaMA",
    "org": "Meta AI",
    "release_date": "2024-12-06",
    "paper_arxiv_id": null,
    "paper_url": "https://ai.meta.com/blog/meta-llama-3-3/",
    "params": "70B",
    "weights": "open",
    "architecture": "decoder-only transformer",
    "notes": "Instruction-tuned-only release that matched Llama 3.1 405B at 4-5x lower serving cost; closing 2024's Llama cadence."
  },
  {
    "name": "Gemini 2.0 Flash (Experimental)",
    "family": "Gemini",
    "org": "Google DeepMind",
    "release_date": "2024-12-11",
    "paper_arxiv_id": null,
    "paper_url": "https://blog.google/technology/google-deepmind/google-gemini-ai-update-december-2024/",
    "params": "unknown",
    "weights": "closed-api",
    "architecture": "multimodal transformer",
    "notes": "First Gemini 2 model; positioned for 'the agentic era'; native tool use; image and audio output baked in; GA on Feb 5 2025."
  },
  {
    "name": "Phi-4",
    "family": "Phi",
    "org": "Microsoft Research",
    "release_date": "2024-12-12",
    "paper_arxiv_id": "2412.08905",
    "paper_url": "https://arxiv.org/abs/2412.08905",
    "params": "14B",
    "weights": "open",
    "architecture": "decoder-only transformer",
    "notes": "Synthetic-data-heavy training; beat GPT-4 on MATH and GPQA; weights MIT-licensed in early 2025."
  },
  {
    "name": "DeepSeek-V3",
    "family": "DeepSeek",
    "org": "DeepSeek AI",
    "release_date": "2024-12-26",
    "paper_arxiv_id": "2412.19437",
    "paper_url": "https://arxiv.org/abs/2412.19437",
    "params": "671B total / 37B active",
    "weights": "open",
    "architecture": "MoE transformer (MLA + auxiliary-loss-free routing + multi-token prediction)",
    "notes": "Trained in only 2.788M H800 GPU-hours (~$5.6M compute); matched closed frontier models on many benchmarks; the model that triggered the 'DeepSeek moment' in January 2025."
  },
  {
    "name": "DeepSeek-R1",
    "family": "DeepSeek",
    "org": "DeepSeek AI",
    "release_date": "2025-01-20",
    "paper_arxiv_id": "2501.12948",
    "paper_url": "https://arxiv.org/abs/2501.12948",
    "params": "671B total / 37B active (+ 1.5B-70B distilled variants)",
    "weights": "open",
    "architecture": "MoE transformer (pure-RL trained reasoning)",
    "notes": "First open reasoning model comparable to o1; demonstrated emergent CoT from RL alone (R1-Zero); the geopolitical shock-release that wiped >$1T from tech-stock market caps in late January 2025."
  },
  {
    "name": "o3-mini",
    "family": "GPT / o-series",
    "org": "OpenAI",
    "release_date": "2025-01-31",
    "paper_arxiv_id": null,
    "paper_url": "https://openai.com/index/openai-o3-mini/",
    "params": "unknown",
    "weights": "closed-api",
    "architecture": "decoder-only transformer (reasoning RL)",
    "notes": "Faster, cheaper reasoning model; available to free ChatGPT users (the first OpenAI reasoning model free-tier could access)."
  },
  {
    "name": "Grok-3",
    "family": "Grok",
    "org": "xAI",
    "release_date": "2025-02-17",
    "paper_arxiv_id": null,
    "paper_url": "https://x.ai/news/grok-3",
    "params": "unknown (multi-trillion MoE rumored)",
    "weights": "closed-api",
    "architecture": "MoE transformer",
    "notes": "Trained on the Colossus supercluster (200K+ GPUs, 10x Grok-2 compute); introduced 'Think' and 'DeepSearch' reasoning modes."
  },
  {
    "name": "Claude 3.7 Sonnet",
    "family": "Claude",
    "org": "Anthropic",
    "release_date": "2025-02-24",
    "paper_arxiv_id": null,
    "paper_url": "https://www.anthropic.com/news/claude-3-7-sonnet",
    "params": "unknown",
    "weights": "closed-api",
    "architecture": "multimodal transformer (hybrid reasoning)",
    "notes": "First hybrid reasoning model: same weights toggle between fast answers and extended thinking with a developer-controlled budget; first model to break 50% on SWE-bench Verified (62.3%)."
  },
  {
    "name": "GPT-4.5 (Orion)",
    "family": "GPT",
    "org": "OpenAI",
    "release_date": "2025-02-27",
    "paper_arxiv_id": null,
    "paper_url": "https://openai.com/index/introducing-gpt-4-5/",
    "params": "unknown (largest OpenAI model at release)",
    "weights": "closed-api",
    "architecture": "decoder-only transformer (dense)",
    "notes": "Internally code-named Orion; OpenAI's largest pretrained (non-reasoning) model; $75/$150 per million tokens \u2014 premium-priced even by frontier standards. Quietly deprecated mid-2025."
  },
  {
    "name": "Gemini 2.5 Pro",
    "family": "Gemini",
    "org": "Google DeepMind",
    "release_date": "2025-03-25",
    "paper_arxiv_id": null,
    "paper_url": "https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/",
    "params": "unknown",
    "weights": "closed-api",
    "architecture": "multimodal transformer (thinking model)",
    "notes": "Google's first explicit 'thinking model'; chain-of-thought before responding; took #1 on LMSYS Chatbot Arena at launch."
  },
  {
    "name": "Llama 4 (Scout + Maverick)",
    "family": "LLaMA",
    "org": "Meta AI",
    "release_date": "2025-04-05",
    "paper_arxiv_id": null,
    "paper_url": "https://ai.meta.com/blog/llama-4-multimodal-intelligence/",
    "params": "Scout: 109B (17B active, 16 experts); Maverick: 400B (17B active, 128 experts)",
    "weights": "open",
    "architecture": "MoE multimodal transformer",
    "notes": "First Meta family to use MoE and natively multimodal; Scout has 10M-token context window (industry-leading at release); Behemoth (~2T params) remained in training."
  },
  {
    "name": "o3 (full) + o4-mini",
    "family": "GPT / o-series",
    "org": "OpenAI",
    "release_date": "2025-04-16",
    "paper_arxiv_id": null,
    "paper_url": "https://openai.com/index/introducing-o3-and-o4-mini/",
    "params": "unknown",
    "weights": "closed-api",
    "architecture": "decoder-only transformer (RL-trained reasoning + tool use)",
    "notes": "First OpenAI models with full agentic tool use in chain-of-thought (browser, Python, vision); o3 became the default 'paid frontier' reasoning model."
  },
  {
    "name": "Qwen3",
    "family": "Qwen",
    "org": "Alibaba Cloud",
    "release_date": "2025-04-29",
    "paper_arxiv_id": "2505.09388",
    "paper_url": "https://arxiv.org/abs/2505.09388",
    "params": "0.6B / 1.7B / 4B / 8B / 14B / 32B (dense); 30B-A3B / 235B-A22B (MoE)",
    "weights": "open",
    "architecture": "decoder-only transformer (+ MoE variants; hybrid thinking)",
    "notes": "Hybrid thinking/non-thinking modes in a single model; trained on 36T tokens; the 235B-A22B MoE matched many closed frontier models on reasoning and coding."
  },
  {
    "name": "Mistral Medium 3",
    "family": "Mistral",
    "org": "Mistral AI",
    "release_date": "2025-05-07",
    "paper_arxiv_id": null,
    "paper_url": "https://mistral.ai/news/mistral-medium-3",
    "params": "unknown",
    "weights": "closed-api",
    "architecture": "decoder-only transformer",
    "notes": "Targeted enterprise tier; 8x cheaper than comparable models at near-frontier quality; 128K context."
  },
  {
    "name": "Claude Opus 4 + Sonnet 4",
    "family": "Claude",
    "org": "Anthropic",
    "release_date": "2025-05-22",
    "paper_arxiv_id": null,
    "paper_url": "https://www.anthropic.com/news/claude-4",
    "params": "unknown",
    "weights": "closed-api",
    "architecture": "multimodal hybrid-reasoning transformer",
    "notes": "First Claude family classified under ASL-3 safety measures; Opus 4 scored 72.5% on SWE-bench Verified; enabled background Claude Code runs."
  },
  {
    "name": "DeepSeek-R1-0528",
    "family": "DeepSeek",
    "org": "DeepSeek AI",
    "release_date": "2025-05-28",
    "paper_arxiv_id": null,
    "paper_url": "https://api-docs.deepseek.com/news/news250528",
    "params": "671B total / 37B active",
    "weights": "open",
    "architecture": "MoE transformer (RL-trained reasoning)",
    "notes": "R1 minor-version refresh; AIME 2025 accuracy 70% -> 87.5%, hallucinations cut 45-50%; reasoning token budget effectively doubled."
  },
  {
    "name": "Grok-4",
    "family": "Grok",
    "org": "xAI",
    "release_date": "2025-07-09",
    "paper_arxiv_id": null,
    "paper_url": "https://x.ai/news/grok-4",
    "params": "unknown",
    "weights": "closed-api",
    "architecture": "MoE transformer (multi-agent reasoning variant 'Heavy')",
    "notes": "Released after a livestreamed event; introduced 'Grok 4 Heavy' multi-agent reasoning variant; xAI's frontier push."
  },
  {
    "name": "Kimi K2",
    "family": "Kimi",
    "org": "Moonshot AI",
    "release_date": "2025-07-11",
    "paper_arxiv_id": "2507.20534",
    "paper_url": "https://arxiv.org/abs/2507.20534",
    "params": "1T total / 32B active",
    "weights": "open",
    "architecture": "MoE transformer (Muon optimizer)",
    "notes": "First trillion-parameter open MoE optimized for agentic use; modified MIT license; trained with the Muon optimizer at unusual scale."
  },
  {
    "name": "gpt-oss-120b / gpt-oss-20b",
    "family": "GPT",
    "org": "OpenAI",
    "release_date": "2025-08-05",
    "paper_arxiv_id": "2508.10925",
    "paper_url": "https://arxiv.org/abs/2508.10925",
    "params": "117B total / 5.1B active; 21B total / 3.6B active",
    "weights": "open",
    "architecture": "MoE transformer",
    "notes": "First OpenAI open-weight release since GPT-2 (6.5 years); Apache 2.0; 120B near-parity with o4-mini, runs on a single 80GB GPU."
  },
  {
    "name": "GPT-5",
    "family": "GPT",
    "org": "OpenAI",
    "release_date": "2025-08-07",
    "paper_arxiv_id": null,
    "paper_url": "https://openai.com/index/introducing-gpt-5/",
    "params": "unknown",
    "weights": "closed-api",
    "architecture": "router + multimodal transformer with reasoning",
    "notes": "Unified frontier model that auto-routes between 'thinking' and 'instant' modes; replaced the separate o-series + GPT-4o split in ChatGPT."
  },
  {
    "name": "Claude Sonnet 4.5",
    "family": "Claude",
    "org": "Anthropic",
    "release_date": "2025-09-29",
    "paper_arxiv_id": null,
    "paper_url": "https://www.anthropic.com/news/claude-sonnet-4-5",
    "params": "unknown",
    "weights": "closed-api",
    "architecture": "multimodal hybrid-reasoning transformer",
    "notes": "Pitched as the best coding model in the world at release; capable of multi-hour autonomous coding sessions ('more of a colleague')."
  },
  {
    "name": "Claude Haiku 4.5",
    "family": "Claude",
    "org": "Anthropic",
    "release_date": "2025-10-15",
    "paper_arxiv_id": null,
    "paper_url": "https://www.anthropic.com/news/claude-haiku-4-5",
    "params": "unknown",
    "weights": "closed-api",
    "architecture": "multimodal hybrid-reasoning transformer",
    "notes": "Hit Sonnet 4-level coding performance at 1/3 the price and >2x the speed; the small-tier in the Claude 4 generation."
  },
  {
    "name": "Kimi K2 Thinking",
    "family": "Kimi",
    "org": "Moonshot AI",
    "release_date": "2025-11-06",
    "paper_arxiv_id": null,
    "paper_url": "https://moonshot.cn/blog/kimi-k2-thinking",
    "params": "1T total / 32B active",
    "weights": "open",
    "architecture": "MoE transformer (chain-of-thought reasoning)",
    "notes": "Open reasoning variant of Kimi K2; positioned by Moonshot as the most capable open thinking model at release."
  },
  {
    "name": "GPT-5.1",
    "family": "GPT",
    "org": "OpenAI",
    "release_date": "2025-11-12",
    "paper_arxiv_id": null,
    "paper_url": "https://openai.com/index/gpt-5-1/",
    "params": "unknown",
    "weights": "closed-api",
    "architecture": "router + multimodal transformer (adaptive reasoning)",
    "notes": "Introduced Instant + Thinking modes that adaptively allocate inference compute; 'warmer' default personality, customizable across 8 personas."
  },
  {
    "name": "Gemini 3 Pro",
    "family": "Gemini",
    "org": "Google DeepMind",
    "release_date": "2025-11-18",
    "paper_arxiv_id": null,
    "paper_url": "https://blog.google/technology/google-deepmind/gemini-3/",
    "params": "unknown",
    "weights": "closed-api",
    "architecture": "multimodal transformer + Deep Think reasoning",
    "notes": "Top of LMSYS Arena at launch (1501 Elo); Deep Think mode hit 41% on Humanity's Last Exam; the model whose dominance triggered OpenAI's 'Code Red' that accelerated GPT-5.2."
  },
  {
    "name": "Claude Opus 4.5",
    "family": "Claude",
    "org": "Anthropic",
    "release_date": "2025-11-24",
    "paper_arxiv_id": null,
    "paper_url": "https://www.anthropic.com/news/claude-opus-4-5",
    "params": "unknown",
    "weights": "closed-api",
    "architecture": "multimodal hybrid-reasoning transformer",
    "notes": "First model to break 80% on SWE-bench Verified (80.9%); price cut ~67% vs Opus 4.1 to $5/$25 per million tokens."
  },
  {
    "name": "DeepSeek-V3.2",
    "family": "DeepSeek",
    "org": "DeepSeek AI",
    "release_date": "2025-12-01",
    "paper_arxiv_id": null,
    "paper_url": "https://api-docs.deepseek.com/news/news251201",
    "params": "~685B total / 37B active",
    "weights": "open",
    "architecture": "MoE transformer (Thinking-in-Tool-Use)",
    "notes": "Introduced native Thinking-in-Tool-Use; trained on a synthesized agentic-task pipeline covering 1800+ environments; ongoing R2 release still deferred per Liang."
  },
  {
    "name": "Mistral Large 3",
    "family": "Mistral",
    "org": "Mistral AI",
    "release_date": "2025-12-02",
    "paper_arxiv_id": null,
    "paper_url": "https://mistral.ai/news/mistral-large-3",
    "params": "675B total / 41B active",
    "weights": "open",
    "architecture": "sparse MoE transformer",
    "notes": "Apache 2.0; Mistral's most capable open release; 256K context; competitive with closed frontier models."
  },
  {
    "name": "GPT-5.2",
    "family": "GPT",
    "org": "OpenAI",
    "release_date": "2025-12-11",
    "paper_arxiv_id": null,
    "paper_url": "https://openai.com/index/introducing-gpt-5-2/",
    "params": "unknown",
    "weights": "closed-api",
    "architecture": "router + multimodal transformer (adaptive reasoning)",
    "notes": "Released roughly three weeks after Gemini 3 Pro to reclaim frontier benchmarks; triggered by the internal 'Code Red' memo over Gemini 3's dominance."
  },
  {
    "name": "Kimi K2.5",
    "family": "Kimi",
    "org": "Moonshot AI",
    "release_date": "2026-01-15",
    "paper_arxiv_id": null,
    "paper_url": "https://moonshot.cn/blog/kimi-k2-5",
    "params": "1T total / 32B active",
    "weights": "open",
    "architecture": "MoE multimodal transformer (agent swarm)",
    "notes": "Added native multimodal input and an agent-swarm paradigm; positioned by Moonshot as the open frontier of agentic AI."
  },
  {
    "name": "Claude Opus 4.6",
    "family": "Claude",
    "org": "Anthropic",
    "release_date": "2026-02-04",
    "paper_arxiv_id": null,
    "paper_url": "https://www.anthropic.com/news/claude-opus-4-6",
    "params": "unknown",
    "weights": "closed-api",
    "architecture": "multimodal hybrid-reasoning transformer",
    "notes": "Incremental Opus refresh focused on agentic robustness and tool use; same $5/$25 pricing as 4.5."
  },
  {
    "name": "GPT-5.3-Codex",
    "family": "GPT",
    "org": "OpenAI",
    "release_date": "2026-02-05",
    "paper_arxiv_id": null,
    "paper_url": "https://openai.com/index/gpt-5-3-codex/",
    "params": "unknown",
    "weights": "closed-api",
    "architecture": "router + multimodal transformer (specialized coder)",
    "notes": "Coding-optimized 5.3 variant powering Codex Cloud and Codex CLI."
  },
  {
    "name": "Claude Sonnet 4.6",
    "family": "Claude",
    "org": "Anthropic",
    "release_date": "2026-02-17",
    "paper_arxiv_id": null,
    "paper_url": "https://www.anthropic.com/news/claude-sonnet-4-6",
    "params": "unknown",
    "weights": "closed-api",
    "architecture": "multimodal hybrid-reasoning transformer",
    "notes": "Beta release shipped with a 1M-token context window \u2014 twice the previous Sonnet ceiling; aimed at Opus-level coding at Sonnet pricing ($3/$15)."
  },
  {
    "name": "GPT-5.4",
    "family": "GPT",
    "org": "OpenAI",
    "release_date": "2026-03-05",
    "paper_arxiv_id": null,
    "paper_url": "https://openai.com/index/gpt-5-4/",
    "params": "unknown",
    "weights": "closed-api",
    "architecture": "router + multimodal transformer",
    "notes": "Routine cadence frontier refresh; positioned between 5.2 and 5.5."
  },
  {
    "name": "Claude Opus 4.7 (1M context)",
    "family": "Claude",
    "org": "Anthropic",
    "release_date": "2026-04-16",
    "paper_arxiv_id": null,
    "paper_url": "https://www.anthropic.com/news/claude-opus-4-7",
    "params": "unknown",
    "weights": "closed-api",
    "architecture": "multimodal hybrid-reasoning transformer",
    "notes": "1M-token context at standard pricing (no long-context premium); first Claude with high-resolution image support up to 2576px; 87.6% SWE-bench Verified, 94.2% GPQA."
  },
  {
    "name": "Kimi K2.6",
    "family": "Kimi",
    "org": "Moonshot AI",
    "release_date": "2026-04-20",
    "paper_arxiv_id": null,
    "paper_url": "https://moonshot.cn/blog/kimi-k2-6",
    "params": "1T total / 32B active",
    "weights": "open",
    "architecture": "MoE multimodal transformer (long-horizon agent swarm)",
    "notes": "Agent swarm scaled to 300 sub-agents and 4000 coordinated steps; long-horizon coding focus."
  },
  {
    "name": "GPT-5.5",
    "family": "GPT",
    "org": "OpenAI",
    "release_date": "2026-04-23",
    "paper_arxiv_id": null,
    "paper_url": "https://openai.com/index/introducing-gpt-5-5/",
    "params": "unknown",
    "weights": "closed-api",
    "architecture": "router + multimodal transformer",
    "notes": "Bridge toward an OpenAI 'super app'; Instant variant became the default ChatGPT model on May 5."
  },
  {
    "name": "Gemini 3.5 Flash",
    "family": "Gemini",
    "org": "Google DeepMind",
    "release_date": "2026-05-19",
    "paper_arxiv_id": null,
    "paper_url": "https://blog.google/innovation-and-ai/models-and-research/gemini-models/gemini-3-5/",
    "params": "unknown",
    "weights": "closed-api",
    "architecture": "multimodal transformer (agentic specialization)",
    "notes": "Launched at Google I/O 2026; outperforms Gemini 3.1 Pro on agentic + multimodal benchmarks at ~4x faster inference and <50% the cost ($1.50/$9.00 per million tokens)."
  }
]