2913 lines
146 KiB
JSON
2913 lines
146 KiB
JSON
|
|
{
|
|||
|
|
"free": 25,
|
|||
|
|
"generated_at": "2026-05-13T09:42:02+08:00",
|
|||
|
|
"models": [
|
|||
|
|
{
|
|||
|
|
"id": "anthropic/claude-opus-4.7-fast",
|
|||
|
|
"name": "Anthropic: Claude Opus 4.7 (Fast)",
|
|||
|
|
"created": 1778613011,
|
|||
|
|
"description": "Fast-mode variant of [Opus 4.7](/anthropic/claude-opus-4.7) - identical capabilities with higher output speed at premium 6x pricing.\n\nLearn more in Anthropic's docs: https://platform.claude.com/docs/en/build-with-claude/fast-mode",
|
|||
|
|
"context_length": 1000000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "perceptron/perceptron-mk1",
|
|||
|
|
"name": "Perceptron: Perceptron Mk1",
|
|||
|
|
"created": 1778597029,
|
|||
|
|
"description": "Perceptron Mk1 (Mark One) is Perceptron's highest-quality vision-language model for video and embodied reasoning.** It accepts image and video inputs paired with natural language queries, and produces detailed visual understanding...",
|
|||
|
|
"context_length": 32768,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "inclusionai/ring-2.6-1t:free",
|
|||
|
|
"name": "inclusionAI: Ring-2.6-1T (free)",
|
|||
|
|
"created": 1778247440,
|
|||
|
|
"description": "Ring-2.6-1T is a 1T-parameter-scale thinking model with 63B active parameters, built for real-world agent workflows that require both strong capability and operational efficiency. It is optimized for coding agents, tool...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "google/gemini-3.1-flash-lite",
|
|||
|
|
"name": "Google: Gemini 3.1 Flash Lite",
|
|||
|
|
"created": 1778168828,
|
|||
|
|
"description": "Gemini 3.1 Flash Lite is Google’s GA high-efficiency multimodal model optimized for low-latency, high-volume workloads. It supports text, image, video, audio, and PDF inputs, and is designed for lightweight agentic...",
|
|||
|
|
"context_length": 1048576,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "baidu/cobuddy:free",
|
|||
|
|
"name": "Baidu Qianfan: CoBuddy (free)",
|
|||
|
|
"created": 1778035480,
|
|||
|
|
"description": "CoBuddy is a code generation model from Baidu, optimized for coding tasks and AI Agent workflows. It features high inference throughput and low end-to-end latency, with native support for tool...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-chat-latest",
|
|||
|
|
"name": "OpenAI: GPT Chat Latest",
|
|||
|
|
"created": 1778000212,
|
|||
|
|
"description": "GPT Chat Latest points to OpenAI's stable API alias `chat-latest` that always resolves to the latest Instant chat model used in ChatGPT. As OpenAI rolls out new Instant model updates...",
|
|||
|
|
"context_length": 400000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "x-ai/grok-4.3",
|
|||
|
|
"name": "xAI: Grok 4.3",
|
|||
|
|
"created": 1777591821,
|
|||
|
|
"description": "Grok 4.3 is a reasoning model from xAI. It accepts text and image inputs with text output, and is suited for agentic workflows, instruction-following tasks, and applications requiring high factual...",
|
|||
|
|
"context_length": 1000000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "ibm-granite/granite-4.1-8b",
|
|||
|
|
"name": "IBM: Granite 4.1 8B",
|
|||
|
|
"created": 1777577071,
|
|||
|
|
"description": "Granite 4.1 8B is a dense, decoder-only 8-billion-parameter language model from IBM, part of the Granite 4.1 family. It supports a 131K-token context window and is designed for enterprise tasks...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "mistralai/mistral-medium-3-5",
|
|||
|
|
"name": "Mistral: Mistral Medium 3.5",
|
|||
|
|
"created": 1777570439,
|
|||
|
|
"description": "Mistral Medium 3.5 is a dense 128B instruction-following model from Mistral AI. It supports text and image inputs with text output, and is designed for agentic workflows, coding, and complex...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openrouter/owl-alpha",
|
|||
|
|
"name": "Owl Alpha",
|
|||
|
|
"created": 1777398589,
|
|||
|
|
"description": "Owl Alpha is a high-performance foundation model designed for agentic workloads. Natively supports tool use, and long-context tasks, with strong performance in code generation, automated workflows, and complex instruction execution....",
|
|||
|
|
"context_length": 1048756,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free",
|
|||
|
|
"name": "NVIDIA: Nemotron 3 Nano Omni (free)",
|
|||
|
|
"created": 1777393095,
|
|||
|
|
"description": "NVIDIA Nemotron™ 3 Nano Omni is a 30B-A3B open multimodal model designed to function as a perception and context sub-agent in enterprise agent systems. It accepts text, image, video, and...",
|
|||
|
|
"context_length": 256000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "poolside/laguna-xs.2:free",
|
|||
|
|
"name": "Poolside: Laguna XS.2 (free)",
|
|||
|
|
"created": 1777389604,
|
|||
|
|
"description": "Laguna XS.2 is the second-generation model in the XS size class from [Poolside](https://poolside.ai), their efficient coding agent series. It combines tool calling and reasoning capabilities with a compact footprint, offering...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "poolside/laguna-m.1:free",
|
|||
|
|
"name": "Poolside: Laguna M.1 (free)",
|
|||
|
|
"created": 1777388504,
|
|||
|
|
"description": "Laguna M.1 is the flagship coding agent model from [Poolside](https://poolside.ai), optimized for complex software engineering tasks. Designed for agentic coding workflows, it supports tool calling and reasoning, with a 128K...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "~anthropic/claude-haiku-latest",
|
|||
|
|
"name": "Anthropic Claude Haiku Latest",
|
|||
|
|
"created": 1777318492,
|
|||
|
|
"description": "This model always redirects to the latest model in the Anthropic Claude Haiku family.",
|
|||
|
|
"context_length": 200000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "~openai/gpt-mini-latest",
|
|||
|
|
"name": "OpenAI GPT Mini Latest",
|
|||
|
|
"created": 1777318471,
|
|||
|
|
"description": "This model always redirects to the latest model in the OpenAI GPT Mini family.",
|
|||
|
|
"context_length": 400000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "~google/gemini-pro-latest",
|
|||
|
|
"name": "Google Gemini Pro Latest",
|
|||
|
|
"created": 1777318451,
|
|||
|
|
"description": "This model always redirects to the latest model in the Google Gemini Pro family.",
|
|||
|
|
"context_length": 1048576,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "~moonshotai/kimi-latest",
|
|||
|
|
"name": "MoonshotAI Kimi Latest",
|
|||
|
|
"created": 1777318428,
|
|||
|
|
"description": "This model always redirects to the latest model in the MoonshotAI Kimi family.",
|
|||
|
|
"context_length": 262142,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "~google/gemini-flash-latest",
|
|||
|
|
"name": "Google Gemini Flash Latest",
|
|||
|
|
"created": 1777318398,
|
|||
|
|
"description": "This model always redirects to the latest model in the Google Gemini Flash family.",
|
|||
|
|
"context_length": 1048576,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "~anthropic/claude-sonnet-latest",
|
|||
|
|
"name": "Anthropic Claude Sonnet Latest",
|
|||
|
|
"created": 1777318368,
|
|||
|
|
"description": "This model always redirects to the latest model in the Anthropic Claude Sonnet family.",
|
|||
|
|
"context_length": 1000000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "~openai/gpt-latest",
|
|||
|
|
"name": "OpenAI GPT Latest",
|
|||
|
|
"created": 1777318334,
|
|||
|
|
"description": "This model always redirects to the latest model in the OpenAI GPT family.",
|
|||
|
|
"context_length": 1050000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3.5-plus-20260420",
|
|||
|
|
"name": "Qwen: Qwen3.5 Plus 2026-04-20",
|
|||
|
|
"created": 1777261368,
|
|||
|
|
"description": "Qwen3.5 Plus (April 2026) is a large-scale multimodal language model from Alibaba. It accepts text, image, and video input and produces text output, with a 1M token context window. This...",
|
|||
|
|
"context_length": 1000000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3.6-flash",
|
|||
|
|
"name": "Qwen: Qwen3.6 Flash",
|
|||
|
|
"created": 1777261362,
|
|||
|
|
"description": "Qwen3.6 Flash is a fast, efficient language model from Alibaba's Qwen 3.6 series. It supports text, image, and video input with a 1M token context window. Tiered pricing kicks in...",
|
|||
|
|
"context_length": 1000000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3.6-35b-a3b",
|
|||
|
|
"name": "Qwen: Qwen3.6 35B A3B",
|
|||
|
|
"created": 1777260255,
|
|||
|
|
"description": "Qwen3.6-35B-A3B is an open-weight multimodal model from Alibaba Cloud with 35 billion total parameters and 3 billion active parameters per token. It uses a hybrid sparse mixture-of-experts architecture combining Gated...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3.6-max-preview",
|
|||
|
|
"name": "Qwen: Qwen3.6 Max Preview",
|
|||
|
|
"created": 1777260242,
|
|||
|
|
"description": "Qwen3.6-Max-Preview is a proprietary frontier model from Alibaba Cloud built on a sparse mixture-of-experts architecture with approximately 1 trillion total parameters. It is optimized for agentic coding, tool use, and...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3.6-27b",
|
|||
|
|
"name": "Qwen: Qwen3.6 27B",
|
|||
|
|
"created": 1777255064,
|
|||
|
|
"description": "Qwen3.6 27B is a dense 27-billion-parameter language model from the Qwen Team at Alibaba, released in April 2026. It features hybrid multimodal capabilities — accepting text, image, and video inputs...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-5.5-pro",
|
|||
|
|
"name": "OpenAI: GPT-5.5 Pro",
|
|||
|
|
"created": 1777051896,
|
|||
|
|
"description": "GPT-5.5 Pro is OpenAI’s high-capability model optimized for deep reasoning and accuracy on complex, high-stakes workloads. It features a 1M+ token context window (922K input, 128K output) with support for...",
|
|||
|
|
"context_length": 1050000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-5.5",
|
|||
|
|
"name": "OpenAI: GPT-5.5",
|
|||
|
|
"created": 1777051893,
|
|||
|
|
"description": "GPT-5.5 is OpenAI’s frontier model designed for complex professional workloads, building on GPT-5.4 with stronger reasoning, higher reliability, and improved token efficiency on hard tasks. It features a 1M+ token...",
|
|||
|
|
"context_length": 1050000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "deepseek/deepseek-v4-pro",
|
|||
|
|
"name": "DeepSeek: DeepSeek V4 Pro",
|
|||
|
|
"created": 1777000679,
|
|||
|
|
"description": "DeepSeek V4 Pro is a large-scale Mixture-of-Experts model from DeepSeek with 1.6T total parameters and 49B activated parameters, supporting a 1M-token context window. It is designed for advanced reasoning, coding,...",
|
|||
|
|
"context_length": 1048576,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "deepseek/deepseek-v4-flash",
|
|||
|
|
"name": "DeepSeek: DeepSeek V4 Flash",
|
|||
|
|
"created": 1777000666,
|
|||
|
|
"description": "DeepSeek V4 Flash is an efficiency-optimized Mixture-of-Experts model from DeepSeek with 284B total parameters and 13B activated parameters, supporting a 1M-token context window. It is designed for fast inference and...",
|
|||
|
|
"context_length": 1048576,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "inclusionai/ling-2.6-1t",
|
|||
|
|
"name": "inclusionAI: Ling-2.6-1T",
|
|||
|
|
"created": 1776948238,
|
|||
|
|
"description": "Ling-2.6-1T is an instant (instruct) model from inclusionAI and the company’s trillion-parameter flagship, designed for real-world agents that require fast execution and high efficiency at scale. It uses a “fast...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "tencent/hy3-preview",
|
|||
|
|
"name": "Tencent: Hy3 preview",
|
|||
|
|
"created": 1776878150,
|
|||
|
|
"description": "Hy3 preview is a high-efficiency Mixture-of-Experts model from Tencent designed for agentic workflows and production use. It supports configurable reasoning levels across disabled, low, and high modes, allowing it to...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "xiaomi/mimo-v2.5-pro",
|
|||
|
|
"name": "Xiaomi: MiMo-V2.5-Pro",
|
|||
|
|
"created": 1776874273,
|
|||
|
|
"description": "MiMo-V2.5-Pro is Xiaomi’s flagship model, delivering strong performance in general agentic capabilities, complex software engineering, and long-horizon tasks, with top rankings on benchmarks such as ClawEval, GDPVal, and SWE-bench Pro....",
|
|||
|
|
"context_length": 1048576,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "xiaomi/mimo-v2.5",
|
|||
|
|
"name": "Xiaomi: MiMo-V2.5",
|
|||
|
|
"created": 1776874269,
|
|||
|
|
"description": "MiMo-V2.5 is a native omnimodal model by Xiaomi. It delivers Pro-level agentic performance at roughly half the inference cost, while surpassing MiMo-V2-Omni in multimodal perception across image and video understanding...",
|
|||
|
|
"context_length": 1048576,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-5.4-image-2",
|
|||
|
|
"name": "OpenAI: GPT-5.4 Image 2",
|
|||
|
|
"created": 1776797528,
|
|||
|
|
"description": "[GPT-5.4](https://openrouter.ai/openai/gpt-5.4) Image 2 combines OpenAI's GPT-5.4 model with state-of-the-art image generation capabilities from GPT Image 2. It enables rich multimodal workflows, allowing users to seamlessly move between reasoning, coding, and...",
|
|||
|
|
"context_length": 272000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "inclusionai/ling-2.6-flash",
|
|||
|
|
"name": "inclusionAI: Ling-2.6-flash",
|
|||
|
|
"created": 1776795886,
|
|||
|
|
"description": "Ling-2.6-flash is an instant (instruct) model from inclusionAI with 104B total parameters and 7.4B active parameters, designed for real-world agents that require fast responses, strong execution, and high token efficiency....",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "~anthropic/claude-opus-latest",
|
|||
|
|
"name": "Anthropic: Claude Opus Latest",
|
|||
|
|
"created": 1776795361,
|
|||
|
|
"description": "This model always redirects to the latest model in the Claude Opus family.",
|
|||
|
|
"context_length": 1000000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openrouter/pareto-code",
|
|||
|
|
"name": "Pareto Code Router",
|
|||
|
|
"created": 1776747900,
|
|||
|
|
"description": "The Pareto Router maintains a tiered shortlist of strong coding models, ranked by [Artificial Analysis](https://artificialanalysis.ai/) coding percentiles. Set min_coding_score between 0 and 1 on the [pareto-router plugin](https://openrouter.ai/docs/guides/routing/routers/pareto-router#the-min_coding_score-parameter) to control how...",
|
|||
|
|
"context_length": 2000000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "baidu/qianfan-ocr-fast:free",
|
|||
|
|
"name": "Baidu: Qianfan-OCR-Fast (free)",
|
|||
|
|
"created": 1776707472,
|
|||
|
|
"description": "Qianfan-OCR-Fast is a domain-specific multimodal large model purpose-built for OCR. By leveraging specialized OCR training data while preserving versatile multimodal intelligence, it provides a powerful performance upgrade over Qianfan-OCR.",
|
|||
|
|
"context_length": 65536,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "moonshotai/kimi-k2.6",
|
|||
|
|
"name": "MoonshotAI: Kimi K2.6",
|
|||
|
|
"created": 1776699402,
|
|||
|
|
"description": "Kimi K2.6 is Moonshot AI's next-generation multimodal model, designed for long-horizon coding, coding-driven UI/UX generation, and multi-agent orchestration. It handles complex end-to-end coding tasks across Python, Rust, and Go, and...",
|
|||
|
|
"context_length": 262142,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "anthropic/claude-opus-4.7",
|
|||
|
|
"name": "Anthropic: Claude Opus 4.7",
|
|||
|
|
"created": 1776351100,
|
|||
|
|
"description": "Opus 4.7 is the next generation of Anthropic's Opus family, built for long-running, asynchronous agents. Building on the coding and agentic strengths of Opus 4.6, it delivers stronger performance on...",
|
|||
|
|
"context_length": 1000000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "anthropic/claude-opus-4.6-fast",
|
|||
|
|
"name": "Anthropic: Claude Opus 4.6 (Fast)",
|
|||
|
|
"created": 1775592472,
|
|||
|
|
"description": "Fast-mode variant of [Opus 4.6](/anthropic/claude-opus-4.6) - identical capabilities with higher output speed at premium 6x pricing.\n\nLearn more in Anthropic's docs: https://platform.claude.com/docs/en/build-with-claude/fast-mode",
|
|||
|
|
"context_length": 1000000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "z-ai/glm-5.1",
|
|||
|
|
"name": "Z.ai: GLM 5.1",
|
|||
|
|
"created": 1775578025,
|
|||
|
|
"description": "GLM-5.1 delivers a major leap in coding capability, with particularly significant gains in handling long-horizon tasks. Unlike previous models built around minute-level interactions, GLM-5.1 can work independently and continuously on...",
|
|||
|
|
"context_length": 202752,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "google/gemma-4-26b-a4b-it:free",
|
|||
|
|
"name": "Google: Gemma 4 26B A4B (free)",
|
|||
|
|
"created": 1775227989,
|
|||
|
|
"description": "Gemma 4 26B A4B IT is an instruction-tuned Mixture-of-Experts (MoE) model from Google DeepMind. Despite 25.2B total parameters, only 3.8B activate per token during inference — delivering near-31B quality at...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "google/gemma-4-26b-a4b-it",
|
|||
|
|
"name": "Google: Gemma 4 26B A4B ",
|
|||
|
|
"created": 1775227989,
|
|||
|
|
"description": "Gemma 4 26B A4B IT is an instruction-tuned Mixture-of-Experts (MoE) model from Google DeepMind. Despite 25.2B total parameters, only 3.8B activate per token during inference — delivering near-31B quality at...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "google/gemma-4-31b-it:free",
|
|||
|
|
"name": "Google: Gemma 4 31B (free)",
|
|||
|
|
"created": 1775148486,
|
|||
|
|
"description": "Gemma 4 31B Instruct is Google DeepMind's 30.7B dense multimodal model supporting text and image input with text output. Features a 256K token context window, configurable thinking/reasoning mode, native function...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "google/gemma-4-31b-it",
|
|||
|
|
"name": "Google: Gemma 4 31B",
|
|||
|
|
"created": 1775148486,
|
|||
|
|
"description": "Gemma 4 31B Instruct is Google DeepMind's 30.7B dense multimodal model supporting text and image input with text output. Features a 256K token context window, configurable thinking/reasoning mode, native function...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3.6-plus",
|
|||
|
|
"name": "Qwen: Qwen3.6 Plus",
|
|||
|
|
"created": 1775133557,
|
|||
|
|
"description": "Qwen 3.6 Plus builds on a hybrid architecture that combines efficient linear attention with sparse mixture-of-experts routing, enabling strong scalability and high-performance inference. Compared to the 3.5 series, it delivers...",
|
|||
|
|
"context_length": 1000000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "z-ai/glm-5v-turbo",
|
|||
|
|
"name": "Z.ai: GLM 5V Turbo",
|
|||
|
|
"created": 1775061458,
|
|||
|
|
"description": "GLM-5V-Turbo is Z.ai’s first native multimodal agent foundation model, built for vision-based coding and agent-driven tasks. It natively handles image, video, and text inputs, excels at long-horizon planning, complex coding,...",
|
|||
|
|
"context_length": 202752,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "arcee-ai/trinity-large-thinking:free",
|
|||
|
|
"name": "Arcee AI: Trinity Large Thinking (free)",
|
|||
|
|
"created": 1775058318,
|
|||
|
|
"description": "Trinity Large Thinking is a powerful open source reasoning model from the team at Arcee AI. It shows strong performance in PinchBench, agentic workloads, and reasoning tasks. Launch video: https://youtu.be/Gc82AXLa0Rg?si=4RLn6WBz33qT--B7...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "arcee-ai/trinity-large-thinking",
|
|||
|
|
"name": "Arcee AI: Trinity Large Thinking",
|
|||
|
|
"created": 1775058318,
|
|||
|
|
"description": "Trinity Large Thinking is a powerful open source reasoning model from the team at Arcee AI. It shows strong performance in PinchBench, agentic workloads, and reasoning tasks. Launch video: https://youtu.be/Gc82AXLa0Rg?si=4RLn6WBz33qT--B7...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "x-ai/grok-4.20-multi-agent",
|
|||
|
|
"name": "xAI: Grok 4.20 Multi-Agent",
|
|||
|
|
"created": 1774979158,
|
|||
|
|
"description": "Grok 4.20 Multi-Agent is a variant of xAI’s Grok 4.20 designed for collaborative, agent-based workflows. Multiple agents operate in parallel to conduct deep research, coordinate tool use, and synthesize information...",
|
|||
|
|
"context_length": 2000000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "x-ai/grok-4.20",
|
|||
|
|
"name": "xAI: Grok 4.20",
|
|||
|
|
"created": 1774979019,
|
|||
|
|
"description": "Grok 4.20 is a reasoning model from xAI with industry-leading speed and agentic tool calling capabilities. It combines the lowest hallucination rate on the market with strict prompt adherance, delivering...",
|
|||
|
|
"context_length": 2000000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "google/lyria-3-pro-preview",
|
|||
|
|
"name": "Google: Lyria 3 Pro Preview",
|
|||
|
|
"created": 1774907286,
|
|||
|
|
"description": "Full-length songs are priced at $0.08 per song. Lyria 3 is Google's family of music generation models, available through the Gemini API. With Lyria 3, you can generate high-quality, 48kHz...",
|
|||
|
|
"context_length": 1048576,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "google/lyria-3-clip-preview",
|
|||
|
|
"name": "Google: Lyria 3 Clip Preview",
|
|||
|
|
"created": 1774907255,
|
|||
|
|
"description": "30 second duration clips are priced at $0.04 per clip. Lyria 3 is Google's family of music generation models, available through the Gemini API. With Lyria 3, you can generate...",
|
|||
|
|
"context_length": 1048576,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "kwaipilot/kat-coder-pro-v2",
|
|||
|
|
"name": "Kwaipilot: KAT-Coder-Pro V2",
|
|||
|
|
"created": 1774649310,
|
|||
|
|
"description": "KAT-Coder-Pro V2 is the latest high-performance model in KwaiKAT’s KAT-Coder series, designed for complex enterprise-grade software engineering and SaaS integration. It builds on the agentic coding strengths of earlier versions,...",
|
|||
|
|
"context_length": 256000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "rekaai/reka-edge",
|
|||
|
|
"name": "Reka Edge",
|
|||
|
|
"created": 1774026965,
|
|||
|
|
"description": "Reka Edge is an extremely efficient 7B multimodal vision-language model that accepts image/video+text inputs and generates text outputs. This model is optimized specifically to deliver industry-leading performance in image understanding,...",
|
|||
|
|
"context_length": 16384,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "xiaomi/mimo-v2-omni",
|
|||
|
|
"name": "Xiaomi: MiMo-V2-Omni",
|
|||
|
|
"created": 1773863703,
|
|||
|
|
"description": "MiMo-V2-Omni is a frontier omni-modal model that natively processes image, video, and audio inputs within a unified architecture. It combines strong multimodal perception with agentic capability - visual grounding, multi-step...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "xiaomi/mimo-v2-pro",
|
|||
|
|
"name": "Xiaomi: MiMo-V2-Pro",
|
|||
|
|
"created": 1773863643,
|
|||
|
|
"description": "MiMo-V2-Pro is Xiaomi's flagship foundation model, featuring over 1T total parameters and a 1M context length, deeply optimized for agentic scenarios. It is highly adaptable to general agent frameworks like...",
|
|||
|
|
"context_length": 1048576,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "minimax/minimax-m2.7",
|
|||
|
|
"name": "MiniMax: MiniMax M2.7",
|
|||
|
|
"created": 1773836697,
|
|||
|
|
"description": "MiniMax-M2.7 is a next-generation large language model designed for autonomous, real-world productivity and continuous improvement. Built to actively participate in its own evolution, M2.7 integrates advanced agentic capabilities through multi-agent...",
|
|||
|
|
"context_length": 196608,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-5.4-nano",
|
|||
|
|
"name": "OpenAI: GPT-5.4 Nano",
|
|||
|
|
"created": 1773748187,
|
|||
|
|
"description": "GPT-5.4 nano is the most lightweight and cost-efficient variant of the GPT-5.4 family, optimized for speed-critical and high-volume tasks. It supports text and image inputs and is designed for low-latency...",
|
|||
|
|
"context_length": 400000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-5.4-mini",
|
|||
|
|
"name": "OpenAI: GPT-5.4 Mini",
|
|||
|
|
"created": 1773748178,
|
|||
|
|
"description": "GPT-5.4 mini brings the core capabilities of GPT-5.4 to a faster, more efficient model optimized for high-throughput workloads. It supports text and image inputs with strong performance across reasoning, coding,...",
|
|||
|
|
"context_length": 400000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "mistralai/mistral-small-2603",
|
|||
|
|
"name": "Mistral: Mistral Small 4",
|
|||
|
|
"created": 1773695685,
|
|||
|
|
"description": "Mistral Small 4 is the next major release in the Mistral Small family, unifying the capabilities of several flagship Mistral models into a single system. It combines strong reasoning from...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "z-ai/glm-5-turbo",
|
|||
|
|
"name": "Z.ai: GLM 5 Turbo",
|
|||
|
|
"created": 1773583573,
|
|||
|
|
"description": "GLM-5 Turbo is a new model from Z.ai designed for fast inference and strong performance in agent-driven environments such as OpenClaw scenarios. It is deeply optimized for real-world agent workflows...",
|
|||
|
|
"context_length": 202752,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "nvidia/nemotron-3-super-120b-a12b:free",
|
|||
|
|
"name": "NVIDIA: Nemotron 3 Super (free)",
|
|||
|
|
"created": 1773245239,
|
|||
|
|
"description": "NVIDIA Nemotron 3 Super is a 120B-parameter open hybrid MoE model, activating just 12B parameters for maximum compute efficiency and accuracy in complex multi-agent applications. Built on a hybrid Mamba-Transformer...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "nvidia/nemotron-3-super-120b-a12b",
|
|||
|
|
"name": "NVIDIA: Nemotron 3 Super",
|
|||
|
|
"created": 1773245239,
|
|||
|
|
"description": "NVIDIA Nemotron 3 Super is a 120B-parameter open hybrid MoE model, activating just 12B parameters for maximum compute efficiency and accuracy in complex multi-agent applications. Built on a hybrid Mamba-Transformer...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "bytedance-seed/seed-2.0-lite",
|
|||
|
|
"name": "ByteDance Seed: Seed-2.0-Lite",
|
|||
|
|
"created": 1773157231,
|
|||
|
|
"description": "Seed-2.0-Lite is a versatile, cost‑efficient enterprise workhorse that delivers strong multimodal and agent capabilities while offering noticeably lower latency, making it a practical default choice for most production workloads across...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3.5-9b",
|
|||
|
|
"name": "Qwen: Qwen3.5-9B",
|
|||
|
|
"created": 1773152396,
|
|||
|
|
"description": "Qwen3.5-9B is a multimodal foundation model from the Qwen3.5 family, designed to deliver strong reasoning, coding, and visual understanding in an efficient 9B-parameter architecture. It uses a unified vision-language design...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-5.4-pro",
|
|||
|
|
"name": "OpenAI: GPT-5.4 Pro",
|
|||
|
|
"created": 1772734366,
|
|||
|
|
"description": "GPT-5.4 Pro is OpenAI's most advanced model, building on GPT-5.4's unified architecture with enhanced reasoning capabilities for complex, high-stakes tasks. It features a 1M+ token context window (922K input, 128K...",
|
|||
|
|
"context_length": 1050000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-5.4",
|
|||
|
|
"name": "OpenAI: GPT-5.4",
|
|||
|
|
"created": 1772734352,
|
|||
|
|
"description": "GPT-5.4 is OpenAI’s latest frontier model, unifying the Codex and GPT lines into a single system. It features a 1M+ token context window (922K input, 128K output) with support for...",
|
|||
|
|
"context_length": 1050000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "inception/mercury-2",
|
|||
|
|
"name": "Inception: Mercury 2",
|
|||
|
|
"created": 1772636275,
|
|||
|
|
"description": "Mercury 2 is an extremely fast reasoning LLM, and the first reasoning diffusion LLM (dLLM). Instead of generating tokens sequentially, Mercury 2 produces and refines multiple tokens in parallel, achieving...",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-5.3-chat",
|
|||
|
|
"name": "OpenAI: GPT-5.3 Chat",
|
|||
|
|
"created": 1772564061,
|
|||
|
|
"description": "GPT-5.3 Chat is an update to ChatGPT's most-used model that makes everyday conversations smoother, more useful, and more directly helpful. It delivers more accurate answers with better contextualization and significantly...",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "google/gemini-3.1-flash-lite-preview",
|
|||
|
|
"name": "Google: Gemini 3.1 Flash Lite Preview",
|
|||
|
|
"created": 1772512673,
|
|||
|
|
"description": "Gemini 3.1 Flash Lite Preview is Google's high-efficiency model optimized for high-volume use cases. It outperforms Gemini 2.5 Flash Lite on overall quality and approaches Gemini 2.5 Flash performance across...",
|
|||
|
|
"context_length": 1048576,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "bytedance-seed/seed-2.0-mini",
|
|||
|
|
"name": "ByteDance Seed: Seed-2.0-Mini",
|
|||
|
|
"created": 1772131107,
|
|||
|
|
"description": "Seed-2.0-mini targets latency-sensitive, high-concurrency, and cost-sensitive scenarios, emphasizing fast response and flexible inference deployment. It delivers performance comparable to ByteDance-Seed-1.6, supports 256k context, four reasoning effort modes (minimal/low/medium/high), multimodal understanding,...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "google/gemini-3.1-flash-image-preview",
|
|||
|
|
"name": "Google: Nano Banana 2 (Gemini 3.1 Flash Image Preview)",
|
|||
|
|
"created": 1772119558,
|
|||
|
|
"description": "Gemini 3.1 Flash Image Preview, a.k.a. \"Nano Banana 2,\" is Google’s latest state of the art image generation and editing model, delivering Pro-level visual quality at Flash speed. It combines...",
|
|||
|
|
"context_length": 65536,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3.5-35b-a3b",
|
|||
|
|
"name": "Qwen: Qwen3.5-35B-A3B",
|
|||
|
|
"created": 1772053822,
|
|||
|
|
"description": "The Qwen3.5 Series 35B-A3B is a native vision-language model designed with a hybrid architecture that integrates linear attention mechanisms and a sparse mixture-of-experts model, achieving higher inference efficiency. Its overall...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3.5-27b",
|
|||
|
|
"name": "Qwen: Qwen3.5-27B",
|
|||
|
|
"created": 1772053810,
|
|||
|
|
"description": "The Qwen3.5 27B native vision-language Dense model incorporates a linear attention mechanism, delivering fast response times while balancing inference speed and performance. Its overall capabilities are comparable to those of...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3.5-122b-a10b",
|
|||
|
|
"name": "Qwen: Qwen3.5-122B-A10B",
|
|||
|
|
"created": 1772053789,
|
|||
|
|
"description": "The Qwen3.5 122B-A10B native vision-language model is built on a hybrid architecture that integrates a linear attention mechanism with a sparse mixture-of-experts model, achieving higher inference efficiency. In terms of...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3.5-flash-02-23",
|
|||
|
|
"name": "Qwen: Qwen3.5-Flash",
|
|||
|
|
"created": 1772053776,
|
|||
|
|
"description": "The Qwen3.5 native vision-language Flash models are built on a hybrid architecture that integrates a linear attention mechanism with a sparse mixture-of-experts model, achieving higher inference efficiency. Compared to the...",
|
|||
|
|
"context_length": 1000000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "liquid/lfm-2-24b-a2b",
|
|||
|
|
"name": "LiquidAI: LFM2-24B-A2B",
|
|||
|
|
"created": 1772048711,
|
|||
|
|
"description": "LFM2-24B-A2B is the largest model in the LFM2 family of hybrid architectures designed for efficient on-device deployment. Built as a 24B parameter Mixture-of-Experts model with only 2B active parameters per...",
|
|||
|
|
"context_length": 32768,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "google/gemini-3.1-pro-preview-customtools",
|
|||
|
|
"name": "Google: Gemini 3.1 Pro Preview Custom Tools",
|
|||
|
|
"created": 1772045923,
|
|||
|
|
"description": "Gemini 3.1 Pro Preview Custom Tools is a variant of Gemini 3.1 Pro that improves tool selection behavior by preventing overuse of a general bash tool when more efficient third-party...",
|
|||
|
|
"context_length": 1048576,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-5.3-codex",
|
|||
|
|
"name": "OpenAI: GPT-5.3-Codex",
|
|||
|
|
"created": 1771959164,
|
|||
|
|
"description": "GPT-5.3-Codex is OpenAI’s most advanced agentic coding model, combining the frontier software engineering performance of GPT-5.2-Codex with the broader reasoning and professional knowledge capabilities of GPT-5.2. It achieves state-of-the-art results...",
|
|||
|
|
"context_length": 400000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "aion-labs/aion-2.0",
|
|||
|
|
"name": "AionLabs: Aion-2.0",
|
|||
|
|
"created": 1771881306,
|
|||
|
|
"description": "Aion-2.0 is a variant of DeepSeek V3.2 optimized for immersive roleplaying and storytelling. It is particularly strong at introducing tension, crises, and conflict into stories, making narratives feel more engaging....",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "google/gemini-3.1-pro-preview",
|
|||
|
|
"name": "Google: Gemini 3.1 Pro Preview",
|
|||
|
|
"created": 1771509627,
|
|||
|
|
"description": "Gemini 3.1 Pro Preview is Google’s frontier reasoning model, delivering enhanced software engineering performance, improved agentic reliability, and more efficient token usage across complex workflows. Building on the multimodal foundation...",
|
|||
|
|
"context_length": 1048576,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "anthropic/claude-sonnet-4.6",
|
|||
|
|
"name": "Anthropic: Claude Sonnet 4.6",
|
|||
|
|
"created": 1771342990,
|
|||
|
|
"description": "Sonnet 4.6 is Anthropic's most capable Sonnet-class model yet, with frontier performance across coding, agents, and professional work. It excels at iterative development, complex codebase navigation, end-to-end project management with...",
|
|||
|
|
"context_length": 1000000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3.5-plus-02-15",
|
|||
|
|
"name": "Qwen: Qwen3.5 Plus 2026-02-15",
|
|||
|
|
"created": 1771229416,
|
|||
|
|
"description": "The Qwen3.5 native vision-language series Plus models are built on a hybrid architecture that integrates linear attention mechanisms with sparse mixture-of-experts models, achieving higher inference efficiency. In a variety of...",
|
|||
|
|
"context_length": 1000000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3.5-397b-a17b",
|
|||
|
|
"name": "Qwen: Qwen3.5 397B A17B",
|
|||
|
|
"created": 1771223018,
|
|||
|
|
"description": "The Qwen3.5 series 397B-A17B native vision-language model is built on a hybrid architecture that integrates a linear attention mechanism with a sparse mixture-of-experts model, achieving higher inference efficiency. It delivers...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "minimax/minimax-m2.5:free",
|
|||
|
|
"name": "MiniMax: MiniMax M2.5 (free)",
|
|||
|
|
"created": 1770908502,
|
|||
|
|
"description": "MiniMax-M2.5 is a SOTA large language model designed for real-world productivity. Trained in a diverse range of complex real-world digital working environments, M2.5 builds upon the coding expertise of M2.1...",
|
|||
|
|
"context_length": 196608,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "minimax/minimax-m2.5",
|
|||
|
|
"name": "MiniMax: MiniMax M2.5",
|
|||
|
|
"created": 1770908502,
|
|||
|
|
"description": "MiniMax-M2.5 is a SOTA large language model designed for real-world productivity. Trained in a diverse range of complex real-world digital working environments, M2.5 builds upon the coding expertise of M2.1...",
|
|||
|
|
"context_length": 196608,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "z-ai/glm-5",
|
|||
|
|
"name": "Z.ai: GLM 5",
|
|||
|
|
"created": 1770829182,
|
|||
|
|
"description": "GLM-5 is Z.ai’s flagship open-source foundation model engineered for complex systems design and long-horizon agent workflows. Built for expert developers, it delivers production-grade performance on large-scale programming tasks, rivaling leading...",
|
|||
|
|
"context_length": 202752,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3-max-thinking",
|
|||
|
|
"name": "Qwen: Qwen3 Max Thinking",
|
|||
|
|
"created": 1770671901,
|
|||
|
|
"description": "Qwen3-Max-Thinking is the flagship reasoning model in the Qwen3 series, designed for high-stakes cognitive tasks that require deep, multi-step reasoning. By significantly scaling model capacity and reinforcement learning compute, it...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "anthropic/claude-opus-4.6",
|
|||
|
|
"name": "Anthropic: Claude Opus 4.6",
|
|||
|
|
"created": 1770219050,
|
|||
|
|
"description": "Opus 4.6 is Anthropic’s strongest model for coding and long-running professional tasks. It is built for agents that operate across entire workflows rather than single prompts, making it especially effective...",
|
|||
|
|
"context_length": 1000000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3-coder-next",
|
|||
|
|
"name": "Qwen: Qwen3 Coder Next",
|
|||
|
|
"created": 1770164101,
|
|||
|
|
"description": "Qwen3-Coder-Next is an open-weight causal language model optimized for coding agents and local development workflows. It uses a sparse MoE design with 80B total parameters and only 3B activated per...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openrouter/free",
|
|||
|
|
"name": "Free Models Router",
|
|||
|
|
"created": 1769917427,
|
|||
|
|
"description": "The simplest way to get free inference. openrouter/free is a router that selects free models at random from the models available on OpenRouter. The router smartly filters for models that...",
|
|||
|
|
"context_length": 200000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "stepfun/step-3.5-flash",
|
|||
|
|
"name": "StepFun: Step 3.5 Flash",
|
|||
|
|
"created": 1769728337,
|
|||
|
|
"description": "Step 3.5 Flash is StepFun's most capable open-source foundation model. Built on a sparse Mixture of Experts (MoE) architecture, it selectively activates only 11B of its 196B parameters per token....",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "arcee-ai/trinity-large-preview",
|
|||
|
|
"name": "Arcee AI: Trinity Large Preview",
|
|||
|
|
"created": 1769552670,
|
|||
|
|
"description": "Trinity-Large-Preview is a frontier-scale open-weight language model from Arcee, built as a 400B-parameter sparse Mixture-of-Experts with 13B active parameters per token using 4-of-256 expert routing. It excels in creative writing,...",
|
|||
|
|
"context_length": 131000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "moonshotai/kimi-k2.5",
|
|||
|
|
"name": "MoonshotAI: Kimi K2.5",
|
|||
|
|
"created": 1769487076,
|
|||
|
|
"description": "Kimi K2.5 is Moonshot AI's native multimodal model, delivering state-of-the-art visual coding capability and a self-directed agent swarm paradigm. Built on Kimi K2 with continued pretraining over approximately 15T mixed...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "upstage/solar-pro-3",
|
|||
|
|
"name": "Upstage: Solar Pro 3",
|
|||
|
|
"created": 1769481200,
|
|||
|
|
"description": "Solar Pro 3 is Upstage's powerful Mixture-of-Experts (MoE) language model. With 102B total parameters and 12B active parameters per forward pass, it delivers exceptional performance while maintaining computational efficiency. Optimized...",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "minimax/minimax-m2-her",
|
|||
|
|
"name": "MiniMax: MiniMax M2-her",
|
|||
|
|
"created": 1769177239,
|
|||
|
|
"description": "MiniMax M2-her is a dialogue-first large language model built for immersive roleplay, character-driven chat, and expressive multi-turn conversations. Designed to stay consistent in tone and personality, it supports rich message...",
|
|||
|
|
"context_length": 65536,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "writer/palmyra-x5",
|
|||
|
|
"name": "Writer: Palmyra X5",
|
|||
|
|
"created": 1769003823,
|
|||
|
|
"description": "Palmyra X5 is Writer's most advanced model, purpose-built for building and scaling AI agents across the enterprise. It delivers industry-leading speed and efficiency on context windows up to 1 million...",
|
|||
|
|
"context_length": 1040000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "liquid/lfm-2.5-1.2b-thinking:free",
|
|||
|
|
"name": "LiquidAI: LFM2.5-1.2B-Thinking (free)",
|
|||
|
|
"created": 1768927527,
|
|||
|
|
"description": "LFM2.5-1.2B-Thinking is a lightweight reasoning-focused model optimized for agentic tasks, data extraction, and RAG—while still running comfortably on edge devices. It supports long context (up to 32K tokens) and is...",
|
|||
|
|
"context_length": 32768,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "liquid/lfm-2.5-1.2b-instruct:free",
|
|||
|
|
"name": "LiquidAI: LFM2.5-1.2B-Instruct (free)",
|
|||
|
|
"created": 1768927521,
|
|||
|
|
"description": "LFM2.5-1.2B-Instruct is a compact, high-performance instruction-tuned model built for fast on-device AI. It delivers strong chat quality in a 1.2B parameter footprint, with efficient edge inference and broad runtime support.",
|
|||
|
|
"context_length": 32768,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-audio",
|
|||
|
|
"name": "OpenAI: GPT Audio",
|
|||
|
|
"created": 1768862569,
|
|||
|
|
"description": "The gpt-audio model is OpenAI's first generally available audio model. The new snapshot features an upgraded decoder for more natural sounding voices and maintains better voice consistency. Audio is priced...",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-audio-mini",
|
|||
|
|
"name": "OpenAI: GPT Audio Mini",
|
|||
|
|
"created": 1768859419,
|
|||
|
|
"description": "A cost-efficient version of GPT Audio. The new snapshot features an upgraded decoder for more natural sounding voices and maintains better voice consistency. Input is priced at $0.60 per million...",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "z-ai/glm-4.7-flash",
|
|||
|
|
"name": "Z.ai: GLM 4.7 Flash",
|
|||
|
|
"created": 1768833913,
|
|||
|
|
"description": "As a 30B-class SOTA model, GLM-4.7-Flash offers a new option that balances performance and efficiency. It is further optimized for agentic coding use cases, strengthening coding capabilities, long-horizon task planning,...",
|
|||
|
|
"context_length": 202752,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-5.2-codex",
|
|||
|
|
"name": "OpenAI: GPT-5.2-Codex",
|
|||
|
|
"created": 1768409315,
|
|||
|
|
"description": "GPT-5.2-Codex is an upgraded version of GPT-5.1-Codex optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks....",
|
|||
|
|
"context_length": 400000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "bytedance-seed/seed-1.6-flash",
|
|||
|
|
"name": "ByteDance Seed: Seed 1.6 Flash",
|
|||
|
|
"created": 1766505011,
|
|||
|
|
"description": "Seed 1.6 Flash is an ultra-fast multimodal deep thinking model by ByteDance Seed, supporting both text and visual understanding. It features a 256k context window and can generate outputs of...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "bytedance-seed/seed-1.6",
|
|||
|
|
"name": "ByteDance Seed: Seed 1.6",
|
|||
|
|
"created": 1766504997,
|
|||
|
|
"description": "Seed 1.6 is a general-purpose model released by the ByteDance Seed team. It incorporates multimodal capabilities and adaptive deep thinking with a 256K context window.",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "minimax/minimax-m2.1",
|
|||
|
|
"name": "MiniMax: MiniMax M2.1",
|
|||
|
|
"created": 1766454997,
|
|||
|
|
"description": "MiniMax-M2.1 is a lightweight, state-of-the-art large language model optimized for coding, agentic workflows, and modern application development. With only 10 billion activated parameters, it delivers a major jump in real-world...",
|
|||
|
|
"context_length": 196608,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "z-ai/glm-4.7",
|
|||
|
|
"name": "Z.ai: GLM 4.7",
|
|||
|
|
"created": 1766378014,
|
|||
|
|
"description": "GLM-4.7 is Z.ai’s latest flagship model, featuring upgrades in two key areas: enhanced programming capabilities and more stable multi-step reasoning/execution. It demonstrates significant improvements in executing complex agent tasks while...",
|
|||
|
|
"context_length": 202752,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "google/gemini-3-flash-preview",
|
|||
|
|
"name": "Google: Gemini 3 Flash Preview",
|
|||
|
|
"created": 1765987078,
|
|||
|
|
"description": "Gemini 3 Flash Preview is a high speed, high value thinking model designed for agentic workflows, multi turn chat, and coding assistance. It delivers near Pro level reasoning and tool...",
|
|||
|
|
"context_length": 1048576,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "xiaomi/mimo-v2-flash",
|
|||
|
|
"name": "Xiaomi: MiMo-V2-Flash",
|
|||
|
|
"created": 1765731308,
|
|||
|
|
"description": "MiMo-V2-Flash is an open-source foundation language model developed by Xiaomi. It is a Mixture-of-Experts model with 309B total parameters and 15B active parameters, adopting hybrid attention architecture. MiMo-V2-Flash supports a...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "nvidia/nemotron-3-nano-30b-a3b:free",
|
|||
|
|
"name": "NVIDIA: Nemotron 3 Nano 30B A3B (free)",
|
|||
|
|
"created": 1765731275,
|
|||
|
|
"description": "NVIDIA Nemotron 3 Nano 30B A3B is a small language MoE model with highest compute efficiency and accuracy for developers to build specialized agentic AI systems. The model is fully...",
|
|||
|
|
"context_length": 256000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "nvidia/nemotron-3-nano-30b-a3b",
|
|||
|
|
"name": "NVIDIA: Nemotron 3 Nano 30B A3B",
|
|||
|
|
"created": 1765731275,
|
|||
|
|
"description": "NVIDIA Nemotron 3 Nano 30B A3B is a small language MoE model with highest compute efficiency and accuracy for developers to build specialized agentic AI systems. The model is fully...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-5.2-chat",
|
|||
|
|
"name": "OpenAI: GPT-5.2 Chat",
|
|||
|
|
"created": 1765389783,
|
|||
|
|
"description": "GPT-5.2 Chat (AKA Instant) is the fast, lightweight member of the 5.2 family, optimized for low-latency chat while retaining strong general intelligence. It uses adaptive reasoning to selectively “think” on...",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-5.2-pro",
|
|||
|
|
"name": "OpenAI: GPT-5.2 Pro",
|
|||
|
|
"created": 1765389780,
|
|||
|
|
"description": "GPT-5.2 Pro is OpenAI’s most advanced model, offering major improvements in agentic coding and long context performance over GPT-5 Pro. It is optimized for complex tasks that require step-by-step reasoning,...",
|
|||
|
|
"context_length": 400000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-5.2",
|
|||
|
|
"name": "OpenAI: GPT-5.2",
|
|||
|
|
"created": 1765389775,
|
|||
|
|
"description": "GPT-5.2 is the latest frontier-grade model in the GPT-5 series, offering stronger agentic and long context perfomance compared to GPT-5.1. It uses adaptive reasoning to allocate computation dynamically, responding quickly...",
|
|||
|
|
"context_length": 400000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "mistralai/devstral-2512",
|
|||
|
|
"name": "Mistral: Devstral 2 2512",
|
|||
|
|
"created": 1765285419,
|
|||
|
|
"description": "Devstral 2 is a state-of-the-art open-source model by Mistral AI specializing in agentic coding. It is a 123B-parameter dense transformer model supporting a 256K context window. Devstral 2 supports exploring...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "relace/relace-search",
|
|||
|
|
"name": "Relace: Relace Search",
|
|||
|
|
"created": 1765213560,
|
|||
|
|
"description": "The relace-search model uses 4-12 `view_file` and `grep` tools in parallel to explore a codebase and return relevant files to the user request. In contrast to RAG, relace-search performs agentic...",
|
|||
|
|
"context_length": 256000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "z-ai/glm-4.6v",
|
|||
|
|
"name": "Z.ai: GLM 4.6V",
|
|||
|
|
"created": 1765207462,
|
|||
|
|
"description": "GLM-4.6V is a large multimodal model designed for high-fidelity visual understanding and long-context reasoning across images, documents, and mixed media. It supports up to 128K tokens, processes complex page layouts...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "nex-agi/deepseek-v3.1-nex-n1",
|
|||
|
|
"name": "Nex AGI: DeepSeek V3.1 Nex N1",
|
|||
|
|
"created": 1765204393,
|
|||
|
|
"description": "DeepSeek V3.1 Nex-N1 is the flagship release of the Nex-N1 series — a post-trained model designed to highlight agent autonomy, tool use, and real-world productivity. Nex-N1 demonstrates competitive performance across...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "essentialai/rnj-1-instruct",
|
|||
|
|
"name": "EssentialAI: Rnj 1 Instruct",
|
|||
|
|
"created": 1765094847,
|
|||
|
|
"description": "Rnj-1 is an 8B-parameter, dense, open-weight model family developed by Essential AI and trained from scratch with a focus on programming, math, and scientific reasoning. The model demonstrates strong performance...",
|
|||
|
|
"context_length": 32768,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openrouter/bodybuilder",
|
|||
|
|
"name": "Body Builder (beta)",
|
|||
|
|
"created": 1764903653,
|
|||
|
|
"description": "Transform your natural language requests into structured OpenRouter API request objects. Describe what you want to accomplish with AI models, and Body Builder will construct the appropriate API calls. Example:...",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-5.1-codex-max",
|
|||
|
|
"name": "OpenAI: GPT-5.1-Codex-Max",
|
|||
|
|
"created": 1764878934,
|
|||
|
|
"description": "GPT-5.1-Codex-Max is OpenAI’s latest agentic coding model, designed for long-running, high-context software development tasks. It is based on an updated version of the 5.1 reasoning stack and trained on agentic...",
|
|||
|
|
"context_length": 400000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "amazon/nova-2-lite-v1",
|
|||
|
|
"name": "Amazon: Nova 2 Lite",
|
|||
|
|
"created": 1764696672,
|
|||
|
|
"description": "Nova 2 Lite is a fast, cost-effective reasoning model for everyday workloads that can process text, images, and videos to generate text. Nova 2 Lite demonstrates standout capabilities in processing...",
|
|||
|
|
"context_length": 1000000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "mistralai/ministral-14b-2512",
|
|||
|
|
"name": "Mistral: Ministral 3 14B 2512",
|
|||
|
|
"created": 1764681735,
|
|||
|
|
"description": "The largest model in the Ministral 3 family, Ministral 3 14B offers frontier capabilities and performance comparable to its larger Mistral Small 3.2 24B counterpart. A powerful and efficient language...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "mistralai/ministral-8b-2512",
|
|||
|
|
"name": "Mistral: Ministral 3 8B 2512",
|
|||
|
|
"created": 1764681654,
|
|||
|
|
"description": "A balanced model in the Ministral 3 family, Ministral 3 8B is a powerful, efficient tiny language model with vision capabilities.",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "mistralai/ministral-3b-2512",
|
|||
|
|
"name": "Mistral: Ministral 3 3B 2512",
|
|||
|
|
"created": 1764681560,
|
|||
|
|
"description": "The smallest model in the Ministral 3 family, Ministral 3 3B is a powerful, efficient tiny language model with vision capabilities.",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "mistralai/mistral-large-2512",
|
|||
|
|
"name": "Mistral: Mistral Large 3 2512",
|
|||
|
|
"created": 1764624472,
|
|||
|
|
"description": "Mistral Large 3 2512 is Mistral’s most capable model to date, featuring a sparse mixture-of-experts architecture with 41B active parameters (675B total), and released under the Apache 2.0 license.",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "arcee-ai/trinity-mini",
|
|||
|
|
"name": "Arcee AI: Trinity Mini",
|
|||
|
|
"created": 1764601720,
|
|||
|
|
"description": "Trinity Mini is a 26B-parameter (3B active) sparse mixture-of-experts language model featuring 128 experts with 8 active per token. Engineered for efficient reasoning over long contexts (131k) with robust function...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "deepseek/deepseek-v3.2-speciale",
|
|||
|
|
"name": "DeepSeek: DeepSeek V3.2 Speciale",
|
|||
|
|
"created": 1764594837,
|
|||
|
|
"description": "DeepSeek-V3.2-Speciale is a high-compute variant of DeepSeek-V3.2 optimized for maximum reasoning and agentic performance. It builds on DeepSeek Sparse Attention (DSA) for efficient long-context processing, then scales post-training reinforcement learning...",
|
|||
|
|
"context_length": 163840,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "deepseek/deepseek-v3.2",
|
|||
|
|
"name": "DeepSeek: DeepSeek V3.2",
|
|||
|
|
"created": 1764594642,
|
|||
|
|
"description": "DeepSeek-V3.2 is a large language model designed to harmonize high computational efficiency with strong reasoning and agentic tool-use performance. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "prime-intellect/intellect-3",
|
|||
|
|
"name": "Prime Intellect: INTELLECT-3",
|
|||
|
|
"created": 1764212534,
|
|||
|
|
"description": "INTELLECT-3 is a 106B-parameter Mixture-of-Experts model (12B active) post-trained from GLM-4.5-Air-Base using supervised fine-tuning (SFT) followed by large-scale reinforcement learning (RL). It offers state-of-the-art performance for its size across math,...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "anthropic/claude-opus-4.5",
|
|||
|
|
"name": "Anthropic: Claude Opus 4.5",
|
|||
|
|
"created": 1764010580,
|
|||
|
|
"description": "Claude Opus 4.5 is Anthropic’s frontier reasoning model optimized for complex software engineering, agentic workflows, and long-horizon computer use. It offers strong multimodal capabilities, competitive performance across real-world coding and...",
|
|||
|
|
"context_length": 200000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "allenai/olmo-3-32b-think",
|
|||
|
|
"name": "AllenAI: Olmo 3 32B Think",
|
|||
|
|
"created": 1763758276,
|
|||
|
|
"description": "Olmo 3 32B Think is a large-scale, 32-billion-parameter model purpose-built for deep reasoning, complex logic chains and advanced instruction-following scenarios. Its capacity enables strong performance on demanding evaluation tasks and...",
|
|||
|
|
"context_length": 65536,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "google/gemini-3-pro-image-preview",
|
|||
|
|
"name": "Google: Nano Banana Pro (Gemini 3 Pro Image Preview)",
|
|||
|
|
"created": 1763653797,
|
|||
|
|
"description": "Nano Banana Pro is Google’s most advanced image-generation and editing model, built on Gemini 3 Pro. It extends the original Nano Banana with significantly improved multimodal reasoning, real-world grounding, and...",
|
|||
|
|
"context_length": 65536,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "x-ai/grok-4.1-fast",
|
|||
|
|
"name": "xAI: Grok 4.1 Fast",
|
|||
|
|
"created": 1763587502,
|
|||
|
|
"description": "Grok 4.1 Fast is xAI's best agentic tool calling model that shines in real-world use cases like customer support and deep research. 2M context window. Reasoning can be enabled/disabled using...",
|
|||
|
|
"context_length": 2000000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "deepcogito/cogito-v2.1-671b",
|
|||
|
|
"name": "Deep Cogito: Cogito v2.1 671B",
|
|||
|
|
"created": 1763071233,
|
|||
|
|
"description": "Cogito v2.1 671B MoE represents one of the strongest open models globally, matching performance of frontier closed and open models. This model is trained using self play with reinforcement learning...",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-5.1",
|
|||
|
|
"name": "OpenAI: GPT-5.1",
|
|||
|
|
"created": 1763060305,
|
|||
|
|
"description": "GPT-5.1 is the latest frontier-grade model in the GPT-5 series, offering stronger general-purpose reasoning, improved instruction adherence, and a more natural conversational style compared to GPT-5. It uses adaptive reasoning...",
|
|||
|
|
"context_length": 400000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-5.1-chat",
|
|||
|
|
"name": "OpenAI: GPT-5.1 Chat",
|
|||
|
|
"created": 1763060302,
|
|||
|
|
"description": "GPT-5.1 Chat (AKA Instant is the fast, lightweight member of the 5.1 family, optimized for low-latency chat while retaining strong general intelligence. It uses adaptive reasoning to selectively “think” on...",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-5.1-codex",
|
|||
|
|
"name": "OpenAI: GPT-5.1-Codex",
|
|||
|
|
"created": 1763060298,
|
|||
|
|
"description": "GPT-5.1-Codex is a specialized version of GPT-5.1 optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks....",
|
|||
|
|
"context_length": 400000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-5.1-codex-mini",
|
|||
|
|
"name": "OpenAI: GPT-5.1-Codex-Mini",
|
|||
|
|
"created": 1763057820,
|
|||
|
|
"description": "GPT-5.1-Codex-Mini is a smaller and faster version of GPT-5.1-Codex",
|
|||
|
|
"context_length": 400000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "moonshotai/kimi-k2-thinking",
|
|||
|
|
"name": "MoonshotAI: Kimi K2 Thinking",
|
|||
|
|
"created": 1762440622,
|
|||
|
|
"description": "Kimi K2 Thinking is Moonshot AI’s most advanced open reasoning model to date, extending the K2 series into agentic, long-horizon reasoning. Built on the trillion-parameter Mixture-of-Experts (MoE) architecture introduced in...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "amazon/nova-premier-v1",
|
|||
|
|
"name": "Amazon: Nova Premier 1.0",
|
|||
|
|
"created": 1761950332,
|
|||
|
|
"description": "Amazon Nova Premier is the most capable of Amazon’s multimodal models for complex reasoning tasks and for use as the best teacher for distilling custom models.",
|
|||
|
|
"context_length": 1000000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "perplexity/sonar-pro-search",
|
|||
|
|
"name": "Perplexity: Sonar Pro Search",
|
|||
|
|
"created": 1761854366,
|
|||
|
|
"description": "Exclusively available on the OpenRouter API, Sonar Pro's new Pro Search mode is Perplexity's most advanced agentic search system. It is designed for deeper reasoning and analysis. Pricing is based...",
|
|||
|
|
"context_length": 200000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "mistralai/voxtral-small-24b-2507",
|
|||
|
|
"name": "Mistral: Voxtral Small 24B 2507",
|
|||
|
|
"created": 1761835144,
|
|||
|
|
"description": "Voxtral Small is an enhancement of Mistral Small 3, incorporating state-of-the-art audio input capabilities while retaining best-in-class text performance. It excels at speech transcription, translation and audio understanding. Input audio...",
|
|||
|
|
"context_length": 32000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-oss-safeguard-20b",
|
|||
|
|
"name": "OpenAI: gpt-oss-safeguard-20b",
|
|||
|
|
"created": 1761752836,
|
|||
|
|
"description": "gpt-oss-safeguard-20b is a safety reasoning model from OpenAI built upon gpt-oss-20b. This open-weight, 21B-parameter Mixture-of-Experts (MoE) model offers lower latency for safety tasks like content classification, LLM filtering, and trust...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "nvidia/nemotron-nano-12b-v2-vl:free",
|
|||
|
|
"name": "NVIDIA: Nemotron Nano 12B 2 VL (free)",
|
|||
|
|
"created": 1761675565,
|
|||
|
|
"description": "NVIDIA Nemotron Nano 2 VL is a 12-billion-parameter open multimodal reasoning model designed for video understanding and document intelligence. It introduces a hybrid Transformer-Mamba architecture, combining transformer-level accuracy with Mamba’s...",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "minimax/minimax-m2",
|
|||
|
|
"name": "MiniMax: MiniMax M2",
|
|||
|
|
"created": 1761252093,
|
|||
|
|
"description": "MiniMax-M2 is a compact, high-efficiency large language model optimized for end-to-end coding and agentic workflows. With 10 billion activated parameters (230 billion total), it delivers near-frontier intelligence across general reasoning,...",
|
|||
|
|
"context_length": 196608,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3-vl-32b-instruct",
|
|||
|
|
"name": "Qwen: Qwen3 VL 32B Instruct",
|
|||
|
|
"created": 1761231332,
|
|||
|
|
"description": "Qwen3-VL-32B-Instruct is a large-scale multimodal vision-language model designed for high-precision understanding and reasoning across text, images, and video. With 32 billion parameters, it combines deep visual perception with advanced text...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "ibm-granite/granite-4.0-h-micro",
|
|||
|
|
"name": "IBM: Granite 4.0 Micro",
|
|||
|
|
"created": 1760927695,
|
|||
|
|
"description": "Granite-4.0-H-Micro is a 3B parameter from the Granite 4 family of models. These models are the latest in a series of models released by IBM. They are fine-tuned for long...",
|
|||
|
|
"context_length": 131000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "microsoft/phi-4-mini-instruct",
|
|||
|
|
"name": "Microsoft: Phi 4 Mini Instruct",
|
|||
|
|
"created": 1760726049,
|
|||
|
|
"description": "Phi-4-mini-instruct is a lightweight open model built upon synthetic data and filtered publicly available websites - with a focus on high-quality, reasoning dense data. The model belongs to the Phi-4...",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-5-image-mini",
|
|||
|
|
"name": "OpenAI: GPT-5 Image Mini",
|
|||
|
|
"created": 1760624583,
|
|||
|
|
"description": "GPT-5 Image Mini combines OpenAI's advanced language capabilities, powered by [GPT-5 Mini](https://openrouter.ai/openai/gpt-5-mini), with GPT Image 1 Mini for efficient image generation. This natively multimodal model features superior instruction following, text...",
|
|||
|
|
"context_length": 400000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "anthropic/claude-haiku-4.5",
|
|||
|
|
"name": "Anthropic: Claude Haiku 4.5",
|
|||
|
|
"created": 1760547638,
|
|||
|
|
"description": "Claude Haiku 4.5 is Anthropic’s fastest and most efficient model, delivering near-frontier intelligence at a fraction of the cost and latency of larger Claude models. Matching Claude Sonnet 4’s performance...",
|
|||
|
|
"context_length": 200000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3-vl-8b-thinking",
|
|||
|
|
"name": "Qwen: Qwen3 VL 8B Thinking",
|
|||
|
|
"created": 1760463746,
|
|||
|
|
"description": "Qwen3-VL-8B-Thinking is the reasoning-optimized variant of the Qwen3-VL-8B multimodal model, designed for advanced visual and textual reasoning across complex scenes, documents, and temporal sequences. It integrates enhanced multimodal alignment and...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3-vl-8b-instruct",
|
|||
|
|
"name": "Qwen: Qwen3 VL 8B Instruct",
|
|||
|
|
"created": 1760463308,
|
|||
|
|
"description": "Qwen3-VL-8B-Instruct is a multimodal vision-language model from the Qwen3-VL series, built for high-fidelity understanding and reasoning across text, images, and video. It features improved multimodal fusion with Interleaved-MRoPE for long-horizon...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-5-image",
|
|||
|
|
"name": "OpenAI: GPT-5 Image",
|
|||
|
|
"created": 1760447986,
|
|||
|
|
"description": "[GPT-5](https://openrouter.ai/openai/gpt-5) Image combines OpenAI's GPT-5 model with state-of-the-art image generation capabilities. It offers major improvements in reasoning, code quality, and user experience while incorporating GPT Image 1's superior instruction following,...",
|
|||
|
|
"context_length": 400000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/o3-deep-research",
|
|||
|
|
"name": "OpenAI: o3 Deep Research",
|
|||
|
|
"created": 1760129661,
|
|||
|
|
"description": "o3-deep-research is OpenAI's advanced model for deep research, designed to tackle complex, multi-step research tasks.\n\nNote: This model always uses the 'web_search' tool which adds additional cost.",
|
|||
|
|
"context_length": 200000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/o4-mini-deep-research",
|
|||
|
|
"name": "OpenAI: o4 Mini Deep Research",
|
|||
|
|
"created": 1760129642,
|
|||
|
|
"description": "o4-mini-deep-research is OpenAI's faster, more affordable deep research model—ideal for tackling complex, multi-step research tasks.\n\nNote: This model always uses the 'web_search' tool which adds additional cost.",
|
|||
|
|
"context_length": 200000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "nvidia/llama-3.3-nemotron-super-49b-v1.5",
|
|||
|
|
"name": "NVIDIA: Llama 3.3 Nemotron Super 49B V1.5",
|
|||
|
|
"created": 1760101395,
|
|||
|
|
"description": "Llama-3.3-Nemotron-Super-49B-v1.5 is a 49B-parameter, English-centric reasoning/chat model derived from Meta’s Llama-3.3-70B-Instruct with a 128K context. It’s post-trained for agentic workflows (RAG, tool calling) via SFT across math, code, science, and...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "baidu/ernie-4.5-21b-a3b-thinking",
|
|||
|
|
"name": "Baidu: ERNIE 4.5 21B A3B Thinking",
|
|||
|
|
"created": 1760048887,
|
|||
|
|
"description": "ERNIE-4.5-21B-A3B-Thinking is Baidu's upgraded lightweight MoE model, refined to boost reasoning depth and quality for top-tier performance in logical puzzles, math, science, coding, text generation, and expert-level academic benchmarks.",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "google/gemini-2.5-flash-image",
|
|||
|
|
"name": "Google: Nano Banana (Gemini 2.5 Flash Image)",
|
|||
|
|
"created": 1759870431,
|
|||
|
|
"description": "Gemini 2.5 Flash Image, a.k.a. \"Nano Banana,\" is now generally available. It is a state of the art image generation model with contextual understanding. It is capable of image generation,...",
|
|||
|
|
"context_length": 32768,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3-vl-30b-a3b-thinking",
|
|||
|
|
"name": "Qwen: Qwen3 VL 30B A3B Thinking",
|
|||
|
|
"created": 1759794479,
|
|||
|
|
"description": "Qwen3-VL-30B-A3B-Thinking is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Thinking variant enhances reasoning in STEM, math, and complex tasks. It excels...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3-vl-30b-a3b-instruct",
|
|||
|
|
"name": "Qwen: Qwen3 VL 30B A3B Instruct",
|
|||
|
|
"created": 1759794476,
|
|||
|
|
"description": "Qwen3-VL-30B-A3B-Instruct is a multimodal model that unifies strong text generation with visual understanding for images and videos. Its Instruct variant optimizes instruction-following for general multimodal tasks. It excels in perception...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-5-pro",
|
|||
|
|
"name": "OpenAI: GPT-5 Pro",
|
|||
|
|
"created": 1759776663,
|
|||
|
|
"description": "GPT-5 Pro is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and...",
|
|||
|
|
"context_length": 400000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "z-ai/glm-4.6",
|
|||
|
|
"name": "Z.ai: GLM 4.6",
|
|||
|
|
"created": 1759235576,
|
|||
|
|
"description": "Compared with GLM-4.5, this generation brings several key improvements: Longer context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex...",
|
|||
|
|
"context_length": 204800,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "anthropic/claude-sonnet-4.5",
|
|||
|
|
"name": "Anthropic: Claude Sonnet 4.5",
|
|||
|
|
"created": 1759161676,
|
|||
|
|
"description": "Claude Sonnet 4.5 is Anthropic’s most advanced Sonnet model to date, optimized for real-world agents and coding workflows. It delivers state-of-the-art performance on coding benchmarks such as SWE-bench Verified, with...",
|
|||
|
|
"context_length": 1000000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "deepseek/deepseek-v3.2-exp",
|
|||
|
|
"name": "DeepSeek: DeepSeek V3.2 Exp",
|
|||
|
|
"created": 1759150481,
|
|||
|
|
"description": "DeepSeek-V3.2-Exp is an experimental large language model released by DeepSeek as an intermediate step between V3.1 and future architectures. It introduces DeepSeek Sparse Attention (DSA), a fine-grained sparse attention mechanism...",
|
|||
|
|
"context_length": 163840,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "thedrummer/cydonia-24b-v4.1",
|
|||
|
|
"name": "TheDrummer: Cydonia 24B V4.1",
|
|||
|
|
"created": 1758931878,
|
|||
|
|
"description": "Uncensored and creative writing model based on Mistral Small 3.2 24B with good recall, prompt adherence, and intelligence.",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "relace/relace-apply-3",
|
|||
|
|
"name": "Relace: Relace Apply 3",
|
|||
|
|
"created": 1758891572,
|
|||
|
|
"description": "Relace Apply 3 is a specialized code-patching LLM that merges AI-suggested edits straight into your source files. It can apply updates from GPT-4o, Claude, and others into your files at...",
|
|||
|
|
"context_length": 256000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "google/gemini-2.5-flash-lite-preview-09-2025",
|
|||
|
|
"name": "Google: Gemini 2.5 Flash Lite Preview 09-2025",
|
|||
|
|
"created": 1758819686,
|
|||
|
|
"description": "Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance...",
|
|||
|
|
"context_length": 1048576,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3-vl-235b-a22b-thinking",
|
|||
|
|
"name": "Qwen: Qwen3 VL 235B A22B Thinking",
|
|||
|
|
"created": 1758668690,
|
|||
|
|
"description": "Qwen3-VL-235B-A22B Thinking is a multimodal model that unifies strong text generation with visual understanding across images and video. The Thinking model is optimized for multimodal reasoning in STEM and math....",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3-vl-235b-a22b-instruct",
|
|||
|
|
"name": "Qwen: Qwen3 VL 235B A22B Instruct",
|
|||
|
|
"created": 1758668687,
|
|||
|
|
"description": "Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video. The Instruct model targets general vision-language use (VQA, document parsing, chart/table...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3-max",
|
|||
|
|
"name": "Qwen: Qwen3 Max",
|
|||
|
|
"created": 1758662808,
|
|||
|
|
"description": "Qwen3-Max is an updated release built on the Qwen3 series, offering major improvements in reasoning, instruction following, multilingual support, and long-tail knowledge coverage compared to the January 2025 version. It...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3-coder-plus",
|
|||
|
|
"name": "Qwen: Qwen3 Coder Plus",
|
|||
|
|
"created": 1758662707,
|
|||
|
|
"description": "Qwen3 Coder Plus is Alibaba's proprietary version of the Open Source Qwen3 Coder 480B A35B. It is a powerful coding agent model specializing in autonomous programming via tool calling and...",
|
|||
|
|
"context_length": 1000000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-5-codex",
|
|||
|
|
"name": "OpenAI: GPT-5 Codex",
|
|||
|
|
"created": 1758643403,
|
|||
|
|
"description": "GPT-5-Codex is a specialized version of GPT-5 optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks....",
|
|||
|
|
"context_length": 400000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "deepseek/deepseek-v3.1-terminus",
|
|||
|
|
"name": "DeepSeek: DeepSeek V3.1 Terminus",
|
|||
|
|
"created": 1758548275,
|
|||
|
|
"description": "DeepSeek-V3.1 Terminus is an update to [DeepSeek V3.1](/deepseek/deepseek-chat-v3.1) that maintains the model's original capabilities while addressing issues reported by users, including language consistency and agent capabilities, further optimizing the model's...",
|
|||
|
|
"context_length": 163840,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "x-ai/grok-4-fast",
|
|||
|
|
"name": "xAI: Grok 4 Fast",
|
|||
|
|
"created": 1758240090,
|
|||
|
|
"description": "Grok 4 Fast is xAI's latest multimodal model with SOTA cost-efficiency and a 2M token context window. It comes in two flavors: non-reasoning and reasoning. Read more about the model...",
|
|||
|
|
"context_length": 2000000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "alibaba/tongyi-deepresearch-30b-a3b",
|
|||
|
|
"name": "Tongyi DeepResearch 30B A3B",
|
|||
|
|
"created": 1758210804,
|
|||
|
|
"description": "Tongyi DeepResearch is an agentic large language model developed by Tongyi Lab, with 30 billion total parameters activating only 3 billion per token. It's optimized for long-horizon, deep information-seeking tasks...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3-coder-flash",
|
|||
|
|
"name": "Qwen: Qwen3 Coder Flash",
|
|||
|
|
"created": 1758115536,
|
|||
|
|
"description": "Qwen3 Coder Flash is Alibaba's fast and cost efficient version of their proprietary Qwen3 Coder Plus. It is a powerful coding agent model specializing in autonomous programming via tool calling...",
|
|||
|
|
"context_length": 1000000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3-next-80b-a3b-thinking",
|
|||
|
|
"name": "Qwen: Qwen3 Next 80B A3B Thinking",
|
|||
|
|
"created": 1757612284,
|
|||
|
|
"description": "Qwen3-Next-80B-A3B-Thinking is a reasoning-first chat model in the Qwen3-Next line that outputs structured “thinking” traces by default. It’s designed for hard multi-step problems; math proofs, code synthesis/debugging, logic, and agentic...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3-next-80b-a3b-instruct:free",
|
|||
|
|
"name": "Qwen: Qwen3 Next 80B A3B Instruct (free)",
|
|||
|
|
"created": 1757612213,
|
|||
|
|
"description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3-next-80b-a3b-instruct",
|
|||
|
|
"name": "Qwen: Qwen3 Next 80B A3B Instruct",
|
|||
|
|
"created": 1757612213,
|
|||
|
|
"description": "Qwen3-Next-80B-A3B-Instruct is an instruction-tuned chat model in the Qwen3-Next series optimized for fast, stable responses without “thinking” traces. It targets complex tasks across reasoning, code generation, knowledge QA, and multilingual...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen-plus-2025-07-28:thinking",
|
|||
|
|
"name": "Qwen: Qwen Plus 0728 (thinking)",
|
|||
|
|
"created": 1757347599,
|
|||
|
|
"description": "Qwen Plus 0728, based on the Qwen3 foundation model, is a 1 million context hybrid reasoning model with a balanced performance, speed, and cost combination.",
|
|||
|
|
"context_length": 1000000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen-plus-2025-07-28",
|
|||
|
|
"name": "Qwen: Qwen Plus 0728",
|
|||
|
|
"created": 1757347599,
|
|||
|
|
"description": "Qwen Plus 0728, based on the Qwen3 foundation model, is a 1 million context hybrid reasoning model with a balanced performance, speed, and cost combination.",
|
|||
|
|
"context_length": 1000000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "nvidia/nemotron-nano-9b-v2:free",
|
|||
|
|
"name": "NVIDIA: Nemotron Nano 9B V2 (free)",
|
|||
|
|
"created": 1757106807,
|
|||
|
|
"description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and...",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "nvidia/nemotron-nano-9b-v2",
|
|||
|
|
"name": "NVIDIA: Nemotron Nano 9B V2",
|
|||
|
|
"created": 1757106807,
|
|||
|
|
"description": "NVIDIA-Nemotron-Nano-9B-v2 is a large language model (LLM) trained from scratch by NVIDIA, and designed as a unified model for both reasoning and non-reasoning tasks. It responds to user queries and...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "moonshotai/kimi-k2-0905",
|
|||
|
|
"name": "MoonshotAI: Kimi K2 0905",
|
|||
|
|
"created": 1757021147,
|
|||
|
|
"description": "Kimi K2 0905 is the September update of [Kimi K2 0711](moonshotai/kimi-k2). It is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3-30b-a3b-thinking-2507",
|
|||
|
|
"name": "Qwen: Qwen3 30B A3B Thinking 2507",
|
|||
|
|
"created": 1756399192,
|
|||
|
|
"description": "Qwen3-30B-A3B-Thinking-2507 is a 30B parameter Mixture-of-Experts reasoning model optimized for complex tasks requiring extended multi-step thinking. The model is designed specifically for “thinking mode,” where internal reasoning traces are separated...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "x-ai/grok-code-fast-1",
|
|||
|
|
"name": "xAI: Grok Code Fast 1",
|
|||
|
|
"created": 1756238927,
|
|||
|
|
"description": "Grok Code Fast 1 is a speedy and economical reasoning model that excels at agentic coding. With reasoning traces visible in the response, developers can steer Grok Code for high-quality...",
|
|||
|
|
"context_length": 256000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "nousresearch/hermes-4-70b",
|
|||
|
|
"name": "Nous: Hermes 4 70B",
|
|||
|
|
"created": 1756236182,
|
|||
|
|
"description": "Hermes 4 70B is a hybrid reasoning model from Nous Research, built on Meta-Llama-3.1-70B. It introduces the same hybrid mode as the larger 405B release, allowing the model to either...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "nousresearch/hermes-4-405b",
|
|||
|
|
"name": "Nous: Hermes 4 405B",
|
|||
|
|
"created": 1756235463,
|
|||
|
|
"description": "Hermes 4 is a large-scale reasoning model built on Meta-Llama-3.1-405B and released by Nous Research. It introduces a hybrid reasoning mode, where the model can choose to deliberate internally with...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "deepseek/deepseek-chat-v3.1",
|
|||
|
|
"name": "DeepSeek: DeepSeek V3.1",
|
|||
|
|
"created": 1755779628,
|
|||
|
|
"description": "DeepSeek-V3.1 is a large hybrid reasoning model (671B parameters, 37B active) that supports both thinking and non-thinking modes via prompt templates. It extends the DeepSeek-V3 base with a two-phase long-context...",
|
|||
|
|
"context_length": 163840,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-4o-audio-preview",
|
|||
|
|
"name": "OpenAI: GPT-4o Audio",
|
|||
|
|
"created": 1755233061,
|
|||
|
|
"description": "The gpt-4o-audio-preview model adds support for audio inputs as prompts. This enhancement allows the model to detect nuances within audio recordings and add depth to generated user experiences. Audio outputs...",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "mistralai/mistral-medium-3.1",
|
|||
|
|
"name": "Mistral: Mistral Medium 3.1",
|
|||
|
|
"created": 1755095639,
|
|||
|
|
"description": "Mistral Medium 3.1 is an updated version of Mistral Medium 3, which is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced operational cost. It balances...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "baidu/ernie-4.5-21b-a3b",
|
|||
|
|
"name": "Baidu: ERNIE 4.5 21B A3B",
|
|||
|
|
"created": 1755034167,
|
|||
|
|
"description": "A sophisticated text-based Mixture-of-Experts (MoE) model featuring 21B total parameters with 3B activated per token, delivering exceptional multimodal understanding and generation through heterogeneous MoE structures and modality-isolated routing. Supporting an...",
|
|||
|
|
"context_length": 120000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "baidu/ernie-4.5-vl-28b-a3b",
|
|||
|
|
"name": "Baidu: ERNIE 4.5 VL 28B A3B",
|
|||
|
|
"created": 1755032836,
|
|||
|
|
"description": "A powerful multimodal Mixture-of-Experts chat model featuring 28B total parameters with 3B activated per token, delivering exceptional text and vision understanding through its innovative heterogeneous MoE structure with modality-isolated routing....",
|
|||
|
|
"context_length": 30000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "z-ai/glm-4.5v",
|
|||
|
|
"name": "Z.ai: GLM 4.5V",
|
|||
|
|
"created": 1754922288,
|
|||
|
|
"description": "GLM-4.5V is a vision-language foundation model for multimodal agent applications. Built on a Mixture-of-Experts (MoE) architecture with 106B parameters and 12B activated parameters, it achieves state-of-the-art results in video understanding,...",
|
|||
|
|
"context_length": 65536,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "ai21/jamba-large-1.7",
|
|||
|
|
"name": "AI21: Jamba Large 1.7",
|
|||
|
|
"created": 1754669020,
|
|||
|
|
"description": "Jamba Large 1.7 is the latest model in the Jamba open family, offering improvements in grounding, instruction-following, and overall efficiency. Built on a hybrid SSM-Transformer architecture with a 256K context...",
|
|||
|
|
"context_length": 256000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-5-chat",
|
|||
|
|
"name": "OpenAI: GPT-5 Chat",
|
|||
|
|
"created": 1754587837,
|
|||
|
|
"description": "GPT-5 Chat is designed for advanced, natural, multimodal, and context-aware conversations for enterprise applications.",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-5",
|
|||
|
|
"name": "OpenAI: GPT-5",
|
|||
|
|
"created": 1754587413,
|
|||
|
|
"description": "GPT-5 is OpenAI’s most advanced model, offering major improvements in reasoning, code quality, and user experience. It is optimized for complex tasks that require step-by-step reasoning, instruction following, and accuracy...",
|
|||
|
|
"context_length": 400000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-5-mini",
|
|||
|
|
"name": "OpenAI: GPT-5 Mini",
|
|||
|
|
"created": 1754587407,
|
|||
|
|
"description": "GPT-5 Mini is a compact version of GPT-5, designed to handle lighter-weight reasoning tasks. It provides the same instruction-following and safety-tuning benefits as GPT-5, but with reduced latency and cost....",
|
|||
|
|
"context_length": 400000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-5-nano",
|
|||
|
|
"name": "OpenAI: GPT-5 Nano",
|
|||
|
|
"created": 1754587402,
|
|||
|
|
"description": "GPT-5-Nano is the smallest and fastest variant in the GPT-5 system, optimized for developer tools, rapid interactions, and ultra-low latency environments. While limited in reasoning depth compared to its larger...",
|
|||
|
|
"context_length": 400000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-oss-120b:free",
|
|||
|
|
"name": "OpenAI: gpt-oss-120b (free)",
|
|||
|
|
"created": 1754414231,
|
|||
|
|
"description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-oss-120b",
|
|||
|
|
"name": "OpenAI: gpt-oss-120b",
|
|||
|
|
"created": 1754414231,
|
|||
|
|
"description": "gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-oss-20b:free",
|
|||
|
|
"name": "OpenAI: gpt-oss-20b (free)",
|
|||
|
|
"created": 1754414229,
|
|||
|
|
"description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-oss-20b",
|
|||
|
|
"name": "OpenAI: gpt-oss-20b",
|
|||
|
|
"created": 1754414229,
|
|||
|
|
"description": "gpt-oss-20b is an open-weight 21B parameter model released by OpenAI under the Apache 2.0 license. It uses a Mixture-of-Experts (MoE) architecture with 3.6B active parameters per forward pass, optimized for...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "anthropic/claude-opus-4.1",
|
|||
|
|
"name": "Anthropic: Claude Opus 4.1",
|
|||
|
|
"created": 1754411591,
|
|||
|
|
"description": "Claude Opus 4.1 is an updated version of Anthropic’s flagship model, offering improved performance in coding, reasoning, and agentic tasks. It achieves 74.5% on SWE-bench Verified and shows notable gains...",
|
|||
|
|
"context_length": 200000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "mistralai/codestral-2508",
|
|||
|
|
"name": "Mistral: Codestral 2508",
|
|||
|
|
"created": 1754079630,
|
|||
|
|
"description": "Mistral's cutting-edge language model for coding released end of July 2025. Codestral specializes in low-latency, high-frequency tasks such as fill-in-the-middle (FIM), code correction and test generation.\n\n[Blog Post](https://mistral.ai/news/codestral-25-08)",
|
|||
|
|
"context_length": 256000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3-coder-30b-a3b-instruct",
|
|||
|
|
"name": "Qwen: Qwen3 Coder 30B A3B Instruct",
|
|||
|
|
"created": 1753972379,
|
|||
|
|
"description": "Qwen3-Coder-30B-A3B-Instruct is a 30.5B parameter Mixture-of-Experts (MoE) model with 128 experts (8 active per forward pass), designed for advanced code generation, repository-scale understanding, and agentic tool use. Built on the...",
|
|||
|
|
"context_length": 160000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3-30b-a3b-instruct-2507",
|
|||
|
|
"name": "Qwen: Qwen3 30B A3B Instruct 2507",
|
|||
|
|
"created": 1753806965,
|
|||
|
|
"description": "Qwen3-30B-A3B-Instruct-2507 is a 30.5B-parameter mixture-of-experts language model from Qwen, with 3.3B active parameters per inference. It operates in non-thinking mode and is designed for high-quality instruction following, multilingual understanding, and...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "z-ai/glm-4.5",
|
|||
|
|
"name": "Z.ai: GLM 4.5",
|
|||
|
|
"created": 1753471347,
|
|||
|
|
"description": "GLM-4.5 is our latest flagship foundation model, purpose-built for agent-based applications. It leverages a Mixture-of-Experts (MoE) architecture and supports a context length of up to 128k tokens. GLM-4.5 delivers significantly...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "z-ai/glm-4.5-air:free",
|
|||
|
|
"name": "Z.ai: GLM 4.5 Air (free)",
|
|||
|
|
"created": 1753471258,
|
|||
|
|
"description": "GLM-4.5-Air is the lightweight variant of our latest flagship model family, also purpose-built for agent-centric applications. Like GLM-4.5, it adopts the Mixture-of-Experts (MoE) architecture but with a more compact parameter...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "z-ai/glm-4.5-air",
|
|||
|
|
"name": "Z.ai: GLM 4.5 Air",
|
|||
|
|
"created": 1753471258,
|
|||
|
|
"description": "GLM-4.5-Air is the lightweight variant of our latest flagship model family, also purpose-built for agent-centric applications. Like GLM-4.5, it adopts the Mixture-of-Experts (MoE) architecture but with a more compact parameter...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3-235b-a22b-thinking-2507",
|
|||
|
|
"name": "Qwen: Qwen3 235B A22B Thinking 2507",
|
|||
|
|
"created": 1753449557,
|
|||
|
|
"description": "Qwen3-235B-A22B-Thinking-2507 is a high-performance, open-weight Mixture-of-Experts (MoE) language model optimized for complex reasoning tasks. It activates 22B of its 235B parameters per forward pass and natively supports up to 262,144...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "z-ai/glm-4-32b",
|
|||
|
|
"name": "Z.ai: GLM 4 32B ",
|
|||
|
|
"created": 1753376617,
|
|||
|
|
"description": "GLM 4 32B is a cost-effective foundation language model. It can efficiently perform complex tasks and has significantly enhanced capabilities in tool use, online search, and code-related intelligent tasks. It...",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3-coder:free",
|
|||
|
|
"name": "Qwen: Qwen3 Coder 480B A35B (free)",
|
|||
|
|
"created": 1753230546,
|
|||
|
|
"description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over...",
|
|||
|
|
"context_length": 262000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3-coder",
|
|||
|
|
"name": "Qwen: Qwen3 Coder 480B A35B",
|
|||
|
|
"created": 1753230546,
|
|||
|
|
"description": "Qwen3-Coder-480B-A35B-Instruct is a Mixture-of-Experts (MoE) code generation model developed by the Qwen team. It is optimized for agentic coding tasks such as function calling, tool use, and long-context reasoning over...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "bytedance/ui-tars-1.5-7b",
|
|||
|
|
"name": "ByteDance: UI-TARS 7B ",
|
|||
|
|
"created": 1753205056,
|
|||
|
|
"description": "UI-TARS-1.5 is a multimodal vision-language agent optimized for GUI-based environments, including desktop interfaces, web browsers, mobile systems, and games. Built by ByteDance, it builds upon the UI-TARS framework with reinforcement...",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "google/gemini-2.5-flash-lite",
|
|||
|
|
"name": "Google: Gemini 2.5 Flash Lite",
|
|||
|
|
"created": 1753200276,
|
|||
|
|
"description": "Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance...",
|
|||
|
|
"context_length": 1048576,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3-235b-a22b-2507",
|
|||
|
|
"name": "Qwen: Qwen3 235B A22B Instruct 2507",
|
|||
|
|
"created": 1753119555,
|
|||
|
|
"description": "Qwen3-235B-A22B-Instruct-2507 is a multilingual, instruction-tuned mixture-of-experts language model based on the Qwen3-235B architecture, with 22B active parameters per forward pass. It is optimized for general-purpose text generation, including instruction following,...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "switchpoint/router",
|
|||
|
|
"name": "Switchpoint Router",
|
|||
|
|
"created": 1752272899,
|
|||
|
|
"description": "Switchpoint AI's router instantly analyzes your request and directs it to the optimal AI from an ever-evolving library. As the world of LLMs advances, our router gets smarter, ensuring you...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "moonshotai/kimi-k2",
|
|||
|
|
"name": "MoonshotAI: Kimi K2 0711",
|
|||
|
|
"created": 1752263252,
|
|||
|
|
"description": "Kimi K2 Instruct is a large-scale Mixture-of-Experts (MoE) language model developed by Moonshot AI, featuring 1 trillion total parameters with 32 billion active per forward pass. It is optimized for...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "mistralai/devstral-medium",
|
|||
|
|
"name": "Mistral: Devstral Medium",
|
|||
|
|
"created": 1752161321,
|
|||
|
|
"description": "Devstral Medium is a high-performance code generation and agentic reasoning model developed jointly by Mistral AI and All Hands AI. Positioned as a step up from Devstral Small, it achieves...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "mistralai/devstral-small",
|
|||
|
|
"name": "Mistral: Devstral Small 1.1",
|
|||
|
|
"created": 1752160751,
|
|||
|
|
"description": "Devstral Small 1.1 is a 24B parameter open-weight language model for software engineering agents, developed by Mistral AI in collaboration with All Hands AI. Finetuned from Mistral Small 3.1 and...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "cognitivecomputations/dolphin-mistral-24b-venice-edition:free",
|
|||
|
|
"name": "Venice: Uncensored (free)",
|
|||
|
|
"created": 1752094966,
|
|||
|
|
"description": "Venice Uncensored Dolphin Mistral 24B Venice Edition is a fine-tuned variant of Mistral-Small-24B-Instruct-2501, developed by dphn.ai in collaboration with Venice.ai. This model is designed as an “uncensored” instruct-tuned LLM, preserving...",
|
|||
|
|
"context_length": 32768,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "x-ai/grok-4",
|
|||
|
|
"name": "xAI: Grok 4",
|
|||
|
|
"created": 1752087689,
|
|||
|
|
"description": "Grok 4 is xAI's latest reasoning model with a 256k context window. It supports parallel tool calling, structured outputs, and both image and text inputs. Note that reasoning is not...",
|
|||
|
|
"context_length": 256000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "tencent/hunyuan-a13b-instruct",
|
|||
|
|
"name": "Tencent: Hunyuan A13B Instruct",
|
|||
|
|
"created": 1751987664,
|
|||
|
|
"description": "Hunyuan-A13B is a 13B active parameter Mixture-of-Experts (MoE) language model developed by Tencent, with a total parameter count of 80B and support for reasoning via Chain-of-Thought. It offers competitive benchmark...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "morph/morph-v3-large",
|
|||
|
|
"name": "Morph: Morph V3 Large",
|
|||
|
|
"created": 1751910858,
|
|||
|
|
"description": "Morph's high-accuracy apply model for complex code edits. ~4,500 tokens/sec with 98% accuracy for precise code transformations. The model requires the prompt to be in the following format: \u003cinstruction\u003e{instruction}\u003c/instruction\u003e \u003ccode\u003e{initial_code}\u003c/code\u003e...",
|
|||
|
|
"context_length": 262144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "morph/morph-v3-fast",
|
|||
|
|
"name": "Morph: Morph V3 Fast",
|
|||
|
|
"created": 1751910002,
|
|||
|
|
"description": "Morph's fastest apply model for code edits. ~10,500 tokens/sec with 96% accuracy for rapid code transformations. The model requires the prompt to be in the following format: \u003cinstruction\u003e{instruction}\u003c/instruction\u003e \u003ccode\u003e{initial_code}\u003c/code\u003e \u003cupdate\u003e{edit_snippet}\u003c/update\u003e...",
|
|||
|
|
"context_length": 81920,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "baidu/ernie-4.5-vl-424b-a47b",
|
|||
|
|
"name": "Baidu: ERNIE 4.5 VL 424B A47B ",
|
|||
|
|
"created": 1751300903,
|
|||
|
|
"description": "ERNIE-4.5-VL-424B-A47B is a multimodal Mixture-of-Experts (MoE) model from Baidu’s ERNIE 4.5 series, featuring 424B total parameters with 47B active per token. It is trained jointly on text and image data...",
|
|||
|
|
"context_length": 123000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "baidu/ernie-4.5-300b-a47b",
|
|||
|
|
"name": "Baidu: ERNIE 4.5 300B A47B ",
|
|||
|
|
"created": 1751300139,
|
|||
|
|
"description": "ERNIE-4.5-300B-A47B is a 300B parameter Mixture-of-Experts (MoE) language model developed by Baidu as part of the ERNIE 4.5 series. It activates 47B parameters per token and supports text generation in...",
|
|||
|
|
"context_length": 123000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "mistralai/mistral-small-3.2-24b-instruct",
|
|||
|
|
"name": "Mistral: Mistral Small 3.2 24B",
|
|||
|
|
"created": 1750443016,
|
|||
|
|
"description": "Mistral-Small-3.2-24B-Instruct-2506 is an updated 24B parameter model from Mistral optimized for instruction following, repetition reduction, and improved function calling. Compared to the 3.1 release, version 3.2 significantly improves accuracy on...",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "minimax/minimax-m1",
|
|||
|
|
"name": "MiniMax: MiniMax M1",
|
|||
|
|
"created": 1750200414,
|
|||
|
|
"description": "MiniMax-M1 is a large-scale, open-weight reasoning model designed for extended context and high-efficiency inference. It leverages a hybrid Mixture-of-Experts (MoE) architecture paired with a custom \"lightning attention\" mechanism, allowing it...",
|
|||
|
|
"context_length": 1000000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "google/gemini-2.5-flash",
|
|||
|
|
"name": "Google: Gemini 2.5 Flash",
|
|||
|
|
"created": 1750172488,
|
|||
|
|
"description": "Gemini 2.5 Flash is Google's state-of-the-art workhorse model, specifically designed for advanced reasoning, coding, mathematics, and scientific tasks. It includes built-in \"thinking\" capabilities, enabling it to provide responses with greater...",
|
|||
|
|
"context_length": 1048576,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "google/gemini-2.5-pro",
|
|||
|
|
"name": "Google: Gemini 2.5 Pro",
|
|||
|
|
"created": 1750169544,
|
|||
|
|
"description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy...",
|
|||
|
|
"context_length": 1048576,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/o3-pro",
|
|||
|
|
"name": "OpenAI: o3 Pro",
|
|||
|
|
"created": 1749598352,
|
|||
|
|
"description": "The o-series of models are trained with reinforcement learning to think before they answer and perform complex reasoning. The o3-pro model uses more compute to think harder and provide consistently...",
|
|||
|
|
"context_length": 200000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "x-ai/grok-3-mini",
|
|||
|
|
"name": "xAI: Grok 3 Mini",
|
|||
|
|
"created": 1749583245,
|
|||
|
|
"description": "A lightweight model that thinks before responding. Fast, smart, and great for logic-based tasks that do not require deep domain knowledge. The raw thinking traces are accessible.",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "x-ai/grok-3",
|
|||
|
|
"name": "xAI: Grok 3",
|
|||
|
|
"created": 1749582908,
|
|||
|
|
"description": "Grok 3 is the latest model from xAI. It's their flagship model that excels at enterprise use cases like data extraction, coding, and text summarization. Possesses deep domain knowledge in...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "google/gemini-2.5-pro-preview",
|
|||
|
|
"name": "Google: Gemini 2.5 Pro Preview 06-05",
|
|||
|
|
"created": 1749137257,
|
|||
|
|
"description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy...",
|
|||
|
|
"context_length": 1048576,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "deepseek/deepseek-r1-0528",
|
|||
|
|
"name": "DeepSeek: R1 0528",
|
|||
|
|
"created": 1748455170,
|
|||
|
|
"description": "May 28th update to the [original DeepSeek R1](/deepseek/deepseek-r1) Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active...",
|
|||
|
|
"context_length": 163840,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "anthropic/claude-opus-4",
|
|||
|
|
"name": "Anthropic: Claude Opus 4",
|
|||
|
|
"created": 1747931245,
|
|||
|
|
"description": "Claude Opus 4 is benchmarked as the world’s best coding model, at time of release, bringing sustained performance on complex, long-running tasks and agent workflows. It sets new benchmarks in...",
|
|||
|
|
"context_length": 200000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "anthropic/claude-sonnet-4",
|
|||
|
|
"name": "Anthropic: Claude Sonnet 4",
|
|||
|
|
"created": 1747930371,
|
|||
|
|
"description": "Claude Sonnet 4 significantly enhances the capabilities of its predecessor, Sonnet 3.7, excelling in both coding and reasoning tasks with improved precision and controllability. Achieving state-of-the-art performance on SWE-bench (72.7%),...",
|
|||
|
|
"context_length": 1000000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "google/gemma-3n-e4b-it",
|
|||
|
|
"name": "Google: Gemma 3n 4B",
|
|||
|
|
"created": 1747776824,
|
|||
|
|
"description": "Gemma 3n E4B-it is optimized for efficient execution on mobile and low-resource devices, such as phones, laptops, and tablets. It supports multimodal inputs—including text, visual data, and audio—enabling diverse tasks...",
|
|||
|
|
"context_length": 32768,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "mistralai/mistral-medium-3",
|
|||
|
|
"name": "Mistral: Mistral Medium 3",
|
|||
|
|
"created": 1746627341,
|
|||
|
|
"description": "Mistral Medium 3 is a high-performance enterprise-grade language model designed to deliver frontier-level capabilities at significantly reduced operational cost. It balances state-of-the-art reasoning and multimodal performance with 8× lower cost...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "google/gemini-2.5-pro-preview-05-06",
|
|||
|
|
"name": "Google: Gemini 2.5 Pro Preview 05-06",
|
|||
|
|
"created": 1746578513,
|
|||
|
|
"description": "Gemini 2.5 Pro is Google’s state-of-the-art AI model designed for advanced reasoning, coding, mathematics, and scientific tasks. It employs “thinking” capabilities, enabling it to reason through responses with enhanced accuracy...",
|
|||
|
|
"context_length": 1048576,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "arcee-ai/spotlight",
|
|||
|
|
"name": "Arcee AI: Spotlight",
|
|||
|
|
"created": 1746481552,
|
|||
|
|
"description": "Spotlight is a 7‑billion‑parameter vision‑language model derived from Qwen 2.5‑VL and fine‑tuned by Arcee AI for tight image‑text grounding tasks. It offers a 32 k‑token context window, enabling rich multimodal...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "arcee-ai/maestro-reasoning",
|
|||
|
|
"name": "Arcee AI: Maestro Reasoning",
|
|||
|
|
"created": 1746481269,
|
|||
|
|
"description": "Maestro Reasoning is Arcee's flagship analysis model: a 32 B‑parameter derivative of Qwen 2.5‑32 B tuned with DPO and chain‑of‑thought RL for step‑by‑step logic. Compared to the earlier 7 B...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "arcee-ai/virtuoso-large",
|
|||
|
|
"name": "Arcee AI: Virtuoso Large",
|
|||
|
|
"created": 1746478885,
|
|||
|
|
"description": "Virtuoso‑Large is Arcee's top‑tier general‑purpose LLM at 72 B parameters, tuned to tackle cross‑domain reasoning, creative writing and enterprise QA. Unlike many 70 B peers, it retains the 128 k...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "arcee-ai/coder-large",
|
|||
|
|
"name": "Arcee AI: Coder Large",
|
|||
|
|
"created": 1746478663,
|
|||
|
|
"description": "Coder‑Large is a 32 B‑parameter offspring of Qwen 2.5‑Instruct that has been further trained on permissively‑licensed GitHub, CodeSearchNet and synthetic bug‑fix corpora. It supports a 32k context window, enabling multi‑file...",
|
|||
|
|
"context_length": 32768,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "meta-llama/llama-guard-4-12b",
|
|||
|
|
"name": "Meta: Llama Guard 4 12B",
|
|||
|
|
"created": 1745975193,
|
|||
|
|
"description": "Llama Guard 4 is a Llama 4 Scout-derived multimodal pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM...",
|
|||
|
|
"context_length": 163840,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3-30b-a3b",
|
|||
|
|
"name": "Qwen: Qwen3 30B A3B",
|
|||
|
|
"created": 1745878604,
|
|||
|
|
"description": "Qwen3, the latest generation in the Qwen large language model series, features both dense and mixture-of-experts (MoE) architectures to excel in reasoning, multilingual support, and advanced agent tasks. Its unique...",
|
|||
|
|
"context_length": 40960,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3-8b",
|
|||
|
|
"name": "Qwen: Qwen3 8B",
|
|||
|
|
"created": 1745876632,
|
|||
|
|
"description": "Qwen3-8B is a dense 8.2B parameter causal language model from the Qwen3 series, designed for both reasoning-heavy tasks and efficient dialogue. It supports seamless switching between \"thinking\" mode for math,...",
|
|||
|
|
"context_length": 40960,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3-14b",
|
|||
|
|
"name": "Qwen: Qwen3 14B",
|
|||
|
|
"created": 1745876478,
|
|||
|
|
"description": "Qwen3-14B is a dense 14.8B parameter causal language model from the Qwen3 series, designed for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for...",
|
|||
|
|
"context_length": 40960,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3-32b",
|
|||
|
|
"name": "Qwen: Qwen3 32B",
|
|||
|
|
"created": 1745875945,
|
|||
|
|
"description": "Qwen3-32B is a dense 32.8B parameter causal language model from the Qwen3 series, optimized for both complex reasoning and efficient dialogue. It supports seamless switching between a \"thinking\" mode for...",
|
|||
|
|
"context_length": 40960,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen3-235b-a22b",
|
|||
|
|
"name": "Qwen: Qwen3 235B A22B",
|
|||
|
|
"created": 1745875757,
|
|||
|
|
"description": "Qwen3-235B-A22B is a 235B parameter mixture-of-experts (MoE) model developed by Qwen, activating 22B parameters per forward pass. It supports seamless switching between a \"thinking\" mode for complex reasoning, math, and...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/o4-mini-high",
|
|||
|
|
"name": "OpenAI: o4 Mini High",
|
|||
|
|
"created": 1744824212,
|
|||
|
|
"description": "OpenAI o4-mini-high is the same model as [o4-mini](/openai/o4-mini) with reasoning_effort set to high. OpenAI o4-mini is a compact reasoning model in the o-series, optimized for fast, cost-efficient performance while retaining...",
|
|||
|
|
"context_length": 200000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/o3",
|
|||
|
|
"name": "OpenAI: o3",
|
|||
|
|
"created": 1744823457,
|
|||
|
|
"description": "o3 is a well-rounded and powerful model across domains. It sets a new standard for math, science, coding, and visual reasoning tasks. It also excels at technical writing and instruction-following....",
|
|||
|
|
"context_length": 200000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/o4-mini",
|
|||
|
|
"name": "OpenAI: o4 Mini",
|
|||
|
|
"created": 1744820942,
|
|||
|
|
"description": "OpenAI o4-mini is a compact reasoning model in the o-series, optimized for fast, cost-efficient performance while retaining strong multimodal and agentic capabilities. It supports tool use and demonstrates competitive reasoning...",
|
|||
|
|
"context_length": 200000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-4.1",
|
|||
|
|
"name": "OpenAI: GPT-4.1",
|
|||
|
|
"created": 1744651385,
|
|||
|
|
"description": "GPT-4.1 is a flagship large language model optimized for advanced instruction following, real-world software engineering, and long-context reasoning. It supports a 1 million token context window and outperforms GPT-4o and...",
|
|||
|
|
"context_length": 1047576,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-4.1-mini",
|
|||
|
|
"name": "OpenAI: GPT-4.1 Mini",
|
|||
|
|
"created": 1744651381,
|
|||
|
|
"description": "GPT-4.1 Mini is a mid-sized model delivering performance competitive with GPT-4o at substantially lower latency and cost. It retains a 1 million token context window and scores 45.1% on hard...",
|
|||
|
|
"context_length": 1047576,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-4.1-nano",
|
|||
|
|
"name": "OpenAI: GPT-4.1 Nano",
|
|||
|
|
"created": 1744651369,
|
|||
|
|
"description": "For tasks that demand low latency, GPT‑4.1 nano is the fastest and cheapest model in the GPT-4.1 series. It delivers exceptional performance at a small size with its 1 million...",
|
|||
|
|
"context_length": 1047576,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "alfredpros/codellama-7b-instruct-solidity",
|
|||
|
|
"name": "AlfredPros: CodeLLaMa 7B Instruct Solidity",
|
|||
|
|
"created": 1744641874,
|
|||
|
|
"description": "A finetuned 7 billion parameters Code LLaMA - Instruct model to generate Solidity smart contract using 4-bit QLoRA finetuning provided by PEFT library.",
|
|||
|
|
"context_length": 4096,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "x-ai/grok-3-mini-beta",
|
|||
|
|
"name": "xAI: Grok 3 Mini Beta",
|
|||
|
|
"created": 1744240195,
|
|||
|
|
"description": "Grok 3 Mini is a lightweight, smaller thinking model. Unlike traditional models that generate answers immediately, Grok 3 Mini thinks before responding. It’s ideal for reasoning-heavy tasks that don’t demand...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "x-ai/grok-3-beta",
|
|||
|
|
"name": "xAI: Grok 3 Beta",
|
|||
|
|
"created": 1744240068,
|
|||
|
|
"description": "Grok 3 is the latest model from xAI. It's their flagship model that excels at enterprise use cases like data extraction, coding, and text summarization. Possesses deep domain knowledge in...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "meta-llama/llama-4-maverick",
|
|||
|
|
"name": "Meta: Llama 4 Maverick",
|
|||
|
|
"created": 1743881822,
|
|||
|
|
"description": "Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward...",
|
|||
|
|
"context_length": 1048576,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "meta-llama/llama-4-scout",
|
|||
|
|
"name": "Meta: Llama 4 Scout",
|
|||
|
|
"created": 1743881519,
|
|||
|
|
"description": "Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B. It supports native multimodal input...",
|
|||
|
|
"context_length": 327680,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "deepseek/deepseek-chat-v3-0324",
|
|||
|
|
"name": "DeepSeek: DeepSeek V3 0324",
|
|||
|
|
"created": 1742824755,
|
|||
|
|
"description": "DeepSeek V3, a 685B-parameter, mixture-of-experts model, is the latest iteration of the flagship chat model family from the DeepSeek team. It succeeds the [DeepSeek V3](/deepseek/deepseek-chat-v3) model and performs really well...",
|
|||
|
|
"context_length": 163840,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/o1-pro",
|
|||
|
|
"name": "OpenAI: o1-pro",
|
|||
|
|
"created": 1742423211,
|
|||
|
|
"description": "The o1 series of models are trained with reinforcement learning to think before they answer and perform complex reasoning. The o1-pro model uses more compute to think harder and provide...",
|
|||
|
|
"context_length": 200000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "mistralai/mistral-small-3.1-24b-instruct",
|
|||
|
|
"name": "Mistral: Mistral Small 3.1 24B",
|
|||
|
|
"created": 1742238937,
|
|||
|
|
"description": "Mistral Small 3.1 24B Instruct is an upgraded variant of Mistral Small 3 (2501), featuring 24 billion parameters with advanced multimodal capabilities. It provides state-of-the-art performance in text-based reasoning and...",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "google/gemma-3-4b-it",
|
|||
|
|
"name": "Google: Gemma 3 4B",
|
|||
|
|
"created": 1741905510,
|
|||
|
|
"description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities,...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "google/gemma-3-12b-it",
|
|||
|
|
"name": "Google: Gemma 3 12B",
|
|||
|
|
"created": 1741902625,
|
|||
|
|
"description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities,...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "cohere/command-a",
|
|||
|
|
"name": "Cohere: Command A",
|
|||
|
|
"created": 1741894342,
|
|||
|
|
"description": "Command A is an open-weights 111B parameter model with a 256k context window focused on delivering great performance across agentic, multilingual, and coding use cases. Compared to other leading proprietary...",
|
|||
|
|
"context_length": 256000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-4o-mini-search-preview",
|
|||
|
|
"name": "OpenAI: GPT-4o-mini Search Preview",
|
|||
|
|
"created": 1741818122,
|
|||
|
|
"description": "GPT-4o mini Search Preview is a specialized model for web search in Chat Completions. It is trained to understand and execute web search queries.",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-4o-search-preview",
|
|||
|
|
"name": "OpenAI: GPT-4o Search Preview",
|
|||
|
|
"created": 1741817949,
|
|||
|
|
"description": "GPT-4o Search Previewis a specialized model for web search in Chat Completions. It is trained to understand and execute web search queries.",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "rekaai/reka-flash-3",
|
|||
|
|
"name": "Reka Flash 3",
|
|||
|
|
"created": 1741812813,
|
|||
|
|
"description": "Reka Flash 3 is a general-purpose, instruction-tuned large language model with 21 billion parameters, developed by Reka. It excels at general chat, coding tasks, instruction-following, and function calling. Featuring a...",
|
|||
|
|
"context_length": 65536,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "google/gemma-3-27b-it",
|
|||
|
|
"name": "Google: Gemma 3 27B",
|
|||
|
|
"created": 1741756359,
|
|||
|
|
"description": "Gemma 3 introduces multimodality, supporting vision-language input and text outputs. It handles context windows up to 128k tokens, understands over 140 languages, and offers improved math, reasoning, and chat capabilities,...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "thedrummer/skyfall-36b-v2",
|
|||
|
|
"name": "TheDrummer: Skyfall 36B V2",
|
|||
|
|
"created": 1741636566,
|
|||
|
|
"description": "Skyfall 36B v2 is an enhanced iteration of Mistral Small 2501, specifically fine-tuned for improved creativity, nuanced writing, role-playing, and coherent storytelling.",
|
|||
|
|
"context_length": 32768,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "perplexity/sonar-reasoning-pro",
|
|||
|
|
"name": "Perplexity: Sonar Reasoning Pro",
|
|||
|
|
"created": 1741313308,
|
|||
|
|
"description": "Note: Sonar Pro pricing includes Perplexity search pricing. See [details here](https://docs.perplexity.ai/guides/pricing#detailed-pricing-breakdown-for-sonar-reasoning-pro-and-sonar-pro) Sonar Reasoning Pro is a premier reasoning model powered by DeepSeek R1 with Chain of Thought (CoT). Designed for...",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "perplexity/sonar-pro",
|
|||
|
|
"name": "Perplexity: Sonar Pro",
|
|||
|
|
"created": 1741312423,
|
|||
|
|
"description": "Note: Sonar Pro pricing includes Perplexity search pricing. See [details here](https://docs.perplexity.ai/guides/pricing#detailed-pricing-breakdown-for-sonar-reasoning-pro-and-sonar-pro) For enterprises seeking more advanced capabilities, the Sonar Pro API can handle in-depth, multi-step queries with added extensibility, like...",
|
|||
|
|
"context_length": 200000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "perplexity/sonar-deep-research",
|
|||
|
|
"name": "Perplexity: Sonar Deep Research",
|
|||
|
|
"created": 1741311246,
|
|||
|
|
"description": "Sonar Deep Research is a research-focused model designed for multi-step retrieval, synthesis, and reasoning across complex topics. It autonomously searches, reads, and evaluates sources, refining its approach as it gathers...",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "google/gemini-2.0-flash-lite-001",
|
|||
|
|
"name": "Google: Gemini 2.0 Flash Lite",
|
|||
|
|
"created": 1740506212,
|
|||
|
|
"description": "Gemini 2.0 Flash Lite offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5),...",
|
|||
|
|
"context_length": 1048576,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "mistralai/mistral-saba",
|
|||
|
|
"name": "Mistral: Saba",
|
|||
|
|
"created": 1739803239,
|
|||
|
|
"description": "Mistral Saba is a 24B-parameter language model specifically designed for the Middle East and South Asia, delivering accurate and contextually relevant responses while maintaining efficient performance. Trained on curated regional...",
|
|||
|
|
"context_length": 32768,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "meta-llama/llama-guard-3-8b",
|
|||
|
|
"name": "Llama Guard 3 8B",
|
|||
|
|
"created": 1739401318,
|
|||
|
|
"description": "Llama Guard 3 is a Llama-3.1-8B pretrained model, fine-tuned for content safety classification. Similar to previous versions, it can be used to classify content in both LLM inputs (prompt classification)...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/o3-mini-high",
|
|||
|
|
"name": "OpenAI: o3 Mini High",
|
|||
|
|
"created": 1739372611,
|
|||
|
|
"description": "OpenAI o3-mini-high is the same model as [o3-mini](/openai/o3-mini) with reasoning_effort set to high. o3-mini is a cost-efficient language model optimized for STEM reasoning tasks, particularly excelling in science, mathematics, and...",
|
|||
|
|
"context_length": 200000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "google/gemini-2.0-flash-001",
|
|||
|
|
"name": "Google: Gemini 2.0 Flash",
|
|||
|
|
"created": 1738769413,
|
|||
|
|
"description": "Gemini Flash 2.0 offers a significantly faster time to first token (TTFT) compared to [Gemini Flash 1.5](/google/gemini-flash-1.5), while maintaining quality on par with larger models like [Gemini Pro 1.5](/google/gemini-pro-1.5). It...",
|
|||
|
|
"context_length": 1048576,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "aion-labs/aion-1.0",
|
|||
|
|
"name": "AionLabs: Aion-1.0",
|
|||
|
|
"created": 1738697557,
|
|||
|
|
"description": "Aion-1.0 is a multi-model system designed for high performance across various tasks, including reasoning and coding. It is built on DeepSeek-R1, augmented with additional models and techniques such as Tree...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "aion-labs/aion-1.0-mini",
|
|||
|
|
"name": "AionLabs: Aion-1.0-Mini",
|
|||
|
|
"created": 1738697107,
|
|||
|
|
"description": "Aion-1.0-Mini 32B parameter model is a distilled version of the DeepSeek-R1 model, designed for strong performance in reasoning domains such as mathematics, coding, and logic. It is a modified variant...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "aion-labs/aion-rp-llama-3.1-8b",
|
|||
|
|
"name": "AionLabs: Aion-RP 1.0 (8B)",
|
|||
|
|
"created": 1738696718,
|
|||
|
|
"description": "Aion-RP-Llama-3.1-8B ranks the highest in the character evaluation portion of the RPBench-Auto benchmark, a roleplaying-specific variant of Arena-Hard-Auto, where LLMs evaluate each other’s responses. It is a fine-tuned base model...",
|
|||
|
|
"context_length": 32768,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen2.5-vl-72b-instruct",
|
|||
|
|
"name": "Qwen: Qwen2.5 VL 72B Instruct",
|
|||
|
|
"created": 1738410311,
|
|||
|
|
"description": "Qwen2.5-VL is proficient in recognizing common objects such as flowers, birds, fish, and insects. It is also highly capable of analyzing texts, charts, icons, graphics, and layouts within images.",
|
|||
|
|
"context_length": 32000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen-plus",
|
|||
|
|
"name": "Qwen: Qwen-Plus",
|
|||
|
|
"created": 1738409840,
|
|||
|
|
"description": "Qwen-Plus, based on the Qwen2.5 foundation model, is a 131K context model with a balanced performance, speed, and cost combination.",
|
|||
|
|
"context_length": 1000000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/o3-mini",
|
|||
|
|
"name": "OpenAI: o3 Mini",
|
|||
|
|
"created": 1738351721,
|
|||
|
|
"description": "OpenAI o3-mini is a cost-efficient language model optimized for STEM reasoning tasks, particularly excelling in science, mathematics, and coding. This model supports the `reasoning_effort` parameter, which can be set to...",
|
|||
|
|
"context_length": 200000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "mistralai/mistral-small-24b-instruct-2501",
|
|||
|
|
"name": "Mistral: Mistral Small 3",
|
|||
|
|
"created": 1738255409,
|
|||
|
|
"description": "Mistral Small 3 is a 24B-parameter language model optimized for low-latency performance across common AI tasks. Released under the Apache 2.0 license, it features both pre-trained and instruction-tuned versions designed...",
|
|||
|
|
"context_length": 32768,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "deepseek/deepseek-r1-distill-qwen-32b",
|
|||
|
|
"name": "DeepSeek: R1 Distill Qwen 32B",
|
|||
|
|
"created": 1738194830,
|
|||
|
|
"description": "DeepSeek R1 Distill Qwen 32B is a distilled large language model based on [Qwen 2.5 32B](https://huggingface.co/Qwen/Qwen2.5-32B), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). It outperforms OpenAI's o1-mini across various benchmarks, achieving new...",
|
|||
|
|
"context_length": 32768,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "perplexity/sonar",
|
|||
|
|
"name": "Perplexity: Sonar",
|
|||
|
|
"created": 1738013808,
|
|||
|
|
"description": "Sonar is lightweight, affordable, fast, and simple to use — now featuring citations and the ability to customize sources. It is designed for companies seeking to integrate lightweight question-and-answer features...",
|
|||
|
|
"context_length": 127072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "deepseek/deepseek-r1-distill-llama-70b",
|
|||
|
|
"name": "DeepSeek: R1 Distill Llama 70B",
|
|||
|
|
"created": 1737663169,
|
|||
|
|
"description": "DeepSeek R1 Distill Llama 70B is a distilled large language model based on [Llama-3.3-70B-Instruct](/meta-llama/llama-3.3-70b-instruct), using outputs from [DeepSeek R1](/deepseek/deepseek-r1). The model combines advanced distillation techniques to achieve high performance across...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "deepseek/deepseek-r1",
|
|||
|
|
"name": "DeepSeek: R1",
|
|||
|
|
"created": 1737381095,
|
|||
|
|
"description": "DeepSeek R1 is here: Performance on par with [OpenAI o1](/openai/o1), but open-sourced and with fully open reasoning tokens. It's 671B parameters in size, with 37B active in an inference pass....",
|
|||
|
|
"context_length": 64000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "minimax/minimax-01",
|
|||
|
|
"name": "MiniMax: MiniMax-01",
|
|||
|
|
"created": 1736915462,
|
|||
|
|
"description": "MiniMax-01 is a combines MiniMax-Text-01 for text generation and MiniMax-VL-01 for image understanding. It has 456 billion parameters, with 45.9 billion parameters activated per inference, and can handle a context...",
|
|||
|
|
"context_length": 1000192,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "microsoft/phi-4",
|
|||
|
|
"name": "Microsoft: Phi 4",
|
|||
|
|
"created": 1736489872,
|
|||
|
|
"description": "[Microsoft Research](/microsoft) Phi-4 is designed to perform well in complex reasoning tasks and can operate efficiently in situations with limited memory or where quick responses are needed. At 14 billion...",
|
|||
|
|
"context_length": 16384,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "sao10k/l3.1-70b-hanami-x1",
|
|||
|
|
"name": "Sao10K: Llama 3.1 70B Hanami x1",
|
|||
|
|
"created": 1736302854,
|
|||
|
|
"description": "This is [Sao10K](/sao10k)'s experiment over [Euryale v2.2](/sao10k/l3.1-euryale-70b).",
|
|||
|
|
"context_length": 16000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "deepseek/deepseek-chat",
|
|||
|
|
"name": "DeepSeek: DeepSeek V3",
|
|||
|
|
"created": 1735241320,
|
|||
|
|
"description": "DeepSeek-V3 is the latest model from the DeepSeek team, building upon the instruction following and coding abilities of the previous versions. Pre-trained on nearly 15 trillion tokens, the reported evaluations...",
|
|||
|
|
"context_length": 163840,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "sao10k/l3.3-euryale-70b",
|
|||
|
|
"name": "Sao10K: Llama 3.3 Euryale 70B",
|
|||
|
|
"created": 1734535928,
|
|||
|
|
"description": "Euryale L3.3 70B is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.2](/models/sao10k/l3-euryale-70b).",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/o1",
|
|||
|
|
"name": "OpenAI: o1",
|
|||
|
|
"created": 1734459999,
|
|||
|
|
"description": "The latest and strongest model family from OpenAI, o1 is designed to spend more time thinking before responding. The o1 model series is trained with large-scale reinforcement learning to reason...",
|
|||
|
|
"context_length": 200000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "cohere/command-r7b-12-2024",
|
|||
|
|
"name": "Cohere: Command R7B (12-2024)",
|
|||
|
|
"created": 1734158152,
|
|||
|
|
"description": "Command R7B (12-2024) is a small, fast update of the Command R+ model, delivered in December 2024. It excels at RAG, tool use, agents, and similar tasks requiring complex reasoning...",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "meta-llama/llama-3.3-70b-instruct:free",
|
|||
|
|
"name": "Meta: Llama 3.3 70B Instruct (free)",
|
|||
|
|
"created": 1733506137,
|
|||
|
|
"description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model...",
|
|||
|
|
"context_length": 65536,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "meta-llama/llama-3.3-70b-instruct",
|
|||
|
|
"name": "Meta: Llama 3.3 70B Instruct",
|
|||
|
|
"created": 1733506137,
|
|||
|
|
"description": "The Meta Llama 3.3 multilingual large language model (LLM) is a pretrained and instruction tuned generative model in 70B (text in/text out). The Llama 3.3 instruction tuned text only model...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "amazon/nova-lite-v1",
|
|||
|
|
"name": "Amazon: Nova Lite 1.0",
|
|||
|
|
"created": 1733437363,
|
|||
|
|
"description": "Amazon Nova Lite 1.0 is a very low-cost multimodal model from Amazon that focused on fast processing of image, video, and text inputs to generate text output. Amazon Nova Lite...",
|
|||
|
|
"context_length": 300000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "amazon/nova-micro-v1",
|
|||
|
|
"name": "Amazon: Nova Micro 1.0",
|
|||
|
|
"created": 1733437237,
|
|||
|
|
"description": "Amazon Nova Micro 1.0 is a text-only model that delivers the lowest latency responses in the Amazon Nova family of models at a very low cost. With a context length...",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "amazon/nova-pro-v1",
|
|||
|
|
"name": "Amazon: Nova Pro 1.0",
|
|||
|
|
"created": 1733436303,
|
|||
|
|
"description": "Amazon Nova Pro 1.0 is a capable multimodal model from Amazon focused on providing a combination of accuracy, speed, and cost for a wide range of tasks. As of December...",
|
|||
|
|
"context_length": 300000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-4o-2024-11-20",
|
|||
|
|
"name": "OpenAI: GPT-4o (2024-11-20)",
|
|||
|
|
"created": 1732127594,
|
|||
|
|
"description": "The 2024-11-20 version of GPT-4o offers a leveled-up creative writing ability with more natural, engaging, and tailored writing to improve relevance \u0026 readability. It’s also better at working with uploaded...",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "mistralai/mistral-large-2411",
|
|||
|
|
"name": "Mistral Large 2411",
|
|||
|
|
"created": 1731978685,
|
|||
|
|
"description": "Mistral Large 2 2411 is an update of [Mistral Large 2](/mistralai/mistral-large) released together with [Pixtral Large 2411](/mistralai/pixtral-large-2411) It provides a significant upgrade on the previous [Mistral Large 24.07](/mistralai/mistral-large-2407), with notable...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "mistralai/mistral-large-2407",
|
|||
|
|
"name": "Mistral Large 2407",
|
|||
|
|
"created": 1731978415,
|
|||
|
|
"description": "This is Mistral AI's flagship model, Mistral Large 2 (version mistral-large-2407). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/)....",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "mistralai/pixtral-large-2411",
|
|||
|
|
"name": "Mistral: Pixtral Large 2411",
|
|||
|
|
"created": 1731977388,
|
|||
|
|
"description": "Pixtral Large is a 124B parameter, open-weight, multimodal model built on top of [Mistral Large 2](/mistralai/mistral-large-2411). The model is able to understand documents, charts and natural images. The model is...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen-2.5-coder-32b-instruct",
|
|||
|
|
"name": "Qwen2.5 Coder 32B Instruct",
|
|||
|
|
"created": 1731368400,
|
|||
|
|
"description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen). Qwen2.5-Coder brings the following improvements upon CodeQwen1.5: - Significantly improvements in **code generation**, **code reasoning**...",
|
|||
|
|
"context_length": 32768,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "thedrummer/unslopnemo-12b",
|
|||
|
|
"name": "TheDrummer: UnslopNemo 12B",
|
|||
|
|
"created": 1731103448,
|
|||
|
|
"description": "UnslopNemo v4.1 is the latest addition from the creator of Rocinante, designed for adventure writing and role-play scenarios.",
|
|||
|
|
"context_length": 32768,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "anthropic/claude-3.5-haiku",
|
|||
|
|
"name": "Anthropic: Claude 3.5 Haiku",
|
|||
|
|
"created": 1730678400,
|
|||
|
|
"description": "Claude 3.5 Haiku features offers enhanced capabilities in speed, coding accuracy, and tool use. Engineered to excel in real-time applications, it delivers quick response times that are essential for dynamic...",
|
|||
|
|
"context_length": 200000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "anthracite-org/magnum-v4-72b",
|
|||
|
|
"name": "Magnum v4 72B",
|
|||
|
|
"created": 1729555200,
|
|||
|
|
"description": "This is a series of models designed to replicate the prose quality of the Claude 3 models, specifically Sonnet(https://openrouter.ai/anthropic/claude-3.5-sonnet) and Opus(https://openrouter.ai/anthropic/claude-3-opus).\n\nThe model is fine-tuned on top of [Qwen2.5 72B](https://openrouter.ai/qwen/qwen-2.5-72b-instruct).",
|
|||
|
|
"context_length": 16384,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen-2.5-7b-instruct",
|
|||
|
|
"name": "Qwen: Qwen2.5 7B Instruct",
|
|||
|
|
"created": 1729036800,
|
|||
|
|
"description": "Qwen2.5 7B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2: - Significantly more knowledge and has greatly improved capabilities in coding and...",
|
|||
|
|
"context_length": 32768,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "inflection/inflection-3-pi",
|
|||
|
|
"name": "Inflection: Inflection 3 Pi",
|
|||
|
|
"created": 1728604800,
|
|||
|
|
"description": "Inflection 3 Pi powers Inflection's [Pi](https://pi.ai) chatbot, including backstory, emotional intelligence, productivity, and safety. It has access to recent news, and excels in scenarios like customer support and roleplay. Pi...",
|
|||
|
|
"context_length": 8000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "inflection/inflection-3-productivity",
|
|||
|
|
"name": "Inflection: Inflection 3 Productivity",
|
|||
|
|
"created": 1728604800,
|
|||
|
|
"description": "Inflection 3 Productivity is optimized for following instructions. It is better for tasks requiring JSON output or precise adherence to provided guidelines. It has access to recent news. For emotional...",
|
|||
|
|
"context_length": 8000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "thedrummer/rocinante-12b",
|
|||
|
|
"name": "TheDrummer: Rocinante 12B",
|
|||
|
|
"created": 1727654400,
|
|||
|
|
"description": "Rocinante 12B is designed for engaging storytelling and rich prose. Early testers have reported: - Expanded vocabulary with unique and expressive word choices - Enhanced creativity for vivid narratives -...",
|
|||
|
|
"context_length": 32768,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "meta-llama/llama-3.2-1b-instruct",
|
|||
|
|
"name": "Meta: Llama 3.2 1B Instruct",
|
|||
|
|
"created": 1727222400,
|
|||
|
|
"description": "Llama 3.2 1B is a 1-billion-parameter language model focused on efficiently performing natural language tasks, such as summarization, dialogue, and multilingual text analysis. Its smaller size allows it to operate...",
|
|||
|
|
"context_length": 60000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "meta-llama/llama-3.2-3b-instruct:free",
|
|||
|
|
"name": "Meta: Llama 3.2 3B Instruct (free)",
|
|||
|
|
"created": 1727222400,
|
|||
|
|
"description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "meta-llama/llama-3.2-3b-instruct",
|
|||
|
|
"name": "Meta: Llama 3.2 3B Instruct",
|
|||
|
|
"created": 1727222400,
|
|||
|
|
"description": "Llama 3.2 3B is a 3-billion-parameter multilingual large language model, optimized for advanced natural language processing tasks like dialogue generation, reasoning, and summarization. Designed with the latest transformer architecture, it...",
|
|||
|
|
"context_length": 80000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "meta-llama/llama-3.2-11b-vision-instruct",
|
|||
|
|
"name": "Meta: Llama 3.2 11B Vision Instruct",
|
|||
|
|
"created": 1727222400,
|
|||
|
|
"description": "Llama 3.2 11B Vision is a multimodal model with 11 billion parameters, designed to handle tasks combining visual and textual data. It excels in tasks such as image captioning and...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "qwen/qwen-2.5-72b-instruct",
|
|||
|
|
"name": "Qwen2.5 72B Instruct",
|
|||
|
|
"created": 1726704000,
|
|||
|
|
"description": "Qwen2.5 72B is the latest series of Qwen large language models. Qwen2.5 brings the following improvements upon Qwen2: - Significantly more knowledge and has greatly improved capabilities in coding and...",
|
|||
|
|
"context_length": 32768,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "cohere/command-r-plus-08-2024",
|
|||
|
|
"name": "Cohere: Command R+ (08-2024)",
|
|||
|
|
"created": 1724976000,
|
|||
|
|
"description": "command-r-plus-08-2024 is an update of the [Command R+](/models/cohere/command-r-plus) with roughly 50% higher throughput and 25% lower latencies as compared to the previous Command R+ version, while keeping the hardware footprint...",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "cohere/command-r-08-2024",
|
|||
|
|
"name": "Cohere: Command R (08-2024)",
|
|||
|
|
"created": 1724976000,
|
|||
|
|
"description": "command-r-08-2024 is an update of the [Command R](/models/cohere/command-r) with improved performance for multilingual retrieval-augmented generation (RAG) and tool use. More broadly, it is better at math, code and reasoning and...",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "sao10k/l3.1-euryale-70b",
|
|||
|
|
"name": "Sao10K: Llama 3.1 Euryale 70B v2.2",
|
|||
|
|
"created": 1724803200,
|
|||
|
|
"description": "Euryale L3.1 70B v2.2 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). It is the successor of [Euryale L3 70B v2.1](/models/sao10k/l3-euryale-70b).",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "nousresearch/hermes-3-llama-3.1-70b",
|
|||
|
|
"name": "Nous: Hermes 3 70B Instruct",
|
|||
|
|
"created": 1723939200,
|
|||
|
|
"description": "Hermes 3 is a generalist language model with many improvements over [Hermes 2](/models/nousresearch/nous-hermes-2-mistral-7b-dpo), including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "nousresearch/hermes-3-llama-3.1-405b:free",
|
|||
|
|
"name": "Nous: Hermes 3 405B Instruct (free)",
|
|||
|
|
"created": 1723766400,
|
|||
|
|
"description": "Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "nousresearch/hermes-3-llama-3.1-405b",
|
|||
|
|
"name": "Nous: Hermes 3 405B Instruct",
|
|||
|
|
"created": 1723766400,
|
|||
|
|
"description": "Hermes 3 is a generalist language model with many improvements over Hermes 2, including advanced agentic capabilities, much better roleplaying, reasoning, multi-turn conversation, long context coherence, and improvements across the...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "sao10k/l3-lunaris-8b",
|
|||
|
|
"name": "Sao10K: Llama 3 8B Lunaris",
|
|||
|
|
"created": 1723507200,
|
|||
|
|
"description": "Lunaris 8B is a versatile generalist and roleplaying model based on Llama 3. It's a strategic merge of multiple models, designed to balance creativity with improved logic and general knowledge....",
|
|||
|
|
"context_length": 8192,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-4o-2024-08-06",
|
|||
|
|
"name": "OpenAI: GPT-4o (2024-08-06)",
|
|||
|
|
"created": 1722902400,
|
|||
|
|
"description": "The 2024-08-06 version of GPT-4o offers improved performance in structured outputs, with the ability to supply a JSON schema in the respone_format. Read more [here](https://openai.com/index/introducing-structured-outputs-in-the-api/). GPT-4o (\"o\" for \"omni\") is...",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "meta-llama/llama-3.1-8b-instruct",
|
|||
|
|
"name": "Meta: Llama 3.1 8B Instruct",
|
|||
|
|
"created": 1721692800,
|
|||
|
|
"description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes \u0026 flavors. This 8B instruct-tuned version is fast and efficient. It has demonstrated strong performance compared to...",
|
|||
|
|
"context_length": 16384,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "meta-llama/llama-3.1-70b-instruct",
|
|||
|
|
"name": "Meta: Llama 3.1 70B Instruct",
|
|||
|
|
"created": 1721692800,
|
|||
|
|
"description": "Meta's latest class of model (Llama 3.1) launched with a variety of sizes \u0026 flavors. This 70B instruct-tuned version is optimized for high quality dialogue usecases. It has demonstrated strong...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "mistralai/mistral-nemo",
|
|||
|
|
"name": "Mistral: Mistral Nemo",
|
|||
|
|
"created": 1721347200,
|
|||
|
|
"description": "A 12B parameter model with a 128k token context length built by Mistral in collaboration with NVIDIA. The model is multilingual, supporting English, French, German, Spanish, Italian, Portuguese, Chinese, Japanese,...",
|
|||
|
|
"context_length": 131072,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-4o-mini",
|
|||
|
|
"name": "OpenAI: GPT-4o-mini",
|
|||
|
|
"created": 1721260800,
|
|||
|
|
"description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs. As their most advanced small model, it is many multiples more affordable...",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-4o-mini-2024-07-18",
|
|||
|
|
"name": "OpenAI: GPT-4o-mini (2024-07-18)",
|
|||
|
|
"created": 1721260800,
|
|||
|
|
"description": "GPT-4o mini is OpenAI's newest model after [GPT-4 Omni](/models/openai/gpt-4o), supporting both text and image inputs with text outputs. As their most advanced small model, it is many multiples more affordable...",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "google/gemma-2-27b-it",
|
|||
|
|
"name": "Google: Gemma 2 27B",
|
|||
|
|
"created": 1720828800,
|
|||
|
|
"description": "Gemma 2 27B by Google is an open model built from the same research and technology used to create the [Gemini models](/models?q=gemini). Gemma models are well-suited for a variety of...",
|
|||
|
|
"context_length": 8192,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "sao10k/l3-euryale-70b",
|
|||
|
|
"name": "Sao10k: Llama 3 Euryale 70B v2.1",
|
|||
|
|
"created": 1718668800,
|
|||
|
|
"description": "Euryale 70B v2.1 is a model focused on creative roleplay from [Sao10k](https://ko-fi.com/sao10k). - Better prompt adherence. - Better anatomy / spatial awareness. - Adapts much better to unique and custom...",
|
|||
|
|
"context_length": 8192,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "nousresearch/hermes-2-pro-llama-3-8b",
|
|||
|
|
"name": "NousResearch: Hermes 2 Pro - Llama-3 8B",
|
|||
|
|
"created": 1716768000,
|
|||
|
|
"description": "Hermes 2 Pro is an upgraded, retrained version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced...",
|
|||
|
|
"context_length": 8192,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-4o",
|
|||
|
|
"name": "OpenAI: GPT-4o",
|
|||
|
|
"created": 1715558400,
|
|||
|
|
"description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as...",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-4o-2024-05-13",
|
|||
|
|
"name": "OpenAI: GPT-4o (2024-05-13)",
|
|||
|
|
"created": 1715558400,
|
|||
|
|
"description": "GPT-4o (\"o\" for \"omni\") is OpenAI's latest AI model, supporting both text and image inputs with text outputs. It maintains the intelligence level of [GPT-4 Turbo](/models/openai/gpt-4-turbo) while being twice as...",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "meta-llama/llama-3-8b-instruct",
|
|||
|
|
"name": "Meta: Llama 3 8B Instruct",
|
|||
|
|
"created": 1713398400,
|
|||
|
|
"description": "Meta's latest class of model (Llama 3) launched with a variety of sizes \u0026 flavors. This 8B instruct-tuned version was optimized for high quality dialogue usecases. It has demonstrated strong...",
|
|||
|
|
"context_length": 8192,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "meta-llama/llama-3-70b-instruct",
|
|||
|
|
"name": "Meta: Llama 3 70B Instruct",
|
|||
|
|
"created": 1713398400,
|
|||
|
|
"description": "Meta's latest class of model (Llama 3) launched with a variety of sizes \u0026 flavors. This 70B instruct-tuned version was optimized for high quality dialogue usecases. It has demonstrated strong...",
|
|||
|
|
"context_length": 8192,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "mistralai/mixtral-8x22b-instruct",
|
|||
|
|
"name": "Mistral: Mixtral 8x22B Instruct",
|
|||
|
|
"created": 1713312000,
|
|||
|
|
"description": "Mistral's official instruct fine-tuned version of [Mixtral 8x22B](/models/mistralai/mixtral-8x22b). It uses 39B active parameters out of 141B, offering unparalleled cost efficiency for its size. Its strengths include: - strong math, coding,...",
|
|||
|
|
"context_length": 65536,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "microsoft/wizardlm-2-8x22b",
|
|||
|
|
"name": "WizardLM-2 8x22B",
|
|||
|
|
"created": 1713225600,
|
|||
|
|
"description": "WizardLM-2 8x22B is Microsoft AI's most advanced Wizard model. It demonstrates highly competitive performance compared to leading proprietary models, and it consistently outperforms all existing state-of-the-art opensource models. It is...",
|
|||
|
|
"context_length": 65535,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-4-turbo",
|
|||
|
|
"name": "OpenAI: GPT-4 Turbo",
|
|||
|
|
"created": 1712620800,
|
|||
|
|
"description": "The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling.\n\nTraining data: up to December 2023.",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "anthropic/claude-3-haiku",
|
|||
|
|
"name": "Anthropic: Claude 3 Haiku",
|
|||
|
|
"created": 1710288000,
|
|||
|
|
"description": "Claude 3 Haiku is Anthropic's fastest and most compact model for\nnear-instant responsiveness. Quick and accurate targeted performance.\n\nSee the launch announcement and benchmark results [here](https://www.anthropic.com/news/claude-3-haiku)\n\n#multimodal",
|
|||
|
|
"context_length": 200000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "mistralai/mistral-large",
|
|||
|
|
"name": "Mistral Large",
|
|||
|
|
"created": 1708905600,
|
|||
|
|
"description": "This is Mistral AI's flagship model, Mistral Large 2 (version `mistral-large-2407`). It's a proprietary weights-available model and excels at reasoning, code, JSON, chat, and more. Read the launch announcement [here](https://mistral.ai/news/mistral-large-2407/)....",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-3.5-turbo-0613",
|
|||
|
|
"name": "OpenAI: GPT-3.5 Turbo (older v0613)",
|
|||
|
|
"created": 1706140800,
|
|||
|
|
"description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.\n\nTraining data up to Sep 2021.",
|
|||
|
|
"context_length": 4095,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-4-turbo-preview",
|
|||
|
|
"name": "OpenAI: GPT-4 Turbo Preview",
|
|||
|
|
"created": 1706140800,
|
|||
|
|
"description": "The preview GPT-4 model with improved instruction following, JSON mode, reproducible outputs, parallel function calling, and more. Training data: up to Dec 2023. **Note:** heavily rate limited by OpenAI while...",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openrouter/auto",
|
|||
|
|
"name": "Auto Router",
|
|||
|
|
"created": 1699401600,
|
|||
|
|
"description": "Your prompt will be processed by a meta-model and routed to one of dozens of models (see below), optimizing for the best possible output. To see which model was used,...",
|
|||
|
|
"context_length": 2000000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-4-1106-preview",
|
|||
|
|
"name": "OpenAI: GPT-4 Turbo (older v1106)",
|
|||
|
|
"created": 1699228800,
|
|||
|
|
"description": "The latest GPT-4 Turbo model with vision capabilities. Vision requests can now use JSON mode and function calling.\n\nTraining data: up to April 2023.",
|
|||
|
|
"context_length": 128000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "mistralai/mistral-7b-instruct-v0.1",
|
|||
|
|
"name": "Mistral: Mistral 7B Instruct v0.1",
|
|||
|
|
"created": 1695859200,
|
|||
|
|
"description": "A 7.3B parameter model that outperforms Llama 2 13B on all benchmarks, with optimizations for speed and context length.",
|
|||
|
|
"context_length": 2824,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-3.5-turbo-instruct",
|
|||
|
|
"name": "OpenAI: GPT-3.5 Turbo Instruct",
|
|||
|
|
"created": 1695859200,
|
|||
|
|
"description": "This model is a variant of GPT-3.5 Turbo tuned for instructional prompts and omitting chat-related optimizations. Training data: up to Sep 2021.",
|
|||
|
|
"context_length": 4095,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-3.5-turbo-16k",
|
|||
|
|
"name": "OpenAI: GPT-3.5 Turbo 16k",
|
|||
|
|
"created": 1693180800,
|
|||
|
|
"description": "This model offers four times the context length of gpt-3.5-turbo, allowing it to support approximately 20 pages of text in a single request at a higher cost. Training data: up...",
|
|||
|
|
"context_length": 16385,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "mancer/weaver",
|
|||
|
|
"name": "Mancer: Weaver (alpha)",
|
|||
|
|
"created": 1690934400,
|
|||
|
|
"description": "An attempt to recreate Claude-style verbosity, but don't expect the same level of coherence or memory. Meant for use in roleplay/narrative situations.",
|
|||
|
|
"context_length": 8000,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "undi95/remm-slerp-l2-13b",
|
|||
|
|
"name": "ReMM SLERP 13B",
|
|||
|
|
"created": 1689984000,
|
|||
|
|
"description": "A recreation trial of the original MythoMax-L2-B13 but with updated models. #merge",
|
|||
|
|
"context_length": 6144,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "gryphe/mythomax-l2-13b",
|
|||
|
|
"name": "MythoMax 13B",
|
|||
|
|
"created": 1688256000,
|
|||
|
|
"description": "One of the highest performing and most popular fine-tunes of Llama 2 13B, with rich descriptions and roleplay. #merge",
|
|||
|
|
"context_length": 4096,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-4",
|
|||
|
|
"name": "OpenAI: GPT-4",
|
|||
|
|
"created": 1685232000,
|
|||
|
|
"description": "OpenAI's flagship model, GPT-4 is a large-scale multimodal language model capable of solving difficult problems with greater accuracy than previous models due to its broader general knowledge and advanced reasoning...",
|
|||
|
|
"context_length": 8191,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-4-0314",
|
|||
|
|
"name": "OpenAI: GPT-4 (older v0314)",
|
|||
|
|
"created": 1685232000,
|
|||
|
|
"description": "GPT-4-0314 is the first version of GPT-4 released, with a context length of 8,192 tokens, and was supported until June 14. Training data: up to Sep 2021.",
|
|||
|
|
"context_length": 8191,
|
|||
|
|
"pricing": {}
|
|||
|
|
},
|
|||
|
|
{
|
|||
|
|
"id": "openai/gpt-3.5-turbo",
|
|||
|
|
"name": "OpenAI: GPT-3.5 Turbo",
|
|||
|
|
"created": 1685232000,
|
|||
|
|
"description": "GPT-3.5 Turbo is OpenAI's fastest model. It can understand and generate natural language or code, and is optimized for chat and traditional completion tasks.\n\nTraining data up to Sep 2021.",
|
|||
|
|
"context_length": 16385,
|
|||
|
|
"pricing": {}
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
"paid": 0,
|
|||
|
|
"total": 363
|
|||
|
|
}
|