{"object":"list","data":[{"id":"anthropic/claude-haiku-4-5","object":"model","created":1778671374,"owned_by":"anthropic","name":"Claude Haiku 4.5","pricing":{"input":1.0,"output":5.0,"prompt":"0.000001","completion":"0.000005","image":"0","request":"0","input_cache_read":"0.0000001"},"context_length":200000,"max_output_length":8192,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"input_modalities":["text","image"],"output_modalities":["text"],"supported_sampling_parameters":["temperature","top_p","top_k","max_tokens","stop"],"supported_features":["tools","structured_outputs"],"description":"Anthropic's fastest model with near-frontier intelligence. Extended thinking support with 200K context window. Best for high-throughput, cost-sensitive workloads.","top_provider":{"context_length":200000,"max_completion_tokens":8192,"is_moderated":false}},{"id":"anthropic/claude-opus-4-6","object":"model","created":1770414679,"owned_by":"anthropic","name":"Claude Opus 4.6","pricing":{"input":5.0,"output":25.0,"prompt":"0.000005","completion":"0.000025","image":"0","request":"0","input_cache_read":"0"},"context_length":200000,"max_output_length":32768,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"input_modalities":["text"],"output_modalities":["text"],"supported_sampling_parameters":["temperature","top_p","top_k","max_tokens","stop"],"supported_features":["tools","structured_outputs","reasoning"],"description":"Anthropic's most intelligent model for building agents and coding","top_provider":{"context_length":200000,"max_completion_tokens":32768,"is_moderated":false}},{"id":"anthropic/claude-opus-4-7","object":"model","created":1778671374,"owned_by":"anthropic","name":"Claude Opus 4.7","pricing":{"input":5.0,"output":25.0,"prompt":"0.000005","completion":"0.000025","image":"0","request":"0","input_cache_read":"0.0000005"},"context_length":1000000,"max_output_length":32768,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"input_modalities":["text","image"],"output_modalities":["text"],"supported_sampling_parameters":["temperature","top_p","top_k","max_tokens","stop"],"supported_features":["tools","structured_outputs","reasoning"],"description":"Anthropic's most capable model. Next-generation built for long-running agents and complex coding tasks. 1M token context window with 128K max output.","top_provider":{"context_length":1000000,"max_completion_tokens":32768,"is_moderated":false}},{"id":"anthropic/claude-sonnet-4-5","object":"model","created":1769639067,"owned_by":"anthropic","name":"Claude Sonnet 4.5","pricing":{"input":3.0,"output":15.5,"prompt":"0.000003","completion":"0.0000155","image":"0","request":"0","input_cache_read":"0"},"context_length":200000,"max_output_length":16384,"supported_sampling_parameters":["temperature","top_p","top_k","max_tokens","stop"],"supported_features":["tools","structured_outputs"],"description":"Anthropic's Claude Sonnet 4.5 - a powerful, efficient model balancing intelligence and speed. Excels at complex reasoning, coding, and creative tasks with 200K context window. Anonymized, not TEE-protected.","top_provider":{"context_length":200000,"max_completion_tokens":16384,"is_moderated":false}},{"id":"anthropic/claude-sonnet-4-6","object":"model","created":1778671374,"owned_by":"anthropic","name":"Claude Sonnet 4.6","pricing":{"input":3.0,"output":15.0,"prompt":"0.000003","completion":"0.000015","image":"0","request":"0","input_cache_read":"0.0000003"},"context_length":1000000,"max_output_length":16384,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"input_modalities":["text","image"],"output_modalities":["text"],"supported_sampling_parameters":["temperature","top_p","top_k","max_tokens","stop"],"supported_features":["tools","structured_outputs","reasoning"],"description":"Anthropic's best balance of speed and intelligence. Extended thinking support with 1M token context window and 64K max output. Ideal for most production workloads.","top_provider":{"context_length":1000000,"max_completion_tokens":16384,"is_moderated":false}},{"id":"black-forest-labs/FLUX.2-klein-4B","object":"model","created":1769640850,"owned_by":"nearai","name":"FLUX.2-klein-4B","hugging_face_id":"black-forest-labs/FLUX.2-klein-4B","quantization":"bf16","pricing":{"input":1.0,"output":1.0,"prompt":"0.000001","completion":"0.000001","image":"0.012","request":"0","input_cache_read":"0"},"context_length":128000,"max_output_length":1,"architecture":{"inputModalities":["text"],"outputModalities":["image"]},"input_modalities":["text"],"output_modalities":["image"],"supported_sampling_parameters":["seed"],"supported_features":[],"description":"The FLUX.2 [klein] model family are our fastest image models to date. FLUX.2 [klein] unifies generation and editing in a single compact architecture, delivering state-of-the-art quality with end-to-end inference in as low as under a second. Built for applications that require real-time image generation without sacrificing quality.","top_provider":{"context_length":128000,"max_completion_tokens":1,"is_moderated":false}},{"id":"google/gemini-2.5-flash","object":"model","created":1778671374,"owned_by":"google","name":"Gemini 2.5 Flash","pricing":{"input":0.3,"output":2.5,"prompt":"0.0000003","completion":"0.0000025","image":"0","request":"0","input_cache_read":"0"},"context_length":1000000,"max_output_length":8192,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"input_modalities":["text","image"],"output_modalities":["text"],"supported_sampling_parameters":["temperature","top_p","top_k","max_tokens","stop"],"supported_features":["tools","structured_outputs","json_mode"],"description":"Google's fast hybrid reasoning model with 1M token context window. Optimized for speed and cost while maintaining strong performance across tasks.","top_provider":{"context_length":1000000,"max_completion_tokens":8192,"is_moderated":false}},{"id":"google/gemini-2.5-flash-lite","object":"model","created":1778671374,"owned_by":"google","name":"Gemini 2.5 Flash Lite","pricing":{"input":0.1,"output":0.4,"prompt":"0.0000001","completion":"0.0000004","image":"0","request":"0","input_cache_read":"0"},"context_length":1048576,"max_output_length":8192,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"input_modalities":["text"],"output_modalities":["text"],"supported_sampling_parameters":["temperature","top_p","top_k","max_tokens","stop"],"supported_features":["tools","structured_outputs","json_mode"],"description":"Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance...","top_provider":{"context_length":1048576,"max_completion_tokens":8192,"is_moderated":false}},{"id":"google/gemini-2.5-pro","object":"model","created":1778671374,"owned_by":"google","name":"Gemini 2.5 Pro","pricing":{"input":1.25,"output":10.0,"prompt":"0.00000125","completion":"0.00001","image":"0","request":"0","input_cache_read":"0"},"context_length":1000000,"max_output_length":16384,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"input_modalities":["text","image"],"output_modalities":["text"],"supported_sampling_parameters":["temperature","top_p","top_k","max_tokens","stop"],"supported_features":["tools","structured_outputs","json_mode","reasoning"],"description":"Google's strongest reasoning model. Excels at coding, math, and complex analysis with 1M token context window. Supports text and image input.","top_provider":{"context_length":1000000,"max_completion_tokens":16384,"is_moderated":false}},{"id":"google/gemini-3.1-flash-lite","object":"model","created":1778671374,"owned_by":"google","name":"Gemini 3.1 Flash Lite","pricing":{"input":0.25,"output":1.5,"prompt":"0.00000025","completion":"0.0000015","image":"0","request":"0","input_cache_read":"0"},"context_length":1048576,"max_output_length":8192,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"input_modalities":["text"],"output_modalities":["text"],"supported_sampling_parameters":["temperature","top_p","top_k","max_tokens","stop"],"supported_features":["tools","structured_outputs","json_mode"],"description":"Gemini 3.1 Flash Lite is Google’s GA high-efficiency multimodal model optimized for low-latency, high-volume workloads. It supports text, image, video, audio, and PDF inputs, and is designed for lightweight agentic...","top_provider":{"context_length":1048576,"max_completion_tokens":8192,"is_moderated":false}},{"id":"google/gemini-3.5-flash","object":"model","created":1779214705,"owned_by":"google","name":"Gemini 3.5 Flash","pricing":{"input":1.5,"output":9.0,"prompt":"0.0000015","completion":"0.000009","image":"0","request":"0","input_cache_read":"0.00000015"},"context_length":1000000,"max_output_length":8192,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"input_modalities":["text","image"],"output_modalities":["text"],"supported_sampling_parameters":["temperature","top_p","top_k","max_tokens","stop"],"supported_features":["tools","structured_outputs","json_mode"],"description":"Google's high-efficiency multimodal model with 1M token context. Strong agentic and coding performance, rivaling larger flagship models on many tasks.","top_provider":{"context_length":1000000,"max_completion_tokens":8192,"is_moderated":false}},{"id":"google/gemma-4-31B-it","object":"model","created":1778743163,"owned_by":"nearai","name":"Gemma 4 31B Instruct","hugging_face_id":"google/gemma-4-31B-it","quantization":"bf16","pricing":{"input":0.13,"output":0.4,"prompt":"0.00000013","completion":"0.0000004","image":"0","request":"0","input_cache_read":"0.000000026"},"context_length":262144,"max_output_length":8192,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"input_modalities":["text"],"output_modalities":["text"],"supported_sampling_parameters":["temperature","top_p","top_k","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supported_features":["tools"],"description":"Gemma 4 31B Instruct is Google's open-weight 31B-parameter language model, tuned for instruction following and dialogue. Strong general-purpose performance with 32K context window.","top_provider":{"context_length":262144,"max_completion_tokens":8192,"is_moderated":false}},{"id":"moonshotai/kimi-k2.6","object":"model","created":1779449813,"owned_by":"moonshotai","name":"Kimi K2.6","hugging_face_id":"moonshotai/Kimi-K2.6","pricing":{"input":0.8,"output":3.5,"prompt":"0.0000008","completion":"0.0000035","image":"0","request":"0","input_cache_read":"0.0000003"},"context_length":262144,"max_output_length":8192,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"input_modalities":["text","image"],"output_modalities":["text"],"supported_sampling_parameters":["temperature","top_p","top_k","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supported_features":["tools","structured_outputs"],"description":"Moonshot AI's frontier MoE model with 256K context window. Excels at complex reasoning, math, coding, and multilingual tasks with native vision support.","top_provider":{"context_length":262144,"max_completion_tokens":8192,"is_moderated":false}},{"id":"openai/gpt-4.1","object":"model","created":1778671374,"owned_by":"openai","name":"OpenAI GPT-4.1","pricing":{"input":2.0,"output":8.0,"prompt":"0.000002","completion":"0.000008","image":"0","request":"0","input_cache_read":"0.0000005"},"context_length":1000000,"max_output_length":16384,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"input_modalities":["text","image"],"output_modalities":["text"],"supported_sampling_parameters":["temperature","top_p","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supported_features":["tools","structured_outputs","json_mode"],"description":"OpenAI's flagship production model with 1M token context window. Excels at instruction following, coding, and long-context tasks. 75% cheaper cached input reads.","top_provider":{"context_length":1000000,"max_completion_tokens":16384,"is_moderated":false}},{"id":"openai/gpt-4.1-mini","object":"model","created":1778671374,"owned_by":"openai","name":"OpenAI GPT-4.1 Mini","pricing":{"input":0.4,"output":1.6,"prompt":"0.0000004","completion":"0.0000016","image":"0","request":"0","input_cache_read":"0.0000001"},"context_length":1000000,"max_output_length":16384,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"input_modalities":["text","image"],"output_modalities":["text"],"supported_sampling_parameters":["temperature","top_p","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supported_features":["tools","structured_outputs","json_mode"],"description":"Cost-effective version of GPT-4.1 with the same 1M token context window. Great balance of capability and cost for production workloads.","top_provider":{"context_length":1000000,"max_completion_tokens":16384,"is_moderated":false}},{"id":"openai/gpt-4.1-nano","object":"model","created":1778671374,"owned_by":"openai","name":"OpenAI GPT-4.1 Nano","pricing":{"input":0.1,"output":0.4,"prompt":"0.0000001","completion":"0.0000004","image":"0","request":"0","input_cache_read":"0.000000025"},"context_length":1000000,"max_output_length":16384,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"input_modalities":["text","image"],"output_modalities":["text"],"supported_sampling_parameters":["temperature","top_p","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supported_features":["tools","structured_outputs","json_mode"],"description":"OpenAI's most cost-efficient model with 1M token context. Ideal for classification, extraction, and high-volume tasks where cost matters most.","top_provider":{"context_length":1000000,"max_completion_tokens":16384,"is_moderated":false}},{"id":"openai/gpt-5","object":"model","created":1778671374,"owned_by":"openai","name":"OpenAI GPT-5","pricing":{"input":1.25,"output":10.0,"prompt":"0.00000125","completion":"0.00001","image":"0","request":"0","input_cache_read":"0.000000125"},"context_length":400000,"max_output_length":16384,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"input_modalities":["text","image"],"output_modalities":["text"],"supported_sampling_parameters":["temperature","top_p","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supported_features":["tools","structured_outputs","json_mode","reasoning"],"description":"OpenAI's next-generation model with enhanced reasoning and 400K context window. Strong performance across coding, math, and creative tasks.","top_provider":{"context_length":400000,"max_completion_tokens":16384,"is_moderated":false}},{"id":"openai/gpt-5.1","object":"model","created":1778671374,"owned_by":"openai","name":"GPT-5.1","pricing":{"input":1.25,"output":10.0,"prompt":"0.00000125","completion":"0.00001","image":"0","request":"0","input_cache_read":"0"},"context_length":400000,"max_output_length":16384,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"input_modalities":["text"],"output_modalities":["text"],"supported_sampling_parameters":["temperature","top_p","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supported_features":["tools","structured_outputs","json_mode","reasoning"],"description":"GPT-5.1 is the latest frontier-grade model in the GPT-5 series, offering stronger general-purpose reasoning, improved instruction adherence, and a more natural conversational style compared to GPT-5. It uses adaptive reasoning...","top_provider":{"context_length":400000,"max_completion_tokens":16384,"is_moderated":false}},{"id":"openai/gpt-5.2","object":"model","created":1769638083,"owned_by":"openai","name":"OpenAI GPT-5.2","pricing":{"input":1.8,"output":15.5,"prompt":"0.0000018","completion":"0.0000155","image":"0","request":"0","input_cache_read":"0.00000018"},"context_length":400000,"max_output_length":16384,"supported_sampling_parameters":["temperature","top_p","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supported_features":["tools","structured_outputs","json_mode","reasoning"],"description":"OpenAI GPT-5.2 with 400k context window. Anonymized endpoint optimized for deep reasoning and large-context workflows.","top_provider":{"context_length":400000,"max_completion_tokens":16384,"is_moderated":false}},{"id":"openai/gpt-5.4","object":"model","created":1778671374,"owned_by":"openai","name":"GPT-5.4","pricing":{"input":2.5,"output":15.0,"prompt":"0.0000025","completion":"0.000015","image":"0","request":"0","input_cache_read":"0"},"context_length":1050000,"max_output_length":16384,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"input_modalities":["text"],"output_modalities":["text"],"supported_sampling_parameters":["temperature","top_p","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supported_features":["tools","structured_outputs","json_mode","reasoning"],"description":"GPT-5.4 is OpenAI’s latest frontier model, unifying the Codex and GPT lines into a single system. It features a 1M+ token context window (922K input, 128K output) with support for...","top_provider":{"context_length":1050000,"max_completion_tokens":16384,"is_moderated":false}},{"id":"openai/gpt-5.4-mini","object":"model","created":1778671374,"owned_by":"openai","name":"GPT-5.4 Mini","pricing":{"input":0.75,"output":4.5,"prompt":"0.00000075","completion":"0.0000045","image":"0","request":"0","input_cache_read":"0"},"context_length":400000,"max_output_length":16384,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"input_modalities":["text"],"output_modalities":["text"],"supported_sampling_parameters":["temperature","top_p","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supported_features":["tools","structured_outputs","json_mode","reasoning"],"description":"GPT-5.4 mini brings the core capabilities of GPT-5.4 to a faster, more efficient model optimized for high-throughput workloads. It supports text and image inputs with strong performance across reasoning, coding,...","top_provider":{"context_length":400000,"max_completion_tokens":16384,"is_moderated":false}},{"id":"openai/gpt-5.4-nano","object":"model","created":1778671374,"owned_by":"openai","name":"GPT-5.4 Nano","pricing":{"input":0.2,"output":1.25,"prompt":"0.0000002","completion":"0.00000125","image":"0","request":"0","input_cache_read":"0"},"context_length":400000,"max_output_length":16384,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"input_modalities":["text"],"output_modalities":["text"],"supported_sampling_parameters":["temperature","top_p","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supported_features":["tools","structured_outputs","json_mode","reasoning"],"description":"GPT-5.4 nano is the most lightweight and cost-efficient variant of the GPT-5.4 family, optimized for speed-critical and high-volume tasks. It supports text and image inputs and is designed for low-latency...","top_provider":{"context_length":400000,"max_completion_tokens":16384,"is_moderated":false}},{"id":"openai/gpt-5.5","object":"model","created":1778671374,"owned_by":"openai","name":"GPT-5.5","pricing":{"input":5.0,"output":30.0,"prompt":"0.000005","completion":"0.00003","image":"0","request":"0","input_cache_read":"0"},"context_length":1050000,"max_output_length":16384,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"input_modalities":["text"],"output_modalities":["text"],"supported_sampling_parameters":["temperature","top_p","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supported_features":["tools","structured_outputs","json_mode","reasoning"],"description":"GPT-5.5 is OpenAI’s frontier model designed for complex professional workloads, building on GPT-5.4 with stronger reasoning, higher reliability, and improved token efficiency on hard tasks. It features a 1M+ token...","top_provider":{"context_length":1050000,"max_completion_tokens":16384,"is_moderated":false}},{"id":"openai/gpt-5-mini","object":"model","created":1778671374,"owned_by":"openai","name":"GPT-5 Mini","pricing":{"input":0.25,"output":2.0,"prompt":"0.00000025","completion":"0.000002","image":"0","request":"0","input_cache_read":"0"},"context_length":400000,"max_output_length":16384,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"input_modalities":["text"],"output_modalities":["text"],"supported_sampling_parameters":["temperature","top_p","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supported_features":["tools","structured_outputs","json_mode","reasoning"],"description":"GPT-5 Mini is a compact version of GPT-5, designed to handle lighter-weight reasoning tasks. It provides the same instruction-following and safety-tuning benefits as GPT-5, but with reduced latency and cost....","top_provider":{"context_length":400000,"max_completion_tokens":16384,"is_moderated":false}},{"id":"openai/gpt-5-nano","object":"model","created":1778671374,"owned_by":"openai","name":"GPT-5 Nano","pricing":{"input":0.05,"output":0.4,"prompt":"0.00000005","completion":"0.0000004","image":"0","request":"0","input_cache_read":"0"},"context_length":400000,"max_output_length":16384,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"input_modalities":["text"],"output_modalities":["text"],"supported_sampling_parameters":["temperature","top_p","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supported_features":["tools","structured_outputs","json_mode","reasoning"],"description":"GPT-5-Nano is the smallest and fastest variant in the GPT-5 system, optimized for developer tools, rapid interactions, and ultra-low latency environments. While limited in reasoning depth compared to its larger...","top_provider":{"context_length":400000,"max_completion_tokens":16384,"is_moderated":false}},{"id":"openai/gpt-oss-120b","object":"model","created":1761354458,"owned_by":"nearai","name":"GPT OSS 120B","hugging_face_id":"openai/gpt-oss-120b","quantization":"bf16","pricing":{"input":0.15,"output":0.55,"prompt":"0.00000015","completion":"0.00000055","image":"0","request":"0","input_cache_read":"0.00000003"},"context_length":131000,"max_output_length":16384,"supported_sampling_parameters":["temperature","top_p","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supported_features":["tools","structured_outputs","reasoning"],"description":"gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.","top_provider":{"context_length":131000,"max_completion_tokens":16384,"is_moderated":false}},{"id":"openai/o3","object":"model","created":1778671374,"owned_by":"openai","name":"OpenAI o3","pricing":{"input":2.0,"output":8.0,"prompt":"0.000002","completion":"0.000008","image":"0","request":"0","input_cache_read":"0.000001"},"context_length":200000,"max_output_length":32768,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"input_modalities":["text","image"],"output_modalities":["text"],"supported_sampling_parameters":["max_tokens","stop","seed"],"supported_features":["tools","structured_outputs","reasoning"],"description":"OpenAI's flagship reasoning model. Uses chain-of-thought to solve complex math, coding, and logic problems. 200K context window.","top_provider":{"context_length":200000,"max_completion_tokens":32768,"is_moderated":false}},{"id":"openai/o3-mini","object":"model","created":1778671374,"owned_by":"openai","name":"o3 Mini","pricing":{"input":1.1,"output":4.4,"prompt":"0.0000011","completion":"0.0000044","image":"0","request":"0","input_cache_read":"0"},"context_length":200000,"max_output_length":32768,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"input_modalities":["text"],"output_modalities":["text"],"supported_sampling_parameters":["max_tokens","stop","seed"],"supported_features":["tools","structured_outputs","reasoning"],"description":"OpenAI o3-mini is a cost-efficient language model optimized for STEM reasoning tasks, particularly excelling in science, mathematics, and coding. This model supports the `reasoning_effort` parameter, which can be set to...","top_provider":{"context_length":200000,"max_completion_tokens":32768,"is_moderated":false}},{"id":"openai/o4-mini","object":"model","created":1778671374,"owned_by":"openai","name":"OpenAI o4 Mini","pricing":{"input":1.1,"output":4.4,"prompt":"0.0000011","completion":"0.0000044","image":"0","request":"0","input_cache_read":"0.00000055"},"context_length":200000,"max_output_length":32768,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"input_modalities":["text","image"],"output_modalities":["text"],"supported_sampling_parameters":["max_tokens","stop","seed"],"supported_features":["tools","structured_outputs","reasoning"],"description":"OpenAI's cost-effective reasoning model. Strong performance on math, coding, and scientific reasoning at a fraction of o3's cost. 200K context window.","top_provider":{"context_length":200000,"max_completion_tokens":32768,"is_moderated":false}},{"id":"openai/privacy-filter","object":"model","created":1779129892,"owned_by":"nearai","name":"Privacy Filter","pricing":{"input":0.01,"output":0.0,"prompt":"0.00000001","completion":"0","image":"0","request":"0","input_cache_read":"0"},"context_length":512,"max_output_length":1024,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"input_modalities":["text"],"output_modalities":["text"],"supported_sampling_parameters":[],"supported_features":[],"description":"PII detection (token classification) — returns spans for emails, phones, addresses, names, account numbers, secrets. NEAR AI runs this model in a TEE; prompts are not anonymized by the model itself, the cloud-api wraps it to do redaction.","top_provider":{"context_length":512,"max_completion_tokens":1024,"is_moderated":false}},{"id":"openai/whisper-large-v3","object":"model","created":1774015272,"owned_by":"nearai","name":"Whisper Large v3","hugging_face_id":"openai/whisper-large-v3","pricing":{"input":0.01,"output":0.01,"prompt":"0.00000001","completion":"0.00000001","image":"0","request":"0","input_cache_read":"0"},"context_length":448,"max_output_length":1024,"architecture":{"inputModalities":["audio"],"outputModalities":["text"]},"input_modalities":["audio"],"output_modalities":["text"],"supported_sampling_parameters":["temperature"],"supported_features":[],"description":"Whisper is a state-of-the-art model for automatic speech recognition (ASR) and speech translation.","top_provider":{"context_length":448,"max_completion_tokens":1024,"is_moderated":false}},{"id":"Qwen/Qwen3-30B-A3B-Instruct-2507","object":"model","created":1761354458,"owned_by":"nearai","name":"Qwen3 30B A3B Instruct 2507","hugging_face_id":"Qwen/Qwen3-30B-A3B-Instruct-2507","quantization":"bf16","pricing":{"input":0.15,"output":0.55,"prompt":"0.00000015","completion":"0.00000055","image":"0","request":"0","input_cache_read":"0.00000003"},"context_length":262144,"max_output_length":8192,"supported_sampling_parameters":["temperature","top_p","top_k","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supported_features":["tools","structured_outputs"],"description":"Qwen3-30B-A3B-Instruct-2507 is a mixture-of-experts (MoE) causal language model featuring 30.5 billion total parameters and 3.3 billion activated parameters per inference. It supports ultra-long context up to 262 K tokens and operates exclusively in non-thinking mode, delivering strong enhancements in instruction following, reasoning, logical comprehension, mathematics, coding, multilingual understanding, and alignment with user preferences.","top_provider":{"context_length":262144,"max_completion_tokens":8192,"is_moderated":false}},{"id":"Qwen/Qwen3.5-122B-A10B","object":"model","created":1772549671,"owned_by":"nearai","name":"Qwen3.5 122B A10B","hugging_face_id":"Qwen/Qwen3.5-122B-A10B","quantization":"bf16","pricing":{"input":0.4,"output":3.2,"prompt":"0.0000004","completion":"0.0000032","image":"0","request":"0","input_cache_read":"0.00000008"},"context_length":131072,"max_output_length":16384,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"input_modalities":["text"],"output_modalities":["text"],"supported_sampling_parameters":["temperature","top_p","top_k","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supported_features":["tools","structured_outputs"],"description":"Qwen3.5 122B MoE model with 10B active parameters, supporting reasoning and tool calling","top_provider":{"context_length":131072,"max_completion_tokens":16384,"is_moderated":false}},{"id":"Qwen/Qwen3.6-35B-A3B-FP8","object":"model","created":1778743164,"owned_by":"nearai","name":"Qwen 3.6 35B A3B FP8","hugging_face_id":"Qwen/Qwen3.6-35B-A3B-FP8","quantization":"fp8","pricing":{"input":0.17,"output":1.1,"prompt":"0.00000017","completion":"0.0000011","image":"0","request":"0","input_cache_read":"0.000000056"},"context_length":262144,"max_output_length":8192,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"input_modalities":["text"],"output_modalities":["text"],"supported_sampling_parameters":["temperature","top_p","top_k","max_tokens","stop","seed"],"supported_features":["tools","structured_outputs","reasoning"],"description":"Qwen 3.6 35B is a fast mixture-of-experts language model with ~3B active parameters per token. Strong at reasoning, coding, and multilingual tasks with 32K context window.","top_provider":{"context_length":262144,"max_completion_tokens":8192,"is_moderated":false}},{"id":"qwen/qwen3.7-max","object":"model","created":1779449813,"owned_by":"qwen","name":"Qwen3.7 Max","pricing":{"input":2.8000000000000003,"output":7.5,"prompt":"0.0000028","completion":"0.0000075","image":"0","request":"0","input_cache_read":"0"},"context_length":1000000,"max_output_length":16384,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"input_modalities":["text"],"output_modalities":["text"],"supported_sampling_parameters":["temperature","top_p","top_k","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supported_features":["tools","structured_outputs","json_mode"],"description":"Qwen's most capable proprietary model with 1M context window. Strong at reasoning, coding, math, and multilingual tasks.","top_provider":{"context_length":1000000,"max_completion_tokens":16384,"is_moderated":false}},{"id":"Qwen/Qwen3-Embedding-0.6B","object":"model","created":1774015272,"owned_by":"nearai","name":"Qwen3-Embedding-0.6B","hugging_face_id":"Qwen/Qwen3-Embedding-0.6B","quantization":"bf16","pricing":{"input":0.01,"output":0.01,"prompt":"0.00000001","completion":"0.00000001","image":"0","request":"0","input_cache_read":"0"},"context_length":40960,"max_output_length":1024,"architecture":{"inputModalities":["text"],"outputModalities":["embedding"]},"input_modalities":["text"],"output_modalities":["embedding"],"supported_sampling_parameters":[],"supported_features":[],"description":"The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding tasks.","top_provider":{"context_length":40960,"max_completion_tokens":1024,"is_moderated":false}},{"id":"Qwen/Qwen3-Reranker-0.6B","object":"model","created":1774015272,"owned_by":"nearai","name":"Qwen3-Reranker-0.6B","hugging_face_id":"Qwen/Qwen3-Reranker-0.6B","quantization":"bf16","pricing":{"input":0.01,"output":0.01,"prompt":"0.00000001","completion":"0.00000001","image":"0","request":"0","input_cache_read":"0"},"context_length":40960,"max_output_length":1024,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"input_modalities":["text"],"output_modalities":["text"],"supported_sampling_parameters":[],"supported_features":[],"description":"The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding and ranking tasks.","top_provider":{"context_length":40960,"max_completion_tokens":1024,"is_moderated":false}},{"id":"Qwen/Qwen3-VL-30B-A3B-Instruct","object":"model","created":1774015272,"owned_by":"nearai","name":"Qwen3-VL-30B-A3B-Instruct","hugging_face_id":"Qwen/Qwen3-VL-30B-A3B-Instruct","quantization":"bf16","pricing":{"input":0.15,"output":0.55,"prompt":"0.00000015","completion":"0.00000055","image":"0","request":"0","input_cache_read":"0.00000003"},"context_length":256000,"max_output_length":8192,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"input_modalities":["text","image"],"output_modalities":["text"],"supported_sampling_parameters":["temperature","top_p","top_k","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supported_features":["tools","structured_outputs"],"description":"Qwen3-VL-30B-A3B-Instruct is a vision-language model supporting text and image inputs.","top_provider":{"context_length":256000,"max_completion_tokens":8192,"is_moderated":false}},{"id":"zai-org/GLM-5.1-FP8","object":"model","created":1776190834,"owned_by":"nearai","name":"GLM 5.1","hugging_face_id":"zai-org/GLM-5.1-FP8","quantization":"fp8","pricing":{"input":0.85,"output":3.3000000000000003,"prompt":"0.00000085","completion":"0.0000033","image":"0","request":"0","input_cache_read":"0.00000017"},"context_length":202752,"max_output_length":16384,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"input_modalities":["text"],"output_modalities":["text"],"supported_sampling_parameters":["temperature","top_p","top_k","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supported_features":["tools","structured_outputs","reasoning"],"description":"GLM-5.1 is an open-source foundation model built for complex systems engineering and long-horizon agent workflows. It delivers production-grade productivity for large-scale programming tasks, with performance aligned to top closed-source models, and is designed for expert developers building at the system level.","top_provider":{"context_length":202752,"max_completion_tokens":16384,"is_moderated":false}}]}