{"models":[{"modelId":"anthropic/claude-opus-4-6","inputCostPerToken":{"amount":5000,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":25000,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":false,"contextLength":200000,"modelDisplayName":"Claude Opus 4.6","modelDescription":"Anthropic's most intelligent model for building agents and coding","modelIcon":"https://upload.wikimedia.org/wikipedia/commons/b/b0/Claude_AI_symbol.svg","ownedBy":"anthropic","aliases":["claude-opus-4-6","claude-opus-latest","opus-4-6"],"providerType":"external","providerConfig":{"backend":"anthropic","base_url":"https://api.anthropic.com/v1","model_name":"claude-opus-4-6"},"attestationSupported":false,"architecture":{"inputModalities":["text"],"outputModalities":["text"]}}},{"modelId":"anthropic/claude-sonnet-4-5","inputCostPerToken":{"amount":3000,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":15500,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":false,"contextLength":200000,"modelDisplayName":"Claude Sonnet 4.5","modelDescription":"Anthropic's Claude Sonnet 4.5 - a powerful, efficient model balancing intelligence and speed. Excels at complex reasoning, coding, and creative tasks with 200K context window. Anonymized, not TEE-protected.","modelIcon":"https://upload.wikimedia.org/wikipedia/commons/b/b0/Claude_AI_symbol.svg","ownedBy":"anthropic","providerType":"external","providerConfig":{"backend":"anthropic","base_url":"https://api.anthropic.com/v1","model_name":"claude-sonnet-4-5-20250929"},"attestationSupported":false}},{"modelId":"black-forest-labs/FLUX.2-klein-4B","inputCostPerToken":{"amount":1000,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":1000,"scale":9,"currency":"USD"},"costPerImage":{"amount":12000000,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":true,"contextLength":128000,"modelDisplayName":"FLUX.2-klein-4B","modelDescription":"The FLUX.2 [klein] model family are our fastest image models to date. FLUX.2 [klein] unifies generation and editing in a single compact architecture, delivering state-of-the-art quality with end-to-end inference in as low as under a second. Built for applications that require real-time image generation without sacrificing quality.","modelIcon":"https://cdn-avatars.huggingface.co/v1/production/uploads/633f7a8f4be90e06da248e0f/m5YoF33abJ09vcwFxt1Mj.png","ownedBy":"nearai","providerType":"vllm","attestationSupported":true,"architecture":{"inputModalities":["text"],"outputModalities":["image"]}}},{"modelId":"google/gemini-3-pro","inputCostPerToken":{"amount":1250,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":15000,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":false,"contextLength":1000000,"modelDisplayName":"Gemini 3 Pro Preview","modelDescription":"Google's Gemini 3 Pro Preview - a highly capable multimodal model with an industry-leading 1M token context window. Optimized for complex reasoning, code generation, and long document analysis. Anonymized, not TEE-protected.","modelIcon":"https://www.gstatic.com/lamda/images/gemini_sparkle_aurora_33f86dc0c0257da337c63.svg","ownedBy":"google","providerType":"external","providerConfig":{"backend":"gemini","base_url":"https://generativelanguage.googleapis.com/v1beta","model_name":"gemini-3-pro-preview"},"attestationSupported":false}},{"modelId":"openai/gpt-5.2","inputCostPerToken":{"amount":1800,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":15500,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":false,"contextLength":400000,"modelDisplayName":"OpenAI GPT-5.2","modelDescription":"OpenAI GPT-5.2 with 400k context window. Anonymized endpoint optimized for deep reasoning and large-context workflows.","modelIcon":"https://cdn.openai.com/API/docs/images/model-page/model-icons/gpt-5.2.png","ownedBy":"openai","aliases":["openai-gpt-5.2","gpt-5.2","gpt-5.2-400k"],"providerType":"external","providerConfig":{"backend":"openai_compatible","base_url":"https://api.openai.com/v1","model_name":"gpt-5.2"},"attestationSupported":false}},{"modelId":"openai/gpt-oss-120b","inputCostPerToken":{"amount":150,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":550,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":true,"contextLength":131000,"modelDisplayName":"GPT OSS 120B","modelDescription":"gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.","modelIcon":"https://avatars.githubusercontent.com/u/14957082?s=200&v=4","ownedBy":"nearai","aliases":["nearai/gpt-oss-120b","gpt-oss-120b"],"providerType":"vllm","attestationSupported":true}},{"modelId":"openai/whisper-large-v3","inputCostPerToken":{"amount":10,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":10,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":true,"contextLength":448,"modelDisplayName":"Whisper Large v3","modelDescription":"Whisper is a state-of-the-art model for automatic speech recognition (ASR) and speech translation.","modelIcon":"https://avatars.githubusercontent.com/u/14957082?s=200&v=4","ownedBy":"nearai","providerType":"vllm","attestationSupported":true,"architecture":{"inputModalities":["audio"],"outputModalities":["text"]}}},{"modelId":"Qwen/Qwen3-30B-A3B-Instruct-2507","inputCostPerToken":{"amount":150,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":550,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":true,"contextLength":262144,"modelDisplayName":"Qwen3 30B A3B Instruct 2507","modelDescription":"Qwen3-30B-A3B-Instruct-2507 is a mixture-of-experts (MoE) causal language model featuring 30.5 billion total parameters and 3.3 billion activated parameters per inference. It supports ultra-long context up to 262 K tokens and operates exclusively in non-thinking mode, delivering strong enhancements in instruction following, reasoning, logical comprehension, mathematics, coding, multilingual understanding, and alignment with user preferences.","modelIcon":"https://avatars.githubusercontent.com/u/141221163?s=200&v=4","ownedBy":"nearai","aliases":["qwen/qwen3-30b-a3b-instruct-2507","qwen3-30b-a3b-instruct-2507"],"providerType":"vllm","attestationSupported":true}},{"modelId":"Qwen/Qwen3.5-122B-A10B","inputCostPerToken":{"amount":400,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":3200,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":true,"contextLength":131072,"modelDisplayName":"Qwen3.5 122B A10B","modelDescription":"Qwen3.5 122B MoE model with 10B active parameters, supporting reasoning and tool calling","modelIcon":"https://avatars.githubusercontent.com/u/141221163?s=200&v=4","ownedBy":"nearai","aliases":["qwen3.5-122b","qwen3.5-122b-a10b","qwen3-5-122b","deepseek-ai/DeepSeek-V3.1"],"providerType":"vllm","attestationSupported":true,"architecture":{"inputModalities":["text"],"outputModalities":["text"]}}},{"modelId":"Qwen/Qwen3-Embedding-0.6B","inputCostPerToken":{"amount":10,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":10,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":true,"contextLength":40960,"modelDisplayName":"Qwen3-Embedding-0.6B","modelDescription":"The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding tasks.","modelIcon":"https://avatars.githubusercontent.com/u/223098841?s=200&v=4","ownedBy":"nearai","providerType":"vllm","attestationSupported":true,"architecture":{"inputModalities":["text"],"outputModalities":["text"]}}},{"modelId":"Qwen/Qwen3-Reranker-0.6B","inputCostPerToken":{"amount":10,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":10,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":true,"contextLength":40960,"modelDisplayName":"Qwen3-Reranker-0.6B","modelDescription":"The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding and ranking tasks.","modelIcon":"https://avatars.githubusercontent.com/u/223098841?s=200&v=4","ownedBy":"nearai","providerType":"vllm","attestationSupported":true,"architecture":{"inputModalities":["text"],"outputModalities":["text"]}}},{"modelId":"Qwen/Qwen3-VL-30B-A3B-Instruct","inputCostPerToken":{"amount":150,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":550,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":true,"contextLength":256000,"modelDisplayName":"Qwen3-VL-30B-A3B-Instruct","modelDescription":"Qwen3-VL-30B-A3B-Instruct is a vision-language model supporting text and image inputs.","modelIcon":"https://avatars.githubusercontent.com/u/141221163?s=200&v=4","ownedBy":"nearai","providerType":"vllm","attestationSupported":true,"architecture":{"inputModalities":["text"],"outputModalities":["text"]}}},{"modelId":"zai-org/GLM-5-FP8","inputCostPerToken":{"amount":850,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":3300,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":true,"contextLength":202752,"modelDisplayName":"GLM 5","modelDescription":"GLM-5 is an open-source foundation model built for complex systems engineering and long-horizon agent workflows. It delivers production-grade productivity for large-scale programming tasks, with performance aligned to top closed-source models, and is designed for expert developers building at the system level.","modelIcon":"https://avatars.githubusercontent.com/u/223098841?s=200&v=4","ownedBy":"nearai","aliases":["glm-latest","zai-org/GLM-latest","GLM-5","zai-org/GLM-4.7"],"providerType":"vllm","attestationSupported":true,"architecture":{"inputModalities":["text"],"outputModalities":["text"]}}}],"limit":100,"offset":0,"total":13}