{"models":[{"modelId":"anthropic/claude-haiku-4-5","inputCostPerToken":{"amount":1000,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":5000,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":100,"scale":9,"currency":"USD"},"metadata":{"verifiable":false,"contextLength":200000,"modelDisplayName":"Claude Haiku 4.5","modelDescription":"Anthropic's fastest model with near-frontier intelligence. Extended thinking support with 200K context window. Best for high-throughput, cost-sensitive workloads.","modelIcon":"https://avatars.githubusercontent.com/u/76263028?v=4","ownedBy":"anthropic","aliases":["claude-haiku-4-5","haiku-4-5"],"providerType":"external","providerConfig":{"backend":"anthropic","base_url":"https://api.anthropic.com/v1","model_name":"claude-haiku-4-5"},"attestationSupported":false,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"maxOutputLength":8192,"supportedSamplingParameters":["temperature","top_p","top_k","max_tokens","stop"],"supportedFeatures":["tools","structured_outputs"],"isReady":false}},{"modelId":"anthropic/claude-opus-4-6","inputCostPerToken":{"amount":5000,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":25000,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":false,"contextLength":200000,"modelDisplayName":"Claude Opus 4.6","modelDescription":"Anthropic's most intelligent model for building agents and coding","modelIcon":"https://avatars.githubusercontent.com/u/76263028?v=4","ownedBy":"anthropic","aliases":["claude-opus-4-6","claude-opus-latest","opus-4-6"],"providerType":"external","providerConfig":{"backend":"anthropic","base_url":"https://api.anthropic.com/v1","model_name":"claude-opus-4-6"},"attestationSupported":false,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"maxOutputLength":32768,"supportedSamplingParameters":["temperature","top_p","top_k","max_tokens","stop"],"supportedFeatures":["tools","structured_outputs","reasoning"],"isReady":false}},{"modelId":"anthropic/claude-opus-4-7","inputCostPerToken":{"amount":5000,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":25000,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":500,"scale":9,"currency":"USD"},"metadata":{"verifiable":false,"contextLength":1000000,"modelDisplayName":"Claude Opus 4.7","modelDescription":"Anthropic's most capable model. Next-generation built for long-running agents and complex coding tasks. 1M token context window with 128K max output.","modelIcon":"https://avatars.githubusercontent.com/u/76263028?v=4","ownedBy":"anthropic","aliases":["claude-opus-4-7","opus-4-7"],"providerType":"external","providerConfig":{"backend":"anthropic","base_url":"https://api.anthropic.com/v1","model_name":"claude-opus-4-7"},"attestationSupported":false,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"maxOutputLength":32768,"supportedSamplingParameters":["temperature","top_p","top_k","max_tokens","stop"],"supportedFeatures":["tools","structured_outputs","reasoning"],"isReady":false}},{"modelId":"anthropic/claude-sonnet-4-5","inputCostPerToken":{"amount":3000,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":15500,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":false,"contextLength":200000,"modelDisplayName":"Claude Sonnet 4.5","modelDescription":"Anthropic's Claude Sonnet 4.5 - a powerful, efficient model balancing intelligence and speed. Excels at complex reasoning, coding, and creative tasks with 200K context window. Anonymized, not TEE-protected.","modelIcon":"https://avatars.githubusercontent.com/u/76263028?v=4","ownedBy":"anthropic","providerType":"external","providerConfig":{"backend":"anthropic","base_url":"https://api.anthropic.com/v1","model_name":"claude-sonnet-4-5-20250929"},"attestationSupported":false,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"maxOutputLength":16384,"supportedSamplingParameters":["temperature","top_p","top_k","max_tokens","stop"],"supportedFeatures":["tools","structured_outputs"],"isReady":false}},{"modelId":"anthropic/claude-sonnet-4-6","inputCostPerToken":{"amount":3000,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":15000,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":300,"scale":9,"currency":"USD"},"metadata":{"verifiable":false,"contextLength":1000000,"modelDisplayName":"Claude Sonnet 4.6","modelDescription":"Anthropic's best balance of speed and intelligence. Extended thinking support with 1M token context window and 64K max output. Ideal for most production workloads.","modelIcon":"https://avatars.githubusercontent.com/u/76263028?v=4","ownedBy":"anthropic","aliases":["claude-sonnet-4-6","sonnet-4-6"],"providerType":"external","providerConfig":{"backend":"anthropic","base_url":"https://api.anthropic.com/v1","model_name":"claude-sonnet-4-6"},"attestationSupported":false,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"maxOutputLength":16384,"supportedSamplingParameters":["temperature","top_p","top_k","max_tokens","stop"],"supportedFeatures":["tools","structured_outputs","reasoning"],"isReady":false}},{"modelId":"black-forest-labs/FLUX.2-klein-4B","inputCostPerToken":{"amount":1000,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":1000,"scale":9,"currency":"USD"},"costPerImage":{"amount":12000000,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":true,"contextLength":128000,"modelDisplayName":"FLUX.2-klein-4B","modelDescription":"The FLUX.2 [klein] model family are our fastest image models to date. FLUX.2 [klein] unifies generation and editing in a single compact architecture, delivering state-of-the-art quality with end-to-end inference in as low as under a second. Built for applications that require real-time image generation without sacrificing quality.","modelIcon":"https://cdn-avatars.huggingface.co/v1/production/uploads/633f7a8f4be90e06da248e0f/m5YoF33abJ09vcwFxt1Mj.png","ownedBy":"nearai","providerType":"vllm","attestationSupported":true,"architecture":{"inputModalities":["text"],"outputModalities":["image"]},"huggingFaceId":"black-forest-labs/FLUX.2-klein-4B","quantization":"bf16","maxOutputLength":1,"supportedSamplingParameters":["seed"],"datacenters":[{"country_code":"US"}]}},{"modelId":"deepseek-ai/DeepSeek-V4-Flash","inputCostPerToken":{"amount":170,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":350,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":35,"scale":9,"currency":"USD"},"metadata":{"verifiable":true,"contextLength":1048576,"modelDisplayName":"DeepSeek V4 Flash","modelDescription":"DeepSeek V4 Flash — large mixture-of-experts language model from DeepSeek, FP8-quantized. Served on H200 with TP=4 and EAGLE speculative decoding in a TDX-confidential inference CVM.","modelIcon":"https://avatars.githubusercontent.com/u/148330874?v=4","ownedBy":"nearai","providerType":"vllm","attestationSupported":true,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"huggingFaceId":"deepseek-ai/DeepSeek-V4-Flash","quantization":"fp8","maxOutputLength":8192,"supportedSamplingParameters":["temperature","top_p","top_k","min_p","frequency_penalty","presence_penalty","repetition_penalty","max_tokens","stop","seed","logit_bias"],"supportedFeatures":["tools","reasoning","json_mode","structured_outputs"],"datacenters":[{"country_code":"US"}],"isReady":true,"openrouterSlug":"deepseek/deepseek-v4-flash"}},{"modelId":"deepseek/deepseek-v3.2","inputCostPerToken":{"amount":1100,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":1100,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":550,"scale":9,"currency":"USD"},"metadata":{"verifiable":true,"contextLength":128000,"modelDisplayName":"deepseek-v3.2","modelDescription":"Attested model served via Chutes TEE (verified end-to-end by NEAR AI).","modelIcon":"https://avatars.githubusercontent.com/u/148330874?v=4","ownedBy":"attested 3p","providerType":"chutes","attestationSupported":true,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"supportedSamplingParameters":["temperature","top_p","frequency_penalty","presence_penalty","stop","seed","max_tokens"],"supportedFeatures":["tools","json_mode"],"isReady":false}},{"modelId":"google/gemini-2.5-flash","inputCostPerToken":{"amount":300,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":2500,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":false,"contextLength":1000000,"modelDisplayName":"Gemini 2.5 Flash","modelDescription":"Google's fast hybrid reasoning model with 1M token context window. Optimized for speed and cost while maintaining strong performance across tasks.","modelIcon":"https://avatars.githubusercontent.com/u/1342004?v=4","ownedBy":"google","aliases":["gemini-2.5-flash"],"providerType":"external","providerConfig":{"backend":"gemini","base_url":"https://generativelanguage.googleapis.com/v1beta","model_name":"gemini-2.5-flash"},"attestationSupported":false,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"maxOutputLength":8192,"supportedSamplingParameters":["temperature","top_p","top_k","max_tokens","stop"],"supportedFeatures":["tools","structured_outputs","json_mode"],"isReady":false}},{"modelId":"google/gemini-2.5-flash-lite","inputCostPerToken":{"amount":100,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":400,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":false,"contextLength":1048576,"modelDisplayName":"Gemini 2.5 Flash Lite","modelDescription":"Gemini 2.5 Flash-Lite is a lightweight reasoning model in the Gemini 2.5 family, optimized for ultra-low latency and cost efficiency. It offers improved throughput, faster token generation, and better performance...","modelIcon":"https://avatars.githubusercontent.com/u/1342004?v=4","ownedBy":"google","providerType":"external","providerConfig":{"backend":"gemini","base_url":"https://generativelanguage.googleapis.com/v1beta","model_name":"gemini-2.5-flash-lite"},"attestationSupported":false,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"maxOutputLength":8192,"supportedSamplingParameters":["temperature","top_p","top_k","max_tokens","stop"],"supportedFeatures":["tools","structured_outputs","json_mode"],"isReady":false}},{"modelId":"google/gemini-2.5-pro","inputCostPerToken":{"amount":1250,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":10000,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":false,"contextLength":1000000,"modelDisplayName":"Gemini 2.5 Pro","modelDescription":"Google's strongest reasoning model. Excels at coding, math, and complex analysis with 1M token context window. Supports text and image input.","modelIcon":"https://avatars.githubusercontent.com/u/1342004?v=4","ownedBy":"google","aliases":["gemini-2.5-pro"],"providerType":"external","providerConfig":{"backend":"gemini","base_url":"https://generativelanguage.googleapis.com/v1beta","model_name":"gemini-2.5-pro"},"attestationSupported":false,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"maxOutputLength":16384,"supportedSamplingParameters":["temperature","top_p","top_k","max_tokens","stop"],"supportedFeatures":["tools","structured_outputs","json_mode","reasoning"],"isReady":false}},{"modelId":"google/gemini-3.1-flash-lite","inputCostPerToken":{"amount":250,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":1500,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":false,"contextLength":1048576,"modelDisplayName":"Gemini 3.1 Flash Lite","modelDescription":"Gemini 3.1 Flash Lite is Google’s GA high-efficiency multimodal model optimized for low-latency, high-volume workloads. It supports text, image, video, audio, and PDF inputs, and is designed for lightweight agentic...","modelIcon":"https://avatars.githubusercontent.com/u/1342004?v=4","ownedBy":"google","providerType":"external","providerConfig":{"backend":"gemini","base_url":"https://generativelanguage.googleapis.com/v1beta","model_name":"gemini-3.1-flash-lite"},"attestationSupported":false,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"maxOutputLength":8192,"supportedSamplingParameters":["temperature","top_p","top_k","max_tokens","stop"],"supportedFeatures":["tools","structured_outputs","json_mode"],"isReady":false}},{"modelId":"google/gemini-3.5-flash","inputCostPerToken":{"amount":1500,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":9000,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":150,"scale":9,"currency":"USD"},"metadata":{"verifiable":false,"contextLength":1000000,"modelDisplayName":"Gemini 3.5 Flash","modelDescription":"Google's high-efficiency multimodal model with 1M token context. Strong agentic and coding performance, rivaling larger flagship models on many tasks.","modelIcon":"https://avatars.githubusercontent.com/u/1342004?v=4","ownedBy":"google","aliases":["gemini-3.5-flash"],"providerType":"external","providerConfig":{"backend":"gemini","base_url":"https://generativelanguage.googleapis.com/v1beta","model_name":"gemini-3.5-flash"},"attestationSupported":false,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"maxOutputLength":8192,"supportedSamplingParameters":["temperature","top_p","top_k","max_tokens","stop"],"supportedFeatures":["tools","structured_outputs","json_mode"],"isReady":false}},{"modelId":"google/gemma-4-31B-it","inputCostPerToken":{"amount":130,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":400,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":26,"scale":9,"currency":"USD"},"metadata":{"verifiable":true,"contextLength":262144,"modelDisplayName":"Gemma 4 31B Instruct","modelDescription":"Gemma 4 31B Instruct is Google's open-weight 31B-parameter language model, tuned for instruction following and dialogue. Strong general-purpose performance with 32K context window.","modelIcon":"https://avatars.githubusercontent.com/u/1342004?s=200&v=4","ownedBy":"nearai","aliases":["gemma-4-31b-it","google/gemma-4-31b-it"],"providerType":"vllm","attestationSupported":true,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"huggingFaceId":"google/gemma-4-31B-it","quantization":"bf16","maxOutputLength":8192,"supportedSamplingParameters":["temperature","top_p","top_k","min_p","frequency_penalty","presence_penalty","repetition_penalty","max_tokens","stop","seed"],"supportedFeatures":["tools","reasoning","structured_outputs","json_mode","logprobs"],"datacenters":[{"country_code":"US"}],"isReady":true,"openrouterSlug":"google/gemma-4-31b-it"}},{"modelId":"minimax/minimax-m2.5","inputCostPerToken":{"amount":170,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":1320,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":80,"scale":9,"currency":"USD"},"metadata":{"verifiable":true,"contextLength":128000,"modelDisplayName":"minimax-m2.5","modelDescription":"Attested model served via Chutes TEE (verified end-to-end by NEAR AI).","modelIcon":"https://avatars.githubusercontent.com/u/194880281?v=4","ownedBy":"attested 3p","providerType":"chutes","attestationSupported":true,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"supportedSamplingParameters":["temperature","top_p","frequency_penalty","presence_penalty","stop","seed","max_tokens"],"supportedFeatures":["tools","json_mode"],"isReady":false}},{"modelId":"moonshotai/kimi-k2.5","inputCostPerToken":{"amount":480,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":2200,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":240,"scale":9,"currency":"USD"},"metadata":{"verifiable":true,"contextLength":128000,"modelDisplayName":"kimi-k2.5","modelDescription":"Attested model served via Chutes TEE (verified end-to-end by NEAR AI).","modelIcon":"https://avatars.githubusercontent.com/u/129152888?v=4","ownedBy":"attested 3p","providerType":"chutes","attestationSupported":true,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"supportedSamplingParameters":["temperature","top_p","frequency_penalty","presence_penalty","stop","seed","max_tokens"],"supportedFeatures":["tools","json_mode"],"isReady":false}},{"modelId":"moonshotai/kimi-k2.6","inputCostPerToken":{"amount":810,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":3850,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":410,"scale":9,"currency":"USD"},"metadata":{"verifiable":true,"contextLength":262144,"modelDisplayName":"Kimi K2.6","modelDescription":"Moonshot AI's frontier MoE model with 256K context window. Excels at complex reasoning, math, coding, and multilingual tasks with native vision support.","modelIcon":"https://avatars.githubusercontent.com/u/148031206?s=200&v=4","ownedBy":"attested 3p","aliases":["kimi-k2.6"],"providerType":"chutes","providerConfig":{"api_key":"***","backend":"openai_compatible","base_url":"https://openrouter.ai/api/v1","model_name":"moonshotai/kimi-k2.6"},"attestationSupported":true,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"huggingFaceId":"moonshotai/Kimi-K2.6","maxOutputLength":8192,"supportedSamplingParameters":["temperature","top_p","top_k","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supportedFeatures":["tools","structured_outputs"],"isReady":true}},{"modelId":"openai/gpt-4.1","inputCostPerToken":{"amount":2000,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":8000,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":500,"scale":9,"currency":"USD"},"metadata":{"verifiable":false,"contextLength":1000000,"modelDisplayName":"OpenAI GPT-4.1","modelDescription":"OpenAI's flagship production model with 1M token context window. Excels at instruction following, coding, and long-context tasks. 75% cheaper cached input reads.","modelIcon":"https://avatars.githubusercontent.com/u/14957082?s=200&v=4","ownedBy":"openai","aliases":["gpt-4.1"],"providerType":"external","providerConfig":{"backend":"openai_compatible","base_url":"https://api.openai.com/v1","model_name":"gpt-4.1"},"attestationSupported":false,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"maxOutputLength":16384,"supportedSamplingParameters":["temperature","top_p","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supportedFeatures":["tools","structured_outputs","json_mode"],"isReady":false}},{"modelId":"openai/gpt-4.1-mini","inputCostPerToken":{"amount":400,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":1600,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":100,"scale":9,"currency":"USD"},"metadata":{"verifiable":false,"contextLength":1000000,"modelDisplayName":"OpenAI GPT-4.1 Mini","modelDescription":"Cost-effective version of GPT-4.1 with the same 1M token context window. Great balance of capability and cost for production workloads.","modelIcon":"https://avatars.githubusercontent.com/u/14957082?s=200&v=4","ownedBy":"openai","aliases":["gpt-4.1-mini"],"providerType":"external","providerConfig":{"backend":"openai_compatible","base_url":"https://api.openai.com/v1","model_name":"gpt-4.1-mini"},"attestationSupported":false,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"maxOutputLength":16384,"supportedSamplingParameters":["temperature","top_p","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supportedFeatures":["tools","structured_outputs","json_mode"],"isReady":false}},{"modelId":"openai/gpt-4.1-nano","inputCostPerToken":{"amount":100,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":400,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":25,"scale":9,"currency":"USD"},"metadata":{"verifiable":false,"contextLength":1000000,"modelDisplayName":"OpenAI GPT-4.1 Nano","modelDescription":"OpenAI's most cost-efficient model with 1M token context. Ideal for classification, extraction, and high-volume tasks where cost matters most.","modelIcon":"https://avatars.githubusercontent.com/u/14957082?s=200&v=4","ownedBy":"openai","aliases":["gpt-4.1-nano"],"providerType":"external","providerConfig":{"backend":"openai_compatible","base_url":"https://api.openai.com/v1","model_name":"gpt-4.1-nano"},"attestationSupported":false,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"maxOutputLength":16384,"supportedSamplingParameters":["temperature","top_p","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supportedFeatures":["tools","structured_outputs","json_mode"],"isReady":false}},{"modelId":"openai/gpt-5","inputCostPerToken":{"amount":1250,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":10000,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":125,"scale":9,"currency":"USD"},"metadata":{"verifiable":false,"contextLength":400000,"modelDisplayName":"OpenAI GPT-5","modelDescription":"OpenAI's next-generation model with enhanced reasoning and 400K context window. Strong performance across coding, math, and creative tasks.","modelIcon":"https://avatars.githubusercontent.com/u/14957082?s=200&v=4","ownedBy":"openai","aliases":["gpt-5"],"providerType":"external","providerConfig":{"backend":"openai_compatible","base_url":"https://api.openai.com/v1","model_name":"gpt-5"},"attestationSupported":false,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"maxOutputLength":16384,"supportedSamplingParameters":["temperature","top_p","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supportedFeatures":["tools","structured_outputs","json_mode","reasoning"],"isReady":false}},{"modelId":"openai/gpt-5.1","inputCostPerToken":{"amount":1250,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":10000,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":false,"contextLength":400000,"modelDisplayName":"GPT-5.1","modelDescription":"GPT-5.1 is the latest frontier-grade model in the GPT-5 series, offering stronger general-purpose reasoning, improved instruction adherence, and a more natural conversational style compared to GPT-5. It uses adaptive reasoning...","modelIcon":"https://avatars.githubusercontent.com/u/14957082?s=200&v=4","ownedBy":"openai","providerType":"external","providerConfig":{"backend":"openai_compatible","base_url":"https://api.openai.com/v1","model_name":"gpt-5.1"},"attestationSupported":false,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"maxOutputLength":16384,"supportedSamplingParameters":["temperature","top_p","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supportedFeatures":["tools","structured_outputs","json_mode","reasoning"],"isReady":false}},{"modelId":"openai/gpt-5.2","inputCostPerToken":{"amount":1800,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":15500,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":180,"scale":9,"currency":"USD"},"metadata":{"verifiable":false,"contextLength":400000,"modelDisplayName":"OpenAI GPT-5.2","modelDescription":"OpenAI GPT-5.2 with 400k context window. Anonymized endpoint optimized for deep reasoning and large-context workflows.","modelIcon":"https://avatars.githubusercontent.com/u/14957082?s=200&v=4","ownedBy":"openai","aliases":["openai-gpt-5.2","gpt-5.2","gpt-5.2-400k"],"providerType":"external","providerConfig":{"backend":"openai_compatible","base_url":"https://api.openai.com/v1","model_name":"gpt-5.2"},"attestationSupported":false,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"maxOutputLength":16384,"supportedSamplingParameters":["temperature","top_p","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supportedFeatures":["tools","structured_outputs","json_mode","reasoning"],"isReady":false}},{"modelId":"openai/gpt-5.4","inputCostPerToken":{"amount":2500,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":15000,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":false,"contextLength":1050000,"modelDisplayName":"GPT-5.4","modelDescription":"GPT-5.4 is OpenAI’s latest frontier model, unifying the Codex and GPT lines into a single system. It features a 1M+ token context window (922K input, 128K output) with support for...","modelIcon":"https://avatars.githubusercontent.com/u/14957082?s=200&v=4","ownedBy":"openai","providerType":"external","providerConfig":{"backend":"openai_compatible","base_url":"https://api.openai.com/v1","model_name":"gpt-5.4"},"attestationSupported":false,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"maxOutputLength":16384,"supportedSamplingParameters":["temperature","top_p","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supportedFeatures":["tools","structured_outputs","json_mode","reasoning"],"isReady":false}},{"modelId":"openai/gpt-5.4-mini","inputCostPerToken":{"amount":750,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":4500,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":false,"contextLength":400000,"modelDisplayName":"GPT-5.4 Mini","modelDescription":"GPT-5.4 mini brings the core capabilities of GPT-5.4 to a faster, more efficient model optimized for high-throughput workloads. It supports text and image inputs with strong performance across reasoning, coding,...","modelIcon":"https://avatars.githubusercontent.com/u/14957082?s=200&v=4","ownedBy":"openai","providerType":"external","providerConfig":{"backend":"openai_compatible","base_url":"https://api.openai.com/v1","model_name":"gpt-5.4-mini"},"attestationSupported":false,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"maxOutputLength":16384,"supportedSamplingParameters":["temperature","top_p","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supportedFeatures":["tools","structured_outputs","json_mode","reasoning"],"isReady":false}},{"modelId":"openai/gpt-5.4-nano","inputCostPerToken":{"amount":200,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":1250,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":false,"contextLength":400000,"modelDisplayName":"GPT-5.4 Nano","modelDescription":"GPT-5.4 nano is the most lightweight and cost-efficient variant of the GPT-5.4 family, optimized for speed-critical and high-volume tasks. It supports text and image inputs and is designed for low-latency...","modelIcon":"https://avatars.githubusercontent.com/u/14957082?s=200&v=4","ownedBy":"openai","providerType":"external","providerConfig":{"backend":"openai_compatible","base_url":"https://api.openai.com/v1","model_name":"gpt-5.4-nano"},"attestationSupported":false,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"maxOutputLength":16384,"supportedSamplingParameters":["temperature","top_p","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supportedFeatures":["tools","structured_outputs","json_mode","reasoning"],"isReady":false}},{"modelId":"openai/gpt-5.5","inputCostPerToken":{"amount":5000,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":30000,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":false,"contextLength":1050000,"modelDisplayName":"GPT-5.5","modelDescription":"GPT-5.5 is OpenAI’s frontier model designed for complex professional workloads, building on GPT-5.4 with stronger reasoning, higher reliability, and improved token efficiency on hard tasks. It features a 1M+ token...","modelIcon":"https://avatars.githubusercontent.com/u/14957082?s=200&v=4","ownedBy":"openai","providerType":"external","providerConfig":{"backend":"openai_compatible","base_url":"https://api.openai.com/v1","model_name":"gpt-5.5"},"attestationSupported":false,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"maxOutputLength":16384,"supportedSamplingParameters":["temperature","top_p","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supportedFeatures":["tools","structured_outputs","json_mode","reasoning"],"isReady":false}},{"modelId":"openai/gpt-5-mini","inputCostPerToken":{"amount":250,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":2000,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":false,"contextLength":400000,"modelDisplayName":"GPT-5 Mini","modelDescription":"GPT-5 Mini is a compact version of GPT-5, designed to handle lighter-weight reasoning tasks. It provides the same instruction-following and safety-tuning benefits as GPT-5, but with reduced latency and cost....","modelIcon":"https://avatars.githubusercontent.com/u/14957082?s=200&v=4","ownedBy":"openai","providerType":"external","providerConfig":{"backend":"openai_compatible","base_url":"https://api.openai.com/v1","model_name":"gpt-5-mini"},"attestationSupported":false,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"maxOutputLength":16384,"supportedSamplingParameters":["temperature","top_p","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supportedFeatures":["tools","structured_outputs","json_mode","reasoning"],"isReady":false}},{"modelId":"openai/gpt-5-nano","inputCostPerToken":{"amount":50,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":400,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":false,"contextLength":400000,"modelDisplayName":"GPT-5 Nano","modelDescription":"GPT-5-Nano is the smallest and fastest variant in the GPT-5 system, optimized for developer tools, rapid interactions, and ultra-low latency environments. While limited in reasoning depth compared to its larger...","modelIcon":"https://avatars.githubusercontent.com/u/14957082?s=200&v=4","ownedBy":"openai","providerType":"external","providerConfig":{"backend":"openai_compatible","base_url":"https://api.openai.com/v1","model_name":"gpt-5-nano"},"attestationSupported":false,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"maxOutputLength":16384,"supportedSamplingParameters":["temperature","top_p","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supportedFeatures":["tools","structured_outputs","json_mode","reasoning"],"isReady":false}},{"modelId":"openai/gpt-oss-120b","inputCostPerToken":{"amount":150,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":550,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":30,"scale":9,"currency":"USD"},"metadata":{"verifiable":true,"contextLength":131072,"modelDisplayName":"GPT OSS 120B","modelDescription":"gpt-oss-120b is an open-weight, 117B-parameter Mixture-of-Experts (MoE) language model from OpenAI designed for high-reasoning, agentic, and general-purpose production use cases. It activates 5.1B parameters per forward pass and is optimized to run on a single H100 GPU with native MXFP4 quantization. The model supports configurable reasoning depth, full chain-of-thought access, and native tool use, including function calling, browsing, and structured output generation.","modelIcon":"https://avatars.githubusercontent.com/u/14957082?s=200&v=4","ownedBy":"nearai","aliases":["nearai/gpt-oss-120b","gpt-oss-120b"],"providerType":"vllm","attestationSupported":true,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"huggingFaceId":"openai/gpt-oss-120b","quantization":"fp4","maxOutputLength":16384,"supportedSamplingParameters":["temperature","top_p","top_k","min_p","frequency_penalty","presence_penalty","repetition_penalty","max_tokens","stop","seed","logit_bias"],"supportedFeatures":["tools","json_mode","structured_outputs","logprobs","reasoning"],"datacenters":[{"country_code":"US"}],"isReady":true}},{"modelId":"openai/o3","inputCostPerToken":{"amount":2000,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":8000,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":1000,"scale":9,"currency":"USD"},"metadata":{"verifiable":false,"contextLength":200000,"modelDisplayName":"OpenAI o3","modelDescription":"OpenAI's flagship reasoning model. Uses chain-of-thought to solve complex math, coding, and logic problems. 200K context window.","modelIcon":"https://avatars.githubusercontent.com/u/14957082?s=200&v=4","ownedBy":"openai","aliases":["o3"],"providerType":"external","providerConfig":{"backend":"openai_compatible","base_url":"https://api.openai.com/v1","model_name":"o3"},"attestationSupported":false,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"maxOutputLength":32768,"supportedSamplingParameters":["max_tokens","stop","seed"],"supportedFeatures":["tools","structured_outputs","reasoning"],"isReady":false}},{"modelId":"openai/o3-mini","inputCostPerToken":{"amount":1100,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":4400,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":false,"contextLength":200000,"modelDisplayName":"o3 Mini","modelDescription":"OpenAI o3-mini is a cost-efficient language model optimized for STEM reasoning tasks, particularly excelling in science, mathematics, and coding. This model supports the `reasoning_effort` parameter, which can be set to...","modelIcon":"https://avatars.githubusercontent.com/u/14957082?s=200&v=4","ownedBy":"openai","providerType":"external","providerConfig":{"backend":"openai_compatible","base_url":"https://api.openai.com/v1","model_name":"o3-mini"},"attestationSupported":false,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"maxOutputLength":32768,"supportedSamplingParameters":["max_tokens","stop","seed"],"supportedFeatures":["tools","structured_outputs","reasoning"],"isReady":false}},{"modelId":"openai/o4-mini","inputCostPerToken":{"amount":1100,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":4400,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":550,"scale":9,"currency":"USD"},"metadata":{"verifiable":false,"contextLength":200000,"modelDisplayName":"OpenAI o4 Mini","modelDescription":"OpenAI's cost-effective reasoning model. Strong performance on math, coding, and scientific reasoning at a fraction of o3's cost. 200K context window.","modelIcon":"https://avatars.githubusercontent.com/u/14957082?s=200&v=4","ownedBy":"openai","aliases":["o4-mini"],"providerType":"external","providerConfig":{"backend":"openai_compatible","base_url":"https://api.openai.com/v1","model_name":"o4-mini"},"attestationSupported":false,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"maxOutputLength":32768,"supportedSamplingParameters":["max_tokens","stop","seed"],"supportedFeatures":["tools","structured_outputs","reasoning"],"isReady":false}},{"modelId":"openai/privacy-filter","inputCostPerToken":{"amount":10,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":true,"contextLength":512,"modelDisplayName":"Privacy Filter","modelDescription":"PII detection (token classification) — returns spans for emails, phones, addresses, names, account numbers, secrets. NEAR AI runs this model in a TEE; prompts are not anonymized by the model itself, the cloud-api wraps it to do redaction.","modelIcon":"https://avatars.githubusercontent.com/u/29134221?v=4","ownedBy":"nearai","providerType":"vllm","attestationSupported":true,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"maxOutputLength":1024,"datacenters":[{"country_code":"US"}]}},{"modelId":"openai/whisper-large-v3","inputCostPerToken":{"amount":10,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":10,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":true,"contextLength":448,"modelDisplayName":"Whisper Large v3","modelDescription":"Whisper is a state-of-the-art model for automatic speech recognition (ASR) and speech translation.","modelIcon":"https://avatars.githubusercontent.com/u/14957082?s=200&v=4","ownedBy":"nearai","providerType":"vllm","attestationSupported":true,"architecture":{"inputModalities":["audio"],"outputModalities":["text"]},"huggingFaceId":"openai/whisper-large-v3","maxOutputLength":1024,"supportedSamplingParameters":["temperature"],"datacenters":[{"country_code":"US"}]}},{"modelId":"qwen/qwen3-32b","inputCostPerToken":{"amount":110,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":460,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":60,"scale":9,"currency":"USD"},"metadata":{"verifiable":true,"contextLength":128000,"modelDisplayName":"qwen3-32b","modelDescription":"Attested model served via Chutes TEE (verified end-to-end by NEAR AI).","modelIcon":"https://avatars.githubusercontent.com/u/141221163?v=4","ownedBy":"attested 3p","providerType":"chutes","attestationSupported":true,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"supportedSamplingParameters":["temperature","top_p","frequency_penalty","presence_penalty","stop","seed","max_tokens"],"supportedFeatures":["tools","json_mode"],"isReady":false}},{"modelId":"Qwen/Qwen3.5-122B-A10B","inputCostPerToken":{"amount":400,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":3200,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":80,"scale":9,"currency":"USD"},"metadata":{"verifiable":true,"contextLength":262144,"modelDisplayName":"Qwen3.5 122B A10B","modelDescription":"Qwen3.5 122B MoE model with 10B active parameters, supporting reasoning and tool calling","modelIcon":"https://avatars.githubusercontent.com/u/141221163?s=200&v=4","ownedBy":"nearai","aliases":["qwen3.5-122b","qwen3.5-122b-a10b","qwen3-5-122b","deepseek-ai/DeepSeek-V3.1"],"providerType":"vllm","attestationSupported":true,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"huggingFaceId":"Qwen/Qwen3.5-122B-A10B","quantization":"bf16","maxOutputLength":16384,"supportedSamplingParameters":["temperature","top_p","top_k","min_p","frequency_penalty","presence_penalty","repetition_penalty","max_tokens","stop","seed","logit_bias"],"supportedFeatures":["tools","structured_outputs","json_mode","reasoning"],"datacenters":[{"country_code":"US"}],"isReady":true,"openrouterSlug":"qwen/qwen3.5-122b-a10b"}},{"modelId":"qwen/qwen3.5-397b-a17b","inputCostPerToken":{"amount":500,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":3300,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":250,"scale":9,"currency":"USD"},"metadata":{"verifiable":true,"contextLength":128000,"modelDisplayName":"qwen3.5-397b-a17b","modelDescription":"Attested model served via Chutes TEE (verified end-to-end by NEAR AI).","modelIcon":"https://avatars.githubusercontent.com/u/141221163?v=4","ownedBy":"attested 3p","providerType":"chutes","attestationSupported":true,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"supportedSamplingParameters":["temperature","top_p","frequency_penalty","presence_penalty","stop","seed","max_tokens"],"supportedFeatures":["tools","json_mode"],"isReady":false}},{"modelId":"Qwen/Qwen3.6-27B-FP8","inputCostPerToken":{"amount":325,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":3250,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":160,"scale":9,"currency":"USD"},"metadata":{"verifiable":true,"contextLength":262144,"modelDisplayName":"Qwen 3.6 27B FP8","modelDescription":"Qwen 3.6 27B is a dense FP8 language model with strong reasoning, coding, and tool-use. 256K context window.","modelIcon":"https://avatars.githubusercontent.com/u/141221163?v=4","ownedBy":"nearai","providerType":"vllm","attestationSupported":true,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"huggingFaceId":"Qwen/Qwen3.6-27B-FP8","quantization":"fp8","maxOutputLength":8192,"supportedSamplingParameters":["temperature","top_p","top_k","min_p","frequency_penalty","presence_penalty","repetition_penalty","stop","seed","max_tokens","logit_bias"],"supportedFeatures":["tools","structured_outputs","reasoning","json_mode","logprobs"],"datacenters":[{"country_code":"US"}],"isReady":true,"openrouterSlug":"qwen/qwen3.6-27b"}},{"modelId":"Qwen/Qwen3.6-35B-A3B-FP8","inputCostPerToken":{"amount":170,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":1100,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":56,"scale":9,"currency":"USD"},"metadata":{"verifiable":true,"contextLength":262144,"modelDisplayName":"Qwen 3.6 35B A3B FP8","modelDescription":"Qwen 3.6 35B is a fast mixture-of-experts language model with ~3B active parameters per token. Strong at reasoning, coding, and multilingual tasks with 32K context window.","modelIcon":"https://avatars.githubusercontent.com/u/141221163?s=200&v=4","ownedBy":"nearai","aliases":["qwen3.6-35b-a3b-fp8","qwen/qwen3.6-35b-a3b-fp8","Qwen/Qwen3-30B-A3B-Instruct-2507","qwen/qwen3-30b-a3b-instruct-2507","qwen3-30b-a3b-instruct-2507"],"providerType":"vllm","attestationSupported":true,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"huggingFaceId":"Qwen/Qwen3.6-35B-A3B-FP8","quantization":"fp8","maxOutputLength":8192,"supportedSamplingParameters":["temperature","top_p","top_k","min_p","frequency_penalty","presence_penalty","repetition_penalty","stop","seed","max_tokens","logit_bias"],"supportedFeatures":["tools","json_mode","structured_outputs","logprobs","reasoning"],"datacenters":[{"country_code":"US"}],"isReady":true,"openrouterSlug":"qwen/qwen3.6-35b-a3b"}},{"modelId":"qwen/qwen3.7-max","inputCostPerToken":{"amount":2800,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":7500,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":false,"contextLength":1000000,"modelDisplayName":"Qwen3.7 Max","modelDescription":"Qwen's most capable proprietary model with 1M context window. Strong at reasoning, coding, math, and multilingual tasks.","modelIcon":"https://avatars.githubusercontent.com/u/141221163?s=200&v=4","ownedBy":"qwen","aliases":["qwen3.7-max"],"providerType":"external","providerConfig":{"api_key":"***","backend":"openai_compatible","base_url":"https://openrouter.ai/api/v1","model_name":"qwen/qwen3.7-max"},"attestationSupported":false,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"maxOutputLength":16384,"supportedSamplingParameters":["temperature","top_p","top_k","frequency_penalty","presence_penalty","max_tokens","stop","seed"],"supportedFeatures":["tools","structured_outputs","json_mode"],"isReady":false}},{"modelId":"Qwen/Qwen3-Embedding-0.6B","inputCostPerToken":{"amount":10,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":10,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":true,"contextLength":32768,"modelDisplayName":"Qwen3-Embedding-0.6B","modelDescription":"The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding tasks.","modelIcon":"https://avatars.githubusercontent.com/u/223098841?s=200&v=4","ownedBy":"nearai","providerType":"vllm","attestationSupported":true,"architecture":{"inputModalities":["text"],"outputModalities":["embedding"]},"huggingFaceId":"Qwen/Qwen3-Embedding-0.6B","quantization":"bf16","maxOutputLength":1024,"datacenters":[{"country_code":"US"}]}},{"modelId":"Qwen/Qwen3-Reranker-0.6B","inputCostPerToken":{"amount":10,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":10,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":0,"scale":9,"currency":"USD"},"metadata":{"verifiable":true,"contextLength":40960,"modelDisplayName":"Qwen3-Reranker-0.6B","modelDescription":"The Qwen3 Embedding model series is the latest proprietary model of the Qwen family, specifically designed for text embedding and ranking tasks.","modelIcon":"https://avatars.githubusercontent.com/u/223098841?s=200&v=4","ownedBy":"nearai","providerType":"vllm","attestationSupported":true,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"huggingFaceId":"Qwen/Qwen3-Reranker-0.6B","quantization":"bf16","maxOutputLength":1024,"datacenters":[{"country_code":"US"}]}},{"modelId":"Qwen/Qwen3-VL-30B-A3B-Instruct","inputCostPerToken":{"amount":150,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":550,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":30,"scale":9,"currency":"USD"},"metadata":{"verifiable":true,"contextLength":16384,"modelDisplayName":"Qwen3-VL-30B-A3B-Instruct","modelDescription":"Qwen3-VL-30B-A3B-Instruct is a vision-language model supporting text and image inputs.","modelIcon":"https://avatars.githubusercontent.com/u/141221163?s=200&v=4","ownedBy":"nearai","providerType":"vllm","attestationSupported":true,"architecture":{"inputModalities":["text","image"],"outputModalities":["text"]},"huggingFaceId":"Qwen/Qwen3-VL-30B-A3B-Instruct","quantization":"fp8","maxOutputLength":8192,"supportedSamplingParameters":["temperature","top_p","top_k","min_p","frequency_penalty","presence_penalty","repetition_penalty","max_tokens","stop","seed","logit_bias"],"supportedFeatures":["structured_outputs","logprobs"],"datacenters":[{"country_code":"US"}],"isReady":true,"openrouterSlug":"qwen/qwen3-vl-30b-a3b-instruct"}},{"modelId":"z-ai/glm-5","inputCostPerToken":{"amount":1050,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":2810,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":520,"scale":9,"currency":"USD"},"metadata":{"verifiable":true,"contextLength":128000,"modelDisplayName":"glm-5","modelDescription":"Attested model served via Chutes TEE (verified end-to-end by NEAR AI).","modelIcon":"https://avatars.githubusercontent.com/u/223098841?v=4","ownedBy":"attested 3p","providerType":"chutes","attestationSupported":true,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"supportedSamplingParameters":["temperature","top_p","frequency_penalty","presence_penalty","stop","seed","max_tokens"],"supportedFeatures":["tools","json_mode"],"isReady":false}},{"modelId":"z-ai/glm-5.2","inputCostPerToken":{"amount":1400,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":4400,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":300,"scale":9,"currency":"USD"},"metadata":{"verifiable":true,"contextLength":1048576,"modelDisplayName":"GLM 5.2","modelDescription":"GLM-5.2 is an open-source foundation model featuring improved MTP and IndexShare over GLM-5.1. 753B MoE architecture, FP8 precision, optimized for complex systems engineering and long-horizon agent workflows.","modelIcon":"https://avatars.githubusercontent.com/u/223098841?v=4","ownedBy":"nearai","providerType":"vllm","attestationSupported":true,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"huggingFaceId":"zai-org/GLM-5.2-FP8","quantization":"fp8","maxOutputLength":131072,"supportedSamplingParameters":["temperature","top_p","top_k","min_p","frequency_penalty","presence_penalty","repetition_penalty","max_tokens","stop","seed","logit_bias"],"supportedFeatures":["tools","structured_outputs","reasoning","json_mode"]}},{"modelId":"zai-org/GLM-5.1-FP8","inputCostPerToken":{"amount":850,"scale":9,"currency":"USD"},"outputCostPerToken":{"amount":3300,"scale":9,"currency":"USD"},"costPerImage":{"amount":0,"scale":9,"currency":"USD"},"cacheReadCostPerToken":{"amount":170,"scale":9,"currency":"USD"},"metadata":{"verifiable":true,"contextLength":202752,"modelDisplayName":"GLM 5.1","modelDescription":"GLM-5.1 is an open-source foundation model built for complex systems engineering and long-horizon agent workflows. It delivers production-grade productivity for large-scale programming tasks, with performance aligned to top closed-source models, and is designed for expert developers building at the system level.","modelIcon":"https://avatars.githubusercontent.com/u/223098841?s=200&v=4","ownedBy":"nearai","aliases":["zai-org/GLM-5-FP8","glm-latest","zai-org/GLM-latest","GLM-5","zai-org/GLM-4.7","glm","GLM-5.1","zai-org/GLM-5.1"],"providerType":"vllm","attestationSupported":true,"architecture":{"inputModalities":["text"],"outputModalities":["text"]},"huggingFaceId":"zai-org/GLM-5.1-FP8","quantization":"fp8","maxOutputLength":16384,"supportedSamplingParameters":["temperature","top_p","top_k","min_p","frequency_penalty","presence_penalty","repetition_penalty","max_tokens","stop","seed","logit_bias"],"supportedFeatures":["tools","structured_outputs","reasoning","json_mode"],"datacenters":[{"country_code":"US"}],"isReady":true,"openrouterSlug":"z-ai/glm-5.1"}}],"limit":100,"offset":0,"total":47}