{
  "generated_at": "2026-05-14T01:04:26",
  "total_models": 113,
  "tested_count": 72,
  "tokens_per_call_assumption": {
    "input": 300,
    "output": 1500
  },
  "default_weights": {
    "quality": 0.5,
    "cost": 0.2,
    "tool_calling": 0.15,
    "speed": 0.075,
    "latency": 0.075
  },
  "subscriptions_catalog": {
    "ollama_cloud_pro": {
      "name": "Ollama Cloud",
      "plan": "Pro",
      "price_month_usd": 30,
      "url": "https://ollama.com/cloud",
      "notes": "Rate limit varía por modelo. Recomendado para uso a volumen mid (1-10k calls/día)."
    },
    "xiaomi_standard": {
      "name": "Xiaomi MiMo Standard",
      "plan": "Standard",
      "price_month_usd": 14,
      "url": "https://mimo.xiaomi.com",
      "notes": "200M credits/mes. Off-peak 16-24 UTC = 0.8x consumption. Acceso a 8 modelos MiMo."
    },
    "minimax_agent_pro": {
      "name": "MiniMax Agent Pro",
      "plan": "Agent Pro",
      "price_month_usd": 19,
      "url": "https://api.minimax.io",
      "notes": "Acceso a M2.7 highspeed + límites generosos para agentes (1k+ calls/día)."
    },
    "anthropic_pro": {
      "name": "Anthropic Pro",
      "plan": "Pro",
      "price_month_usd": 20,
      "url": "https://www.anthropic.com/pricing",
      "notes": "Sub Anthropic Pro $20/mes. NO incluye API access (solo claude.ai web)."
    }
  },
  "models": [
    {
      "key": "groq-llama-4-scout",
      "id": "meta-llama/llama-4-scout-17b-16e-instruct",
      "name": "Llama 4 Scout 17B (Groq preview)",
      "tier": "cheap",
      "provider": "groq_direct",
      "open_source": true,
      "license": "Llama Community",
      "cost_input_per_M": 0.11,
      "cost_output_per_M": 0.34,
      "cost_per_1k_calls_usd": 0.543,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": true,
      "runs": 157,
      "score_global": 7.69,
      "score_by_pillar": {
        "Contenido": 8.26,
        "Agentes": 7.79,
        "Coding": 8.15,
        "Razonamiento": 8.26
      },
      "score_by_suite": {
        "content_generation": 8.59,
        "tool_calling": 8.25,
        "task_management": 8.44,
        "code_generation": 8.63,
        "reasoning": 8.26,
        "summarization": 8.25,
        "presentation": 8.57,
        "startup_content": 8.4,
        "deep_reasoning": 8.1,
        "customer_support": 8.28,
        "structured_output": 8.37,
        "hallucination": 8.44,
        "creativity": 8.34,
        "string_precision": 8.05,
        "news_seo_writing": 7.49,
        "ocr_extraction": 7.73,
        "orchestration": 7.71,
        "multi_turn": 7.54,
        "policy_adherence": 8.04,
        "agent_capabilities": 6.7,
        "strategy": 8.38,
        "sales_outreach": 8.53,
        "translation": 8.29,
        "agent_long_horizon": 8.26,
        "niah_es": 6.91
      },
      "quality_avg": 7.7,
      "cost_score_avg": 8.32,
      "speed_score_avg": 7.81,
      "latency_score_avg": 5.87,
      "tool_calling_score_avg": 7.04,
      "judge_score_avg": 4.04,
      "tokens_per_second": 169.7,
      "latency_avg_s": 2.68,
      "total_input_tokens": 1518574,
      "total_output_tokens": 73662
    },
    {
      "key": "groq-llama-3.1-8b",
      "id": "llama-3.1-8b-instant",
      "name": "Llama 3.1 8B Instant (Groq)",
      "tier": "ultra_cheap",
      "provider": "groq_direct",
      "open_source": true,
      "license": "Llama Community",
      "cost_input_per_M": 0.05,
      "cost_output_per_M": 0.08,
      "cost_per_1k_calls_usd": 0.135,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 145,
      "score_global": 7.67,
      "score_by_pillar": {
        "Contenido": 8.27,
        "Agentes": 8.06,
        "Coding": 8.09,
        "Razonamiento": 7.8
      },
      "score_by_suite": {
        "content_generation": 8.65,
        "tool_calling": 8.64,
        "task_management": 8.46,
        "code_generation": 8.32,
        "reasoning": 8.11,
        "summarization": 8.14,
        "presentation": 8.34,
        "startup_content": 8.36,
        "deep_reasoning": 7.09,
        "customer_support": 7.77,
        "structured_output": 8.62,
        "hallucination": 8.32,
        "creativity": 8.49,
        "string_precision": 7.76,
        "news_seo_writing": 7.43,
        "ocr_extraction": 7.87,
        "orchestration": 7.83,
        "multi_turn": 8.0,
        "policy_adherence": 7.94,
        "agent_capabilities": 8.0,
        "strategy": 8.4,
        "sales_outreach": 8.43,
        "translation": 8.62,
        "agent_long_horizon": 8.21,
        "niah_es_lite": 6.7
      },
      "quality_avg": 7.33,
      "cost_score_avg": 8.72,
      "speed_score_avg": 8.02,
      "latency_score_avg": 6.3,
      "tool_calling_score_avg": 7.1,
      "judge_score_avg": 3.9,
      "tokens_per_second": 262.2,
      "latency_avg_s": 2.97,
      "total_input_tokens": 1533194,
      "total_output_tokens": 77242
    },
    {
      "key": "devstral",
      "id": "mistralai/devstral-small",
      "name": "Devstral Small",
      "tier": "ultra_cheap",
      "provider": "openrouter",
      "open_source": true,
      "license": "Apache 2.0",
      "cost_input_per_M": 0.1,
      "cost_output_per_M": 0.3,
      "cost_per_1k_calls_usd": 0.48,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 223,
      "score_global": 7.52,
      "score_by_pillar": {
        "Contenido": 7.48,
        "Agentes": 7.21,
        "Coding": 8.09,
        "Razonamiento": 7.71
      },
      "score_by_suite": {
        "content_generation": 7.82,
        "tool_calling": 6.55,
        "task_management": 7.77,
        "code_generation": 8.14,
        "reasoning": 7.92,
        "summarization": 7.32,
        "presentation": 7.6,
        "startup_content": 7.8,
        "deep_reasoning": 7.64,
        "customer_support": 7.29,
        "structured_output": 7.77,
        "hallucination": 7.65,
        "creativity": 7.44,
        "string_precision": 8.69,
        "news_seo_writing": 6.77,
        "agent_capabilities": 7.07,
        "ocr_extraction": 7.11,
        "orchestration": 7.23,
        "multi_turn": 7.47,
        "policy_adherence": 7.54,
        "strategy": 7.65,
        "sales_outreach": 7.7,
        "translation": 8.29,
        "agent_long_horizon": 8.12,
        "niah_es": 7.24
      },
      "quality_avg": 7.89,
      "cost_score_avg": 7.57,
      "speed_score_avg": 8.47,
      "latency_score_avg": 5.42,
      "tool_calling_score_avg": 6.81,
      "judge_score_avg": 4.1,
      "tokens_per_second": 139.3,
      "latency_avg_s": 4.89,
      "total_input_tokens": 1644856,
      "total_output_tokens": 119582
    },
    {
      "key": "mistral-small-4",
      "id": "mistralai/mistral-small-2603",
      "name": "Mistral Small 4",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": true,
      "license": "Apache 2.0",
      "cost_input_per_M": 0.15,
      "cost_output_per_M": 0.6,
      "cost_per_1k_calls_usd": 0.945,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 166,
      "score_global": 7.51,
      "score_by_pillar": {
        "Contenido": 7.91,
        "Agentes": 7.7,
        "Coding": 7.83,
        "Razonamiento": 7.87
      },
      "score_by_suite": {
        "content_generation": 8.22,
        "tool_calling": 7.42,
        "task_management": 8.28,
        "code_generation": 8.11,
        "reasoning": 8.17,
        "summarization": 7.6,
        "presentation": 7.63,
        "startup_content": 8.05,
        "deep_reasoning": 7.56,
        "customer_support": 8.3,
        "structured_output": 8.08,
        "hallucination": 8.03,
        "creativity": 8.08,
        "string_precision": 7.85,
        "news_seo_writing": 7.3,
        "ocr_extraction": 7.41,
        "orchestration": 6.87,
        "multi_turn": 7.35,
        "policy_adherence": 7.89,
        "agent_capabilities": 8.07,
        "strategy": 8.01,
        "sales_outreach": 8.02,
        "translation": 8.38,
        "agent_long_horizon": 7.68,
        "niah_es": 7.01
      },
      "quality_avg": 7.88,
      "cost_score_avg": 7.76,
      "speed_score_avg": 7.3,
      "latency_score_avg": 4.53,
      "tool_calling_score_avg": 7.07,
      "judge_score_avg": 4.13,
      "tokens_per_second": 81.8,
      "latency_avg_s": 9.93,
      "total_input_tokens": 1960762,
      "total_output_tokens": 140488
    },
    {
      "key": "groq-gpt-oss-20b",
      "id": "openai/gpt-oss-20b",
      "name": "GPT-OSS 20B (Groq)",
      "tier": "ultra_cheap",
      "provider": "groq_direct",
      "open_source": true,
      "license": "Apache 2.0",
      "cost_input_per_M": 0.075,
      "cost_output_per_M": 0.3,
      "cost_per_1k_calls_usd": 0.472,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 144,
      "score_global": 7.47,
      "score_by_pillar": {
        "Contenido": 8.25,
        "Agentes": 7.37,
        "Coding": 7.92,
        "Razonamiento": 7.78
      },
      "score_by_suite": {
        "content_generation": 8.82,
        "tool_calling": 7.32,
        "task_management": 7.78,
        "code_generation": 8.28,
        "reasoning": 8.28,
        "summarization": 8.0,
        "presentation": 8.11,
        "startup_content": 8.52,
        "deep_reasoning": 7.22,
        "customer_support": 7.0,
        "structured_output": 8.34,
        "hallucination": 8.21,
        "creativity": 8.27,
        "string_precision": 8.32,
        "news_seo_writing": 7.27,
        "ocr_extraction": 6.8,
        "orchestration": 6.41,
        "multi_turn": 8.0,
        "policy_adherence": 7.89,
        "agent_capabilities": 7.52,
        "strategy": 8.01,
        "sales_outreach": 8.57,
        "translation": 8.62,
        "agent_long_horizon": 7.98,
        "niah_es_lite": 6.66
      },
      "quality_avg": 7.1,
      "cost_score_avg": 8.47,
      "speed_score_avg": 9.26,
      "latency_score_avg": 5.89,
      "tool_calling_score_avg": 6.94,
      "judge_score_avg": 4.07,
      "tokens_per_second": 474.4,
      "latency_avg_s": 2.86,
      "total_input_tokens": 1367972,
      "total_output_tokens": 161997
    },
    {
      "key": "mimo-v2-omni-xiaomi",
      "id": "mimo-v2-omni",
      "name": "MiMo V2-Omni (Xiaomi direct)",
      "tier": "subscription",
      "provider": "xiaomi_direct",
      "open_source": true,
      "license": "MIT",
      "cost_input_per_M": 0.07,
      "cost_output_per_M": 0.07,
      "cost_per_1k_calls_usd": 0.126,
      "subscriptions": [
        {
          "key": "xiaomi_standard",
          "name": "Xiaomi MiMo Standard",
          "plan": "Standard",
          "price_month_usd": 14,
          "url": "https://mimo.xiaomi.com",
          "notes": "200M credits/mes. Off-peak 16-24 UTC = 0.8x consumption. Acceso a 8 modelos MiMo."
        }
      ],
      "notes": "Mismo modelo que via OpenRouter — comparar provider stability",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": true,
      "runs": 102,
      "score_global": 7.46,
      "score_by_pillar": {
        "Contenido": 7.8,
        "Agentes": 7.25,
        "Coding": 6.9,
        "Razonamiento": 7.35
      },
      "score_by_suite": {
        "content_generation": 8.4,
        "tool_calling": 7.27,
        "task_management": 8.54,
        "code_generation": 6.54,
        "reasoning": 8.36,
        "summarization": 6.51,
        "presentation": 8.5,
        "startup_content": 8.42,
        "deep_reasoning": 6.01,
        "customer_support": 6.93,
        "structured_output": 7.14,
        "hallucination": 8.1,
        "creativity": 8.16,
        "string_precision": 7.7,
        "news_seo_writing": 7.05,
        "ocr_extraction": 6.09,
        "orchestration": 6.09,
        "multi_turn": 7.58,
        "policy_adherence": 7.75,
        "agent_capabilities": 7.19,
        "strategy": 8.29,
        "sales_outreach": 8.38,
        "translation": 6.55,
        "agent_long_horizon": 8.17
      },
      "quality_avg": 7.27,
      "cost_score_avg": 8.84,
      "speed_score_avg": 8.93,
      "latency_score_avg": 2.09,
      "tool_calling_score_avg": 6.93,
      "judge_score_avg": 4.01,
      "tokens_per_second": 102.3,
      "latency_avg_s": 21.65,
      "total_input_tokens": 188123,
      "total_output_tokens": 170549
    },
    {
      "key": "mimo-v2.5",
      "id": "mimo-v2.5",
      "name": "MiMo V2.5 (Xiaomi)",
      "tier": "subscription",
      "provider": "xiaomi_direct",
      "open_source": false,
      "license": "Xiaomi Commercial",
      "cost_input_per_M": 0.07,
      "cost_output_per_M": 0.07,
      "cost_per_1k_calls_usd": 0.126,
      "subscriptions": [
        {
          "key": "xiaomi_standard",
          "name": "Xiaomi MiMo Standard",
          "plan": "Standard",
          "price_month_usd": 14,
          "url": "https://mimo.xiaomi.com",
          "notes": "200M credits/mes. Off-peak 16-24 UTC = 0.8x consumption. Acceso a 8 modelos MiMo."
        }
      ],
      "notes": "All-in-one multimodal nativo, 1M context, lanzado 22 abril 2026",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": true,
      "runs": 148,
      "score_global": 7.45,
      "score_by_pillar": {
        "Contenido": 7.97,
        "Agentes": 7.44,
        "Coding": 7.43,
        "Razonamiento": 7.26
      },
      "score_by_suite": {
        "content_generation": 8.29,
        "tool_calling": 7.31,
        "task_management": 6.75,
        "code_generation": 7.81,
        "reasoning": 6.78,
        "summarization": 7.64,
        "presentation": 8.2,
        "startup_content": 8.26,
        "deep_reasoning": 6.65,
        "customer_support": 8.08,
        "structured_output": 7.68,
        "hallucination": 8.08,
        "creativity": 8.07,
        "string_precision": 7.6,
        "news_seo_writing": 7.0,
        "ocr_extraction": 6.73,
        "orchestration": 7.38,
        "multi_turn": 7.49,
        "policy_adherence": 7.68,
        "agent_capabilities": 7.26,
        "strategy": 8.13,
        "sales_outreach": 8.49,
        "translation": 8.11,
        "agent_long_horizon": 8.01,
        "niah_es_lite": 7.05
      },
      "quality_avg": 7.63,
      "cost_score_avg": 8.47,
      "speed_score_avg": 7.8,
      "latency_score_avg": 1.98,
      "tool_calling_score_avg": 7.14,
      "judge_score_avg": 4.12,
      "tokens_per_second": 71.1,
      "latency_avg_s": 20.04,
      "total_input_tokens": 1691782,
      "total_output_tokens": 186765
    },
    {
      "key": "gemini-3.1-flash-lite",
      "id": "google/gemini-3.1-flash-lite-preview",
      "name": "Gemini 3.1 Flash Lite",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 0.25,
      "cost_output_per_M": 1.5,
      "cost_per_1k_calls_usd": 2.325,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": true,
      "runs": 148,
      "score_global": 7.44,
      "score_by_pillar": {
        "Contenido": 7.92,
        "Agentes": 7.49,
        "Coding": 7.8,
        "Razonamiento": 7.89
      },
      "score_by_suite": {
        "content_generation": 8.25,
        "tool_calling": 7.84,
        "task_management": 8.25,
        "code_generation": 8.1,
        "reasoning": 8.13,
        "summarization": 7.55,
        "presentation": 7.77,
        "startup_content": 8.0,
        "deep_reasoning": 7.54,
        "customer_support": 7.16,
        "structured_output": 7.92,
        "hallucination": 8.18,
        "creativity": 8.08,
        "string_precision": 7.83,
        "news_seo_writing": 7.12,
        "ocr_extraction": 7.45,
        "orchestration": 6.83,
        "multi_turn": 7.82,
        "policy_adherence": 7.75,
        "agent_capabilities": 7.17,
        "strategy": 8.06,
        "sales_outreach": 8.43,
        "translation": 8.3,
        "agent_long_horizon": 7.57,
        "niah_es_lite": 6.78
      },
      "quality_avg": 7.82,
      "cost_score_avg": 7.36,
      "speed_score_avg": 7.59,
      "latency_score_avg": 4.98,
      "tool_calling_score_avg": 7.08,
      "judge_score_avg": 4.11,
      "tokens_per_second": 110.3,
      "latency_avg_s": 4.08,
      "total_input_tokens": 1348663,
      "total_output_tokens": 83980
    },
    {
      "key": "nemotron-nano",
      "id": "nvidia/nemotron-3-nano-30b-a3b",
      "name": "Nemotron 3 Nano 30B",
      "tier": "ultra_cheap",
      "provider": "openrouter",
      "open_source": true,
      "license": "NVIDIA Open",
      "cost_input_per_M": 0.05,
      "cost_output_per_M": 0.2,
      "cost_per_1k_calls_usd": 0.315,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 103,
      "score_global": 7.43,
      "score_by_pillar": {
        "Contenido": 7.85,
        "Agentes": 6.67,
        "Coding": 7.66,
        "Razonamiento": 7.74
      },
      "score_by_suite": {
        "content_generation": 8.0,
        "tool_calling": 6.84,
        "task_management": 7.95,
        "code_generation": 8.05,
        "reasoning": 7.89,
        "summarization": 7.5,
        "presentation": 8.15,
        "startup_content": 8.2,
        "deep_reasoning": 7.58,
        "customer_support": 6.4,
        "structured_output": 7.81,
        "hallucination": 7.72,
        "creativity": 7.97,
        "string_precision": 7.8,
        "news_seo_writing": 7.19,
        "ocr_extraction": 7.07,
        "orchestration": 5.72,
        "multi_turn": 7.17,
        "policy_adherence": 7.46,
        "agent_capabilities": 5.91,
        "strategy": 7.93,
        "sales_outreach": 7.95,
        "translation": 7.93,
        "agent_long_horizon": 7.5
      },
      "quality_avg": 7.79,
      "cost_score_avg": 8.65,
      "speed_score_avg": 8.21,
      "latency_score_avg": 1.65,
      "tool_calling_score_avg": 6.6,
      "judge_score_avg": 4.03,
      "tokens_per_second": 86.3,
      "latency_avg_s": 37.48,
      "total_input_tokens": 375017,
      "total_output_tokens": 310848
    },
    {
      "key": "mimo-v2.5-pro",
      "id": "mimo-v2.5-pro",
      "name": "MiMo V2.5-Pro (Xiaomi)",
      "tier": "subscription",
      "provider": "xiaomi_direct",
      "open_source": false,
      "license": "Xiaomi Commercial",
      "cost_input_per_M": 0.14,
      "cost_output_per_M": 0.14,
      "cost_per_1k_calls_usd": 0.252,
      "subscriptions": [
        {
          "key": "xiaomi_standard",
          "name": "Xiaomi MiMo Standard",
          "plan": "Standard",
          "price_month_usd": 14,
          "url": "https://mimo.xiaomi.com",
          "notes": "200M credits/mes. Off-peak 16-24 UTC = 0.8x consumption. Acceso a 8 modelos MiMo."
        }
      ],
      "notes": "Flagship reasoning, agentic, 1M context, lanzado 22 abril 2026",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": true,
      "runs": 103,
      "score_global": 7.42,
      "score_by_pillar": {
        "Contenido": 7.7,
        "Agentes": 7.3,
        "Coding": 7.53,
        "Razonamiento": 7.18
      },
      "score_by_suite": {
        "content_generation": 8.35,
        "tool_calling": 6.99,
        "task_management": 7.69,
        "code_generation": 8.04,
        "reasoning": 6.59,
        "summarization": 7.66,
        "presentation": 8.09,
        "startup_content": 8.23,
        "deep_reasoning": 6.81,
        "customer_support": 7.31,
        "structured_output": 7.54,
        "hallucination": 7.87,
        "creativity": 7.47,
        "string_precision": 7.26,
        "news_seo_writing": 7.01,
        "ocr_extraction": 7.42,
        "orchestration": 7.44,
        "multi_turn": 7.67,
        "policy_adherence": 7.28,
        "agent_capabilities": 6.89,
        "strategy": 7.82,
        "sales_outreach": 8.45,
        "translation": 6.41,
        "agent_long_horizon": 7.16
      },
      "quality_avg": 7.65,
      "cost_score_avg": 8.83,
      "speed_score_avg": 6.97,
      "latency_score_avg": 1.44,
      "tool_calling_score_avg": 7.11,
      "judge_score_avg": 4.13,
      "tokens_per_second": 49.1,
      "latency_avg_s": 31.75,
      "total_input_tokens": 209698,
      "total_output_tokens": 164371
    },
    {
      "key": "mimo-v2-flash",
      "id": "xiaomi/mimo-v2-flash",
      "name": "MiMo-V2-Flash",
      "tier": "ultra_cheap",
      "provider": "openrouter",
      "open_source": true,
      "license": "MIT",
      "cost_input_per_M": 0.09,
      "cost_output_per_M": 0.29,
      "cost_per_1k_calls_usd": 0.462,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 103,
      "score_global": 7.41,
      "score_by_pillar": {
        "Contenido": 7.86,
        "Agentes": 6.6,
        "Coding": 7.59,
        "Razonamiento": 7.79
      },
      "score_by_suite": {
        "content_generation": 8.21,
        "tool_calling": 6.78,
        "task_management": 8.1,
        "code_generation": 8.05,
        "reasoning": 8.19,
        "summarization": 7.7,
        "presentation": 7.96,
        "startup_content": 7.93,
        "deep_reasoning": 7.53,
        "customer_support": 5.28,
        "structured_output": 7.56,
        "hallucination": 7.73,
        "creativity": 7.9,
        "string_precision": 7.42,
        "news_seo_writing": 7.12,
        "ocr_extraction": 7.45,
        "orchestration": 5.88,
        "multi_turn": 7.67,
        "policy_adherence": 7.41,
        "agent_capabilities": 5.83,
        "strategy": 7.96,
        "sales_outreach": 8.22,
        "translation": 8.11,
        "agent_long_horizon": 7.55
      },
      "quality_avg": 7.72,
      "cost_score_avg": 8.81,
      "speed_score_avg": 6.25,
      "latency_score_avg": 2.46,
      "tool_calling_score_avg": 6.87,
      "judge_score_avg": 4.14,
      "tokens_per_second": 54.2,
      "latency_avg_s": 33.67,
      "total_input_tokens": 231731,
      "total_output_tokens": 129827
    },
    {
      "key": "mimo-v2-pro-xiaomi",
      "id": "mimo-v2-pro",
      "name": "MiMo V2-Pro (Xiaomi direct)",
      "tier": "subscription",
      "provider": "xiaomi_direct",
      "open_source": true,
      "license": "MIT",
      "cost_input_per_M": 0.07,
      "cost_output_per_M": 0.07,
      "cost_per_1k_calls_usd": 0.126,
      "subscriptions": [
        {
          "key": "xiaomi_standard",
          "name": "Xiaomi MiMo Standard",
          "plan": "Standard",
          "price_month_usd": 14,
          "url": "https://mimo.xiaomi.com",
          "notes": "200M credits/mes. Off-peak 16-24 UTC = 0.8x consumption. Acceso a 8 modelos MiMo."
        }
      ],
      "notes": "Mismo modelo que via OpenRouter — comparar provider stability",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 103,
      "score_global": 7.39,
      "score_by_pillar": {
        "Contenido": 7.87,
        "Agentes": 7.28,
        "Coding": 7.16,
        "Razonamiento": 6.74
      },
      "score_by_suite": {
        "content_generation": 8.14,
        "tool_calling": 6.67,
        "task_management": 8.08,
        "code_generation": 8.28,
        "reasoning": 5.15,
        "summarization": 7.66,
        "presentation": 8.18,
        "startup_content": 8.23,
        "deep_reasoning": 7.24,
        "customer_support": 7.39,
        "structured_output": 7.46,
        "hallucination": 7.85,
        "creativity": 7.94,
        "string_precision": 7.14,
        "news_seo_writing": 7.07,
        "ocr_extraction": 6.05,
        "orchestration": 7.5,
        "multi_turn": 7.14,
        "policy_adherence": 7.22,
        "agent_capabilities": 7.16,
        "strategy": 6.23,
        "sales_outreach": 8.24,
        "translation": 7.73,
        "agent_long_horizon": 7.7
      },
      "quality_avg": 7.61,
      "cost_score_avg": 8.86,
      "speed_score_avg": 6.5,
      "latency_score_avg": 1.26,
      "tool_calling_score_avg": 6.96,
      "judge_score_avg": 4.14,
      "tokens_per_second": 44.8,
      "latency_avg_s": 36.83,
      "total_input_tokens": 248425,
      "total_output_tokens": 165247
    },
    {
      "key": "gpt-oss-120b-cloud",
      "id": "gpt-oss:120b-cloud",
      "name": "GPT-OSS 120B (Ollama Cloud)",
      "tier": "cloud_ollama",
      "provider": "ollama_cloud",
      "open_source": true,
      "license": "Apache 2.0",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [
        {
          "key": "ollama_cloud_pro",
          "name": "Ollama Cloud",
          "plan": "Pro",
          "price_month_usd": 30,
          "url": "https://ollama.com/cloud",
          "notes": "Rate limit varía por modelo. Recomendado para uso a volumen mid (1-10k calls/día)."
        }
      ],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 148,
      "score_global": 7.37,
      "score_by_pillar": {
        "Contenido": 8.07,
        "Agentes": 6.9,
        "Coding": 7.67,
        "Razonamiento": 7.8
      },
      "score_by_suite": {
        "content_generation": 8.32,
        "tool_calling": 7.12,
        "task_management": 6.67,
        "code_generation": 8.16,
        "reasoning": 8.29,
        "summarization": 7.76,
        "presentation": 8.11,
        "startup_content": 8.31,
        "deep_reasoning": 7.69,
        "customer_support": 6.31,
        "structured_output": 7.82,
        "hallucination": 7.41,
        "creativity": 8.09,
        "string_precision": 7.65,
        "news_seo_writing": 7.56,
        "ocr_extraction": 7.21,
        "orchestration": 6.33,
        "multi_turn": 7.41,
        "policy_adherence": 7.44,
        "agent_capabilities": 7.07,
        "strategy": 7.91,
        "sales_outreach": 8.28,
        "translation": 8.08,
        "agent_long_horizon": 8.6,
        "niah_es_lite": 6.65
      },
      "quality_avg": 7.15,
      "cost_score_avg": 10.0,
      "speed_score_avg": 7.47,
      "latency_score_avg": 2.79,
      "tool_calling_score_avg": 6.87,
      "judge_score_avg": 4.04,
      "tokens_per_second": 67.7,
      "latency_avg_s": 14.3,
      "total_input_tokens": 1465863,
      "total_output_tokens": 180251
    },
    {
      "key": "gemini-flash",
      "id": "google/gemini-2.5-flash",
      "name": "Gemini 2.5 Flash",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 0.3,
      "cost_output_per_M": 2.5,
      "cost_per_1k_calls_usd": 3.84,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": true,
      "runs": 135,
      "score_global": 7.36,
      "score_by_pillar": {
        "Razonamiento": 7.37,
        "Agentes": 6.81,
        "Coding": 7.79,
        "Contenido": 7.53
      },
      "score_by_suite": {
        "deep_reasoning": 7.29,
        "customer_support": 5.85,
        "structured_output": 7.41,
        "hallucination": 7.55,
        "creativity": 7.26,
        "string_precision": 8.38,
        "news_seo_writing": 7.26,
        "content_generation": 7.9,
        "tool_calling": 6.63,
        "task_management": 7.57,
        "code_generation": 7.5,
        "reasoning": 7.43,
        "summarization": 7.56,
        "presentation": 7.48,
        "startup_content": 7.68,
        "ocr_extraction": 7.19,
        "orchestration": 7.28,
        "multi_turn": 7.1,
        "policy_adherence": 7.35,
        "agent_capabilities": 6.89,
        "strategy": 7.27,
        "sales_outreach": 8.03,
        "translation": 7.94,
        "agent_long_horizon": 7.34
      },
      "quality_avg": 7.87,
      "cost_score_avg": 7.28,
      "speed_score_avg": 8.61,
      "latency_score_avg": 3.6,
      "tool_calling_score_avg": 6.69,
      "judge_score_avg": 4.13,
      "tokens_per_second": 115.8,
      "latency_avg_s": 7.88,
      "total_input_tokens": 223199,
      "total_output_tokens": 154266
    },
    {
      "key": "groq-llama-3.3-70b",
      "id": "llama-3.3-70b-versatile",
      "name": "Llama 3.3 70B (Groq)",
      "tier": "cheap",
      "provider": "groq_direct",
      "open_source": true,
      "license": "Llama Community",
      "cost_input_per_M": 0.59,
      "cost_output_per_M": 0.79,
      "cost_per_1k_calls_usd": 1.362,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 169,
      "score_global": 7.36,
      "score_by_pillar": {
        "Contenido": 8.06,
        "Agentes": 7.6,
        "Coding": 7.88,
        "Razonamiento": 7.84
      },
      "score_by_suite": {
        "content_generation": 8.35,
        "tool_calling": 6.94,
        "task_management": 8.13,
        "code_generation": 8.01,
        "reasoning": 8.02,
        "summarization": 7.64,
        "presentation": 7.97,
        "startup_content": 8.07,
        "deep_reasoning": 7.54,
        "customer_support": 7.85,
        "structured_output": 8.29,
        "hallucination": 8.07,
        "creativity": 8.26,
        "string_precision": 7.62,
        "news_seo_writing": 7.29,
        "ocr_extraction": 7.76,
        "orchestration": 7.23,
        "multi_turn": 7.69,
        "policy_adherence": 7.74,
        "agent_capabilities": 7.74,
        "strategy": 8.05,
        "sales_outreach": 8.5,
        "translation": 8.58,
        "agent_long_horizon": 7.91,
        "niah_es": 6.27
      },
      "quality_avg": 7.72,
      "cost_score_avg": 7.13,
      "speed_score_avg": 7.77,
      "latency_score_avg": 5.67,
      "tool_calling_score_avg": 7.09,
      "judge_score_avg": 4.0,
      "tokens_per_second": 172.9,
      "latency_avg_s": 3.81,
      "total_input_tokens": 1953899,
      "total_output_tokens": 115755
    },
    {
      "key": "gemini-flash-lite",
      "id": "google/gemini-2.5-flash-lite",
      "name": "Gemini 2.5 Flash Lite",
      "tier": "ultra_cheap",
      "provider": "openrouter",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 0.1,
      "cost_output_per_M": 0.4,
      "cost_per_1k_calls_usd": 0.63,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": true,
      "runs": 155,
      "score_global": 7.34,
      "score_by_pillar": {
        "Contenido": 7.47,
        "Agentes": 6.76,
        "Coding": 7.89,
        "Razonamiento": 7.38
      },
      "score_by_suite": {
        "content_generation": 7.51,
        "tool_calling": 6.25,
        "task_management": 7.5,
        "code_generation": 7.62,
        "reasoning": 7.43,
        "summarization": 7.43,
        "presentation": 7.42,
        "startup_content": 7.54,
        "deep_reasoning": 7.27,
        "customer_support": 6.17,
        "structured_output": 7.71,
        "hallucination": 7.63,
        "creativity": 7.25,
        "string_precision": 8.48,
        "news_seo_writing": 7.17,
        "agent_capabilities": 6.51,
        "ocr_extraction": 7.2,
        "orchestration": 6.76,
        "multi_turn": 7.6,
        "policy_adherence": 7.67,
        "strategy": 7.24,
        "sales_outreach": 8.04,
        "translation": 8.23
      },
      "quality_avg": 7.79,
      "cost_score_avg": 7.25,
      "speed_score_avg": 9.06,
      "latency_score_avg": 4.48,
      "tool_calling_score_avg": 6.42,
      "judge_score_avg": 4.15,
      "tokens_per_second": 170.9,
      "latency_avg_s": 3.94,
      "total_input_tokens": 40134,
      "total_output_tokens": 129841
    },
    {
      "key": "gemma-4-31b",
      "id": "google/gemma-4-31b-it",
      "name": "Gemma 4 31B",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": true,
      "license": "Apache 2.0",
      "cost_input_per_M": 0.3,
      "cost_output_per_M": 0.6,
      "cost_per_1k_calls_usd": 0.99,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 119,
      "score_global": 7.3,
      "score_by_pillar": {
        "Contenido": 7.4,
        "Agentes": 7.07,
        "Coding": 7.31,
        "Razonamiento": 7.53
      },
      "score_by_suite": {
        "content_generation": 7.05,
        "tool_calling": 7.06,
        "startup_content": 7.57,
        "task_management": 7.76,
        "code_generation": 7.7,
        "reasoning": 7.99,
        "summarization": 7.38,
        "presentation": 7.57,
        "deep_reasoning": 7.06,
        "customer_support": 7.5,
        "structured_output": 7.65,
        "hallucination": 7.48,
        "creativity": 7.89,
        "string_precision": 7.38,
        "news_seo_writing": 6.95,
        "ocr_extraction": 6.64,
        "orchestration": 6.05,
        "multi_turn": 7.36,
        "policy_adherence": 7.06,
        "agent_capabilities": 7.09,
        "strategy": 7.59,
        "sales_outreach": 7.81,
        "translation": 7.46,
        "agent_long_horizon": 7.34
      },
      "quality_avg": 8.19,
      "cost_score_avg": 8.39,
      "speed_score_avg": 3.61,
      "latency_score_avg": 1.82,
      "tool_calling_score_avg": 7.23,
      "judge_score_avg": 4.17,
      "tokens_per_second": 22.1,
      "latency_avg_s": 139.35,
      "total_input_tokens": 293775,
      "total_output_tokens": 129546
    },
    {
      "key": "nim-gemma-4-31b",
      "id": "google/gemma-4-31b-it",
      "name": "Gemma 4 31B (NIM)",
      "tier": "cloud_nim",
      "provider": "nvidia_nim",
      "open_source": true,
      "license": "Gemma Terms",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "Mismo modelo que via OpenRouter (10 runs parciales) - completar via NIM gratis",
      "tested": true,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 119,
      "score_global": 7.3,
      "score_by_pillar": {
        "Contenido": 7.4,
        "Agentes": 7.07,
        "Coding": 7.31,
        "Razonamiento": 7.53
      },
      "score_by_suite": {
        "content_generation": 7.05,
        "tool_calling": 7.06,
        "startup_content": 7.57,
        "task_management": 7.76,
        "code_generation": 7.7,
        "reasoning": 7.99,
        "summarization": 7.38,
        "presentation": 7.57,
        "deep_reasoning": 7.06,
        "customer_support": 7.5,
        "structured_output": 7.65,
        "hallucination": 7.48,
        "creativity": 7.89,
        "string_precision": 7.38,
        "news_seo_writing": 6.95,
        "ocr_extraction": 6.64,
        "orchestration": 6.05,
        "multi_turn": 7.36,
        "policy_adherence": 7.06,
        "agent_capabilities": 7.09,
        "strategy": 7.59,
        "sales_outreach": 7.81,
        "translation": 7.46,
        "agent_long_horizon": 7.34
      },
      "quality_avg": 8.19,
      "cost_score_avg": 8.39,
      "speed_score_avg": 3.61,
      "latency_score_avg": 1.82,
      "tool_calling_score_avg": 7.23,
      "judge_score_avg": 4.17,
      "tokens_per_second": 22.1,
      "latency_avg_s": 139.35,
      "total_input_tokens": 293775,
      "total_output_tokens": 129546
    },
    {
      "key": "gpt-5.4-mini",
      "id": "gpt-5.4-mini",
      "name": "GPT-5.4 Mini",
      "tier": "cheap",
      "provider": "openai_direct",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 0.5,
      "cost_output_per_M": 1.5,
      "cost_per_1k_calls_usd": 2.4,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": true,
      "multimodal": true,
      "runs": 155,
      "score_global": 7.23,
      "score_by_pillar": {
        "Contenido": 7.16,
        "Agentes": 6.86,
        "Coding": 7.76,
        "Razonamiento": 7.33
      },
      "score_by_suite": {
        "content_generation": 7.64,
        "tool_calling": 6.71,
        "task_management": 7.03,
        "code_generation": 7.46,
        "reasoning": 7.01,
        "summarization": 7.15,
        "presentation": 7.54,
        "startup_content": 6.75,
        "deep_reasoning": 7.29,
        "customer_support": 6.51,
        "structured_output": 7.36,
        "hallucination": 7.69,
        "creativity": 6.57,
        "string_precision": 8.49,
        "news_seo_writing": 7.09,
        "agent_capabilities": 6.43,
        "ocr_extraction": 7.13,
        "orchestration": 7.4,
        "multi_turn": 7.28,
        "policy_adherence": 7.54,
        "strategy": 7.43,
        "sales_outreach": 7.98,
        "translation": 7.72
      },
      "quality_avg": 7.37,
      "cost_score_avg": 7.39,
      "speed_score_avg": 9.1,
      "latency_score_avg": 4.03,
      "tool_calling_score_avg": 7.07,
      "judge_score_avg": 4.15,
      "tokens_per_second": 117.9,
      "latency_avg_s": 5.02,
      "total_input_tokens": 37650,
      "total_output_tokens": 108242
    },
    {
      "key": "devstral-medium",
      "id": "mistralai/devstral-medium",
      "name": "Devstral Medium",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": true,
      "license": "Apache 2.0",
      "cost_input_per_M": 0.4,
      "cost_output_per_M": 2.0,
      "cost_per_1k_calls_usd": 3.12,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 88,
      "score_global": 7.21,
      "score_by_pillar": {
        "Contenido": 7.51,
        "Agentes": 6.79,
        "Coding": 7.44,
        "Razonamiento": 7.26
      },
      "score_by_suite": {
        "content_generation": 7.72,
        "tool_calling": 6.3,
        "task_management": 7.84,
        "code_generation": 7.59,
        "reasoning": 7.37,
        "summarization": 6.92,
        "presentation": 7.86,
        "startup_content": 7.72,
        "deep_reasoning": 7.16,
        "customer_support": 6.53,
        "structured_output": 7.51,
        "hallucination": 7.29,
        "creativity": 7.79,
        "string_precision": 7.63,
        "news_seo_writing": 6.77,
        "ocr_extraction": 7.02,
        "orchestration": 6.88,
        "multi_turn": 6.43,
        "policy_adherence": 7.16,
        "agent_capabilities": 6.67,
        "strategy": 7.33,
        "sales_outreach": 7.87
      },
      "quality_avg": 7.89,
      "cost_score_avg": 7.98,
      "speed_score_avg": 6.11,
      "latency_score_avg": 2.51,
      "tool_calling_score_avg": 6.6,
      "judge_score_avg": 4.07,
      "tokens_per_second": 60.2,
      "latency_avg_s": 23.72,
      "total_input_tokens": 27995,
      "total_output_tokens": 45938
    },
    {
      "key": "grok-4.1-fast",
      "id": "x-ai/grok-4.1-fast",
      "name": "Grok 4.1 Fast",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 0.2,
      "cost_output_per_M": 0.5,
      "cost_per_1k_calls_usd": 0.81,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 148,
      "score_global": 7.21,
      "score_by_pillar": {
        "Contenido": 7.67,
        "Agentes": 7.51,
        "Coding": 7.7,
        "Razonamiento": 7.81
      },
      "score_by_suite": {
        "content_generation": 7.75,
        "tool_calling": 7.91,
        "task_management": 7.91,
        "code_generation": 7.84,
        "reasoning": 8.06,
        "summarization": 7.55,
        "presentation": 7.94,
        "startup_content": 7.97,
        "deep_reasoning": 7.7,
        "customer_support": 7.15,
        "structured_output": 7.92,
        "hallucination": 7.94,
        "creativity": 7.72,
        "string_precision": 7.92,
        "news_seo_writing": 7.01,
        "ocr_extraction": 7.14,
        "orchestration": 7.39,
        "multi_turn": 7.4,
        "policy_adherence": 7.41,
        "agent_capabilities": 7.53,
        "strategy": 7.63,
        "sales_outreach": 7.78,
        "translation": 7.91,
        "agent_long_horizon": 7.41,
        "niah_es_lite": 6.29
      },
      "quality_avg": 7.53,
      "cost_score_avg": 7.66,
      "speed_score_avg": 8.84,
      "latency_score_avg": 2.29,
      "tool_calling_score_avg": 7.14,
      "judge_score_avg": 4.19,
      "tokens_per_second": 104.6,
      "latency_avg_s": 21.37,
      "total_input_tokens": 1366100,
      "total_output_tokens": 220481
    },
    {
      "key": "nim-qwen3-next-instruct",
      "id": "qwen/qwen3-next-80b-a3b-instruct",
      "name": "Qwen 3-Next 80B Instruct (NIM)",
      "tier": "cloud_nim",
      "provider": "nvidia_nim",
      "open_source": true,
      "license": "Apache 2.0",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "Próxima generación Qwen — pendiente desde Lote 4",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 103,
      "score_global": 7.2,
      "score_by_pillar": {
        "Contenido": 7.41,
        "Agentes": 7.02,
        "Coding": 7.26,
        "Razonamiento": 7.16
      },
      "score_by_suite": {
        "content_generation": 7.94,
        "tool_calling": 7.45,
        "task_management": 7.58,
        "code_generation": 7.51,
        "reasoning": 7.23,
        "summarization": 7.2,
        "presentation": 7.48,
        "startup_content": 7.48,
        "deep_reasoning": 6.91,
        "customer_support": 7.52,
        "structured_output": 7.55,
        "hallucination": 7.6,
        "creativity": 7.46,
        "string_precision": 7.48,
        "news_seo_writing": 6.7,
        "ocr_extraction": 6.56,
        "orchestration": 6.11,
        "multi_turn": 6.65,
        "policy_adherence": 6.96,
        "agent_capabilities": 7.2,
        "strategy": 7.15,
        "sales_outreach": 7.36,
        "translation": 7.87,
        "agent_long_horizon": 7.11
      },
      "quality_avg": 8.11,
      "cost_score_avg": 6.73,
      "speed_score_avg": 6.91,
      "latency_score_avg": 2.54,
      "tool_calling_score_avg": 7.11,
      "judge_score_avg": 4.14,
      "tokens_per_second": 52.5,
      "latency_avg_s": 24.79,
      "total_input_tokens": 250769,
      "total_output_tokens": 137859
    },
    {
      "key": "gpt-4.1-mini",
      "id": "gpt-4.1-mini",
      "name": "GPT-4.1 Mini",
      "tier": "cheap",
      "provider": "openai_direct",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 0.4,
      "cost_output_per_M": 1.6,
      "cost_per_1k_calls_usd": 2.52,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 155,
      "score_global": 7.17,
      "score_by_pillar": {
        "Contenido": 7.34,
        "Agentes": 6.43,
        "Coding": 7.72,
        "Razonamiento": 7.43
      },
      "score_by_suite": {
        "content_generation": 7.49,
        "tool_calling": 6.73,
        "task_management": 7.36,
        "code_generation": 7.53,
        "reasoning": 7.61,
        "summarization": 6.88,
        "presentation": 7.23,
        "startup_content": 7.7,
        "deep_reasoning": 7.34,
        "customer_support": 5.46,
        "structured_output": 7.49,
        "hallucination": 7.33,
        "creativity": 7.17,
        "string_precision": 8.28,
        "news_seo_writing": 6.92,
        "agent_capabilities": 5.93,
        "ocr_extraction": 7.02,
        "orchestration": 6.45,
        "multi_turn": 6.88,
        "policy_adherence": 7.16,
        "strategy": 7.6,
        "sales_outreach": 7.87,
        "translation": 7.87
      },
      "quality_avg": 7.49,
      "cost_score_avg": 7.83,
      "speed_score_avg": 7.65,
      "latency_score_avg": 3.52,
      "tool_calling_score_avg": 6.98,
      "judge_score_avg": 4.18,
      "tokens_per_second": 71.1,
      "latency_avg_s": 7.53,
      "total_input_tokens": 35337,
      "total_output_tokens": 80931
    },
    {
      "key": "hermes-4-405b",
      "id": "nousresearch/hermes-4-405b",
      "name": "Hermes 4 405B",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": true,
      "license": "Llama 3 community",
      "cost_input_per_M": 1.0,
      "cost_output_per_M": 3.0,
      "cost_per_1k_calls_usd": 4.8,
      "subscriptions": [],
      "notes": "Flagship Hermes 4 con reasoning híbrido. Sin reasoning explícito en este config.",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 74,
      "score_global": 7.17,
      "score_by_pillar": {
        "Contenido": 7.19,
        "Agentes": 7.11,
        "Coding": 7.2,
        "Razonamiento": 7.13
      },
      "score_by_suite": {
        "content_generation": 7.31,
        "task_management": 7.48,
        "code_generation": 7.18,
        "reasoning": 7.39,
        "summarization": 6.56,
        "presentation": 7.17,
        "startup_content": 7.22,
        "deep_reasoning": 6.86,
        "structured_output": 7.55,
        "hallucination": 7.31,
        "creativity": 7.46,
        "string_precision": 7.41,
        "news_seo_writing": 6.47,
        "ocr_extraction": 6.68,
        "multi_turn": 7.14,
        "policy_adherence": 6.71,
        "agent_capabilities": 7.47,
        "strategy": 7.22,
        "sales_outreach": 7.69,
        "translation": 7.8
      },
      "quality_avg": 8.05,
      "cost_score_avg": 7.62,
      "speed_score_avg": 5.65,
      "latency_score_avg": 2.04,
      "tool_calling_score_avg": 7.0,
      "judge_score_avg": 4.18,
      "tokens_per_second": 33.5,
      "latency_avg_s": 15.37,
      "total_input_tokens": 16394,
      "total_output_tokens": 39971
    },
    {
      "key": "nim-devstral-2-123b",
      "id": "mistralai/devstral-2-123b-instruct-2512",
      "name": "Devstral 2 123B (NIM)",
      "tier": "cloud_nim",
      "provider": "nvidia_nim",
      "open_source": true,
      "license": "Apache 2.0",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "Mismo modelo que probamos via OpenRouter - comparar provider stability",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 68,
      "score_global": 7.16,
      "score_by_pillar": {
        "Contenido": 7.26,
        "Agentes": 6.91,
        "Coding": 7.22,
        "Razonamiento": 7.22
      },
      "score_by_suite": {
        "content_generation": 7.56,
        "tool_calling": 6.58,
        "task_management": 7.61,
        "code_generation": 7.38,
        "reasoning": 7.4,
        "presentation": 7.18,
        "startup_content": 7.47,
        "deep_reasoning": 7.05,
        "customer_support": 6.68,
        "structured_output": 7.49,
        "hallucination": 7.38,
        "creativity": 7.58,
        "string_precision": 7.48,
        "news_seo_writing": 6.57,
        "ocr_extraction": 6.59,
        "orchestration": 6.77,
        "multi_turn": 7.3
      },
      "quality_avg": 7.98,
      "cost_score_avg": 7.5,
      "speed_score_avg": 6.22,
      "latency_score_avg": 2.56,
      "tool_calling_score_avg": 6.87,
      "judge_score_avg": 4.1,
      "tokens_per_second": 42.3,
      "latency_avg_s": 17.58,
      "total_input_tokens": 19474,
      "total_output_tokens": 45580
    },
    {
      "key": "llama-4-maverick",
      "id": "meta-llama/llama-4-maverick",
      "name": "Llama 4 Maverick",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": true,
      "license": "Llama Community",
      "cost_input_per_M": 0.5,
      "cost_output_per_M": 1.0,
      "cost_per_1k_calls_usd": 1.65,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": true,
      "runs": 134,
      "score_global": 7.13,
      "score_by_pillar": {
        "Contenido": 7.13,
        "Agentes": 6.59,
        "Coding": 7.51,
        "Razonamiento": 7.14
      },
      "score_by_suite": {
        "content_generation": 7.08,
        "tool_calling": 5.73,
        "task_management": 7.26,
        "code_generation": 7.25,
        "reasoning": 7.23,
        "summarization": 6.95,
        "presentation": 7.2,
        "startup_content": 7.5,
        "deep_reasoning": 7.01,
        "customer_support": 5.12,
        "structured_output": 7.24,
        "hallucination": 7.34,
        "creativity": 6.89,
        "string_precision": 8.09,
        "news_seo_writing": 6.66,
        "agent_capabilities": 7.47,
        "ocr_extraction": 6.98,
        "multi_turn": 6.77,
        "policy_adherence": 7.26,
        "strategy": 7.1,
        "sales_outreach": 7.69,
        "translation": 7.75
      },
      "quality_avg": 7.93,
      "cost_score_avg": 7.58,
      "speed_score_avg": 6.01,
      "latency_score_avg": 2.42,
      "tool_calling_score_avg": 6.81,
      "judge_score_avg": 4.16,
      "tokens_per_second": 47.1,
      "latency_avg_s": 13.17,
      "total_input_tokens": 31998,
      "total_output_tokens": 72629
    },
    {
      "key": "devstral-2",
      "id": "mistralai/devstral-2512",
      "name": "Devstral 2 (Dic 2025)",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": true,
      "license": "Apache 2.0",
      "cost_input_per_M": 0.4,
      "cost_output_per_M": 2.0,
      "cost_per_1k_calls_usd": 3.12,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 160,
      "score_global": 7.09,
      "score_by_pillar": {
        "Contenido": 7.53,
        "Agentes": 6.86,
        "Coding": 7.6,
        "Razonamiento": 7.37
      },
      "score_by_suite": {
        "content_generation": 7.84,
        "tool_calling": 6.45,
        "task_management": 7.87,
        "code_generation": 7.88,
        "reasoning": 7.59,
        "summarization": 7.42,
        "presentation": 7.33,
        "startup_content": 7.61,
        "deep_reasoning": 7.06,
        "customer_support": 5.07,
        "structured_output": 7.79,
        "hallucination": 7.55,
        "creativity": 7.69,
        "string_precision": 7.81,
        "news_seo_writing": 6.7,
        "ocr_extraction": 6.97,
        "orchestration": 6.69,
        "multi_turn": 7.57,
        "policy_adherence": 7.42,
        "agent_capabilities": 7.18,
        "strategy": 7.6,
        "sales_outreach": 7.92,
        "translation": 7.98,
        "agent_long_horizon": 7.72,
        "niah_es": 6.75,
        "niah_es_lite": 6.59
      },
      "quality_avg": 7.8,
      "cost_score_avg": 6.89,
      "speed_score_avg": 6.28,
      "latency_score_avg": 3.83,
      "tool_calling_score_avg": 6.98,
      "judge_score_avg": 4.22,
      "tokens_per_second": 53.0,
      "latency_avg_s": 13.15,
      "total_input_tokens": 1757050,
      "total_output_tokens": 138276
    },
    {
      "key": "claude-opus-4.6",
      "id": "anthropic/claude-opus-4-6",
      "name": "Claude Opus 4.6",
      "tier": "premium",
      "provider": "openrouter",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 15.0,
      "cost_output_per_M": 75.0,
      "cost_per_1k_calls_usd": 117.0,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": true,
      "runs": 246,
      "score_global": 7.09,
      "score_by_pillar": {
        "Contenido": 7.07,
        "Agentes": 6.91,
        "Coding": 7.2,
        "Razonamiento": 7.33
      },
      "score_by_suite": {
        "content_generation": 7.02,
        "tool_calling": 6.38,
        "task_management": 7.56,
        "code_generation": 7.42,
        "reasoning": 7.43,
        "summarization": 7.23,
        "presentation": 7.22,
        "startup_content": 7.24,
        "deep_reasoning": 7.22,
        "customer_support": 7.27,
        "structured_output": 7.1,
        "hallucination": 7.56,
        "creativity": 6.98,
        "string_precision": 7.35,
        "news_seo_writing": 6.6,
        "agent_capabilities": 6.86,
        "ocr_extraction": 6.8,
        "orchestration": 6.48,
        "multi_turn": 6.86,
        "policy_adherence": 7.08,
        "strategy": 7.14,
        "sales_outreach": 7.49,
        "translation": 7.33
      },
      "quality_avg": 8.04,
      "cost_score_avg": 6.83,
      "speed_score_avg": 6.51,
      "latency_score_avg": 1.8,
      "tool_calling_score_avg": 7.14,
      "judge_score_avg": 4.18,
      "tokens_per_second": 46.7,
      "latency_avg_s": 19.14,
      "total_input_tokens": 105497,
      "total_output_tokens": 232473
    },
    {
      "key": "mistral-large",
      "id": "mistralai/mistral-large",
      "name": "Mistral Large",
      "tier": "medium",
      "provider": "openrouter",
      "open_source": true,
      "license": "Apache 2.0",
      "cost_input_per_M": 2.0,
      "cost_output_per_M": 6.0,
      "cost_per_1k_calls_usd": 9.6,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 152,
      "score_global": 7.08,
      "score_by_pillar": {
        "Contenido": 7.14,
        "Agentes": 6.35,
        "Coding": 7.74,
        "Razonamiento": 7.38
      },
      "score_by_suite": {
        "content_generation": 7.21,
        "tool_calling": 6.45,
        "task_management": 7.56,
        "code_generation": 7.87,
        "reasoning": 7.55,
        "summarization": 7.28,
        "presentation": 7.41,
        "startup_content": 7.24,
        "deep_reasoning": 7.22,
        "customer_support": 4.29,
        "structured_output": 7.43,
        "hallucination": 7.56,
        "creativity": 6.83,
        "string_precision": 8.19,
        "news_seo_writing": 6.55,
        "agent_capabilities": 6.58,
        "ocr_extraction": 6.97,
        "orchestration": 6.75,
        "multi_turn": 6.75,
        "policy_adherence": 6.96,
        "strategy": 7.35,
        "sales_outreach": 7.63,
        "translation": 7.83
      },
      "quality_avg": 7.81,
      "cost_score_avg": 7.11,
      "speed_score_avg": 6.81,
      "latency_score_avg": 2.36,
      "tool_calling_score_avg": 7.01,
      "judge_score_avg": 4.21,
      "tokens_per_second": 49.8,
      "latency_avg_s": 15.92,
      "total_input_tokens": 44800,
      "total_output_tokens": 128547
    },
    {
      "key": "gemma-4-26b",
      "id": "google/gemma-4-26b-a4b-it",
      "name": "Gemma 4 26B MoE (3.8B activos)",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": true,
      "license": "Apache 2.0",
      "cost_input_per_M": 0.15,
      "cost_output_per_M": 0.3,
      "cost_per_1k_calls_usd": 0.495,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 104,
      "score_global": 7.06,
      "score_by_pillar": {
        "Contenido": 7.25,
        "Agentes": 6.62,
        "Coding": 7.31,
        "Razonamiento": 7.24
      },
      "score_by_suite": {
        "content_generation": 7.23,
        "tool_calling": 6.11,
        "startup_content": 7.31,
        "task_management": 7.58,
        "code_generation": 7.56,
        "reasoning": 7.42,
        "summarization": 6.75,
        "presentation": 7.21,
        "deep_reasoning": 7.06,
        "customer_support": 5.68,
        "structured_output": 7.53,
        "hallucination": 7.37,
        "creativity": 7.54,
        "string_precision": 7.47,
        "news_seo_writing": 6.9,
        "ocr_extraction": 6.75,
        "orchestration": 7.07,
        "multi_turn": 7.11,
        "policy_adherence": 7.08,
        "agent_capabilities": 6.38,
        "strategy": 7.29,
        "sales_outreach": 7.71,
        "translation": 7.24
      },
      "quality_avg": 7.8,
      "cost_score_avg": 7.6,
      "speed_score_avg": 5.91,
      "latency_score_avg": 2.18,
      "tool_calling_score_avg": 7.2,
      "judge_score_avg": 4.2,
      "tokens_per_second": 44.3,
      "latency_avg_s": 17.38,
      "total_input_tokens": 31577,
      "total_output_tokens": 68689
    },
    {
      "key": "nim-nemotron-3-nano-omni-reasoning",
      "id": "nvidia/nemotron-3-nano-omni-30b-a3b-reasoning",
      "name": "Nemotron 3 Nano Omni 30B-A3B Reasoning (NIM)",
      "tier": "cloud_nim",
      "provider": "nvidia_nim",
      "open_source": true,
      "license": "NVIDIA Open License",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "Lanzado 20 abril 2026. MoE 30B totales / 3B activos (A3B). Multimodal (texto+imagen+audio+video) + reasoning. Comparar contra Nano 30B normal y Nano 9B v2 — sirve para medir el costo de razonar y la ganancia de multimodal en single-turn texto.",
      "tested": true,
      "tool_calling": true,
      "thinking": true,
      "multimodal": true,
      "runs": 91,
      "score_global": 7.04,
      "score_by_pillar": {
        "Contenido": 7.23,
        "Agentes": 6.67,
        "Coding": 7.24,
        "Razonamiento": 7.14
      },
      "score_by_suite": {
        "content_generation": 7.51,
        "tool_calling": 6.83,
        "task_management": 6.97,
        "code_generation": 7.25,
        "reasoning": 7.43,
        "summarization": 7.12,
        "presentation": 7.18,
        "startup_content": 7.62,
        "deep_reasoning": 6.8,
        "customer_support": 6.69,
        "structured_output": 7.55,
        "hallucination": 7.49,
        "creativity": 7.16,
        "string_precision": 7.66,
        "news_seo_writing": 6.69,
        "ocr_extraction": 6.48,
        "orchestration": 6.16,
        "multi_turn": 6.88,
        "policy_adherence": 6.95,
        "agent_capabilities": 6.46,
        "strategy": 7.19,
        "sales_outreach": 7.23,
        "translation": 7.3
      },
      "quality_avg": 7.75,
      "cost_score_avg": 5.81,
      "speed_score_avg": 9.89,
      "latency_score_avg": 2.57,
      "tool_calling_score_avg": 6.87,
      "judge_score_avg": 4.09,
      "tokens_per_second": 202.7,
      "latency_avg_s": 10.17,
      "total_input_tokens": 35902,
      "total_output_tokens": 198864
    },
    {
      "key": "mistral-nemo",
      "id": "mistralai/mistral-nemo",
      "name": "Mistral Nemo",
      "tier": "ultra_cheap",
      "provider": "openrouter",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 0.02,
      "cost_output_per_M": 0.02,
      "cost_per_1k_calls_usd": 0.036,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 122,
      "score_global": 7.03,
      "score_by_pillar": {
        "Razonamiento": 7.09,
        "Agentes": 6.38,
        "Coding": 7.44,
        "Contenido": 7.24
      },
      "score_by_suite": {
        "deep_reasoning": 6.74,
        "customer_support": 4.98,
        "structured_output": 7.28,
        "hallucination": 7.38,
        "creativity": 6.93,
        "string_precision": 7.91,
        "news_seo_writing": 7.04,
        "content_generation": 7.79,
        "tool_calling": 7.06,
        "task_management": 7.68,
        "code_generation": 7.01,
        "reasoning": 7.31,
        "summarization": 6.76,
        "presentation": 7.59,
        "startup_content": 7.44,
        "ocr_extraction": 6.95,
        "orchestration": 6.36,
        "multi_turn": 6.76,
        "policy_adherence": 7.13,
        "agent_capabilities": 6.41,
        "strategy": 7.73,
        "sales_outreach": 7.38,
        "translation": 7.58
      },
      "quality_avg": 6.97,
      "cost_score_avg": 9.0,
      "speed_score_avg": 4.64,
      "latency_score_avg": 2.66,
      "tool_calling_score_avg": 6.67,
      "judge_score_avg": 3.91,
      "tokens_per_second": 33.7,
      "latency_avg_s": 17.47,
      "total_input_tokens": 34975,
      "total_output_tokens": 42864
    },
    {
      "key": "hermes-4-70b",
      "id": "nousresearch/hermes-4-70b",
      "name": "Hermes 4 70B",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": true,
      "license": "Llama 3 community",
      "cost_input_per_M": 0.13,
      "cost_output_per_M": 0.4,
      "cost_per_1k_calls_usd": 0.639,
      "subscriptions": [],
      "notes": "Hybrid reasoning mode. Open-source de Nous Research. Sin reasoning explícito en este config.",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 131,
      "score_global": 6.99,
      "score_by_pillar": {
        "Contenido": 7.35,
        "Agentes": 7.2,
        "Coding": 7.46,
        "Razonamiento": 7.26
      },
      "score_by_suite": {
        "content_generation": 7.77,
        "task_management": 7.52,
        "code_generation": 7.29,
        "reasoning": 7.42,
        "summarization": 7.22,
        "presentation": 7.24,
        "startup_content": 7.45,
        "deep_reasoning": 7.04,
        "structured_output": 7.9,
        "hallucination": 7.38,
        "creativity": 7.44,
        "string_precision": 7.76,
        "news_seo_writing": 6.64,
        "ocr_extraction": 6.89,
        "multi_turn": 6.92,
        "policy_adherence": 7.18,
        "agent_capabilities": 7.42,
        "strategy": 7.43,
        "sales_outreach": 7.64,
        "translation": 7.61,
        "agent_long_horizon": 7.37,
        "niah_es_lite": 6.3
      },
      "quality_avg": 7.84,
      "cost_score_avg": 6.16,
      "speed_score_avg": 6.3,
      "latency_score_avg": 3.82,
      "tool_calling_score_avg": 7.0,
      "judge_score_avg": 4.12,
      "tokens_per_second": 50.0,
      "latency_avg_s": 7.32,
      "total_input_tokens": 1492840,
      "total_output_tokens": 60488
    },
    {
      "key": "kimi-k2",
      "id": "moonshotai/kimi-k2",
      "name": "Kimi K2",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": true,
      "license": "Modified MIT",
      "cost_input_per_M": 0.2,
      "cost_output_per_M": 0.8,
      "cost_per_1k_calls_usd": 1.26,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 138,
      "score_global": 6.96,
      "score_by_pillar": {
        "Contenido": 6.86,
        "Agentes": 6.63,
        "Coding": 7.34,
        "Razonamiento": 7.1
      },
      "score_by_suite": {
        "content_generation": 6.75,
        "tool_calling": 6.2,
        "task_management": 6.89,
        "code_generation": 6.99,
        "reasoning": 7.52,
        "summarization": 6.69,
        "presentation": 6.75,
        "startup_content": 7.31,
        "deep_reasoning": 6.99,
        "customer_support": 6.93,
        "structured_output": 7.16,
        "hallucination": 6.99,
        "creativity": 6.36,
        "string_precision": 7.96,
        "news_seo_writing": 6.71,
        "agent_capabilities": 6.29,
        "ocr_extraction": 6.57,
        "orchestration": 6.91,
        "multi_turn": 6.94,
        "policy_adherence": 7.1,
        "strategy": 6.84,
        "sales_outreach": 7.37,
        "translation": 7.53
      },
      "quality_avg": 7.76,
      "cost_score_avg": 7.36,
      "speed_score_avg": 4.9,
      "latency_score_avg": 1.99,
      "tool_calling_score_avg": 7.21,
      "judge_score_avg": 4.12,
      "tokens_per_second": 28.3,
      "latency_avg_s": 20.64,
      "total_input_tokens": 44693,
      "total_output_tokens": 88841
    },
    {
      "key": "qwen3-coder",
      "id": "qwen/qwen3-coder",
      "name": "Qwen3 Coder",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": true,
      "license": "Apache 2.0",
      "cost_input_per_M": 0.2,
      "cost_output_per_M": 0.6,
      "cost_per_1k_calls_usd": 0.96,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 155,
      "score_global": 6.94,
      "score_by_pillar": {
        "Contenido": 7.08,
        "Agentes": 6.0,
        "Coding": 7.64,
        "Razonamiento": 7.37
      },
      "score_by_suite": {
        "content_generation": 6.99,
        "tool_calling": 6.25,
        "task_management": 7.4,
        "code_generation": 7.8,
        "reasoning": 7.43,
        "summarization": 6.72,
        "presentation": 7.47,
        "startup_content": 7.52,
        "deep_reasoning": 7.35,
        "customer_support": 4.12,
        "structured_output": 7.23,
        "hallucination": 7.34,
        "creativity": 6.74,
        "string_precision": 8.13,
        "news_seo_writing": 6.71,
        "agent_capabilities": 5.56,
        "ocr_extraction": 6.89,
        "orchestration": 6.35,
        "multi_turn": 6.84,
        "policy_adherence": 6.98,
        "strategy": 7.35,
        "sales_outreach": 7.39,
        "translation": 7.63
      },
      "quality_avg": 7.57,
      "cost_score_avg": 7.46,
      "speed_score_avg": 5.97,
      "latency_score_avg": 2.54,
      "tool_calling_score_avg": 6.89,
      "judge_score_avg": 4.22,
      "tokens_per_second": 53.8,
      "latency_avg_s": 17.65,
      "total_input_tokens": 57007,
      "total_output_tokens": 95691
    },
    {
      "key": "mimo-v2-omni",
      "id": "xiaomi/mimo-v2-omni",
      "name": "MiMo-V2-Omni (multimodal)",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 0.4,
      "cost_output_per_M": 2.0,
      "cost_per_1k_calls_usd": 3.12,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": true,
      "runs": 103,
      "score_global": 6.94,
      "score_by_pillar": {
        "Contenido": 7.29,
        "Agentes": 6.63,
        "Coding": 6.77,
        "Razonamiento": 6.86
      },
      "score_by_suite": {
        "content_generation": 7.88,
        "tool_calling": 6.86,
        "task_management": 7.9,
        "code_generation": 6.43,
        "reasoning": 7.54,
        "summarization": 6.76,
        "presentation": 7.51,
        "startup_content": 7.6,
        "deep_reasoning": 5.98,
        "customer_support": 6.2,
        "structured_output": 6.79,
        "hallucination": 7.35,
        "creativity": 7.51,
        "string_precision": 7.5,
        "news_seo_writing": 6.49,
        "ocr_extraction": 6.14,
        "orchestration": 5.48,
        "multi_turn": 6.88,
        "policy_adherence": 7.24,
        "agent_capabilities": 6.52,
        "strategy": 7.45,
        "sales_outreach": 7.35,
        "translation": 7.11,
        "agent_long_horizon": 7.27
      },
      "quality_avg": 7.52,
      "cost_score_avg": 7.0,
      "speed_score_avg": 8.41,
      "latency_score_avg": 1.84,
      "tool_calling_score_avg": 6.91,
      "judge_score_avg": 4.0,
      "tokens_per_second": 85.8,
      "latency_avg_s": 18.52,
      "total_input_tokens": 208736,
      "total_output_tokens": 173093
    },
    {
      "key": "grok-4.20",
      "id": "x-ai/grok-4.20",
      "name": "Grok 4.20",
      "tier": "medium",
      "provider": "openrouter",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 2.0,
      "cost_output_per_M": 6.0,
      "cost_per_1k_calls_usd": 9.6,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 91,
      "score_global": 6.91,
      "score_by_pillar": {
        "Contenido": 7.13,
        "Agentes": 6.48,
        "Coding": 7.18,
        "Razonamiento": 7.01
      },
      "score_by_suite": {
        "content_generation": 7.36,
        "tool_calling": 6.97,
        "task_management": 7.51,
        "code_generation": 7.59,
        "reasoning": 7.43,
        "summarization": 7.12,
        "presentation": 7.11,
        "startup_content": 7.43,
        "deep_reasoning": 6.76,
        "customer_support": 6.01,
        "structured_output": 7.19,
        "hallucination": 7.0,
        "creativity": 7.23,
        "string_precision": 7.18,
        "news_seo_writing": 6.41,
        "ocr_extraction": 6.85,
        "orchestration": 5.6,
        "multi_turn": 6.76,
        "policy_adherence": 6.84,
        "agent_capabilities": 6.2,
        "strategy": 7.12,
        "sales_outreach": 7.37,
        "translation": 7.2
      },
      "quality_avg": 7.83,
      "cost_score_avg": 6.32,
      "speed_score_avg": 6.26,
      "latency_score_avg": 2.46,
      "tool_calling_score_avg": 6.96,
      "judge_score_avg": 4.1,
      "tokens_per_second": 50.8,
      "latency_avg_s": 10.82,
      "total_input_tokens": 37549,
      "total_output_tokens": 53178
    },
    {
      "key": "nim-qwen3.5-397b",
      "id": "qwen/qwen3.5-397b-a17b",
      "name": "Qwen 3.5 397B (NIM)",
      "tier": "cloud_nim",
      "provider": "nvidia_nim",
      "open_source": true,
      "license": "Apache 2.0",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "Mismo modelo que Cristian usa via Ollama Cloud para producción — comparar",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 97,
      "score_global": 6.9,
      "score_by_pillar": {
        "Contenido": 6.95,
        "Agentes": 7.03,
        "Coding": 6.89,
        "Razonamiento": 6.94
      },
      "score_by_suite": {
        "content_generation": 7.05,
        "tool_calling": 6.92,
        "task_management": 7.28,
        "code_generation": 7.18,
        "reasoning": 7.12,
        "summarization": 6.29,
        "presentation": 6.99,
        "startup_content": 7.15,
        "deep_reasoning": 6.83,
        "customer_support": 7.44,
        "structured_output": 7.14,
        "hallucination": 7.1,
        "creativity": 7.33,
        "string_precision": 6.89,
        "news_seo_writing": 6.36,
        "ocr_extraction": 6.47,
        "orchestration": 6.5,
        "multi_turn": 6.66,
        "policy_adherence": 7.05,
        "agent_capabilities": 7.44,
        "strategy": 6.84,
        "sales_outreach": 7.06,
        "translation": 7.26,
        "agent_long_horizon": 5.96
      },
      "quality_avg": 8.07,
      "cost_score_avg": 6.81,
      "speed_score_avg": 3.71,
      "latency_score_avg": 1.47,
      "tool_calling_score_avg": 7.22,
      "judge_score_avg": 4.18,
      "tokens_per_second": 20.7,
      "latency_avg_s": 57.29,
      "total_input_tokens": 166523,
      "total_output_tokens": 110548
    },
    {
      "key": "nim-nemotron-nano-9b-v2",
      "id": "nvidia/nvidia-nemotron-nano-9b-v2",
      "name": "Nemotron Nano 9B v2 (NIM)",
      "tier": "cloud_nim",
      "provider": "nvidia_nim",
      "open_source": false,
      "license": "NVIDIA Open License",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "Variante chica de Nemotron 3, comparar con Nano 30B",
      "tested": true,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 91,
      "score_global": 6.86,
      "score_by_pillar": {
        "Contenido": 7.16,
        "Agentes": 6.44,
        "Coding": 7.05,
        "Razonamiento": 6.86
      },
      "score_by_suite": {
        "content_generation": 7.48,
        "tool_calling": 6.68,
        "task_management": 7.42,
        "code_generation": 7.35,
        "reasoning": 7.42,
        "summarization": 6.75,
        "presentation": 7.37,
        "startup_content": 7.42,
        "deep_reasoning": 6.17,
        "customer_support": 6.75,
        "structured_output": 7.29,
        "hallucination": 7.3,
        "creativity": 7.19,
        "string_precision": 7.23,
        "news_seo_writing": 6.48,
        "ocr_extraction": 6.4,
        "orchestration": 4.92,
        "multi_turn": 6.66,
        "policy_adherence": 6.96,
        "agent_capabilities": 6.37,
        "strategy": 7.23,
        "sales_outreach": 7.34,
        "translation": 7.29
      },
      "quality_avg": 7.73,
      "cost_score_avg": 6.25,
      "speed_score_avg": 7.56,
      "latency_score_avg": 1.27,
      "tool_calling_score_avg": 7.1,
      "judge_score_avg": 4.1,
      "tokens_per_second": 54.3,
      "latency_avg_s": 26.4,
      "total_input_tokens": 31101,
      "total_output_tokens": 132905
    },
    {
      "key": "gpt-5.4",
      "id": "gpt-5.4",
      "name": "GPT-5.4",
      "tier": "premium",
      "provider": "openai_direct",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 5.0,
      "cost_output_per_M": 15.0,
      "cost_per_1k_calls_usd": 24.0,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": true,
      "multimodal": true,
      "runs": 150,
      "score_global": 6.85,
      "score_by_pillar": {
        "Contenido": 6.77,
        "Agentes": 6.35,
        "Coding": 7.58,
        "Razonamiento": 6.87
      },
      "score_by_suite": {
        "content_generation": 7.17,
        "tool_calling": 6.68,
        "task_management": 6.73,
        "code_generation": 7.25,
        "reasoning": 5.96,
        "summarization": 6.88,
        "presentation": 7.29,
        "startup_content": 5.97,
        "deep_reasoning": 6.98,
        "customer_support": 5.21,
        "structured_output": 7.29,
        "hallucination": 7.36,
        "creativity": 6.27,
        "string_precision": 8.25,
        "news_seo_writing": 6.78,
        "ocr_extraction": 6.93,
        "orchestration": 6.92,
        "multi_turn": 6.79,
        "policy_adherence": 7.11,
        "agent_capabilities": 5.65,
        "strategy": 7.29,
        "sales_outreach": 7.84,
        "translation": 7.72
      },
      "quality_avg": 7.09,
      "cost_score_avg": 7.35,
      "speed_score_avg": 7.17,
      "latency_score_avg": 2.84,
      "tool_calling_score_avg": 7.14,
      "judge_score_avg": 4.18,
      "tokens_per_second": 56.8,
      "latency_avg_s": 12.35,
      "total_input_tokens": 34875,
      "total_output_tokens": 116069
    },
    {
      "key": "nim-ministral-14b",
      "id": "mistralai/ministral-14b-instruct-2512",
      "name": "Ministral 14B (NIM)",
      "tier": "cloud_nim",
      "provider": "nvidia_nim",
      "open_source": true,
      "license": "Apache 2.0",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "Mistral chico nuevo dic 2025 - comparar con Mistral Small 4",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 90,
      "score_global": 6.84,
      "score_by_pillar": {
        "Contenido": 7.07,
        "Agentes": 6.6,
        "Coding": 6.92,
        "Razonamiento": 6.77
      },
      "score_by_suite": {
        "content_generation": 7.33,
        "tool_calling": 7.13,
        "task_management": 7.26,
        "code_generation": 6.98,
        "reasoning": 7.15,
        "summarization": 7.13,
        "presentation": 6.96,
        "startup_content": 7.2,
        "deep_reasoning": 6.48,
        "customer_support": 6.05,
        "structured_output": 6.59,
        "hallucination": 6.87,
        "creativity": 7.18,
        "string_precision": 7.36,
        "news_seo_writing": 6.33,
        "ocr_extraction": 6.61,
        "orchestration": 6.39,
        "multi_turn": 6.72,
        "policy_adherence": 6.78,
        "agent_capabilities": 6.16,
        "strategy": 6.87,
        "sales_outreach": 7.14,
        "translation": 7.55
      },
      "quality_avg": 8.02,
      "cost_score_avg": 6.89,
      "speed_score_avg": 3.9,
      "latency_score_avg": 1.69,
      "tool_calling_score_avg": 6.72,
      "judge_score_avg": 4.12,
      "tokens_per_second": 21.5,
      "latency_avg_s": 49.5,
      "total_input_tokens": 27548,
      "total_output_tokens": 86481
    },
    {
      "key": "nemotron-super",
      "id": "nvidia/nemotron-3-super-120b-a12b",
      "name": "Nemotron 3 Super",
      "tier": "ultra_cheap",
      "provider": "openrouter",
      "open_source": true,
      "license": "NVIDIA Open",
      "cost_input_per_M": 0.1,
      "cost_output_per_M": 0.5,
      "cost_per_1k_calls_usd": 0.78,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 91,
      "score_global": 6.83,
      "score_by_pillar": {
        "Contenido": 7.45,
        "Agentes": 5.75,
        "Coding": 7.29,
        "Razonamiento": 7.19
      },
      "score_by_suite": {
        "content_generation": 7.66,
        "tool_calling": 6.06,
        "task_management": 7.45,
        "code_generation": 7.65,
        "reasoning": 7.57,
        "summarization": 7.1,
        "presentation": 7.89,
        "startup_content": 7.65,
        "deep_reasoning": 6.8,
        "customer_support": 4.36,
        "structured_output": 7.48,
        "hallucination": 7.37,
        "creativity": 7.5,
        "string_precision": 7.37,
        "news_seo_writing": 6.7,
        "ocr_extraction": 6.75,
        "orchestration": 4.49,
        "multi_turn": 7.32,
        "policy_adherence": 7.21,
        "agent_capabilities": 4.43,
        "strategy": 7.42,
        "sales_outreach": 7.72,
        "translation": 7.72
      },
      "quality_avg": 7.17,
      "cost_score_avg": 8.38,
      "speed_score_avg": 4.42,
      "latency_score_avg": 1.26,
      "tool_calling_score_avg": 6.85,
      "judge_score_avg": 4.27,
      "tokens_per_second": 30.6,
      "latency_avg_s": 46.1,
      "total_input_tokens": 35068,
      "total_output_tokens": 96589
    },
    {
      "key": "nim-mistral-large-3",
      "id": "mistralai/mistral-large-3-675b-instruct-2512",
      "name": "Mistral Large 3 675B (NIM)",
      "tier": "cloud_nim",
      "provider": "nvidia_nim",
      "open_source": true,
      "license": "Apache 2.0",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "Flagship Mistral diciembre 2025. Comparar contra Mistral Small 4 (#4 ranking).",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 87,
      "score_global": 6.83,
      "score_by_pillar": {
        "Contenido": 6.95,
        "Agentes": 6.67,
        "Coding": 6.9,
        "Razonamiento": 6.84
      },
      "score_by_suite": {
        "content_generation": 7.44,
        "tool_calling": 6.92,
        "task_management": 7.09,
        "code_generation": 7.16,
        "reasoning": 6.97,
        "summarization": 6.97,
        "presentation": 6.9,
        "startup_content": 7.0,
        "deep_reasoning": 6.61,
        "customer_support": 6.44,
        "structured_output": 6.88,
        "hallucination": 6.99,
        "creativity": 6.97,
        "string_precision": 7.07,
        "news_seo_writing": 5.85,
        "ocr_extraction": 6.51,
        "orchestration": 6.91,
        "multi_turn": 6.46,
        "policy_adherence": 6.63,
        "agent_capabilities": 6.43,
        "strategy": 7.03,
        "sales_outreach": 7.33,
        "translation": 7.41
      },
      "quality_avg": 8.18,
      "cost_score_avg": 7.25,
      "speed_score_avg": 2.14,
      "latency_score_avg": 1.31,
      "tool_calling_score_avg": 6.88,
      "judge_score_avg": 4.2,
      "tokens_per_second": 10.6,
      "latency_avg_s": 141.03,
      "total_input_tokens": 27281,
      "total_output_tokens": 67680
    },
    {
      "key": "mimo-v2-pro",
      "id": "xiaomi/mimo-v2-pro",
      "name": "MiMo-V2-Pro",
      "tier": "medium",
      "provider": "openrouter",
      "open_source": false,
      "license": "Proprietary",
      "cost_input_per_M": 1.0,
      "cost_output_per_M": 3.0,
      "cost_per_1k_calls_usd": 4.8,
      "subscriptions": [],
      "notes": "API-only Xiaomi (NO confundir con MiMo V2 Flash que sí es MIT en HF). El Pro no se publica en HuggingFace.",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 91,
      "score_global": 6.8,
      "score_by_pillar": {
        "Contenido": 7.21,
        "Agentes": 6.6,
        "Coding": 6.65,
        "Razonamiento": 6.61
      },
      "score_by_suite": {
        "content_generation": 7.68,
        "tool_calling": 6.2,
        "task_management": 7.6,
        "code_generation": 6.87,
        "reasoning": 7.18,
        "summarization": 7.11,
        "presentation": 7.41,
        "startup_content": 7.44,
        "deep_reasoning": 5.58,
        "customer_support": 6.96,
        "structured_output": 7.06,
        "hallucination": 7.34,
        "creativity": 7.3,
        "string_precision": 7.36,
        "news_seo_writing": 6.36,
        "ocr_extraction": 5.29,
        "orchestration": 6.14,
        "multi_turn": 6.66,
        "policy_adherence": 7.0,
        "agent_capabilities": 6.1,
        "strategy": 7.35,
        "sales_outreach": 7.49,
        "translation": 7.18
      },
      "quality_avg": 7.52,
      "cost_score_avg": 6.45,
      "speed_score_avg": 6.92,
      "latency_score_avg": 1.47,
      "tool_calling_score_avg": 7.23,
      "judge_score_avg": 4.15,
      "tokens_per_second": 51.5,
      "latency_avg_s": 20.89,
      "total_input_tokens": 43909,
      "total_output_tokens": 97394
    },
    {
      "key": "local-nemotron3-base-33b",
      "id": "nemotron3:33b-q4_K_M",
      "name": "Nemotron 3 Base 33B (DGX Spark Q4_K_M)",
      "tier": "local",
      "provider": "openrouter",
      "open_source": true,
      "license": "NVIDIA Open License",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "Nemotron 3 base 33B (NO la versión Omni Reasoning) en Q4_K_M via Ollama oficial. Comparar con Nemotron 3 Super 120B (DGX, también Q4) y Nemotron 3 Nano Omni Reasoning cuando esté disponible.",
      "tested": true,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 103,
      "score_global": 6.77,
      "score_by_pillar": {
        "Contenido": 7.09,
        "Agentes": 6.34,
        "Coding": 6.93,
        "Razonamiento": 6.93
      },
      "score_by_suite": {
        "content_generation": 7.42,
        "tool_calling": 6.11,
        "task_management": 6.98,
        "code_generation": 7.18,
        "reasoning": 7.33,
        "summarization": 6.93,
        "presentation": 7.14,
        "startup_content": 7.4,
        "deep_reasoning": 6.64,
        "customer_support": 6.37,
        "structured_output": 7.41,
        "hallucination": 7.11,
        "creativity": 6.95,
        "string_precision": 7.04,
        "news_seo_writing": 6.47,
        "ocr_extraction": 6.21,
        "orchestration": 6.0,
        "multi_turn": 6.49,
        "policy_adherence": 6.78,
        "agent_capabilities": 5.98,
        "strategy": 6.95,
        "sales_outreach": 7.38,
        "translation": 7.1,
        "agent_long_horizon": 6.65
      },
      "quality_avg": 7.83,
      "cost_score_avg": 5.27,
      "speed_score_avg": 7.92,
      "latency_score_avg": 1.52,
      "tool_calling_score_avg": 6.74,
      "judge_score_avg": 4.07,
      "tokens_per_second": 62.7,
      "latency_avg_s": 58.29,
      "total_input_tokens": 262662,
      "total_output_tokens": 344974
    },
    {
      "key": "gpt-4.1",
      "id": "gpt-4.1",
      "name": "GPT-4.1",
      "tier": "medium",
      "provider": "openai_direct",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 2.0,
      "cost_output_per_M": 8.0,
      "cost_per_1k_calls_usd": 12.6,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 254,
      "score_global": 6.74,
      "score_by_pillar": {
        "Contenido": 7.43,
        "Agentes": 6.6,
        "Coding": 7.92,
        "Razonamiento": 7.52
      },
      "score_by_suite": {
        "content_generation": 7.55,
        "tool_calling": 6.83,
        "task_management": 7.59,
        "code_generation": 7.87,
        "reasoning": 7.73,
        "summarization": 7.35,
        "presentation": 7.52,
        "startup_content": 7.65,
        "deep_reasoning": 7.4,
        "customer_support": 5.6,
        "structured_output": 7.65,
        "hallucination": 7.56,
        "creativity": 7.11,
        "string_precision": 8.36,
        "news_seo_writing": 7.03,
        "agent_capabilities": 5.81,
        "ocr_extraction": 7.37,
        "orchestration": 6.93,
        "multi_turn": 7.47,
        "policy_adherence": 7.29,
        "strategy": 7.52,
        "sales_outreach": 7.95,
        "translation": 7.95,
        "agent_long_horizon": 7.13,
        "niah_es": 5.84,
        "niah_es_1m": 4.91
      },
      "quality_avg": 7.62,
      "cost_score_avg": 5.66,
      "speed_score_avg": 6.24,
      "latency_score_avg": 3.72,
      "tool_calling_score_avg": 7.06,
      "judge_score_avg": 4.28,
      "tokens_per_second": 60.8,
      "latency_avg_s": 21.07,
      "total_input_tokens": 19675632,
      "total_output_tokens": 113741
    },
    {
      "key": "nim-glm5",
      "id": "z-ai/glm5",
      "name": "GLM 5 (NIM)",
      "tier": "cloud_nim",
      "provider": "nvidia_nim",
      "open_source": true,
      "license": "MIT",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "Z.AI flagship base, comparar con 5.1",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 90,
      "score_global": 6.73,
      "score_by_pillar": {
        "Contenido": 6.84,
        "Agentes": 6.54,
        "Coding": 6.86,
        "Razonamiento": 6.74
      },
      "score_by_suite": {
        "content_generation": 7.06,
        "tool_calling": 6.0,
        "task_management": 7.27,
        "code_generation": 7.01,
        "reasoning": 7.13,
        "summarization": 6.36,
        "presentation": 6.83,
        "startup_content": 7.0,
        "deep_reasoning": 6.43,
        "customer_support": 6.96,
        "structured_output": 7.18,
        "hallucination": 6.94,
        "creativity": 6.99,
        "string_precision": 6.96,
        "news_seo_writing": 6.09,
        "ocr_extraction": 6.34,
        "orchestration": 6.39,
        "multi_turn": 6.58,
        "policy_adherence": 6.65,
        "agent_capabilities": 6.24,
        "strategy": 6.79,
        "sales_outreach": 7.24,
        "translation": 6.98
      },
      "quality_avg": 7.97,
      "cost_score_avg": 7.27,
      "speed_score_avg": 1.72,
      "latency_score_avg": 1.1,
      "tool_calling_score_avg": 7.18,
      "judge_score_avg": 4.09,
      "tokens_per_second": 7.0,
      "latency_avg_s": 123.21,
      "total_input_tokens": 27716,
      "total_output_tokens": 67035
    },
    {
      "key": "glm-5.1",
      "id": "z-ai/glm-5.1",
      "name": "GLM-5.1",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": true,
      "license": "MIT",
      "cost_input_per_M": 0.95,
      "cost_output_per_M": 3.15,
      "cost_per_1k_calls_usd": 5.01,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 182,
      "score_global": 6.69,
      "score_by_pillar": {
        "Contenido": 6.76,
        "Agentes": 6.56,
        "Coding": 6.83,
        "Razonamiento": 6.65
      },
      "score_by_suite": {
        "content_generation": 7.15,
        "tool_calling": 6.23,
        "task_management": 7.26,
        "code_generation": 6.96,
        "reasoning": 7.14,
        "summarization": 6.57,
        "presentation": 6.59,
        "startup_content": 6.94,
        "deep_reasoning": 6.21,
        "customer_support": 6.73,
        "structured_output": 7.1,
        "hallucination": 6.79,
        "creativity": 6.89,
        "string_precision": 6.91,
        "news_seo_writing": 6.09,
        "ocr_extraction": 6.39,
        "orchestration": 6.33,
        "multi_turn": 6.45,
        "policy_adherence": 6.68,
        "agent_capabilities": 6.47,
        "strategy": 6.92,
        "sales_outreach": 6.98,
        "translation": 6.91
      },
      "quality_avg": 7.88,
      "cost_score_avg": 6.4,
      "speed_score_avg": 3.67,
      "latency_score_avg": 1.08,
      "tool_calling_score_avg": 7.14,
      "judge_score_avg": 4.09,
      "tokens_per_second": 23.4,
      "latency_avg_s": 82.48,
      "total_input_tokens": 56627,
      "total_output_tokens": 243382
    },
    {
      "key": "nim-glm-5.1",
      "id": "z-ai/glm-5.1",
      "name": "GLM 5.1 (NIM)",
      "tier": "cloud_nim",
      "provider": "nvidia_nim",
      "open_source": true,
      "license": "MIT",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "Z.AI agentic, no tenemos. Variante GLM 5 con mejoras",
      "tested": true,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 182,
      "score_global": 6.69,
      "score_by_pillar": {
        "Contenido": 6.76,
        "Agentes": 6.56,
        "Coding": 6.83,
        "Razonamiento": 6.65
      },
      "score_by_suite": {
        "content_generation": 7.15,
        "tool_calling": 6.23,
        "task_management": 7.26,
        "code_generation": 6.96,
        "reasoning": 7.14,
        "summarization": 6.57,
        "presentation": 6.59,
        "startup_content": 6.94,
        "deep_reasoning": 6.21,
        "customer_support": 6.73,
        "structured_output": 7.1,
        "hallucination": 6.79,
        "creativity": 6.89,
        "string_precision": 6.91,
        "news_seo_writing": 6.09,
        "ocr_extraction": 6.39,
        "orchestration": 6.33,
        "multi_turn": 6.45,
        "policy_adherence": 6.68,
        "agent_capabilities": 6.47,
        "strategy": 6.92,
        "sales_outreach": 6.98,
        "translation": 6.91
      },
      "quality_avg": 7.88,
      "cost_score_avg": 6.4,
      "speed_score_avg": 3.67,
      "latency_score_avg": 1.08,
      "tool_calling_score_avg": 7.14,
      "judge_score_avg": 4.09,
      "tokens_per_second": 23.4,
      "latency_avg_s": 82.48,
      "total_input_tokens": 56627,
      "total_output_tokens": 243382
    },
    {
      "key": "local-gemma4-31b",
      "id": "gemma4:31b",
      "name": "Gemma 4 31B (DGX Spark Q4_K_M)",
      "tier": "local",
      "provider": "openrouter",
      "open_source": true,
      "license": "Gemma Terms",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "Q4_K_M en DGX Spark via Ollama. Comparar con Gemma 4 31B NIM y OpenRouter.",
      "tested": true,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 89,
      "score_global": 6.69,
      "score_by_pillar": {
        "Contenido": 6.71,
        "Agentes": 6.66,
        "Coding": 6.6,
        "Razonamiento": 6.86
      },
      "score_by_suite": {
        "content_generation": 7.11,
        "tool_calling": 6.61,
        "task_management": 6.95,
        "code_generation": 6.87,
        "reasoning": 7.07,
        "summarization": 6.31,
        "presentation": 6.73,
        "startup_content": 6.98,
        "deep_reasoning": 6.7,
        "customer_support": 6.88,
        "structured_output": 6.83,
        "hallucination": 6.7,
        "creativity": 6.88,
        "string_precision": 6.72,
        "news_seo_writing": 6.18,
        "ocr_extraction": 6.05,
        "orchestration": 6.6,
        "multi_turn": 6.5,
        "policy_adherence": 6.41,
        "agent_capabilities": 6.72,
        "strategy": 7.07,
        "sales_outreach": 6.94,
        "translation": 6.5
      },
      "quality_avg": 8.22,
      "cost_score_avg": 6.42,
      "speed_score_avg": 1.98,
      "latency_score_avg": 1.01,
      "tool_calling_score_avg": 7.22,
      "judge_score_avg": 4.25,
      "tokens_per_second": 9.3,
      "latency_avg_s": 153.37,
      "total_input_tokens": 27238,
      "total_output_tokens": 117243
    },
    {
      "key": "gemma4-31b",
      "id": "gemma4:31b",
      "name": "Gemma 4 31B (local)",
      "tier": "local",
      "provider": "openrouter",
      "open_source": true,
      "license": "Apache 2.0",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 89,
      "score_global": 6.69,
      "score_by_pillar": {
        "Contenido": 6.71,
        "Agentes": 6.66,
        "Coding": 6.6,
        "Razonamiento": 6.86
      },
      "score_by_suite": {
        "content_generation": 7.11,
        "tool_calling": 6.61,
        "task_management": 6.95,
        "code_generation": 6.87,
        "reasoning": 7.07,
        "summarization": 6.31,
        "presentation": 6.73,
        "startup_content": 6.98,
        "deep_reasoning": 6.7,
        "customer_support": 6.88,
        "structured_output": 6.83,
        "hallucination": 6.7,
        "creativity": 6.88,
        "string_precision": 6.72,
        "news_seo_writing": 6.18,
        "ocr_extraction": 6.05,
        "orchestration": 6.6,
        "multi_turn": 6.5,
        "policy_adherence": 6.41,
        "agent_capabilities": 6.72,
        "strategy": 7.07,
        "sales_outreach": 6.94,
        "translation": 6.5
      },
      "quality_avg": 8.22,
      "cost_score_avg": 6.42,
      "speed_score_avg": 1.98,
      "latency_score_avg": 1.01,
      "tool_calling_score_avg": 7.22,
      "judge_score_avg": 4.25,
      "tokens_per_second": 9.3,
      "latency_avg_s": 153.37,
      "total_input_tokens": 27238,
      "total_output_tokens": 117243
    },
    {
      "key": "claude-sonnet-4.6",
      "id": "anthropic/claude-sonnet-4-6",
      "name": "Claude Sonnet 4.6 (ultimo Anthropic)",
      "tier": "medium",
      "provider": "openrouter",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 3.0,
      "cost_output_per_M": 15.0,
      "cost_per_1k_calls_usd": 23.4,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": true,
      "runs": 227,
      "score_global": 6.65,
      "score_by_pillar": {
        "Contenido": 7.15,
        "Agentes": 6.84,
        "Coding": 7.37,
        "Razonamiento": 7.24
      },
      "score_by_suite": {
        "content_generation": 7.0,
        "tool_calling": 6.34,
        "task_management": 7.45,
        "code_generation": 7.46,
        "reasoning": 7.4,
        "summarization": 7.32,
        "presentation": 7.23,
        "startup_content": 7.4,
        "deep_reasoning": 6.95,
        "customer_support": 7.18,
        "structured_output": 7.15,
        "hallucination": 7.61,
        "creativity": 6.81,
        "string_precision": 7.64,
        "news_seo_writing": 6.8,
        "agent_capabilities": 6.58,
        "ocr_extraction": 6.85,
        "orchestration": 6.72,
        "multi_turn": 6.63,
        "policy_adherence": 7.32,
        "strategy": 7.21,
        "sales_outreach": 7.76,
        "translation": 7.48,
        "niah_es_lite": 4.77
      },
      "quality_avg": 7.38,
      "cost_score_avg": 5.92,
      "speed_score_avg": 6.78,
      "latency_score_avg": 2.19,
      "tool_calling_score_avg": 7.27,
      "judge_score_avg": 4.13,
      "tokens_per_second": 51.0,
      "latency_avg_s": 14.58,
      "total_input_tokens": 1565765,
      "total_output_tokens": 184300
    },
    {
      "key": "local-nemotron-3-super-120b",
      "id": "nemotron-3-super:120b",
      "name": "Nemotron 3 Super 120B (DGX Spark Q4_K_M)",
      "tier": "local",
      "provider": "openrouter",
      "open_source": false,
      "license": "NVIDIA Open License",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "Q4_K_M en DGX Spark. Modelo gigante, primer test post cold start tarda ~3-5min.",
      "tested": true,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 90,
      "score_global": 6.65,
      "score_by_pillar": {
        "Contenido": 6.88,
        "Agentes": 6.38,
        "Coding": 6.76,
        "Razonamiento": 6.65
      },
      "score_by_suite": {
        "content_generation": 7.06,
        "tool_calling": 6.23,
        "task_management": 6.84,
        "code_generation": 7.04,
        "reasoning": 6.97,
        "summarization": 6.65,
        "presentation": 7.07,
        "startup_content": 7.07,
        "deep_reasoning": 6.56,
        "customer_support": 6.79,
        "structured_output": 6.98,
        "hallucination": 6.58,
        "creativity": 6.84,
        "string_precision": 6.98,
        "news_seo_writing": 6.4,
        "ocr_extraction": 6.09,
        "orchestration": 5.8,
        "multi_turn": 6.61,
        "policy_adherence": 6.66,
        "agent_capabilities": 6.05,
        "strategy": 6.59,
        "sales_outreach": 7.09,
        "translation": 6.81
      },
      "quality_avg": 7.96,
      "cost_score_avg": 6.0,
      "speed_score_avg": 3.69,
      "latency_score_avg": 1.09,
      "tool_calling_score_avg": 7.07,
      "judge_score_avg": 4.2,
      "tokens_per_second": 16.8,
      "latency_avg_s": 123.69,
      "total_input_tokens": 34733,
      "total_output_tokens": 157283
    },
    {
      "key": "nim-nemotron-super-1.5",
      "id": "nvidia/llama-3.3-nemotron-super-49b-v1.5",
      "name": "Nemotron Super 49B v1.5 (NIM)",
      "tier": "cloud_nim",
      "provider": "nvidia_nim",
      "open_source": true,
      "license": "NVIDIA Open Model",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "Versión iterada del Nemotron Super 120B que ya medimos en Lote 2",
      "tested": true,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 86,
      "score_global": 6.64,
      "score_by_pillar": {
        "Contenido": 6.96,
        "Agentes": 6.27,
        "Coding": 6.58,
        "Razonamiento": 6.86
      },
      "score_by_suite": {
        "content_generation": 7.09,
        "tool_calling": 6.4,
        "task_management": 7.09,
        "code_generation": 6.93,
        "reasoning": 6.85,
        "summarization": 6.67,
        "presentation": 6.98,
        "startup_content": 7.16,
        "deep_reasoning": 6.8,
        "customer_support": 5.87,
        "structured_output": 6.83,
        "hallucination": 6.71,
        "creativity": 7.14,
        "string_precision": 6.66,
        "news_seo_writing": 6.31,
        "ocr_extraction": 6.08,
        "orchestration": 5.6,
        "multi_turn": 6.25,
        "policy_adherence": 6.82,
        "agent_capabilities": 6.43,
        "strategy": 7.08,
        "sales_outreach": 7.05,
        "translation": 7.42
      },
      "quality_avg": 7.85,
      "cost_score_avg": 6.05,
      "speed_score_avg": 4.37,
      "latency_score_avg": 1.06,
      "tool_calling_score_avg": 7.19,
      "judge_score_avg": 4.09,
      "tokens_per_second": 25.4,
      "latency_avg_s": 83.0,
      "total_input_tokens": 29710,
      "total_output_tokens": 139881
    },
    {
      "key": "nim-deepseek-v4-flash",
      "id": "deepseek-ai/deepseek-v4-flash",
      "name": "DeepSeek V4 Flash (NIM)",
      "tier": "cloud_nim",
      "provider": "nvidia_nim",
      "open_source": true,
      "license": "MIT",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "Mismo modelo que probamos via OpenRouter — comparar latencia/calidad NIM vs OR",
      "tested": true,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 153,
      "score_global": 6.6,
      "score_by_pillar": {
        "Contenido": 7.16,
        "Agentes": 6.85,
        "Coding": 7.09,
        "Razonamiento": 7.12
      },
      "score_by_suite": {
        "content_generation": 7.62,
        "tool_calling": 6.61,
        "task_management": 7.4,
        "code_generation": 7.18,
        "reasoning": 7.33,
        "summarization": 6.64,
        "presentation": 7.33,
        "startup_content": 7.21,
        "deep_reasoning": 6.98,
        "customer_support": 6.89,
        "structured_output": 7.45,
        "hallucination": 7.23,
        "creativity": 7.4,
        "string_precision": 7.21,
        "news_seo_writing": 6.34,
        "ocr_extraction": 6.45,
        "orchestration": 7.16,
        "multi_turn": 6.72,
        "policy_adherence": 7.0,
        "agent_capabilities": 6.54,
        "strategy": 7.07,
        "sales_outreach": 7.38,
        "translation": 7.56,
        "agent_long_horizon": 6.4,
        "niah_es": 5.92
      },
      "quality_avg": 7.9,
      "cost_score_avg": 5.96,
      "speed_score_avg": 3.14,
      "latency_score_avg": 1.87,
      "tool_calling_score_avg": 7.1,
      "judge_score_avg": 4.17,
      "tokens_per_second": 17.2,
      "latency_avg_s": 82.68,
      "total_input_tokens": 1787023,
      "total_output_tokens": 113273
    },
    {
      "key": "minimax-m2.7",
      "id": "minimax/minimax-m2.7",
      "name": "MiniMax M2.7",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 0.3,
      "cost_output_per_M": 1.2,
      "cost_per_1k_calls_usd": 1.89,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 167,
      "score_global": 6.59,
      "score_by_pillar": {
        "Contenido": 6.68,
        "Agentes": 6.4,
        "Coding": 7.1,
        "Razonamiento": 6.07
      },
      "score_by_suite": {
        "content_generation": 6.88,
        "tool_calling": 6.25,
        "task_management": 6.55,
        "code_generation": 6.5,
        "reasoning": 7.04,
        "summarization": 6.38,
        "presentation": 7.13,
        "startup_content": 7.16,
        "deep_reasoning": 4.87,
        "customer_support": 6.47,
        "structured_output": 7.01,
        "hallucination": 7.05,
        "creativity": 5.71,
        "string_precision": 7.82,
        "news_seo_writing": 6.62,
        "agent_capabilities": 6.3,
        "ocr_extraction": 6.48,
        "orchestration": 6.01,
        "multi_turn": 6.76,
        "policy_adherence": 6.72,
        "strategy": 6.97,
        "sales_outreach": 6.81,
        "translation": 7.06,
        "agent_long_horizon": 6.66
      },
      "quality_avg": 7.3,
      "cost_score_avg": 6.43,
      "speed_score_avg": 5.75,
      "latency_score_avg": 1.54,
      "tool_calling_score_avg": 7.06,
      "judge_score_avg": 4.11,
      "tokens_per_second": 39.5,
      "latency_avg_s": 33.79,
      "total_input_tokens": 162597,
      "total_output_tokens": 208835
    },
    {
      "key": "qwen-3.6-plus",
      "id": "qwen/qwen3.6-plus",
      "name": "Qwen 3.6 Plus",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": false,
      "license": "Proprietary",
      "cost_input_per_M": 0.33,
      "cost_output_per_M": 0.65,
      "cost_per_1k_calls_usd": 1.074,
      "subscriptions": [],
      "notes": "Plus = API-only propietario de Alibaba (NO confundir con Qwen 3.6 base que es Apache 2.0)",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 150,
      "score_global": 6.56,
      "score_by_pillar": {
        "Contenido": 6.64,
        "Agentes": 5.74,
        "Coding": 7.1,
        "Razonamiento": 6.98
      },
      "score_by_suite": {
        "content_generation": 6.57,
        "tool_calling": 6.0,
        "task_management": 7.03,
        "code_generation": 7.38,
        "reasoning": 7.27,
        "summarization": 6.45,
        "presentation": 7.05,
        "startup_content": 7.21,
        "deep_reasoning": 7.01,
        "customer_support": 4.75,
        "structured_output": 6.71,
        "hallucination": 6.63,
        "creativity": 6.12,
        "string_precision": 7.5,
        "news_seo_writing": 6.27,
        "ocr_extraction": 6.3,
        "orchestration": 4.89,
        "multi_turn": 6.6,
        "policy_adherence": 6.65,
        "agent_capabilities": 4.77,
        "strategy": 6.98,
        "sales_outreach": 7.28,
        "translation": 6.68
      },
      "quality_avg": 7.56,
      "cost_score_avg": 4.65,
      "speed_score_avg": 7.0,
      "latency_score_avg": 1.07,
      "tool_calling_score_avg": 7.28,
      "judge_score_avg": 4.23,
      "tokens_per_second": 48.3,
      "latency_avg_s": 69.18,
      "total_input_tokens": 47383,
      "total_output_tokens": 463242
    },
    {
      "key": "claude-opus-4.7",
      "id": "anthropic/claude-opus-4-7",
      "name": "Claude Opus 4.7",
      "tier": "premium",
      "provider": "openrouter",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 15.0,
      "cost_output_per_M": 75.0,
      "cost_per_1k_calls_usd": 117.0,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": true,
      "runs": 254,
      "score_global": 6.54,
      "score_by_pillar": {
        "Contenido": 7.22,
        "Agentes": 6.97,
        "Coding": 7.31,
        "Razonamiento": 7.23
      },
      "score_by_suite": {
        "content_generation": 7.36,
        "tool_calling": 6.38,
        "task_management": 7.54,
        "code_generation": 7.48,
        "reasoning": 7.58,
        "summarization": 7.06,
        "presentation": 7.39,
        "startup_content": 7.42,
        "deep_reasoning": 6.85,
        "customer_support": 6.97,
        "structured_output": 7.49,
        "hallucination": 7.51,
        "creativity": 7.35,
        "string_precision": 7.43,
        "news_seo_writing": 6.5,
        "ocr_extraction": 6.88,
        "orchestration": 6.91,
        "multi_turn": 7.14,
        "policy_adherence": 7.02,
        "agent_capabilities": 7.01,
        "strategy": 7.38,
        "sales_outreach": 7.54,
        "translation": 7.35,
        "niah_es": 4.95
      },
      "quality_avg": 7.64,
      "cost_score_avg": 4.93,
      "speed_score_avg": 6.48,
      "latency_score_avg": 2.17,
      "tool_calling_score_avg": 7.19,
      "judge_score_avg": 4.22,
      "tokens_per_second": 51.9,
      "latency_avg_s": 12.68,
      "total_input_tokens": 9971743,
      "total_output_tokens": 189599
    },
    {
      "key": "qwen3.5-397b-cloud",
      "id": "qwen3.5:397b-cloud",
      "name": "Qwen 3.5 397B (Ollama Cloud)",
      "tier": "cloud_ollama",
      "provider": "ollama_cloud",
      "open_source": true,
      "license": "Apache 2.0",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [
        {
          "key": "ollama_cloud_pro",
          "name": "Ollama Cloud",
          "plan": "Pro",
          "price_month_usd": 30,
          "url": "https://ollama.com/cloud",
          "notes": "Rate limit varía por modelo. Recomendado para uso a volumen mid (1-10k calls/día)."
        }
      ],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 94,
      "score_global": 6.49,
      "score_by_pillar": {
        "Contenido": 6.04,
        "Agentes": 7.14,
        "Coding": 6.98,
        "Razonamiento": 5.57
      },
      "score_by_suite": {
        "translation": 4.41,
        "content_generation": 8.04,
        "tool_calling": 7.72,
        "task_management": 8.23,
        "code_generation": 8.28,
        "reasoning": 5.16,
        "summarization": 7.38,
        "presentation": 6.94,
        "startup_content": 7.04,
        "deep_reasoning": 4.89,
        "customer_support": 7.79,
        "structured_output": 6.69,
        "hallucination": 6.42,
        "creativity": 6.64,
        "string_precision": 7.71,
        "news_seo_writing": 4.39,
        "ocr_extraction": 5.31,
        "orchestration": 7.51,
        "multi_turn": 6.19,
        "policy_adherence": 6.44,
        "agent_capabilities": 6.44,
        "strategy": 6.46,
        "sales_outreach": 5.4
      },
      "quality_avg": 5.5,
      "cost_score_avg": 9.84,
      "speed_score_avg": 7.78,
      "latency_score_avg": 1.35,
      "tool_calling_score_avg": 7.24,
      "judge_score_avg": 4.05,
      "tokens_per_second": 75.4,
      "latency_avg_s": 32.0,
      "total_input_tokens": 35683,
      "total_output_tokens": 168538
    },
    {
      "key": "kimi-k2.6",
      "id": "moonshotai/kimi-k2.6",
      "name": "Kimi K2.6",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": true,
      "license": "Modified MIT",
      "cost_input_per_M": 0.8,
      "cost_output_per_M": 3.5,
      "cost_per_1k_calls_usd": 5.49,
      "subscriptions": [],
      "notes": "Thinking model. Pesos públicos en HF (1.1T params)",
      "tested": true,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 182,
      "score_global": 6.4,
      "score_by_pillar": {
        "Contenido": 6.5,
        "Agentes": 6.23,
        "Razonamiento": 6.29,
        "Coding": 6.61
      },
      "score_by_suite": {
        "content_generation": 6.79,
        "tool_calling": 5.66,
        "task_management": 6.73,
        "reasoning": 6.91,
        "summarization": 5.81,
        "deep_reasoning": 5.88,
        "customer_support": 6.67,
        "structured_output": 6.76,
        "hallucination": 6.77,
        "string_precision": 6.91,
        "news_seo_writing": 5.72,
        "ocr_extraction": 5.96,
        "orchestration": 6.2,
        "multi_turn": 6.33,
        "policy_adherence": 6.32,
        "agent_capabilities": 5.93,
        "sales_outreach": 6.92,
        "translation": 6.56,
        "code_generation": 6.8,
        "presentation": 6.68,
        "startup_content": 6.85,
        "creativity": 6.65,
        "strategy": 6.03
      },
      "quality_avg": 7.68,
      "cost_score_avg": 4.85,
      "speed_score_avg": 5.13,
      "latency_score_avg": 1.1,
      "tool_calling_score_avg": 6.94,
      "judge_score_avg": 4.06,
      "tokens_per_second": 35.2,
      "latency_avg_s": 117.96,
      "total_input_tokens": 52060,
      "total_output_tokens": 556926
    },
    {
      "key": "qwen3.5-cloud",
      "id": "qwen3.5:cloud",
      "name": "Qwen 3.5 (Ollama Cloud default)",
      "tier": "cloud_ollama",
      "provider": "ollama_cloud",
      "open_source": true,
      "license": "Apache 2.0",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [
        {
          "key": "ollama_cloud_pro",
          "name": "Ollama Cloud",
          "plan": "Pro",
          "price_month_usd": 30,
          "url": "https://ollama.com/cloud",
          "notes": "Rate limit varía por modelo. Recomendado para uso a volumen mid (1-10k calls/día)."
        }
      ],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 91,
      "score_global": 6.4,
      "score_by_pillar": {
        "Contenido": 6.16,
        "Agentes": 6.82,
        "Coding": 6.11,
        "Razonamiento": 6.37
      },
      "score_by_suite": {
        "content_generation": 7.15,
        "tool_calling": 8.01,
        "task_management": 5.37,
        "code_generation": 6.7,
        "reasoning": 5.2,
        "summarization": 7.61,
        "presentation": 7.46,
        "startup_content": 6.13,
        "deep_reasoning": 6.41,
        "customer_support": 7.11,
        "structured_output": 6.33,
        "hallucination": 7.68,
        "creativity": 6.16,
        "string_precision": 6.82,
        "news_seo_writing": 4.23,
        "ocr_extraction": 4.61,
        "orchestration": 7.42,
        "multi_turn": 6.91,
        "policy_adherence": 5.82,
        "agent_capabilities": 6.64,
        "strategy": 6.15,
        "sales_outreach": 6.75,
        "translation": 5.69
      },
      "quality_avg": 5.29,
      "cost_score_avg": 10.0,
      "speed_score_avg": 7.52,
      "latency_score_avg": 1.32,
      "tool_calling_score_avg": 7.24,
      "judge_score_avg": 4.08,
      "tokens_per_second": 70.5,
      "latency_avg_s": 33.22,
      "total_input_tokens": 35995,
      "total_output_tokens": 154433
    },
    {
      "key": "nim-qwen3-next-thinking",
      "id": "qwen/qwen3-next-80b-a3b-thinking",
      "name": "Qwen 3-Next 80B Thinking (NIM)",
      "tier": "cloud_nim",
      "provider": "nvidia_nim",
      "open_source": true,
      "license": "Apache 2.0",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "Variante thinking — comparar con Qwen 3-Next Instruct",
      "tested": true,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 182,
      "score_global": 6.35,
      "score_by_pillar": {
        "Contenido": 6.43,
        "Agentes": 6.16,
        "Coding": 6.7,
        "Razonamiento": 6.12
      },
      "score_by_suite": {
        "content_generation": 6.76,
        "tool_calling": 6.46,
        "task_management": 6.22,
        "code_generation": 6.75,
        "reasoning": 6.11,
        "summarization": 6.44,
        "presentation": 6.49,
        "startup_content": 6.38,
        "deep_reasoning": 5.93,
        "customer_support": 5.88,
        "structured_output": 6.89,
        "hallucination": 6.59,
        "creativity": 6.47,
        "string_precision": 6.99,
        "news_seo_writing": 6.12,
        "ocr_extraction": 6.18,
        "orchestration": 5.52,
        "multi_turn": 6.6,
        "policy_adherence": 6.54,
        "agent_capabilities": 6.1,
        "strategy": 6.06,
        "sales_outreach": 6.71,
        "translation": 6.18
      },
      "quality_avg": 6.89,
      "cost_score_avg": 5.42,
      "speed_score_avg": 9.1,
      "latency_score_avg": 1.18,
      "tool_calling_score_avg": 6.47,
      "judge_score_avg": 3.69,
      "tokens_per_second": 102.5,
      "latency_avg_s": 17.43,
      "total_input_tokens": 61088,
      "total_output_tokens": 324760
    },
    {
      "key": "gemini-pro",
      "id": "google/gemini-2.5-pro",
      "name": "Gemini 2.5 Pro",
      "tier": "medium",
      "provider": "openrouter",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 1.25,
      "cost_output_per_M": 10.0,
      "cost_per_1k_calls_usd": 15.375,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": true,
      "multimodal": true,
      "runs": 91,
      "score_global": 6.28,
      "score_by_pillar": {
        "Contenido": 6.34,
        "Agentes": 6.46,
        "Coding": 6.53,
        "Razonamiento": 5.52
      },
      "score_by_suite": {
        "content_generation": 7.36,
        "task_management": 6.65,
        "code_generation": 5.79,
        "reasoning": 5.74,
        "summarization": 6.24,
        "presentation": 6.22,
        "startup_content": 5.65,
        "deep_reasoning": 5.25,
        "customer_support": 7.0,
        "structured_output": 7.0,
        "hallucination": 6.22,
        "creativity": 6.77,
        "string_precision": 7.17,
        "news_seo_writing": 5.55,
        "ocr_extraction": 5.96,
        "orchestration": 6.58,
        "multi_turn": 6.99,
        "policy_adherence": 6.63,
        "agent_capabilities": 5.56,
        "strategy": 5.15,
        "sales_outreach": 6.71,
        "translation": 6.66,
        "tool_calling": 6.03
      },
      "quality_avg": 6.52,
      "cost_score_avg": 5.75,
      "speed_score_avg": 8.79,
      "latency_score_avg": 1.29,
      "tool_calling_score_avg": 7.07,
      "judge_score_avg": 3.76,
      "tokens_per_second": 91.2,
      "latency_avg_s": 18.53,
      "total_input_tokens": 24754,
      "total_output_tokens": 145620
    },
    {
      "key": "nim-step-3.5-flash",
      "id": "stepfun-ai/step-3.5-flash",
      "name": "Step 3.5 Flash (NIM)",
      "tier": "cloud_nim",
      "provider": "nvidia_nim",
      "open_source": true,
      "license": "Apache 2.0",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "Step 3.5 Flash - reemplazo de Step3 que fallo 91/91 con 404 en OpenRouter",
      "tested": true,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 103,
      "score_global": 6.2,
      "score_by_pillar": {
        "Contenido": 6.55,
        "Agentes": 6.33,
        "Coding": 6.01,
        "Razonamiento": 5.28
      },
      "score_by_suite": {
        "content_generation": 7.53,
        "tool_calling": 5.88,
        "task_management": 5.9,
        "code_generation": 6.33,
        "reasoning": 4.07,
        "summarization": 6.77,
        "presentation": 4.79,
        "startup_content": 7.16,
        "deep_reasoning": 4.84,
        "customer_support": 6.8,
        "structured_output": 6.4,
        "hallucination": 7.03,
        "creativity": 7.2,
        "string_precision": 7.04,
        "news_seo_writing": 6.1,
        "ocr_extraction": 4.19,
        "orchestration": 6.03,
        "multi_turn": 6.91,
        "policy_adherence": 6.93,
        "agent_capabilities": 5.96,
        "strategy": 5.64,
        "sales_outreach": 5.94,
        "translation": 5.73,
        "agent_long_horizon": 6.51
      },
      "quality_avg": 6.85,
      "cost_score_avg": 5.74,
      "speed_score_avg": 5.84,
      "latency_score_avg": 1.32,
      "tool_calling_score_avg": 6.92,
      "judge_score_avg": 4.17,
      "tokens_per_second": 41.3,
      "latency_avg_s": 69.33,
      "total_input_tokens": 249653,
      "total_output_tokens": 215660
    },
    {
      "key": "gemini-3.1-pro",
      "id": "google/gemini-3.1-pro-preview",
      "name": "Gemini 3.1 Pro",
      "tier": "medium",
      "provider": "openrouter",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 2.0,
      "cost_output_per_M": 12.0,
      "cost_per_1k_calls_usd": 18.6,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": true,
      "multimodal": true,
      "runs": 175,
      "score_global": 6.18,
      "score_by_pillar": {
        "Contenido": 6.29,
        "Agentes": 6.5,
        "Coding": 6.35,
        "Razonamiento": 6.04
      },
      "score_by_suite": {
        "content_generation": 6.92,
        "tool_calling": 6.28,
        "task_management": 6.71,
        "code_generation": 6.26,
        "reasoning": 6.0,
        "summarization": 6.33,
        "presentation": 5.41,
        "startup_content": 6.61,
        "deep_reasoning": 5.75,
        "customer_support": 6.88,
        "structured_output": 6.58,
        "hallucination": 6.1,
        "creativity": 6.55,
        "string_precision": 6.83,
        "news_seo_writing": 5.3,
        "ocr_extraction": 5.64,
        "orchestration": 6.57,
        "multi_turn": 6.31,
        "policy_adherence": 6.4,
        "agent_capabilities": 6.4,
        "strategy": 6.59,
        "sales_outreach": 6.29,
        "translation": 6.81,
        "agent_long_horizon": 6.38,
        "niah_es": 5.95
      },
      "quality_avg": 7.4,
      "cost_score_avg": 2.95,
      "speed_score_avg": 8.26,
      "latency_score_avg": 1.55,
      "tool_calling_score_avg": 7.07,
      "judge_score_avg": 3.9,
      "tokens_per_second": 84.5,
      "latency_avg_s": 18.07,
      "total_input_tokens": 5984280,
      "total_output_tokens": 281979
    },
    {
      "key": "nim-kimi-k2-thinking",
      "id": "moonshotai/kimi-k2-thinking",
      "name": "Kimi K2 Thinking (NIM)",
      "tier": "cloud_nim",
      "provider": "nvidia_nim",
      "open_source": true,
      "license": "Modified MIT",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "Variante thinking de K2 — comparar con K2.6 thinking",
      "tested": true,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 102,
      "score_global": 6.17,
      "score_by_pillar": {
        "Contenido": 6.3,
        "Agentes": 6.27,
        "Coding": 6.1,
        "Razonamiento": 5.25
      },
      "score_by_suite": {
        "content_generation": 7.02,
        "tool_calling": 6.23,
        "task_management": 5.57,
        "code_generation": 5.25,
        "reasoning": 4.27,
        "summarization": 6.36,
        "presentation": 6.43,
        "startup_content": 6.2,
        "deep_reasoning": 4.84,
        "customer_support": 6.77,
        "structured_output": 6.88,
        "hallucination": 6.9,
        "creativity": 6.41,
        "string_precision": 6.99,
        "news_seo_writing": 5.84,
        "ocr_extraction": 5.1,
        "orchestration": 6.57,
        "multi_turn": 6.24,
        "policy_adherence": 6.87,
        "agent_capabilities": 5.58,
        "strategy": 5.43,
        "sales_outreach": 7.01,
        "translation": 5.34,
        "agent_long_horizon": 6.88
      },
      "quality_avg": 6.94,
      "cost_score_avg": 5.78,
      "speed_score_avg": 5.08,
      "latency_score_avg": 1.11,
      "tool_calling_score_avg": 6.86,
      "judge_score_avg": 4.07,
      "tokens_per_second": 30.5,
      "latency_avg_s": 78.4,
      "total_input_tokens": 200238,
      "total_output_tokens": 209595
    },
    {
      "key": "kimi-k2.5",
      "id": "moonshotai/kimi-k2.5",
      "name": "Kimi K2.5",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": true,
      "license": "Modified MIT",
      "cost_input_per_M": 0.2,
      "cost_output_per_M": 0.8,
      "cost_per_1k_calls_usd": 1.26,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 130,
      "score_global": 6.14,
      "score_by_pillar": {
        "Contenido": 6.34,
        "Agentes": 6.31,
        "Coding": 6.18,
        "Razonamiento": 4.95
      },
      "score_by_suite": {
        "content_generation": 5.6,
        "tool_calling": 6.22,
        "task_management": 5.1,
        "code_generation": 5.96,
        "reasoning": 4.65,
        "summarization": 6.44,
        "presentation": 7.14,
        "startup_content": 6.57,
        "deep_reasoning": 3.96,
        "customer_support": 7.16,
        "structured_output": 7.35,
        "hallucination": 6.66,
        "creativity": 6.71,
        "string_precision": 7.09,
        "news_seo_writing": 5.64,
        "ocr_extraction": 4.51,
        "orchestration": 6.81,
        "multi_turn": 6.47,
        "policy_adherence": 6.54,
        "agent_capabilities": 6.42,
        "strategy": 5.82,
        "sales_outreach": 7.6,
        "translation": 5.74,
        "agent_long_horizon": 6.62
      },
      "quality_avg": 6.37,
      "cost_score_avg": 7.05,
      "speed_score_avg": 4.37,
      "latency_score_avg": 1.13,
      "tool_calling_score_avg": 7.12,
      "judge_score_avg": 3.98,
      "tokens_per_second": 29.0,
      "latency_avg_s": 93.31,
      "total_input_tokens": 228998,
      "total_output_tokens": 273771
    },
    {
      "key": "nim-kimi-k2.5",
      "id": "moonshotai/kimi-k2.5",
      "name": "Kimi K2.5 (NIM)",
      "tier": "cloud_nim",
      "provider": "nvidia_nim",
      "open_source": true,
      "license": "Modified MIT",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "Version nueva 2026, no tenemos K2.5 antes. Comparar con K2.6 thinking",
      "tested": true,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 130,
      "score_global": 6.14,
      "score_by_pillar": {
        "Contenido": 6.34,
        "Agentes": 6.31,
        "Coding": 6.18,
        "Razonamiento": 4.95
      },
      "score_by_suite": {
        "content_generation": 5.6,
        "tool_calling": 6.22,
        "task_management": 5.1,
        "code_generation": 5.96,
        "reasoning": 4.65,
        "summarization": 6.44,
        "presentation": 7.14,
        "startup_content": 6.57,
        "deep_reasoning": 3.96,
        "customer_support": 7.16,
        "structured_output": 7.35,
        "hallucination": 6.66,
        "creativity": 6.71,
        "string_precision": 7.09,
        "news_seo_writing": 5.64,
        "ocr_extraction": 4.51,
        "orchestration": 6.81,
        "multi_turn": 6.47,
        "policy_adherence": 6.54,
        "agent_capabilities": 6.42,
        "strategy": 5.82,
        "sales_outreach": 7.6,
        "translation": 5.74,
        "agent_long_horizon": 6.62
      },
      "quality_avg": 6.37,
      "cost_score_avg": 7.05,
      "speed_score_avg": 4.37,
      "latency_score_avg": 1.13,
      "tool_calling_score_avg": 7.12,
      "judge_score_avg": 3.98,
      "tokens_per_second": 29.0,
      "latency_avg_s": 93.31,
      "total_input_tokens": 228998,
      "total_output_tokens": 273771
    },
    {
      "key": "gpt-5.5",
      "id": "gpt-5.5",
      "name": "GPT-5.5",
      "tier": "premium",
      "provider": "openai_direct",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 5.0,
      "cost_output_per_M": 30.0,
      "cost_per_1k_calls_usd": 46.5,
      "subscriptions": [],
      "notes": "",
      "tested": true,
      "tool_calling": true,
      "thinking": true,
      "multimodal": true,
      "runs": 151,
      "score_global": 6.07,
      "score_by_pillar": {
        "Contenido": 6.67,
        "Agentes": 5.69,
        "Coding": 6.56,
        "Razonamiento": 6.51
      },
      "score_by_suite": {
        "translation": 6.79,
        "content_generation": 7.1,
        "tool_calling": 6.05,
        "task_management": 6.49,
        "code_generation": 6.64,
        "reasoning": 6.67,
        "summarization": 6.67,
        "presentation": 6.75,
        "startup_content": 6.64,
        "deep_reasoning": 6.19,
        "customer_support": 4.25,
        "structured_output": 6.93,
        "hallucination": 6.92,
        "creativity": 6.76,
        "string_precision": 6.53,
        "news_seo_writing": 5.84,
        "ocr_extraction": 6.26,
        "orchestration": 5.37,
        "multi_turn": 6.37,
        "policy_adherence": 6.59,
        "agent_capabilities": 5.15,
        "sales_outreach": 7.15,
        "strategy": 6.56,
        "agent_long_horizon": 6.51,
        "niah_es_lite": 5.44
      },
      "quality_avg": 7.29,
      "cost_score_avg": 2.75,
      "speed_score_avg": 6.74,
      "latency_score_avg": 2.78,
      "tool_calling_score_avg": 7.16,
      "judge_score_avg": 4.13,
      "tokens_per_second": 53.0,
      "latency_avg_s": 24.12,
      "total_input_tokens": 1391651,
      "total_output_tokens": 203823
    },
    {
      "key": "deepseek-v4-pro",
      "id": "deepseek/deepseek-v4-pro",
      "name": "DeepSeek V4 Pro",
      "tier": "medium",
      "provider": "openrouter",
      "open_source": true,
      "license": "MIT",
      "cost_input_per_M": 1.74,
      "cost_output_per_M": 3.48,
      "cost_per_1k_calls_usd": 5.742,
      "subscriptions": [],
      "notes": "1.6T params, 49B activos, 1M context. Flagship V4.",
      "tested": true,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 126,
      "score_global": 5.62,
      "score_by_pillar": {
        "Contenido": 6.14,
        "Agentes": 6.42,
        "Coding": 6.42,
        "Razonamiento": 6.22
      },
      "score_by_suite": {
        "content_generation": 6.3,
        "tool_calling": 6.63,
        "task_management": 6.94,
        "code_generation": 7.11,
        "reasoning": 5.61,
        "summarization": 6.66,
        "presentation": 4.62,
        "startup_content": 5.77,
        "deep_reasoning": 5.88,
        "structured_output": 6.84,
        "hallucination": 6.88,
        "creativity": 7.04,
        "string_precision": 6.84,
        "news_seo_writing": 5.94,
        "ocr_extraction": 5.25,
        "orchestration": 6.83,
        "multi_turn": 6.43,
        "policy_adherence": 5.87,
        "agent_capabilities": 6.18,
        "strategy": 6.85,
        "sales_outreach": 7.1,
        "translation": 5.53,
        "agent_long_horizon": 6.81,
        "niah_es_lite": 4.29
      },
      "quality_avg": 6.32,
      "cost_score_avg": 4.73,
      "speed_score_avg": 4.55,
      "latency_score_avg": 1.22,
      "tool_calling_score_avg": 7.04,
      "judge_score_avg": 3.03,
      "tokens_per_second": 26.8,
      "latency_avg_s": 69.99,
      "total_input_tokens": 1540300,
      "total_output_tokens": 240893
    },
    {
      "key": "deepseek-v4-flash-cloud",
      "id": "deepseek-v4-flash",
      "name": "DeepSeek V4 Flash (Ollama Cloud)",
      "tier": "cloud_ollama",
      "provider": "ollama_cloud",
      "open_source": true,
      "license": "MIT",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [
        {
          "key": "ollama_cloud_pro",
          "name": "Ollama Cloud",
          "plan": "Pro",
          "price_month_usd": 30,
          "url": "https://ollama.com/cloud",
          "notes": "Rate limit varía por modelo. Recomendado para uso a volumen mid (1-10k calls/día)."
        }
      ],
      "notes": "Variante mas chica de V4. Comparar con V4 Flash NIM (7.07 score).",
      "tested": true,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 57,
      "score_global": 4.96,
      "score_by_pillar": {},
      "score_by_suite": {
        "agent_long_horizon": 6.79,
        "niah_es_lite": 4.47
      },
      "quality_avg": 5.15,
      "cost_score_avg": 4.26,
      "speed_score_avg": 4.44,
      "latency_score_avg": 2.0,
      "tool_calling_score_avg": 7.0,
      "judge_score_avg": 1.46,
      "tokens_per_second": 29.2,
      "latency_avg_s": 48.31,
      "total_input_tokens": 1478714,
      "total_output_tokens": 68911
    },
    {
      "key": "deepseek-v4-pro-cloud",
      "id": "deepseek-v4-pro",
      "name": "DeepSeek V4 Pro (Ollama Cloud)",
      "tier": "cloud_ollama",
      "provider": "ollama_cloud",
      "open_source": true,
      "license": "MIT",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [
        {
          "key": "ollama_cloud_pro",
          "name": "Ollama Cloud",
          "plan": "Pro",
          "price_month_usd": 30,
          "url": "https://ollama.com/cloud",
          "notes": "Rate limit varía por modelo. Recomendado para uso a volumen mid (1-10k calls/día)."
        }
      ],
      "notes": "Recien agregado a Ollama Cloud (abril 28). Smoke test 22s OK, sin timeouts vs OpenRouter (76% cobertura) y NIM (504s timeouts).",
      "tested": true,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 55,
      "score_global": 4.86,
      "score_by_pillar": {},
      "score_by_suite": {
        "agent_long_horizon": 7.27,
        "niah_es_lite": 4.32
      },
      "quality_avg": 5.24,
      "cost_score_avg": 4.23,
      "speed_score_avg": 3.16,
      "latency_score_avg": 1.35,
      "tool_calling_score_avg": 7.0,
      "judge_score_avg": 1.44,
      "tokens_per_second": 18.6,
      "latency_avg_s": 91.49,
      "total_input_tokens": 1399556,
      "total_output_tokens": 65064
    },
    {
      "key": "deepseek-v4-flash",
      "id": "deepseek/deepseek-v4-flash",
      "name": "DeepSeek V4 Flash (OpenRouter)",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": true,
      "license": "MIT",
      "cost_input_per_M": 0.14,
      "cost_output_per_M": 0.28,
      "cost_per_1k_calls_usd": 0.462,
      "subscriptions": [],
      "notes": "284B params, 13B activos, 1M context. Sucesor V3.2.",
      "tested": false,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 3,
      "score_global": 8.26,
      "score_by_pillar": {
        "Contenido": 8.26
      },
      "score_by_suite": {
        "content_generation": 8.26
      },
      "quality_avg": 9.16,
      "cost_score_avg": 9.0,
      "speed_score_avg": 8.0,
      "latency_score_avg": 1.33,
      "tool_calling_score_avg": 7.0,
      "judge_score_avg": 4.73,
      "tokens_per_second": 66.7,
      "latency_avg_s": 13.41,
      "total_input_tokens": 220,
      "total_output_tokens": 2682
    },
    {
      "key": "gemini-3.1-flash-lite-thinking",
      "id": "google/gemini-3.1-flash-lite-preview",
      "name": "Gemini 3.1 Flash Lite (thinking)",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 0.25,
      "cost_output_per_M": 1.5,
      "cost_per_1k_calls_usd": 2.325,
      "subscriptions": [],
      "notes": "Misma versión con thinking forzado.",
      "tested": false,
      "tool_calling": true,
      "thinking": true,
      "multimodal": true,
      "runs": 12,
      "score_global": 7.46,
      "score_by_pillar": {},
      "score_by_suite": {
        "agent_long_horizon": 7.46
      },
      "quality_avg": 9.0,
      "cost_score_avg": 4.0,
      "speed_score_avg": 9.5,
      "latency_score_avg": 3.0,
      "tool_calling_score_avg": 7.0,
      "judge_score_avg": null,
      "tokens_per_second": 203.0,
      "latency_avg_s": 41.13,
      "total_input_tokens": 146170,
      "total_output_tokens": 102530
    },
    {
      "key": "gemini-flash-thinking",
      "id": "google/gemini-2.5-flash",
      "name": "Gemini 2.5 Flash (thinking)",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 0.3,
      "cost_output_per_M": 2.5,
      "cost_per_1k_calls_usd": 3.84,
      "subscriptions": [],
      "notes": "Misma versión que gemini-flash con thinking forzado (effort=high).",
      "tested": false,
      "tool_calling": true,
      "thinking": true,
      "multimodal": true,
      "runs": 12,
      "score_global": 7.27,
      "score_by_pillar": {},
      "score_by_suite": {
        "agent_long_horizon": 7.27
      },
      "quality_avg": 8.92,
      "cost_score_avg": 3.83,
      "speed_score_avg": 8.83,
      "latency_score_avg": 3.25,
      "tool_calling_score_avg": 7.0,
      "judge_score_avg": null,
      "tokens_per_second": 136.0,
      "latency_avg_s": 86.41,
      "total_input_tokens": 186429,
      "total_output_tokens": 111130
    },
    {
      "key": "nim-deepseek-v4-pro",
      "id": "deepseek-ai/deepseek-v4-pro",
      "name": "DeepSeek V4 Pro (NIM)",
      "tier": "cloud_nim",
      "provider": "nvidia_nim",
      "open_source": true,
      "license": "MIT",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "Re-test del flagship via NIM (vs OpenRouter que dio 76% cobertura en Lote 7-8)",
      "tested": false,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 3,
      "score_global": 7.26,
      "score_by_pillar": {
        "Contenido": 7.37,
        "Agentes": 7.04
      },
      "score_by_suite": {
        "content_generation": 7.37,
        "tool_calling": 7.04
      },
      "quality_avg": 8.83,
      "cost_score_avg": 7.0,
      "speed_score_avg": 1.33,
      "latency_score_avg": 1.0,
      "tool_calling_score_avg": 8.0,
      "judge_score_avg": 4.67,
      "tokens_per_second": 4.5,
      "latency_avg_s": 176.92,
      "total_input_tokens": 881,
      "total_output_tokens": 1584
    },
    {
      "key": "minimax-m2.7-direct",
      "id": "MiniMax-M2.7",
      "name": "MiniMax M2.7 (directo)",
      "tier": "cheap",
      "provider": "minimax_direct",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 0.3,
      "cost_output_per_M": 1.2,
      "cost_per_1k_calls_usd": 1.89,
      "subscriptions": [
        {
          "key": "minimax_agent_pro",
          "name": "MiniMax Agent Pro",
          "plan": "Agent Pro",
          "price_month_usd": 19,
          "url": "https://api.minimax.io",
          "notes": "Acceso a M2.7 highspeed + límites generosos para agentes (1k+ calls/día)."
        }
      ],
      "notes": "API directa MiniMax. Disponible también en sub Agent Pro $19/mes.",
      "tested": false,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 39,
      "score_global": 7.19,
      "score_by_pillar": {
        "Contenido": 7.37,
        "Agentes": 6.85,
        "Coding": 7.91,
        "Razonamiento": 7.68
      },
      "score_by_suite": {
        "content_generation": 7.32,
        "tool_calling": 6.32,
        "task_management": 7.56,
        "code_generation": 7.91,
        "reasoning": 7.68,
        "summarization": 6.71,
        "presentation": 7.61,
        "startup_content": 7.58,
        "agent_long_horizon": 6.84
      },
      "quality_avg": 8.5,
      "cost_score_avg": 5.87,
      "speed_score_avg": 6.72,
      "latency_score_avg": 1.64,
      "tool_calling_score_avg": 7.18,
      "judge_score_avg": null,
      "tokens_per_second": 52.0,
      "latency_avg_s": 52.21,
      "total_input_tokens": 142391,
      "total_output_tokens": 86778
    },
    {
      "key": "kimi-k2.5-thinking",
      "id": "moonshotai/kimi-k2.5",
      "name": "Kimi K2.5 (thinking)",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": true,
      "license": "Modified MIT",
      "cost_input_per_M": 0.2,
      "cost_output_per_M": 0.8,
      "cost_per_1k_calls_usd": 1.26,
      "subscriptions": [],
      "notes": "Misma versión que kimi-k2.5 pero con reasoning forzado vía OpenRouter (effort=high).",
      "tested": false,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 12,
      "score_global": 7.16,
      "score_by_pillar": {},
      "score_by_suite": {
        "agent_long_horizon": 7.16
      },
      "quality_avg": 9.12,
      "cost_score_avg": 4.67,
      "speed_score_avg": 5.17,
      "latency_score_avg": 1.08,
      "tool_calling_score_avg": 7.0,
      "judge_score_avg": null,
      "tokens_per_second": 36.4,
      "latency_avg_s": 318.08,
      "total_input_tokens": 217776,
      "total_output_tokens": 113040
    },
    {
      "key": "minimax-m2.7-highspeed",
      "id": "MiniMax-M2.7-highspeed",
      "name": "MiniMax M2.7 Highspeed",
      "tier": "cheap",
      "provider": "minimax_direct",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 0.3,
      "cost_output_per_M": 1.2,
      "cost_per_1k_calls_usd": 1.89,
      "subscriptions": [
        {
          "key": "minimax_agent_pro",
          "name": "MiniMax Agent Pro",
          "plan": "Agent Pro",
          "price_month_usd": 19,
          "url": "https://api.minimax.io",
          "notes": "Acceso a M2.7 highspeed + límites generosos para agentes (1k+ calls/día)."
        }
      ],
      "notes": "Acceso vía sub Agent Pro $19/mes. Misma calidad que M2.7 directo, latencia ultra baja.",
      "tested": false,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 39,
      "score_global": 7.14,
      "score_by_pillar": {
        "Contenido": 7.46,
        "Agentes": 6.87,
        "Coding": 7.64,
        "Razonamiento": 7.85
      },
      "score_by_suite": {
        "content_generation": 7.34,
        "tool_calling": 6.22,
        "task_management": 7.73,
        "code_generation": 7.64,
        "reasoning": 7.85,
        "summarization": 7.4,
        "presentation": 7.63,
        "startup_content": 7.51,
        "agent_long_horizon": 6.61
      },
      "quality_avg": 8.36,
      "cost_score_avg": 5.97,
      "speed_score_avg": 6.97,
      "latency_score_avg": 1.69,
      "tool_calling_score_avg": 7.18,
      "judge_score_avg": null,
      "tokens_per_second": 49.3,
      "latency_avg_s": 54.47,
      "total_input_tokens": 171687,
      "total_output_tokens": 91414
    },
    {
      "key": "nim-mistral-nemotron",
      "id": "mistralai/mistral-nemotron",
      "name": "Mistral-Nemotron (NIM)",
      "tier": "cloud_nim",
      "provider": "nvidia_nim",
      "open_source": true,
      "license": "Apache 2.0",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "Colaboración Mistral × NVIDIA, optimizado en Nemo",
      "tested": false,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 12,
      "score_global": 7.05,
      "score_by_pillar": {
        "Contenido": 7.72,
        "Agentes": 6.53,
        "Coding": 7.93
      },
      "score_by_suite": {
        "content_generation": 7.72,
        "tool_calling": 5.69,
        "task_management": 7.66,
        "code_generation": 7.93
      },
      "quality_avg": 8.04,
      "cost_score_avg": 8.0,
      "speed_score_avg": 5.83,
      "latency_score_avg": 3.17,
      "tool_calling_score_avg": 5.5,
      "judge_score_avg": 4.1,
      "tokens_per_second": 38.3,
      "latency_avg_s": 19.95,
      "total_input_tokens": 3013,
      "total_output_tokens": 5806
    },
    {
      "key": "claude-haiku-4.5",
      "id": "anthropic/claude-haiku-4.5",
      "name": "Claude Haiku 4.5",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 1.0,
      "cost_output_per_M": 5.0,
      "cost_per_1k_calls_usd": 7.8,
      "subscriptions": [],
      "notes": "Anthropic Haiku family. Hybrid (extended thinking opt-in).",
      "tested": false,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 12,
      "score_global": 6.98,
      "score_by_pillar": {},
      "score_by_suite": {
        "agent_long_horizon": 6.98
      },
      "quality_avg": 8.38,
      "cost_score_avg": 3.67,
      "speed_score_avg": 8.83,
      "latency_score_avg": 2.83,
      "tool_calling_score_avg": 7.0,
      "judge_score_avg": null,
      "tokens_per_second": 102.4,
      "latency_avg_s": 44.33,
      "total_input_tokens": 196551,
      "total_output_tokens": 52340
    },
    {
      "key": "hermes-4-70b-thinking",
      "id": "nousresearch/hermes-4-70b",
      "name": "Hermes 4 70B (thinking)",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": true,
      "license": "Llama 3 community",
      "cost_input_per_M": 0.13,
      "cost_output_per_M": 0.4,
      "cost_per_1k_calls_usd": 0.639,
      "subscriptions": [],
      "notes": "Misma versión que hermes-4-70b pero con reasoning forzado vía OpenRouter (effort=high). Technical report reporta +12 puntos MMLU al activar reasoning (76.7 → 88.4).",
      "tested": false,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 12,
      "score_global": 6.84,
      "score_by_pillar": {},
      "score_by_suite": {
        "agent_long_horizon": 6.84
      },
      "quality_avg": 8.79,
      "cost_score_avg": 2.33,
      "speed_score_avg": 8.0,
      "latency_score_avg": 1.42,
      "tool_calling_score_avg": 7.0,
      "judge_score_avg": null,
      "tokens_per_second": 73.1,
      "latency_avg_s": 95.39,
      "total_input_tokens": 301519,
      "total_output_tokens": 86335
    },
    {
      "key": "claude-haiku-4.5-thinking",
      "id": "anthropic/claude-haiku-4.5",
      "name": "Claude Haiku 4.5 (thinking)",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 1.0,
      "cost_output_per_M": 5.0,
      "cost_per_1k_calls_usd": 7.8,
      "subscriptions": [],
      "notes": "Misma versión con extended thinking forzado.",
      "tested": false,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 12,
      "score_global": 6.58,
      "score_by_pillar": {},
      "score_by_suite": {
        "agent_long_horizon": 6.58
      },
      "quality_avg": 7.79,
      "cost_score_avg": 3.17,
      "speed_score_avg": 9.17,
      "latency_score_avg": 2.25,
      "tool_calling_score_avg": 7.0,
      "judge_score_avg": null,
      "tokens_per_second": 116.9,
      "latency_avg_s": 141.26,
      "total_input_tokens": 255657,
      "total_output_tokens": 107160
    },
    {
      "key": "claude-sonnet-4.6-thinking",
      "id": "anthropic/claude-sonnet-4-6",
      "name": "Claude Sonnet 4.6 (thinking)",
      "tier": "medium",
      "provider": "openrouter",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 3.0,
      "cost_output_per_M": 15.0,
      "cost_per_1k_calls_usd": 23.4,
      "subscriptions": [],
      "notes": "Mismo modelo que claude-sonnet-4.6 con extended thinking forzado (effort=high). Reasoning tokens facturados como output a $15/M — ~5-7x más caro por test que sin thinking.",
      "tested": false,
      "tool_calling": true,
      "thinking": true,
      "multimodal": true,
      "runs": 12,
      "score_global": 6.49,
      "score_by_pillar": {},
      "score_by_suite": {
        "agent_long_horizon": 6.49
      },
      "quality_avg": 8.75,
      "cost_score_avg": 1.17,
      "speed_score_avg": 7.33,
      "latency_score_avg": 1.33,
      "tool_calling_score_avg": 7.0,
      "judge_score_avg": null,
      "tokens_per_second": 57.7,
      "latency_avg_s": 149.09,
      "total_input_tokens": 306217,
      "total_output_tokens": 97264
    },
    {
      "key": "gemini-3.1-pro-thinking",
      "id": "google/gemini-3.1-pro-preview",
      "name": "Gemini 3.1 Pro (thinking)",
      "tier": "medium",
      "provider": "openrouter",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 2.0,
      "cost_output_per_M": 12.0,
      "cost_per_1k_calls_usd": 18.6,
      "subscriptions": [],
      "notes": "Misma versión con thinking forzado (effort=high). Costo ~5x mayor por test que sin thinking.",
      "tested": false,
      "tool_calling": true,
      "thinking": true,
      "multimodal": true,
      "runs": 12,
      "score_global": 6.42,
      "score_by_pillar": {},
      "score_by_suite": {
        "agent_long_horizon": 6.42
      },
      "quality_avg": 8.5,
      "cost_score_avg": 1.67,
      "speed_score_avg": 8.17,
      "latency_score_avg": 1.33,
      "tool_calling_score_avg": 7.0,
      "judge_score_avg": null,
      "tokens_per_second": 83.8,
      "latency_avg_s": 133.43,
      "total_input_tokens": 167761,
      "total_output_tokens": 111329
    },
    {
      "key": "kimi-k2.6-thinking",
      "id": "moonshotai/kimi-k2.6",
      "name": "Kimi K2.6 (thinking)",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": true,
      "license": "Modified MIT",
      "cost_input_per_M": 0.8,
      "cost_output_per_M": 3.5,
      "cost_per_1k_calls_usd": 5.49,
      "subscriptions": [],
      "notes": "Misma versión que kimi-k2.6 con thinking forzado vía OpenRouter (effort=high).",
      "tested": false,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 12,
      "score_global": 6.25,
      "score_by_pillar": {},
      "score_by_suite": {
        "agent_long_horizon": 6.25
      },
      "quality_avg": 8.5,
      "cost_score_avg": 1.33,
      "speed_score_avg": 5.67,
      "latency_score_avg": 1.17,
      "tool_calling_score_avg": 7.0,
      "judge_score_avg": null,
      "tokens_per_second": 40.0,
      "latency_avg_s": 494.23,
      "total_input_tokens": 197721,
      "total_output_tokens": 128021
    },
    {
      "key": "hermes-4-405b-thinking",
      "id": "nousresearch/hermes-4-405b",
      "name": "Hermes 4 405B (thinking)",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": true,
      "license": "Llama 3 community",
      "cost_input_per_M": 1.0,
      "cost_output_per_M": 3.0,
      "cost_per_1k_calls_usd": 4.8,
      "subscriptions": [],
      "notes": "Misma versión que hermes-4-405b pero con reasoning forzado.",
      "tested": false,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 12,
      "score_global": 6.24,
      "score_by_pillar": {},
      "score_by_suite": {
        "agent_long_horizon": 6.24
      },
      "quality_avg": 7.92,
      "cost_score_avg": 2.5,
      "speed_score_avg": 6.0,
      "latency_score_avg": 1.08,
      "tool_calling_score_avg": 7.0,
      "judge_score_avg": null,
      "tokens_per_second": 35.6,
      "latency_avg_s": 188.98,
      "total_input_tokens": 265460,
      "total_output_tokens": 76894
    },
    {
      "key": "claude-opus-4.7-thinking",
      "id": "anthropic/claude-opus-4-7",
      "name": "Claude Opus 4.7 (thinking)",
      "tier": "premium",
      "provider": "openrouter",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 15.0,
      "cost_output_per_M": 75.0,
      "cost_per_1k_calls_usd": 117.0,
      "subscriptions": [],
      "notes": "Mismo modelo que claude-opus-4.7 con extended thinking forzado (effort=high). Reasoning tokens facturados como output a $75/M — ~5-7x más caro por test que sin thinking.",
      "tested": false,
      "tool_calling": true,
      "thinking": true,
      "multimodal": true,
      "runs": 12,
      "score_global": 6.01,
      "score_by_pillar": {},
      "score_by_suite": {
        "agent_long_horizon": 6.01
      },
      "quality_avg": 8.38,
      "cost_score_avg": 1.0,
      "speed_score_avg": 7.83,
      "latency_score_avg": 1.25,
      "tool_calling_score_avg": 7.0,
      "judge_score_avg": null,
      "tokens_per_second": 63.3,
      "latency_avg_s": 204.26,
      "total_input_tokens": 374227,
      "total_output_tokens": 113625
    },
    {
      "key": "nim-magistral-small",
      "id": "mistralai/magistral-small-2506",
      "name": "Mistral Magistral Small (NIM)",
      "tier": "cloud_nim",
      "provider": "nvidia_nim",
      "open_source": true,
      "license": "Apache 2.0",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "Mistral con razonamiento, tamano medio",
      "tested": false,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 10,
      "score_global": 5.51,
      "score_by_pillar": {
        "Contenido": 7.21,
        "Agentes": 4.38
      },
      "score_by_suite": {
        "content_generation": 7.21,
        "tool_calling": 5.31,
        "task_management": 2.54
      },
      "quality_avg": 5.9,
      "cost_score_avg": 6.6,
      "speed_score_avg": 4.0,
      "latency_score_avg": 2.1,
      "tool_calling_score_avg": 5.2,
      "judge_score_avg": 4.56,
      "tokens_per_second": 23.8,
      "latency_avg_s": 43.64,
      "total_input_tokens": 4528,
      "total_output_tokens": 10999
    },
    {
      "key": "deepseek-r1-free",
      "id": "deepseek/deepseek-r1:free",
      "name": "DeepSeek R1 (free)",
      "tier": "free",
      "provider": "openrouter",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "",
      "tested": false,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 0,
      "score_global": null,
      "score_by_pillar": {},
      "score_by_suite": {},
      "judge_score_avg": null,
      "tokens_per_second": null,
      "latency_avg_s": null,
      "total_input_tokens": 0,
      "total_output_tokens": 0
    },
    {
      "key": "llama-3.3-70b-free",
      "id": "meta-llama/llama-3.3-70b-instruct:free",
      "name": "Llama 3.3 70B (free)",
      "tier": "free",
      "provider": "openrouter",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "",
      "tested": false,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 0,
      "score_global": null,
      "score_by_pillar": {},
      "score_by_suite": {},
      "judge_score_avg": null,
      "tokens_per_second": null,
      "latency_avg_s": null,
      "total_input_tokens": 0,
      "total_output_tokens": 0
    },
    {
      "key": "qwen3-coder-free",
      "id": "qwen/qwen3-coder-480b:free",
      "name": "Qwen3 Coder 480B (free)",
      "tier": "free",
      "provider": "openrouter",
      "open_source": true,
      "license": "",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "",
      "tested": false,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 0,
      "score_global": null,
      "score_by_pillar": {},
      "score_by_suite": {},
      "judge_score_avg": null,
      "tokens_per_second": null,
      "latency_avg_s": null,
      "total_input_tokens": 0,
      "total_output_tokens": 0
    },
    {
      "key": "deepseek-v3",
      "id": "deepseek-v3",
      "name": "DeepSeek V3 (local)",
      "tier": "local",
      "provider": "openrouter",
      "open_source": true,
      "license": "MIT",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "",
      "tested": false,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 0,
      "score_global": null,
      "score_by_pillar": {},
      "score_by_suite": {},
      "judge_score_avg": null,
      "tokens_per_second": null,
      "latency_avg_s": null,
      "total_input_tokens": 0,
      "total_output_tokens": 0
    },
    {
      "key": "qwen-3.5-plus",
      "id": "qwen/qwen3.5-plus",
      "name": "Qwen 3.5 Plus",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": false,
      "license": "Proprietary",
      "cost_input_per_M": 1.2,
      "cost_output_per_M": 2.0,
      "cost_per_1k_calls_usd": 3.36,
      "subscriptions": [],
      "notes": "Plus = API-only propietario de Alibaba",
      "tested": false,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 0,
      "score_global": null,
      "score_by_pillar": {},
      "score_by_suite": {},
      "judge_score_avg": null,
      "tokens_per_second": null,
      "latency_avg_s": null,
      "total_input_tokens": 0,
      "total_output_tokens": 0
    },
    {
      "key": "gpt-4o",
      "id": "openai/gpt-4o",
      "name": "GPT-4o",
      "tier": "medium",
      "provider": "openrouter",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 2.5,
      "cost_output_per_M": 10.0,
      "cost_per_1k_calls_usd": 15.75,
      "subscriptions": [],
      "notes": "",
      "tested": false,
      "tool_calling": true,
      "thinking": false,
      "multimodal": true,
      "runs": 0,
      "score_global": null,
      "score_by_pillar": {},
      "score_by_suite": {},
      "judge_score_avg": null,
      "tokens_per_second": null,
      "latency_avg_s": null,
      "total_input_tokens": 0,
      "total_output_tokens": 0
    },
    {
      "key": "claude-sonnet",
      "id": "anthropic/claude-sonnet-4",
      "name": "Claude Sonnet 4",
      "tier": "medium",
      "provider": "openrouter",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 3.0,
      "cost_output_per_M": 15.0,
      "cost_per_1k_calls_usd": 23.4,
      "subscriptions": [],
      "notes": "",
      "tested": false,
      "tool_calling": true,
      "thinking": false,
      "multimodal": true,
      "runs": 0,
      "score_global": null,
      "score_by_pillar": {},
      "score_by_suite": {},
      "judge_score_avg": null,
      "tokens_per_second": null,
      "latency_avg_s": null,
      "total_input_tokens": 0,
      "total_output_tokens": 0
    },
    {
      "key": "gpt-5.5-pro",
      "id": "gpt-5.5-pro",
      "name": "GPT-5.5 Pro",
      "tier": "premium",
      "provider": "openai_responses",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 30.0,
      "cost_output_per_M": 180.0,
      "cost_per_1k_calls_usd": 279.0,
      "subscriptions": [],
      "notes": "Sólo /v1/responses (no chat/completions)",
      "tested": false,
      "tool_calling": true,
      "thinking": true,
      "multimodal": true,
      "runs": 0,
      "score_global": null,
      "score_by_pillar": {},
      "score_by_suite": {},
      "judge_score_avg": null,
      "tokens_per_second": null,
      "latency_avg_s": null,
      "total_input_tokens": 0,
      "total_output_tokens": 0
    },
    {
      "key": "groq-gpt-oss-120b",
      "id": "openai/gpt-oss-120b",
      "name": "GPT-OSS 120B (Groq)",
      "tier": "cheap",
      "provider": "groq_direct",
      "open_source": true,
      "license": "Apache 2.0",
      "cost_input_per_M": 0.15,
      "cost_output_per_M": 0.6,
      "cost_per_1k_calls_usd": 0.945,
      "subscriptions": [],
      "notes": "",
      "tested": false,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 0,
      "score_global": null,
      "score_by_pillar": {},
      "score_by_suite": {},
      "judge_score_avg": null,
      "tokens_per_second": null,
      "latency_avg_s": null,
      "total_input_tokens": 0,
      "total_output_tokens": 0
    },
    {
      "key": "gpt-4o-high",
      "id": "openai/gpt-4o:high",
      "name": "GPT-4o High",
      "tier": "premium",
      "provider": "openrouter",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 5.0,
      "cost_output_per_M": 15.0,
      "cost_per_1k_calls_usd": 24.0,
      "subscriptions": [],
      "notes": "",
      "tested": false,
      "tool_calling": true,
      "thinking": false,
      "multimodal": true,
      "runs": 0,
      "score_global": null,
      "score_by_pillar": {},
      "score_by_suite": {},
      "judge_score_avg": null,
      "tokens_per_second": null,
      "latency_avg_s": null,
      "total_input_tokens": 0,
      "total_output_tokens": 0
    },
    {
      "key": "nim-nemotron-ultra-253b",
      "id": "nvidia/llama-3.1-nemotron-ultra-253b-v1",
      "name": "Nemotron Ultra 253B (NIM)",
      "tier": "cloud_nim",
      "provider": "nvidia_nim",
      "open_source": true,
      "license": "NVIDIA Open Model",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "Modelo más grande de la familia Nemotron",
      "tested": false,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 0,
      "score_global": null,
      "score_by_pillar": {},
      "score_by_suite": {},
      "judge_score_avg": null,
      "tokens_per_second": null,
      "latency_avg_s": null,
      "total_input_tokens": 0,
      "total_output_tokens": 0
    },
    {
      "key": "mimo-v2-flash-free",
      "id": "xiaomi/mimo-v2-flash:free",
      "name": "MiMo-V2-Flash (free)",
      "tier": "free",
      "provider": "openrouter",
      "open_source": true,
      "license": "MIT",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "",
      "tested": false,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 0,
      "score_global": null,
      "score_by_pillar": {},
      "score_by_suite": {},
      "judge_score_avg": null,
      "tokens_per_second": null,
      "latency_avg_s": null,
      "total_input_tokens": 0,
      "total_output_tokens": 0
    },
    {
      "key": "qwen-3.6-plus-free",
      "id": "qwen/qwen3.6-plus:free",
      "name": "Qwen 3.6 Plus (free)",
      "tier": "free",
      "provider": "openrouter",
      "open_source": false,
      "license": "",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "",
      "tested": false,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 0,
      "score_global": null,
      "score_by_pillar": {},
      "score_by_suite": {},
      "judge_score_avg": null,
      "tokens_per_second": null,
      "latency_avg_s": null,
      "total_input_tokens": 0,
      "total_output_tokens": 0
    },
    {
      "key": "step3",
      "id": "stepfun-ai/step3",
      "name": "Step3 (StepFun)",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": true,
      "license": "Apache 2.0",
      "cost_input_per_M": 1.0,
      "cost_output_per_M": 3.0,
      "cost_per_1k_calls_usd": 4.8,
      "subscriptions": [],
      "notes": "MoE 321B/38B activos, multimodal reasoning. 65K context. Lanzado ago 2025.",
      "tested": false,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 0,
      "score_global": null,
      "score_by_pillar": {},
      "score_by_suite": {},
      "judge_score_avg": null,
      "tokens_per_second": null,
      "latency_avg_s": null,
      "total_input_tokens": 0,
      "total_output_tokens": 0
    },
    {
      "key": "seed-oss-36b",
      "id": "bytedance/seed-oss-36b-instruct",
      "name": "Seed-OSS 36B Instruct",
      "tier": "cheap",
      "provider": "openrouter",
      "open_source": true,
      "license": "Apache 2.0",
      "cost_input_per_M": 0.2,
      "cost_output_per_M": 0.6,
      "cost_per_1k_calls_usd": 0.96,
      "subscriptions": [],
      "notes": "Reasoning + math + coding + agentic. 131K context. ByteDance.",
      "tested": false,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 0,
      "score_global": null,
      "score_by_pillar": {},
      "score_by_suite": {},
      "judge_score_avg": null,
      "tokens_per_second": null,
      "latency_avg_s": null,
      "total_input_tokens": 0,
      "total_output_tokens": 0
    },
    {
      "key": "local-nemotron-3-nano-omni-reasoning",
      "id": "hf.co/unsloth/NVIDIA-Nemotron-3-Nano-Omni-30B-A3B-Reasoning-GGUF:Q4_K_M",
      "name": "Nemotron 3 Nano Omni 30B-A3B Reasoning (DGX Spark Q4_K_M)",
      "tier": "local",
      "provider": "openrouter",
      "open_source": true,
      "license": "NVIDIA Open License",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "MoE 30B totales / 3B activos (A3B), thinking + multimodal. GGUF community por unsloth — los tags oficiales de Ollama (nemotron3:33b) son la versión base, no la Omni Reasoning. Ollama: `ollama pull hf.co/unsloth/NVIDIA-Nemotron-3-Nano-Omni-30B-A3B-Reasoning-GGUF:Q4_K_M`. Comparar contra el mismo modelo via NIM (FP16) para medir costo de cuantización.",
      "tested": false,
      "tool_calling": true,
      "thinking": true,
      "multimodal": true,
      "runs": 0,
      "score_global": null,
      "score_by_pillar": {},
      "score_by_suite": {},
      "judge_score_avg": null,
      "tokens_per_second": null,
      "latency_avg_s": null,
      "total_input_tokens": 0,
      "total_output_tokens": 0
    },
    {
      "key": "qwen3.5-25b",
      "id": "qwen3.5:25b",
      "name": "Qwen 3.5 25B (local)",
      "tier": "local",
      "provider": "openrouter",
      "open_source": true,
      "license": "Apache 2.0",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "",
      "tested": false,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 0,
      "score_global": null,
      "score_by_pillar": {},
      "score_by_suite": {},
      "judge_score_avg": null,
      "tokens_per_second": null,
      "latency_avg_s": null,
      "total_input_tokens": 0,
      "total_output_tokens": 0
    },
    {
      "key": "qwen3.5-72b",
      "id": "qwen3.5:72b",
      "name": "Qwen 3.5 72B (local)",
      "tier": "local",
      "provider": "openrouter",
      "open_source": true,
      "license": "Apache 2.0",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "",
      "tested": false,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 0,
      "score_global": null,
      "score_by_pillar": {},
      "score_by_suite": {},
      "judge_score_avg": null,
      "tokens_per_second": null,
      "latency_avg_s": null,
      "total_input_tokens": 0,
      "total_output_tokens": 0
    },
    {
      "key": "llama3.3-70b",
      "id": "llama3.3:70b",
      "name": "Llama 3.3 70B (local)",
      "tier": "local",
      "provider": "openrouter",
      "open_source": true,
      "license": "Llama Community",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "",
      "tested": false,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 0,
      "score_global": null,
      "score_by_pillar": {},
      "score_by_suite": {},
      "judge_score_avg": null,
      "tokens_per_second": null,
      "latency_avg_s": null,
      "total_input_tokens": 0,
      "total_output_tokens": 0
    },
    {
      "key": "llama4-maverick",
      "id": "llama4-maverick",
      "name": "Llama 4 Maverick (local)",
      "tier": "local",
      "provider": "openrouter",
      "open_source": true,
      "license": "Llama Community",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "",
      "tested": false,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 0,
      "score_global": null,
      "score_by_pillar": {},
      "score_by_suite": {},
      "judge_score_avg": null,
      "tokens_per_second": null,
      "latency_avg_s": null,
      "total_input_tokens": 0,
      "total_output_tokens": 0
    },
    {
      "key": "gemma4-26b-moe",
      "id": "gemma4:26b",
      "name": "Gemma 4 26B MoE (local, solo 3.8B activos)",
      "tier": "local",
      "provider": "openrouter",
      "open_source": true,
      "license": "Apache 2.0",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "",
      "tested": false,
      "tool_calling": true,
      "thinking": true,
      "multimodal": false,
      "runs": 0,
      "score_global": null,
      "score_by_pillar": {},
      "score_by_suite": {},
      "judge_score_avg": null,
      "tokens_per_second": null,
      "latency_avg_s": null,
      "total_input_tokens": 0,
      "total_output_tokens": 0
    },
    {
      "key": "mistral-nemo-12b",
      "id": "mistral-nemo",
      "name": "Mistral Nemo 12B (local)",
      "tier": "local",
      "provider": "openrouter",
      "open_source": true,
      "license": "Apache 2.0",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "",
      "tested": false,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 0,
      "score_global": null,
      "score_by_pillar": {},
      "score_by_suite": {},
      "judge_score_avg": null,
      "tokens_per_second": null,
      "latency_avg_s": null,
      "total_input_tokens": 0,
      "total_output_tokens": 0
    },
    {
      "key": "phi-4-14b",
      "id": "phi4",
      "name": "Phi-4 14B (local)",
      "tier": "local",
      "provider": "openrouter",
      "open_source": true,
      "license": "MIT",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "",
      "tested": false,
      "tool_calling": false,
      "thinking": false,
      "multimodal": false,
      "runs": 0,
      "score_global": null,
      "score_by_pillar": {},
      "score_by_suite": {},
      "judge_score_avg": null,
      "tokens_per_second": null,
      "latency_avg_s": null,
      "total_input_tokens": 0,
      "total_output_tokens": 0
    },
    {
      "key": "minimax-m2.5",
      "id": "MiniMax-M2.5",
      "name": "MiniMax M2.5 (local, open-source)",
      "tier": "local",
      "provider": "openrouter",
      "open_source": true,
      "license": "MIT",
      "cost_input_per_M": 0,
      "cost_output_per_M": 0,
      "cost_per_1k_calls_usd": 0.0,
      "subscriptions": [],
      "notes": "",
      "tested": false,
      "tool_calling": true,
      "thinking": false,
      "multimodal": false,
      "runs": 0,
      "score_global": null,
      "score_by_pillar": {},
      "score_by_suite": {},
      "judge_score_avg": null,
      "tokens_per_second": null,
      "latency_avg_s": null,
      "total_input_tokens": 0,
      "total_output_tokens": 0
    }
  ]
}