{
  "name": "QuelLLM.fr",
  "description": "API publique des LLM open-source auto-hébergeables — VRAM, tokens/sec, licences, classements éditoriaux.",
  "version": "1.0.0",
  "lastUpdated": "2026-06-04",
  "license": "CC BY 4.0 — attribution via lien retour vers https://quelllm.fr",
  "source": "https://quelllm.fr",
  "docs": "https://quelllm.fr/api",
  "count": 195,
  "models": [
    {
      "id": "mistral-7b-instruct",
      "name": "Mistral 7B Instruct",
      "author": "Mistral AI",
      "origin": "fr",
      "family": "Mistral",
      "params": 7,
      "license": "Apache 2.0",
      "ctx": 32768,
      "tags": [
        "chat",
        "general"
      ],
      "vram": {
        "q4": 5,
        "q5": 6,
        "q8": 9,
        "fp16": 16
      },
      "url": "https://quelllm.fr/modele/mistral-7b-instruct",
      "apiUrl": "https://quelllm.fr/api/models/mistral-7b-instruct.json"
    },
    {
      "id": "mistral-small-24b",
      "name": "Mistral Small 3",
      "author": "Mistral AI",
      "origin": "fr",
      "family": "Mistral",
      "params": 24,
      "license": "Apache 2.0",
      "ctx": 32768,
      "tags": [
        "chat",
        "general",
        "code"
      ],
      "vram": {
        "q4": 14,
        "q5": 17,
        "q8": 26,
        "fp16": 48
      },
      "url": "https://quelllm.fr/modele/mistral-small-24b",
      "apiUrl": "https://quelllm.fr/api/models/mistral-small-24b.json"
    },
    {
      "id": "mixtral-8x7b",
      "name": "Mixtral 8x7B",
      "author": "Mistral AI",
      "origin": "fr",
      "family": "Mistral",
      "params": 47,
      "license": "Apache 2.0",
      "ctx": 32768,
      "tags": [
        "chat",
        "general",
        "moe"
      ],
      "vram": {
        "q4": 26,
        "q5": 32,
        "q8": 50,
        "fp16": 94
      },
      "url": "https://quelllm.fr/modele/mixtral-8x7b",
      "apiUrl": "https://quelllm.fr/api/models/mixtral-8x7b.json"
    },
    {
      "id": "lucie-7b",
      "name": "Lucie 7B",
      "author": "OpenLLM-France",
      "origin": "fr",
      "family": "Lucie",
      "params": 7,
      "license": "Apache 2.0",
      "ctx": 4096,
      "tags": [
        "chat",
        "fr"
      ],
      "vram": {
        "q4": 5,
        "q5": 6,
        "q8": 9,
        "fp16": 16
      },
      "url": "https://quelllm.fr/modele/lucie-7b",
      "apiUrl": "https://quelllm.fr/api/models/lucie-7b.json"
    },
    {
      "id": "croissant-llm",
      "name": "CroissantLLM 1.3B",
      "author": "CroissantLLM",
      "origin": "fr",
      "family": "Croissant",
      "params": 1.3,
      "license": "MIT",
      "ctx": 2048,
      "tags": [
        "chat",
        "fr",
        "small"
      ],
      "vram": {
        "q4": 1,
        "q5": 1.2,
        "q8": 2,
        "fp16": 3
      },
      "url": "https://quelllm.fr/modele/croissant-llm",
      "apiUrl": "https://quelllm.fr/api/models/croissant-llm.json"
    },
    {
      "id": "llama3-8b",
      "name": "Llama 3.1 8B",
      "author": "Meta",
      "origin": "us",
      "family": "Llama",
      "params": 8,
      "license": "Llama 3 Community",
      "ctx": 131072,
      "tags": [
        "chat",
        "general"
      ],
      "vram": {
        "q4": 6,
        "q5": 7,
        "q8": 10,
        "fp16": 18
      },
      "url": "https://quelllm.fr/modele/llama3-8b",
      "apiUrl": "https://quelllm.fr/api/models/llama3-8b.json"
    },
    {
      "id": "llama3-70b",
      "name": "Llama 3.1 70B",
      "author": "Meta",
      "origin": "us",
      "family": "Llama",
      "params": 70,
      "license": "Llama 3 Community",
      "ctx": 131072,
      "tags": [
        "chat",
        "general"
      ],
      "vram": {
        "q4": 40,
        "q5": 48,
        "q8": 75,
        "fp16": 140
      },
      "url": "https://quelllm.fr/modele/llama3-70b",
      "apiUrl": "https://quelllm.fr/api/models/llama3-70b.json"
    },
    {
      "id": "llama3-3b",
      "name": "Llama 3.2 3B",
      "author": "Meta",
      "origin": "us",
      "family": "Llama",
      "params": 3,
      "license": "Llama 3 Community",
      "ctx": 131072,
      "tags": [
        "chat",
        "small"
      ],
      "vram": {
        "q4": 2.5,
        "q5": 3,
        "q8": 4.5,
        "fp16": 7
      },
      "url": "https://quelllm.fr/modele/llama3-3b",
      "apiUrl": "https://quelllm.fr/api/models/llama3-3b.json"
    },
    {
      "id": "qwen25-7b",
      "name": "Qwen 2.5 7B",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 7,
      "license": "Apache 2.0",
      "ctx": 131072,
      "tags": [
        "chat",
        "general",
        "multilingual"
      ],
      "vram": {
        "q4": 5,
        "q5": 6,
        "q8": 9,
        "fp16": 16
      },
      "url": "https://quelllm.fr/modele/qwen25-7b",
      "apiUrl": "https://quelllm.fr/api/models/qwen25-7b.json"
    },
    {
      "id": "qwen25-32b",
      "name": "Qwen 2.5 32B",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 32,
      "license": "Apache 2.0",
      "ctx": 131072,
      "tags": [
        "chat",
        "general"
      ],
      "vram": {
        "q4": 19,
        "q5": 23,
        "q8": 35,
        "fp16": 64
      },
      "url": "https://quelllm.fr/modele/qwen25-32b",
      "apiUrl": "https://quelllm.fr/api/models/qwen25-32b.json"
    },
    {
      "id": "qwen25-coder-7b",
      "name": "Qwen 2.5 Coder 7B",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 7,
      "license": "Apache 2.0",
      "ctx": 131072,
      "tags": [
        "code"
      ],
      "vram": {
        "q4": 5,
        "q5": 6,
        "q8": 9,
        "fp16": 16
      },
      "url": "https://quelllm.fr/modele/qwen25-coder-7b",
      "apiUrl": "https://quelllm.fr/api/models/qwen25-coder-7b.json"
    },
    {
      "id": "qwen25-coder-32b",
      "name": "Qwen 2.5 Coder 32B",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 32,
      "license": "Apache 2.0",
      "ctx": 131072,
      "tags": [
        "code"
      ],
      "vram": {
        "q4": 19,
        "q5": 23,
        "q8": 35,
        "fp16": 64
      },
      "url": "https://quelllm.fr/modele/qwen25-coder-32b",
      "apiUrl": "https://quelllm.fr/api/models/qwen25-coder-32b.json"
    },
    {
      "id": "gemma2-2b",
      "name": "Gemma 2 2B",
      "author": "Google",
      "origin": "us",
      "family": "Gemma",
      "params": 2,
      "license": "Gemma",
      "ctx": 8192,
      "tags": [
        "chat",
        "small"
      ],
      "vram": {
        "q4": 1.8,
        "q5": 2.2,
        "q8": 3.2,
        "fp16": 5
      },
      "url": "https://quelllm.fr/modele/gemma2-2b",
      "apiUrl": "https://quelllm.fr/api/models/gemma2-2b.json"
    },
    {
      "id": "gemma2-9b",
      "name": "Gemma 2 9B",
      "author": "Google",
      "origin": "us",
      "family": "Gemma",
      "params": 9,
      "license": "Gemma",
      "ctx": 8192,
      "tags": [
        "chat",
        "general"
      ],
      "vram": {
        "q4": 6,
        "q5": 7.5,
        "q8": 11,
        "fp16": 20
      },
      "url": "https://quelllm.fr/modele/gemma2-9b",
      "apiUrl": "https://quelllm.fr/api/models/gemma2-9b.json"
    },
    {
      "id": "gemma2-27b",
      "name": "Gemma 2 27B",
      "author": "Google",
      "origin": "us",
      "family": "Gemma",
      "params": 27,
      "license": "Gemma",
      "ctx": 8192,
      "tags": [
        "chat",
        "general"
      ],
      "vram": {
        "q4": 16,
        "q5": 19,
        "q8": 29,
        "fp16": 54
      },
      "url": "https://quelllm.fr/modele/gemma2-27b",
      "apiUrl": "https://quelllm.fr/api/models/gemma2-27b.json"
    },
    {
      "id": "phi35-mini",
      "name": "Phi-3.5 Mini",
      "author": "Microsoft",
      "origin": "us",
      "family": "Phi",
      "params": 3.8,
      "license": "MIT",
      "ctx": 131072,
      "tags": [
        "chat",
        "small"
      ],
      "vram": {
        "q4": 10,
        "q5": 12,
        "q8": 18,
        "fp16": 33
      },
      "url": "https://quelllm.fr/modele/phi35-mini",
      "apiUrl": "https://quelllm.fr/api/models/phi35-mini.json"
    },
    {
      "id": "phi4-14b",
      "name": "Phi-4 14B",
      "author": "Microsoft",
      "origin": "us",
      "family": "Phi",
      "params": 14,
      "license": "MIT",
      "ctx": 16384,
      "tags": [
        "chat",
        "general",
        "reasoning"
      ],
      "vram": {
        "q4": 9,
        "q5": 11,
        "q8": 16,
        "fp16": 28
      },
      "url": "https://quelllm.fr/modele/phi4-14b",
      "apiUrl": "https://quelllm.fr/api/models/phi4-14b.json"
    },
    {
      "id": "deepseek-r1-7b",
      "name": "DeepSeek R1 Distill 7B",
      "author": "DeepSeek",
      "origin": "cn",
      "family": "DeepSeek",
      "params": 7,
      "license": "MIT",
      "ctx": 32768,
      "tags": [
        "reasoning"
      ],
      "vram": {
        "q4": 5,
        "q5": 6,
        "q8": 9,
        "fp16": 16
      },
      "url": "https://quelllm.fr/modele/deepseek-r1-7b",
      "apiUrl": "https://quelllm.fr/api/models/deepseek-r1-7b.json"
    },
    {
      "id": "deepseek-r1-32b",
      "name": "DeepSeek R1 Distill 32B",
      "author": "DeepSeek",
      "origin": "cn",
      "family": "DeepSeek",
      "params": 32,
      "license": "MIT",
      "ctx": 32768,
      "tags": [
        "reasoning"
      ],
      "vram": {
        "q4": 19,
        "q5": 23,
        "q8": 35,
        "fp16": 64
      },
      "url": "https://quelllm.fr/modele/deepseek-r1-32b",
      "apiUrl": "https://quelllm.fr/api/models/deepseek-r1-32b.json"
    },
    {
      "id": "deepseek-coder-v2-16b",
      "name": "DeepSeek Coder V2 Lite 16B",
      "author": "DeepSeek",
      "origin": "cn",
      "family": "DeepSeek",
      "params": 16,
      "license": "MIT",
      "ctx": 131072,
      "tags": [
        "code"
      ],
      "vram": {
        "q4": 10,
        "q5": 12,
        "q8": 18,
        "fp16": 32
      },
      "url": "https://quelllm.fr/modele/deepseek-coder-v2-16b",
      "apiUrl": "https://quelllm.fr/api/models/deepseek-coder-v2-16b.json"
    },
    {
      "id": "llama32-vision-11b",
      "name": "Llama 3.2 Vision 11B",
      "author": "Meta",
      "origin": "us",
      "family": "Llama",
      "params": 11,
      "license": "Llama 3 Community",
      "ctx": 131072,
      "tags": [
        "vision",
        "chat"
      ],
      "vram": {
        "q4": 8,
        "q5": 10,
        "q8": 14,
        "fp16": 24
      },
      "url": "https://quelllm.fr/modele/llama32-vision-11b",
      "apiUrl": "https://quelllm.fr/api/models/llama32-vision-11b.json"
    },
    {
      "id": "qwen2-vl-7b",
      "name": "Qwen 2 VL 7B",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 7,
      "license": "Apache 2.0",
      "ctx": 32768,
      "tags": [
        "vision",
        "chat"
      ],
      "vram": {
        "q4": 6,
        "q5": 7,
        "q8": 10,
        "fp16": 18
      },
      "url": "https://quelllm.fr/modele/qwen2-vl-7b",
      "apiUrl": "https://quelllm.fr/api/models/qwen2-vl-7b.json"
    },
    {
      "id": "mistral-nemo-12b",
      "name": "Mistral Nemo 12B Instruct",
      "author": "Mistral AI",
      "origin": "fr",
      "family": "Mistral",
      "params": 12,
      "license": "Apache 2.0",
      "ctx": 128000,
      "tags": [
        "chat",
        "general",
        "multilingual",
        "fr"
      ],
      "vram": {
        "q4": 7,
        "q5": 9,
        "q8": 13,
        "fp16": 24
      },
      "url": "https://quelllm.fr/modele/mistral-nemo-12b",
      "apiUrl": "https://quelllm.fr/api/models/mistral-nemo-12b.json"
    },
    {
      "id": "mistral-small-31-24b",
      "name": "Mistral Small 3.1 24B",
      "author": "Mistral AI",
      "origin": "fr",
      "family": "Mistral",
      "params": 24,
      "license": "Apache 2.0",
      "ctx": 128000,
      "tags": [
        "chat",
        "general",
        "vision",
        "multilingual",
        "fr"
      ],
      "vram": {
        "q4": 14,
        "q5": 17,
        "q8": 26,
        "fp16": 48
      },
      "url": "https://quelllm.fr/modele/mistral-small-31-24b",
      "apiUrl": "https://quelllm.fr/api/models/mistral-small-31-24b.json"
    },
    {
      "id": "llama33-70b",
      "name": "Llama 3.3 70B Instruct",
      "author": "Meta",
      "origin": "us",
      "family": "Llama",
      "params": 70,
      "license": "Llama 3.3 Community",
      "ctx": 128000,
      "tags": [
        "chat",
        "general",
        "reasoning"
      ],
      "vram": {
        "q4": 40,
        "q5": 48,
        "q8": 75,
        "fp16": 140
      },
      "url": "https://quelllm.fr/modele/llama33-70b",
      "apiUrl": "https://quelllm.fr/api/models/llama33-70b.json"
    },
    {
      "id": "qwen3-8b",
      "name": "Qwen 3 8B",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 8,
      "license": "Apache 2.0",
      "ctx": 131072,
      "tags": [
        "chat",
        "general",
        "reasoning",
        "multilingual"
      ],
      "vram": {
        "q4": 5,
        "q5": 6,
        "q8": 9,
        "fp16": 16
      },
      "url": "https://quelllm.fr/modele/qwen3-8b",
      "apiUrl": "https://quelllm.fr/api/models/qwen3-8b.json"
    },
    {
      "id": "qwen3-14b",
      "name": "Qwen 3 14B",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 14,
      "license": "Apache 2.0",
      "ctx": 131072,
      "tags": [
        "chat",
        "general",
        "reasoning",
        "multilingual"
      ],
      "vram": {
        "q4": 9,
        "q5": 11,
        "q8": 16,
        "fp16": 28
      },
      "url": "https://quelllm.fr/modele/qwen3-14b",
      "apiUrl": "https://quelllm.fr/api/models/qwen3-14b.json"
    },
    {
      "id": "qwen3-32b",
      "name": "Qwen 3 32B",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 32,
      "license": "Apache 2.0",
      "ctx": 131072,
      "tags": [
        "chat",
        "general",
        "reasoning",
        "multilingual"
      ],
      "vram": {
        "q4": 19,
        "q5": 23,
        "q8": 35,
        "fp16": 64
      },
      "url": "https://quelllm.fr/modele/qwen3-32b",
      "apiUrl": "https://quelllm.fr/api/models/qwen3-32b.json"
    },
    {
      "id": "qwen3-235b-a22b",
      "name": "Qwen 3 235B-A22B",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 235,
      "license": "Apache 2.0",
      "ctx": 131072,
      "tags": [
        "chat",
        "general",
        "reasoning",
        "multilingual",
        "moe"
      ],
      "vram": {
        "q4": 142,
        "q5": 170,
        "q8": 250,
        "fp16": 470
      },
      "url": "https://quelllm.fr/modele/qwen3-235b-a22b",
      "apiUrl": "https://quelllm.fr/api/models/qwen3-235b-a22b.json"
    },
    {
      "id": "qwen25-vl-7b",
      "name": "Qwen 2.5 VL 7B",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 7,
      "license": "Apache 2.0",
      "ctx": 128000,
      "tags": [
        "vision",
        "chat",
        "general"
      ],
      "vram": {
        "q4": 6,
        "q5": 7,
        "q8": 10,
        "fp16": 18
      },
      "url": "https://quelllm.fr/modele/qwen25-vl-7b",
      "apiUrl": "https://quelllm.fr/api/models/qwen25-vl-7b.json"
    },
    {
      "id": "qwen25-vl-72b",
      "name": "Qwen 2.5 VL 72B",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 72,
      "license": "Qwen License",
      "ctx": 128000,
      "tags": [
        "vision",
        "chat",
        "general"
      ],
      "vram": {
        "q4": 42,
        "q5": 50,
        "q8": 78,
        "fp16": 144
      },
      "url": "https://quelllm.fr/modele/qwen25-vl-72b",
      "apiUrl": "https://quelllm.fr/api/models/qwen25-vl-72b.json"
    },
    {
      "id": "qwen25-omni-7b",
      "name": "Qwen 2.5 Omni 7B",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 7,
      "license": "Apache 2.0",
      "ctx": 32768,
      "tags": [
        "vision",
        "audio",
        "chat"
      ],
      "vram": {
        "q4": 6,
        "q5": 7,
        "q8": 10,
        "fp16": 18
      },
      "url": "https://quelllm.fr/modele/qwen25-omni-7b",
      "apiUrl": "https://quelllm.fr/api/models/qwen25-omni-7b.json"
    },
    {
      "id": "qwq-32b",
      "name": "QwQ 32B",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 32,
      "license": "Apache 2.0",
      "ctx": 131072,
      "tags": [
        "reasoning"
      ],
      "vram": {
        "q4": 19,
        "q5": 23,
        "q8": 35,
        "fp16": 64
      },
      "url": "https://quelllm.fr/modele/qwq-32b",
      "apiUrl": "https://quelllm.fr/api/models/qwq-32b.json"
    },
    {
      "id": "deepseek-r1-671b",
      "name": "DeepSeek R1 671B",
      "author": "DeepSeek",
      "origin": "cn",
      "family": "DeepSeek",
      "params": 671,
      "license": "MIT",
      "ctx": 128000,
      "tags": [
        "reasoning",
        "moe"
      ],
      "vram": {
        "q4": 400,
        "q5": 480,
        "q8": 720,
        "fp16": 1342
      },
      "url": "https://quelllm.fr/modele/deepseek-r1-671b",
      "apiUrl": "https://quelllm.fr/api/models/deepseek-r1-671b.json"
    },
    {
      "id": "deepseek-r1-distill-llama-70b",
      "name": "DeepSeek R1 Distill Llama 70B",
      "author": "DeepSeek",
      "origin": "cn",
      "family": "DeepSeek",
      "params": 70,
      "license": "Llama 3.3 Community + DeepSeek",
      "ctx": 128000,
      "tags": [
        "reasoning"
      ],
      "vram": {
        "q4": 40,
        "q5": 48,
        "q8": 75,
        "fp16": 140
      },
      "url": "https://quelllm.fr/modele/deepseek-r1-distill-llama-70b",
      "apiUrl": "https://quelllm.fr/api/models/deepseek-r1-distill-llama-70b.json"
    },
    {
      "id": "deepseek-v3-671b",
      "name": "DeepSeek V3 671B",
      "author": "DeepSeek",
      "origin": "cn",
      "family": "DeepSeek",
      "params": 671,
      "license": "DeepSeek License",
      "ctx": 128000,
      "tags": [
        "chat",
        "general",
        "moe"
      ],
      "vram": {
        "q4": 400,
        "q5": 480,
        "q8": 720,
        "fp16": 1342
      },
      "url": "https://quelllm.fr/modele/deepseek-v3-671b",
      "apiUrl": "https://quelllm.fr/api/models/deepseek-v3-671b.json"
    },
    {
      "id": "gemma3-4b",
      "name": "Gemma 3 4B",
      "author": "Google",
      "origin": "us",
      "family": "Gemma",
      "params": 4,
      "license": "Gemma",
      "ctx": 128000,
      "tags": [
        "chat",
        "general",
        "vision",
        "multilingual",
        "small"
      ],
      "vram": {
        "q4": 10,
        "q5": 12,
        "q8": 18,
        "fp16": 33
      },
      "url": "https://quelllm.fr/modele/gemma3-4b",
      "apiUrl": "https://quelllm.fr/api/models/gemma3-4b.json"
    },
    {
      "id": "gemma3-12b",
      "name": "Gemma 3 12B",
      "author": "Google",
      "origin": "us",
      "family": "Gemma",
      "params": 12,
      "license": "Gemma",
      "ctx": 128000,
      "tags": [
        "chat",
        "general",
        "vision",
        "multilingual"
      ],
      "vram": {
        "q4": 7,
        "q5": 9,
        "q8": 13,
        "fp16": 24
      },
      "url": "https://quelllm.fr/modele/gemma3-12b",
      "apiUrl": "https://quelllm.fr/api/models/gemma3-12b.json"
    },
    {
      "id": "gemma3-27b",
      "name": "Gemma 3 27B",
      "author": "Google",
      "origin": "us",
      "family": "Gemma",
      "params": 27,
      "license": "Gemma",
      "ctx": 128000,
      "tags": [
        "chat",
        "general",
        "vision",
        "multilingual"
      ],
      "vram": {
        "q4": 16,
        "q5": 19,
        "q8": 29,
        "fp16": 54
      },
      "url": "https://quelllm.fr/modele/gemma3-27b",
      "apiUrl": "https://quelllm.fr/api/models/gemma3-27b.json"
    },
    {
      "id": "phi4-multimodal",
      "name": "Phi-4 Multimodal 5.6B",
      "author": "Microsoft",
      "origin": "us",
      "family": "Phi",
      "params": 5.6,
      "license": "MIT",
      "ctx": 128000,
      "tags": [
        "chat",
        "vision",
        "audio",
        "small"
      ],
      "vram": {
        "q4": 4,
        "q5": 5,
        "q8": 7,
        "fp16": 12
      },
      "url": "https://quelllm.fr/modele/phi4-multimodal",
      "apiUrl": "https://quelllm.fr/api/models/phi4-multimodal.json"
    },
    {
      "id": "phi4-reasoning-14b",
      "name": "Phi-4 Reasoning 14B",
      "author": "Microsoft",
      "origin": "us",
      "family": "Phi",
      "params": 14,
      "license": "MIT",
      "ctx": 32768,
      "tags": [
        "reasoning"
      ],
      "vram": {
        "q4": 9,
        "q5": 11,
        "q8": 16,
        "fp16": 28
      },
      "url": "https://quelllm.fr/modele/phi4-reasoning-14b",
      "apiUrl": "https://quelllm.fr/api/models/phi4-reasoning-14b.json"
    },
    {
      "id": "command-r-plus-104b",
      "name": "Command R+ 104B (08-2024)",
      "author": "Cohere",
      "origin": "us",
      "family": "Command",
      "params": 104,
      "license": "CC-BY-NC 4.0",
      "ctx": 128000,
      "tags": [
        "chat",
        "general",
        "multilingual"
      ],
      "vram": {
        "q4": 60,
        "q5": 72,
        "q8": 110,
        "fp16": 208
      },
      "url": "https://quelllm.fr/modele/command-r-plus-104b",
      "apiUrl": "https://quelllm.fr/api/models/command-r-plus-104b.json"
    },
    {
      "id": "aya-expanse-8b",
      "name": "Aya Expanse 8B",
      "author": "Cohere For AI",
      "origin": "us",
      "family": "Aya",
      "params": 8,
      "license": "CC-BY-NC 4.0",
      "ctx": 8192,
      "tags": [
        "chat",
        "general",
        "multilingual"
      ],
      "vram": {
        "q4": 5,
        "q5": 6,
        "q8": 9,
        "fp16": 16
      },
      "url": "https://quelllm.fr/modele/aya-expanse-8b",
      "apiUrl": "https://quelllm.fr/api/models/aya-expanse-8b.json"
    },
    {
      "id": "aya-expanse-32b",
      "name": "Aya Expanse 32B",
      "author": "Cohere For AI",
      "origin": "us",
      "family": "Aya",
      "params": 32,
      "license": "CC-BY-NC 4.0",
      "ctx": 8192,
      "tags": [
        "chat",
        "general",
        "multilingual"
      ],
      "vram": {
        "q4": 19,
        "q5": 23,
        "q8": 35,
        "fp16": 64
      },
      "url": "https://quelllm.fr/modele/aya-expanse-32b",
      "apiUrl": "https://quelllm.fr/api/models/aya-expanse-32b.json"
    },
    {
      "id": "eurollm-9b",
      "name": "EuroLLM 9B Instruct",
      "author": "Utter Project / UE",
      "origin": "eu",
      "family": "EuroLLM",
      "params": 9,
      "license": "Apache 2.0",
      "ctx": 4096,
      "tags": [
        "chat",
        "general",
        "multilingual",
        "fr"
      ],
      "vram": {
        "q4": 6,
        "q5": 7,
        "q8": 10,
        "fp16": 18
      },
      "url": "https://quelllm.fr/modele/eurollm-9b",
      "apiUrl": "https://quelllm.fr/api/models/eurollm-9b.json"
    },
    {
      "id": "teuken-7b",
      "name": "Teuken 7B Instruct",
      "author": "OpenGPT-X",
      "origin": "de",
      "family": "Teuken",
      "params": 7,
      "license": "Apache 2.0 (commercial)",
      "ctx": 4096,
      "tags": [
        "chat",
        "general",
        "multilingual",
        "fr"
      ],
      "vram": {
        "q4": 5,
        "q5": 6,
        "q8": 9,
        "fp16": 14
      },
      "url": "https://quelllm.fr/modele/teuken-7b",
      "apiUrl": "https://quelllm.fr/api/models/teuken-7b.json"
    },
    {
      "id": "pleias-3b",
      "name": "Pleias 3B Preview",
      "author": "PleIAs",
      "origin": "fr",
      "family": "Pleias",
      "params": 3,
      "license": "Apache 2.0",
      "ctx": 2048,
      "tags": [
        "chat",
        "multilingual",
        "fr",
        "small"
      ],
      "vram": {
        "q4": 2,
        "q5": 2.5,
        "q8": 3.5,
        "fp16": 6
      },
      "url": "https://quelllm.fr/modele/pleias-3b",
      "apiUrl": "https://quelllm.fr/api/models/pleias-3b.json"
    },
    {
      "id": "pleias-rag-1b",
      "name": "Pleias-RAG 1B",
      "author": "PleIAs",
      "origin": "fr",
      "family": "Pleias",
      "params": 1.2,
      "license": "Apache 2.0",
      "ctx": 2048,
      "tags": [
        "chat",
        "fr",
        "small"
      ],
      "vram": {
        "q4": 0.8,
        "q5": 1,
        "q8": 1.5,
        "fp16": 2.5
      },
      "url": "https://quelllm.fr/modele/pleias-rag-1b",
      "apiUrl": "https://quelllm.fr/api/models/pleias-rag-1b.json"
    },
    {
      "id": "moshi-7b",
      "name": "Moshi 7B",
      "author": "Kyutai",
      "origin": "fr",
      "family": "Moshi",
      "params": 7.6,
      "license": "CC-BY 4.0",
      "ctx": 4096,
      "tags": [
        "audio",
        "fr"
      ],
      "vram": {
        "q4": 5,
        "q5": 6,
        "q8": 9,
        "fp16": 15
      },
      "url": "https://quelllm.fr/modele/moshi-7b",
      "apiUrl": "https://quelllm.fr/api/models/moshi-7b.json"
    },
    {
      "id": "helium-1-2b",
      "name": "Helium 1 2B",
      "author": "Kyutai",
      "origin": "fr",
      "family": "Helium",
      "params": 2,
      "license": "CC-BY-SA 4.0",
      "ctx": 4096,
      "tags": [
        "chat",
        "general",
        "multilingual",
        "fr",
        "small"
      ],
      "vram": {
        "q4": 1.5,
        "q5": 2,
        "q8": 3,
        "fp16": 5
      },
      "url": "https://quelllm.fr/modele/helium-1-2b",
      "apiUrl": "https://quelllm.fr/api/models/helium-1-2b.json"
    },
    {
      "id": "smollm2-17b",
      "name": "SmolLM2 1.7B Instruct",
      "author": "HuggingFace",
      "origin": "fr",
      "family": "SmolLM",
      "params": 1.7,
      "license": "Apache 2.0",
      "ctx": 8192,
      "tags": [
        "chat",
        "general",
        "small"
      ],
      "vram": {
        "q4": 1.2,
        "q5": 1.5,
        "q8": 2.2,
        "fp16": 3.5
      },
      "url": "https://quelllm.fr/modele/smollm2-17b",
      "apiUrl": "https://quelllm.fr/api/models/smollm2-17b.json"
    },
    {
      "id": "smolvlm2-22b",
      "name": "SmolVLM2 2.2B Instruct",
      "author": "HuggingFace",
      "origin": "fr",
      "family": "SmolLM",
      "params": 2.2,
      "license": "Apache 2.0",
      "ctx": 8192,
      "tags": [
        "vision",
        "chat",
        "small"
      ],
      "vram": {
        "q4": 1.6,
        "q5": 2,
        "q8": 3,
        "fp16": 4.5
      },
      "url": "https://quelllm.fr/modele/smolvlm2-22b",
      "apiUrl": "https://quelllm.fr/api/models/smolvlm2-22b.json"
    },
    {
      "id": "glm-51",
      "name": "GLM-5.1",
      "author": "Z.AI",
      "origin": "cn",
      "family": "GLM",
      "params": 744,
      "license": "MIT",
      "ctx": 200000,
      "tags": [
        "chat",
        "general",
        "reasoning",
        "multilingual",
        "moe"
      ],
      "vram": {
        "q4": 445,
        "q5": 535,
        "q8": 800,
        "fp16": 1488
      },
      "url": "https://quelllm.fr/modele/glm-51",
      "apiUrl": "https://quelllm.fr/api/models/glm-51.json"
    },
    {
      "id": "minimax-m27",
      "name": "MiniMax-M2.7",
      "author": "MiniMax",
      "origin": "cn",
      "family": "MiniMax",
      "params": 229,
      "license": "Apache 2.0",
      "ctx": 205000,
      "tags": [
        "chat",
        "general",
        "reasoning",
        "moe"
      ],
      "vram": {
        "q4": 138,
        "q5": 165,
        "q8": 246,
        "fp16": 458
      },
      "url": "https://quelllm.fr/modele/minimax-m27",
      "apiUrl": "https://quelllm.fr/api/models/minimax-m27.json"
    },
    {
      "id": "gemma4-31b",
      "name": "Gemma 4 31B",
      "author": "Google",
      "origin": "us",
      "family": "Gemma",
      "params": 31,
      "license": "Gemma",
      "ctx": 256000,
      "tags": [
        "chat",
        "general",
        "vision",
        "audio",
        "multilingual"
      ],
      "vram": {
        "q4": 18,
        "q5": 22,
        "q8": 33,
        "fp16": 62
      },
      "url": "https://quelllm.fr/modele/gemma4-31b",
      "apiUrl": "https://quelllm.fr/api/models/gemma4-31b.json"
    },
    {
      "id": "gemma4-e4b",
      "name": "Gemma 4 E4B",
      "author": "Google",
      "origin": "us",
      "family": "Gemma",
      "params": 4,
      "license": "Gemma",
      "ctx": 128000,
      "tags": [
        "chat",
        "general",
        "vision",
        "audio",
        "multilingual",
        "small"
      ],
      "vram": {
        "q4": 10,
        "q5": 12,
        "q8": 18,
        "fp16": 33
      },
      "url": "https://quelllm.fr/modele/gemma4-e4b",
      "apiUrl": "https://quelllm.fr/api/models/gemma4-e4b.json"
    },
    {
      "id": "qwen35-9b",
      "name": "Qwen 3.5 9B",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 9,
      "license": "Apache 2.0",
      "ctx": 262000,
      "tags": [
        "chat",
        "general",
        "reasoning",
        "multilingual"
      ],
      "vram": {
        "q4": 6,
        "q5": 7,
        "q8": 10,
        "fp16": 18
      },
      "url": "https://quelllm.fr/modele/qwen35-9b",
      "apiUrl": "https://quelllm.fr/api/models/qwen35-9b.json"
    },
    {
      "id": "qwen35-27b",
      "name": "Qwen 3.5 27B",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 27,
      "license": "Apache 2.0",
      "ctx": 262000,
      "tags": [
        "chat",
        "general",
        "reasoning",
        "multilingual"
      ],
      "vram": {
        "q4": 16,
        "q5": 19,
        "q8": 29,
        "fp16": 54
      },
      "url": "https://quelllm.fr/modele/qwen35-27b",
      "apiUrl": "https://quelllm.fr/api/models/qwen35-27b.json"
    },
    {
      "id": "qwen35-397b-a17b",
      "name": "Qwen 3.5 397B-A17B",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 397,
      "license": "Apache 2.0",
      "ctx": 262000,
      "tags": [
        "chat",
        "general",
        "reasoning",
        "multilingual",
        "moe"
      ],
      "vram": {
        "q4": 240,
        "q5": 285,
        "q8": 425,
        "fp16": 794
      },
      "url": "https://quelllm.fr/modele/qwen35-397b-a17b",
      "apiUrl": "https://quelllm.fr/api/models/qwen35-397b-a17b.json"
    },
    {
      "id": "qwen36-35b-a3b",
      "name": "Qwen 3.6 35B-A3B",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 35,
      "license": "Apache 2.0",
      "ctx": 262000,
      "tags": [
        "chat",
        "code",
        "reasoning",
        "moe"
      ],
      "vram": {
        "q4": 21,
        "q5": 25,
        "q8": 38,
        "fp16": 70
      },
      "url": "https://quelllm.fr/modele/qwen36-35b-a3b",
      "apiUrl": "https://quelllm.fr/api/models/qwen36-35b-a3b.json"
    },
    {
      "id": "qwen3-coder-next",
      "name": "Qwen3-Coder-Next 80B-A3B",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 80,
      "license": "Apache 2.0",
      "ctx": 262000,
      "tags": [
        "code",
        "moe"
      ],
      "vram": {
        "q4": 48,
        "q5": 58,
        "q8": 86,
        "fp16": 160
      },
      "url": "https://quelllm.fr/modele/qwen3-coder-next",
      "apiUrl": "https://quelllm.fr/api/models/qwen3-coder-next.json"
    },
    {
      "id": "mistral-small-4",
      "name": "Mistral Small 4",
      "author": "Mistral AI",
      "origin": "fr",
      "family": "Mistral",
      "params": 119,
      "license": "Apache 2.0",
      "ctx": 256000,
      "tags": [
        "chat",
        "general",
        "code",
        "vision",
        "reasoning",
        "multilingual",
        "fr",
        "moe"
      ],
      "vram": {
        "q4": 72,
        "q5": 86,
        "q8": 128,
        "fp16": 238
      },
      "url": "https://quelllm.fr/modele/mistral-small-4",
      "apiUrl": "https://quelllm.fr/api/models/mistral-small-4.json"
    },
    {
      "id": "devstral-small-2",
      "name": "Devstral Small 2 24B",
      "author": "Mistral AI",
      "origin": "fr",
      "family": "Mistral",
      "params": 24,
      "license": "Apache 2.0",
      "ctx": 256000,
      "tags": [
        "code",
        "fr"
      ],
      "vram": {
        "q4": 14,
        "q5": 17,
        "q8": 26,
        "fp16": 48
      },
      "url": "https://quelllm.fr/modele/devstral-small-2",
      "apiUrl": "https://quelllm.fr/api/models/devstral-small-2.json"
    },
    {
      "id": "voxtral-4b-tts",
      "name": "Voxtral-4B-TTS",
      "author": "Mistral AI",
      "origin": "fr",
      "family": "Mistral",
      "params": 4,
      "license": "CC-BY-NC 4.0",
      "ctx": 4096,
      "tags": [
        "audio",
        "multilingual",
        "fr",
        "small"
      ],
      "vram": {
        "q4": 10,
        "q5": 12,
        "q8": 18,
        "fp16": 33
      },
      "url": "https://quelllm.fr/modele/voxtral-4b-tts",
      "apiUrl": "https://quelllm.fr/api/models/voxtral-4b-tts.json"
    },
    {
      "id": "deepseek-r2-32b",
      "name": "DeepSeek R2 32B",
      "author": "DeepSeek",
      "origin": "cn",
      "family": "DeepSeek",
      "params": 32,
      "license": "MIT",
      "ctx": 128000,
      "tags": [
        "reasoning"
      ],
      "vram": {
        "q4": 19,
        "q5": 23,
        "q8": 35,
        "fp16": 64
      },
      "url": "https://quelllm.fr/modele/deepseek-r2-32b",
      "apiUrl": "https://quelllm.fr/api/models/deepseek-r2-32b.json"
    },
    {
      "id": "deepseek-v32",
      "name": "DeepSeek V3.2",
      "author": "DeepSeek",
      "origin": "cn",
      "family": "DeepSeek",
      "params": 685,
      "license": "MIT",
      "ctx": 128000,
      "tags": [
        "chat",
        "general",
        "moe"
      ],
      "vram": {
        "q4": 410,
        "q5": 490,
        "q8": 735,
        "fp16": 1370
      },
      "url": "https://quelllm.fr/modele/deepseek-v32",
      "apiUrl": "https://quelllm.fr/api/models/deepseek-v32.json"
    },
    {
      "id": "kimi-k25",
      "name": "Kimi K2.5",
      "author": "Moonshot AI",
      "origin": "cn",
      "family": "Kimi",
      "params": 1000,
      "license": "Modified MIT",
      "ctx": 256000,
      "tags": [
        "chat",
        "general",
        "moe"
      ],
      "vram": {
        "q4": 600,
        "q5": 720,
        "q8": 1080,
        "fp16": 2000
      },
      "url": "https://quelllm.fr/modele/kimi-k25",
      "apiUrl": "https://quelllm.fr/api/models/kimi-k25.json"
    },
    {
      "id": "nemotron-3-super-120b",
      "name": "Nemotron 3 Super 120B",
      "author": "NVIDIA",
      "origin": "us",
      "family": "Nemotron",
      "params": 120,
      "license": "NVIDIA Open Model License",
      "ctx": 128000,
      "tags": [
        "chat",
        "general",
        "reasoning",
        "moe"
      ],
      "vram": {
        "q4": 72,
        "q5": 86,
        "q8": 132,
        "fp16": 240
      },
      "url": "https://quelllm.fr/modele/nemotron-3-super-120b",
      "apiUrl": "https://quelllm.fr/api/models/nemotron-3-super-120b.json"
    },
    {
      "id": "olmo3-7b",
      "name": "OLMo 3 7B",
      "author": "Allen AI",
      "origin": "us",
      "family": "OLMo",
      "params": 7,
      "license": "Apache 2.0",
      "ctx": 8192,
      "tags": [
        "chat",
        "general"
      ],
      "vram": {
        "q4": 5,
        "q5": 6,
        "q8": 9,
        "fp16": 14
      },
      "url": "https://quelllm.fr/modele/olmo3-7b",
      "apiUrl": "https://quelllm.fr/api/models/olmo3-7b.json"
    },
    {
      "id": "olmo3-32b",
      "name": "OLMo 3 32B",
      "author": "Allen AI",
      "origin": "us",
      "family": "OLMo",
      "params": 32,
      "license": "Apache 2.0",
      "ctx": 65536,
      "tags": [
        "chat",
        "general",
        "reasoning"
      ],
      "vram": {
        "q4": 19,
        "q5": 23,
        "q8": 35,
        "fp16": 64
      },
      "url": "https://quelllm.fr/modele/olmo3-32b",
      "apiUrl": "https://quelllm.fr/api/models/olmo3-32b.json"
    },
    {
      "id": "tiny-aya-3b",
      "name": "Tiny Aya 3.35B",
      "author": "Cohere For AI",
      "origin": "ca",
      "family": "Aya",
      "params": 3.35,
      "license": "CC-BY-NC 4.0",
      "ctx": 8192,
      "tags": [
        "chat",
        "multilingual",
        "small"
      ],
      "vram": {
        "q4": 2.2,
        "q5": 2.7,
        "q8": 3.8,
        "fp16": 7
      },
      "url": "https://quelllm.fr/modele/tiny-aya-3b",
      "apiUrl": "https://quelllm.fr/api/models/tiny-aya-3b.json"
    },
    {
      "id": "granite4-3b-vision",
      "name": "Granite 4.0 3B Vision",
      "author": "IBM",
      "origin": "us",
      "family": "Granite",
      "params": 3,
      "license": "Apache 2.0",
      "ctx": 16384,
      "tags": [
        "vision",
        "chat",
        "small"
      ],
      "vram": {
        "q4": 2.2,
        "q5": 2.7,
        "q8": 3.8,
        "fp16": 6.5
      },
      "url": "https://quelllm.fr/modele/granite4-3b-vision",
      "apiUrl": "https://quelllm.fr/api/models/granite4-3b-vision.json"
    },
    {
      "id": "step-35-flash",
      "name": "Step 3.5 Flash",
      "author": "StepFun",
      "origin": "cn",
      "family": "Step",
      "params": 196,
      "license": "Apache 2.0",
      "ctx": 256000,
      "tags": [
        "chat",
        "general",
        "moe"
      ],
      "vram": {
        "q4": 118,
        "q5": 141,
        "q8": 210,
        "fp16": 392
      },
      "url": "https://quelllm.fr/modele/step-35-flash",
      "apiUrl": "https://quelllm.fr/api/models/step-35-flash.json"
    },
    {
      "id": "falcon-h1r-7b",
      "name": "Falcon H1R 7B",
      "author": "TII",
      "origin": "ae",
      "family": "Falcon",
      "params": 7,
      "license": "TII Falcon-LLM License 2.0",
      "ctx": 32768,
      "tags": [
        "reasoning"
      ],
      "vram": {
        "q4": 5,
        "q5": 6,
        "q8": 9,
        "fp16": 14
      },
      "url": "https://quelllm.fr/modele/falcon-h1r-7b",
      "apiUrl": "https://quelllm.fr/api/models/falcon-h1r-7b.json"
    },
    {
      "id": "mixtral-8x22b",
      "name": "Mixtral 8x22B Instruct",
      "author": "Mistral AI",
      "origin": "fr",
      "family": "Mistral",
      "params": 141,
      "license": "Apache 2.0",
      "ctx": 64000,
      "tags": [
        "chat",
        "general",
        "moe",
        "multilingual",
        "fr"
      ],
      "vram": {
        "q4": 82,
        "q5": 100,
        "q8": 150,
        "fp16": 282
      },
      "url": "https://quelllm.fr/modele/mixtral-8x22b",
      "apiUrl": "https://quelllm.fr/api/models/mixtral-8x22b.json"
    },
    {
      "id": "mistral-small-32-24b",
      "name": "Mistral Small 3.2 24B",
      "author": "Mistral AI",
      "origin": "fr",
      "family": "Mistral",
      "params": 24,
      "license": "Apache 2.0",
      "ctx": 128000,
      "tags": [
        "chat",
        "general",
        "vision",
        "multilingual",
        "fr"
      ],
      "vram": {
        "q4": 14,
        "q5": 17,
        "q8": 26,
        "fp16": 48
      },
      "url": "https://quelllm.fr/modele/mistral-small-32-24b",
      "apiUrl": "https://quelllm.fr/api/models/mistral-small-32-24b.json"
    },
    {
      "id": "codestral-22b",
      "name": "Codestral 22B v0.1",
      "author": "Mistral AI",
      "origin": "fr",
      "family": "Mistral",
      "params": 22,
      "license": "Mistral Non-Production License",
      "ctx": 32000,
      "tags": [
        "code",
        "fr"
      ],
      "vram": {
        "q4": 13,
        "q5": 16,
        "q8": 24,
        "fp16": 44
      },
      "url": "https://quelllm.fr/modele/codestral-22b",
      "apiUrl": "https://quelllm.fr/api/models/codestral-22b.json"
    },
    {
      "id": "codestral-mamba-7b",
      "name": "Codestral Mamba 7B",
      "author": "Mistral AI",
      "origin": "fr",
      "family": "Mistral",
      "params": 7,
      "license": "Apache 2.0",
      "ctx": 256000,
      "tags": [
        "code",
        "fr"
      ],
      "vram": {
        "q4": 5,
        "q5": 6,
        "q8": 9,
        "fp16": 14
      },
      "url": "https://quelllm.fr/modele/codestral-mamba-7b",
      "apiUrl": "https://quelllm.fr/api/models/codestral-mamba-7b.json"
    },
    {
      "id": "magistral-small-24b",
      "name": "Magistral Small 24B",
      "author": "Mistral AI",
      "origin": "fr",
      "family": "Mistral",
      "params": 24,
      "license": "Apache 2.0",
      "ctx": 128000,
      "tags": [
        "reasoning",
        "fr"
      ],
      "vram": {
        "q4": 14,
        "q5": 17,
        "q8": 26,
        "fp16": 48
      },
      "url": "https://quelllm.fr/modele/magistral-small-24b",
      "apiUrl": "https://quelllm.fr/api/models/magistral-small-24b.json"
    },
    {
      "id": "mistral-large-3",
      "name": "Mistral Large 3 675B",
      "author": "Mistral AI",
      "origin": "fr",
      "family": "Mistral",
      "params": 675,
      "license": "Apache 2.0",
      "ctx": 256000,
      "tags": [
        "chat",
        "general",
        "vision",
        "multilingual",
        "fr",
        "moe"
      ],
      "vram": {
        "q4": 405,
        "q5": 485,
        "q8": 720,
        "fp16": 1350
      },
      "url": "https://quelllm.fr/modele/mistral-large-3",
      "apiUrl": "https://quelllm.fr/api/models/mistral-large-3.json"
    },
    {
      "id": "mistral-medium-35",
      "name": "Mistral Medium 3.5 128B",
      "author": "Mistral AI",
      "origin": "fr",
      "family": "Mistral",
      "params": 128,
      "license": "Modified MIT",
      "ctx": 256000,
      "tags": [
        "chat",
        "general",
        "code",
        "reasoning",
        "vision",
        "multilingual",
        "fr"
      ],
      "vram": {
        "q4": 74,
        "q5": 91,
        "q8": 137,
        "fp16": 256
      },
      "url": "https://quelllm.fr/modele/mistral-medium-35",
      "apiUrl": "https://quelllm.fr/api/models/mistral-medium-35.json"
    },
    {
      "id": "llama-3-1-405b",
      "name": "Llama 3.1 405B Instruct",
      "author": "Meta",
      "origin": "us",
      "family": "Llama",
      "params": 405,
      "license": "Llama 3.1 Community",
      "ctx": 128000,
      "tags": [
        "chat",
        "general",
        "reasoning"
      ],
      "vram": {
        "q4": 240,
        "q5": 288,
        "q8": 435,
        "fp16": 810
      },
      "url": "https://quelllm.fr/modele/llama-3-1-405b",
      "apiUrl": "https://quelllm.fr/api/models/llama-3-1-405b.json"
    },
    {
      "id": "llama-4-scout",
      "name": "Llama 4 Scout 109B",
      "author": "Meta",
      "origin": "us",
      "family": "Llama",
      "params": 109,
      "license": "Llama 4 Community",
      "ctx": 10000000,
      "tags": [
        "chat",
        "general",
        "vision",
        "moe",
        "multilingual"
      ],
      "vram": {
        "q4": 65,
        "q5": 78,
        "q8": 117,
        "fp16": 218
      },
      "url": "https://quelllm.fr/modele/llama-4-scout",
      "apiUrl": "https://quelllm.fr/api/models/llama-4-scout.json"
    },
    {
      "id": "llama-4-maverick",
      "name": "Llama 4 Maverick 400B",
      "author": "Meta",
      "origin": "us",
      "family": "Llama",
      "params": 400,
      "license": "Llama 4 Community",
      "ctx": 1000000,
      "tags": [
        "chat",
        "general",
        "vision",
        "moe",
        "multilingual"
      ],
      "vram": {
        "q4": 240,
        "q5": 285,
        "q8": 425,
        "fp16": 800
      },
      "url": "https://quelllm.fr/modele/llama-4-maverick",
      "apiUrl": "https://quelllm.fr/api/models/llama-4-maverick.json"
    },
    {
      "id": "llama31-nemotron-70b",
      "name": "Llama 3.1 Nemotron 70B",
      "author": "NVIDIA",
      "origin": "us",
      "family": "Nemotron",
      "params": 70,
      "license": "Llama 3.1 Community",
      "ctx": 128000,
      "tags": [
        "chat",
        "general",
        "reasoning"
      ],
      "vram": {
        "q4": 40,
        "q5": 48,
        "q8": 75,
        "fp16": 140
      },
      "url": "https://quelllm.fr/modele/llama31-nemotron-70b",
      "apiUrl": "https://quelllm.fr/api/models/llama31-nemotron-70b.json"
    },
    {
      "id": "qwen25-3b",
      "name": "Qwen 2.5 3B Instruct",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 3,
      "license": "Qwen Research License",
      "ctx": 32768,
      "tags": [
        "chat",
        "general",
        "multilingual",
        "small"
      ],
      "vram": {
        "q4": 2,
        "q5": 2.5,
        "q8": 4,
        "fp16": 6
      },
      "url": "https://quelllm.fr/modele/qwen25-3b",
      "apiUrl": "https://quelllm.fr/api/models/qwen25-3b.json"
    },
    {
      "id": "qwen25-14b",
      "name": "Qwen 2.5 14B Instruct",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 14,
      "license": "Apache 2.0",
      "ctx": 131072,
      "tags": [
        "chat",
        "general",
        "multilingual"
      ],
      "vram": {
        "q4": 9,
        "q5": 11,
        "q8": 16,
        "fp16": 28
      },
      "url": "https://quelllm.fr/modele/qwen25-14b",
      "apiUrl": "https://quelllm.fr/api/models/qwen25-14b.json"
    },
    {
      "id": "qwen25-72b",
      "name": "Qwen 2.5 72B Instruct",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 72,
      "license": "Qwen License",
      "ctx": 131072,
      "tags": [
        "chat",
        "general",
        "reasoning",
        "multilingual"
      ],
      "vram": {
        "q4": 42,
        "q5": 50,
        "q8": 78,
        "fp16": 144
      },
      "url": "https://quelllm.fr/modele/qwen25-72b",
      "apiUrl": "https://quelllm.fr/api/models/qwen25-72b.json"
    },
    {
      "id": "qwen25-coder-15b",
      "name": "Qwen 2.5 Coder 1.5B Instruct",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 1.5,
      "license": "Apache 2.0",
      "ctx": 32768,
      "tags": [
        "code",
        "small"
      ],
      "vram": {
        "q4": 1,
        "q5": 1.2,
        "q8": 2,
        "fp16": 3
      },
      "url": "https://quelllm.fr/modele/qwen25-coder-15b",
      "apiUrl": "https://quelllm.fr/api/models/qwen25-coder-15b.json"
    },
    {
      "id": "qwen25-coder-3b",
      "name": "Qwen 2.5 Coder 3B Instruct",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 3,
      "license": "Qwen Research License",
      "ctx": 32768,
      "tags": [
        "code",
        "small"
      ],
      "vram": {
        "q4": 2,
        "q5": 2.5,
        "q8": 4,
        "fp16": 6
      },
      "url": "https://quelllm.fr/modele/qwen25-coder-3b",
      "apiUrl": "https://quelllm.fr/api/models/qwen25-coder-3b.json"
    },
    {
      "id": "qwen25-coder-14b",
      "name": "Qwen 2.5 Coder 14B Instruct",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 14,
      "license": "Apache 2.0",
      "ctx": 131072,
      "tags": [
        "code"
      ],
      "vram": {
        "q4": 9,
        "q5": 11,
        "q8": 16,
        "fp16": 28
      },
      "url": "https://quelllm.fr/modele/qwen25-coder-14b",
      "apiUrl": "https://quelllm.fr/api/models/qwen25-coder-14b.json"
    },
    {
      "id": "qwen3-30b-a3b",
      "name": "Qwen 3 30B-A3B",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 30,
      "license": "Apache 2.0",
      "ctx": 131072,
      "tags": [
        "chat",
        "general",
        "reasoning",
        "multilingual",
        "moe"
      ],
      "vram": {
        "q4": 19,
        "q5": 23,
        "q8": 35,
        "fp16": 62
      },
      "url": "https://quelllm.fr/modele/qwen3-30b-a3b",
      "apiUrl": "https://quelllm.fr/api/models/qwen3-30b-a3b.json"
    },
    {
      "id": "deepseek-r1-distill-qwen-15b",
      "name": "DeepSeek R1 Distill Qwen 1.5B",
      "author": "DeepSeek",
      "origin": "cn",
      "family": "DeepSeek",
      "params": 1.5,
      "license": "MIT",
      "ctx": 131072,
      "tags": [
        "reasoning",
        "small"
      ],
      "vram": {
        "q4": 1,
        "q5": 1.2,
        "q8": 2,
        "fp16": 3
      },
      "url": "https://quelllm.fr/modele/deepseek-r1-distill-qwen-15b",
      "apiUrl": "https://quelllm.fr/api/models/deepseek-r1-distill-qwen-15b.json"
    },
    {
      "id": "deepseek-r1-distill-qwen-14b",
      "name": "DeepSeek R1 Distill Qwen 14B",
      "author": "DeepSeek",
      "origin": "cn",
      "family": "DeepSeek",
      "params": 14,
      "license": "MIT",
      "ctx": 131072,
      "tags": [
        "reasoning"
      ],
      "vram": {
        "q4": 9,
        "q5": 11,
        "q8": 16,
        "fp16": 28
      },
      "url": "https://quelllm.fr/modele/deepseek-r1-distill-qwen-14b",
      "apiUrl": "https://quelllm.fr/api/models/deepseek-r1-distill-qwen-14b.json"
    },
    {
      "id": "phi4-mini",
      "name": "Phi-4 Mini 3.8B",
      "author": "Microsoft",
      "origin": "us",
      "family": "Phi",
      "params": 3.8,
      "license": "MIT",
      "ctx": 128000,
      "tags": [
        "chat",
        "general",
        "small"
      ],
      "vram": {
        "q4": 10,
        "q5": 12,
        "q8": 18,
        "fp16": 33
      },
      "url": "https://quelllm.fr/modele/phi4-mini",
      "apiUrl": "https://quelllm.fr/api/models/phi4-mini.json"
    },
    {
      "id": "phi4-mini-reasoning",
      "name": "Phi-4 Mini Reasoning 3.8B",
      "author": "Microsoft",
      "origin": "us",
      "family": "Phi",
      "params": 3.8,
      "license": "MIT",
      "ctx": 128000,
      "tags": [
        "reasoning",
        "small"
      ],
      "vram": {
        "q4": 10,
        "q5": 12,
        "q8": 18,
        "fp16": 33
      },
      "url": "https://quelllm.fr/modele/phi4-mini-reasoning",
      "apiUrl": "https://quelllm.fr/api/models/phi4-mini-reasoning.json"
    },
    {
      "id": "gemma3n-e2b",
      "name": "Gemma 3n E2B",
      "author": "Google",
      "origin": "us",
      "family": "Gemma",
      "params": 2,
      "license": "Gemma",
      "ctx": 32768,
      "tags": [
        "chat",
        "general",
        "multilingual",
        "small"
      ],
      "vram": {
        "q4": 2,
        "q5": 2.5,
        "q8": 3.5,
        "fp16": 6
      },
      "url": "https://quelllm.fr/modele/gemma3n-e2b",
      "apiUrl": "https://quelllm.fr/api/models/gemma3n-e2b.json"
    },
    {
      "id": "gemma3n-e4b",
      "name": "Gemma 3n E4B",
      "author": "Google",
      "origin": "us",
      "family": "Gemma",
      "params": 4,
      "license": "Gemma",
      "ctx": 32768,
      "tags": [
        "chat",
        "general",
        "multilingual",
        "small"
      ],
      "vram": {
        "q4": 4.5,
        "q5": 5.5,
        "q8": 8,
        "fp16": 14
      },
      "url": "https://quelllm.fr/modele/gemma3n-e4b",
      "apiUrl": "https://quelllm.fr/api/models/gemma3n-e4b.json"
    },
    {
      "id": "granite32-8b",
      "name": "Granite 3.2 8B Instruct",
      "author": "IBM",
      "origin": "us",
      "family": "Granite",
      "params": 8,
      "license": "Apache 2.0",
      "ctx": 128000,
      "tags": [
        "chat",
        "general"
      ],
      "vram": {
        "q4": 5,
        "q5": 6,
        "q8": 9,
        "fp16": 16
      },
      "url": "https://quelllm.fr/modele/granite32-8b",
      "apiUrl": "https://quelllm.fr/api/models/granite32-8b.json"
    },
    {
      "id": "granite33-8b",
      "name": "Granite 3.3 8B Instruct",
      "author": "IBM",
      "origin": "us",
      "family": "Granite",
      "params": 8,
      "license": "Apache 2.0",
      "ctx": 128000,
      "tags": [
        "chat",
        "general",
        "code"
      ],
      "vram": {
        "q4": 5,
        "q5": 6,
        "q8": 9,
        "fp16": 16
      },
      "url": "https://quelllm.fr/modele/granite33-8b",
      "apiUrl": "https://quelllm.fr/api/models/granite33-8b.json"
    },
    {
      "id": "granite4-small",
      "name": "Granite 4.0 H-Small 32B-A9B",
      "author": "IBM",
      "origin": "us",
      "family": "Granite",
      "params": 32,
      "license": "Apache 2.0",
      "ctx": 128000,
      "tags": [
        "chat",
        "general",
        "moe"
      ],
      "vram": {
        "q4": 19,
        "q5": 23,
        "q8": 35,
        "fp16": 64
      },
      "url": "https://quelllm.fr/modele/granite4-small",
      "apiUrl": "https://quelllm.fr/api/models/granite4-small.json"
    },
    {
      "id": "granite4-tiny",
      "name": "Granite 4.0 H-Tiny 7B-A1B",
      "author": "IBM",
      "origin": "us",
      "family": "Granite",
      "params": 7,
      "license": "Apache 2.0",
      "ctx": 128000,
      "tags": [
        "chat",
        "general",
        "moe",
        "small"
      ],
      "vram": {
        "q4": 4,
        "q5": 5,
        "q8": 7,
        "fp16": 14
      },
      "url": "https://quelllm.fr/modele/granite4-tiny",
      "apiUrl": "https://quelllm.fr/api/models/granite4-tiny.json"
    },
    {
      "id": "tulu3-8b",
      "name": "Tülu 3 8B",
      "author": "Allen AI",
      "origin": "us",
      "family": "Tulu",
      "params": 8,
      "license": "Llama 3.1 Community",
      "ctx": 128000,
      "tags": [
        "chat",
        "general"
      ],
      "vram": {
        "q4": 6,
        "q5": 7,
        "q8": 10,
        "fp16": 16
      },
      "url": "https://quelllm.fr/modele/tulu3-8b",
      "apiUrl": "https://quelllm.fr/api/models/tulu3-8b.json"
    },
    {
      "id": "tulu3-70b",
      "name": "Tülu 3 70B",
      "author": "Allen AI",
      "origin": "us",
      "family": "Tulu",
      "params": 70,
      "license": "Llama 3.1 Community",
      "ctx": 128000,
      "tags": [
        "chat",
        "general",
        "reasoning"
      ],
      "vram": {
        "q4": 40,
        "q5": 48,
        "q8": 75,
        "fp16": 140
      },
      "url": "https://quelllm.fr/modele/tulu3-70b",
      "apiUrl": "https://quelllm.fr/api/models/tulu3-70b.json"
    },
    {
      "id": "olmoe-1b-7b",
      "name": "OLMoE 1B-7B Instruct",
      "author": "Allen AI",
      "origin": "us",
      "family": "OLMo",
      "params": 7,
      "license": "Apache 2.0",
      "ctx": 4096,
      "tags": [
        "chat",
        "general",
        "moe",
        "small"
      ],
      "vram": {
        "q4": 4,
        "q5": 5,
        "q8": 7,
        "fp16": 14
      },
      "url": "https://quelllm.fr/modele/olmoe-1b-7b",
      "apiUrl": "https://quelllm.fr/api/models/olmoe-1b-7b.json"
    },
    {
      "id": "molmo-7b",
      "name": "Molmo 7B-D",
      "author": "Allen AI",
      "origin": "us",
      "family": "Molmo",
      "params": 7,
      "license": "Apache 2.0",
      "ctx": 4096,
      "tags": [
        "vision",
        "chat"
      ],
      "vram": {
        "q4": 5,
        "q5": 6,
        "q8": 9,
        "fp16": 16
      },
      "url": "https://quelllm.fr/modele/molmo-7b",
      "apiUrl": "https://quelllm.fr/api/models/molmo-7b.json"
    },
    {
      "id": "molmo-72b",
      "name": "Molmo 72B",
      "author": "Allen AI",
      "origin": "us",
      "family": "Molmo",
      "params": 72,
      "license": "Apache 2.0",
      "ctx": 4096,
      "tags": [
        "vision",
        "chat"
      ],
      "vram": {
        "q4": 42,
        "q5": 50,
        "q8": 78,
        "fp16": 144
      },
      "url": "https://quelllm.fr/modele/molmo-72b",
      "apiUrl": "https://quelllm.fr/api/models/molmo-72b.json"
    },
    {
      "id": "smollm3-3b",
      "name": "SmolLM3 3B",
      "author": "HuggingFace",
      "origin": "fr",
      "family": "SmolLM",
      "params": 3,
      "license": "Apache 2.0",
      "ctx": 128000,
      "tags": [
        "chat",
        "general",
        "reasoning",
        "small"
      ],
      "vram": {
        "q4": 2,
        "q5": 2.5,
        "q8": 4,
        "fp16": 6
      },
      "url": "https://quelllm.fr/modele/smollm3-3b",
      "apiUrl": "https://quelllm.fr/api/models/smollm3-3b.json"
    },
    {
      "id": "minicpm-v-26",
      "name": "MiniCPM-V 2.6 8B",
      "author": "OpenBMB",
      "origin": "cn",
      "family": "MiniCPM",
      "params": 8,
      "license": "MiniCPM Model License",
      "ctx": 32000,
      "tags": [
        "vision",
        "chat"
      ],
      "vram": {
        "q4": 5.5,
        "q5": 7,
        "q8": 10,
        "fp16": 18
      },
      "url": "https://quelllm.fr/modele/minicpm-v-26",
      "apiUrl": "https://quelllm.fr/api/models/minicpm-v-26.json"
    },
    {
      "id": "minicpm-o-26",
      "name": "MiniCPM-o 2.6 8B",
      "author": "OpenBMB",
      "origin": "cn",
      "family": "MiniCPM",
      "params": 8,
      "license": "MiniCPM Model License",
      "ctx": 32000,
      "tags": [
        "vision",
        "audio",
        "chat"
      ],
      "vram": {
        "q4": 5.5,
        "q5": 7,
        "q8": 10,
        "fp16": 18
      },
      "url": "https://quelllm.fr/modele/minicpm-o-26",
      "apiUrl": "https://quelllm.fr/api/models/minicpm-o-26.json"
    },
    {
      "id": "falcon3-7b",
      "name": "Falcon 3 7B Instruct",
      "author": "TII",
      "origin": "ae",
      "family": "Falcon",
      "params": 7,
      "license": "TII Falcon-LLM License 2.0",
      "ctx": 32000,
      "tags": [
        "chat",
        "general",
        "multilingual"
      ],
      "vram": {
        "q4": 5,
        "q5": 6,
        "q8": 9,
        "fp16": 14
      },
      "url": "https://quelllm.fr/modele/falcon3-7b",
      "apiUrl": "https://quelllm.fr/api/models/falcon3-7b.json"
    },
    {
      "id": "falcon3-10b",
      "name": "Falcon 3 10B Instruct",
      "author": "TII",
      "origin": "ae",
      "family": "Falcon",
      "params": 10,
      "license": "TII Falcon-LLM License 2.0",
      "ctx": 32000,
      "tags": [
        "chat",
        "general",
        "multilingual"
      ],
      "vram": {
        "q4": 6,
        "q5": 8,
        "q8": 12,
        "fp16": 20
      },
      "url": "https://quelllm.fr/modele/falcon3-10b",
      "apiUrl": "https://quelllm.fr/api/models/falcon3-10b.json"
    },
    {
      "id": "falcon-mamba-7b",
      "name": "Falcon Mamba 7B",
      "author": "TII",
      "origin": "ae",
      "family": "Falcon",
      "params": 7,
      "license": "TII Falcon-LLM License 2.0",
      "ctx": 8192,
      "tags": [
        "chat",
        "general"
      ],
      "vram": {
        "q4": 5,
        "q5": 6,
        "q8": 9,
        "fp16": 14
      },
      "url": "https://quelllm.fr/modele/falcon-mamba-7b",
      "apiUrl": "https://quelllm.fr/api/models/falcon-mamba-7b.json"
    },
    {
      "id": "command-r-35b",
      "name": "Command R 35B v01",
      "author": "Cohere",
      "origin": "ca",
      "family": "Command",
      "params": 35,
      "license": "CC-BY-NC 4.0",
      "ctx": 128000,
      "tags": [
        "chat",
        "general",
        "multilingual"
      ],
      "vram": {
        "q4": 20,
        "q5": 25,
        "q8": 37,
        "fp16": 70
      },
      "url": "https://quelllm.fr/modele/command-r-35b",
      "apiUrl": "https://quelllm.fr/api/models/command-r-35b.json"
    },
    {
      "id": "aya-23-8b",
      "name": "Aya 23 8B",
      "author": "Cohere For AI",
      "origin": "ca",
      "family": "Aya",
      "params": 8,
      "license": "CC-BY-NC 4.0",
      "ctx": 8192,
      "tags": [
        "chat",
        "general",
        "multilingual"
      ],
      "vram": {
        "q4": 5,
        "q5": 6,
        "q8": 9,
        "fp16": 16
      },
      "url": "https://quelllm.fr/modele/aya-23-8b",
      "apiUrl": "https://quelllm.fr/api/models/aya-23-8b.json"
    },
    {
      "id": "aya-23-35b",
      "name": "Aya 23 35B",
      "author": "Cohere For AI",
      "origin": "ca",
      "family": "Aya",
      "params": 35,
      "license": "CC-BY-NC 4.0",
      "ctx": 8192,
      "tags": [
        "chat",
        "general",
        "multilingual"
      ],
      "vram": {
        "q4": 20,
        "q5": 25,
        "q8": 37,
        "fp16": 70
      },
      "url": "https://quelllm.fr/modele/aya-23-35b",
      "apiUrl": "https://quelllm.fr/api/models/aya-23-35b.json"
    },
    {
      "id": "yi-15-34b",
      "name": "Yi 1.5 34B Chat",
      "author": "01.AI",
      "origin": "cn",
      "family": "Yi",
      "params": 34,
      "license": "Apache 2.0",
      "ctx": 4096,
      "tags": [
        "chat",
        "general",
        "multilingual"
      ],
      "vram": {
        "q4": 20,
        "q5": 24,
        "q8": 36,
        "fp16": 68
      },
      "url": "https://quelllm.fr/modele/yi-15-34b",
      "apiUrl": "https://quelllm.fr/api/models/yi-15-34b.json"
    },
    {
      "id": "yi-coder-9b",
      "name": "Yi Coder 9B Chat",
      "author": "01.AI",
      "origin": "cn",
      "family": "Yi",
      "params": 9,
      "license": "Apache 2.0",
      "ctx": 128000,
      "tags": [
        "code"
      ],
      "vram": {
        "q4": 5.5,
        "q5": 7,
        "q8": 10,
        "fp16": 18
      },
      "url": "https://quelllm.fr/modele/yi-coder-9b",
      "apiUrl": "https://quelllm.fr/api/models/yi-coder-9b.json"
    },
    {
      "id": "dbrx-instruct",
      "name": "DBRX Instruct",
      "author": "Databricks",
      "origin": "us",
      "family": "DBRX",
      "params": 132,
      "license": "Databricks Open Model License",
      "ctx": 32768,
      "tags": [
        "chat",
        "general",
        "moe"
      ],
      "vram": {
        "q4": 76,
        "q5": 94,
        "q8": 140,
        "fp16": 264
      },
      "url": "https://quelllm.fr/modele/dbrx-instruct",
      "apiUrl": "https://quelllm.fr/api/models/dbrx-instruct.json"
    },
    {
      "id": "jais-30b",
      "name": "Jais 30B Chat v3",
      "author": "MBZUAI / Core42",
      "origin": "ae",
      "family": "Jais",
      "params": 30,
      "license": "Apache 2.0",
      "ctx": 8192,
      "tags": [
        "chat",
        "general",
        "multilingual"
      ],
      "vram": {
        "q4": 18,
        "q5": 22,
        "q8": 33,
        "fp16": 60
      },
      "url": "https://quelllm.fr/modele/jais-30b",
      "apiUrl": "https://quelllm.fr/api/models/jais-30b.json"
    },
    {
      "id": "jais-70b",
      "name": "Jais Adapted 70B Chat",
      "author": "MBZUAI / Core42",
      "origin": "ae",
      "family": "Jais",
      "params": 70,
      "license": "Apache 2.0",
      "ctx": 4096,
      "tags": [
        "chat",
        "general",
        "multilingual"
      ],
      "vram": {
        "q4": 40,
        "q5": 48,
        "q8": 75,
        "fp16": 140
      },
      "url": "https://quelllm.fr/modele/jais-70b",
      "apiUrl": "https://quelllm.fr/api/models/jais-70b.json"
    },
    {
      "id": "sarvam-m-24b",
      "name": "Sarvam-M 24B",
      "author": "Sarvam AI",
      "origin": "in",
      "family": "Sarvam",
      "params": 24,
      "license": "Apache 2.0",
      "ctx": 32768,
      "tags": [
        "chat",
        "general",
        "reasoning",
        "multilingual"
      ],
      "vram": {
        "q4": 14,
        "q5": 17,
        "q8": 26,
        "fp16": 48
      },
      "url": "https://quelllm.fr/modele/sarvam-m-24b",
      "apiUrl": "https://quelllm.fr/api/models/sarvam-m-24b.json"
    },
    {
      "id": "salamandra-7b",
      "name": "Salamandra 7B Instruct",
      "author": "BSC",
      "origin": "es",
      "family": "Salamandra",
      "params": 7.7,
      "license": "Apache 2.0",
      "ctx": 8192,
      "tags": [
        "chat",
        "general",
        "multilingual",
        "fr"
      ],
      "vram": {
        "q4": 5,
        "q5": 6,
        "q8": 9,
        "fp16": 16
      },
      "url": "https://quelllm.fr/modele/salamandra-7b",
      "apiUrl": "https://quelllm.fr/api/models/salamandra-7b.json"
    },
    {
      "id": "salamandra-40b",
      "name": "Salamandra 40B Instruct",
      "author": "BSC",
      "origin": "es",
      "family": "Salamandra",
      "params": 40,
      "license": "Apache 2.0",
      "ctx": 8192,
      "tags": [
        "chat",
        "general",
        "multilingual",
        "fr"
      ],
      "vram": {
        "q4": 24,
        "q5": 29,
        "q8": 43,
        "fp16": 80
      },
      "url": "https://quelllm.fr/modele/salamandra-40b",
      "apiUrl": "https://quelllm.fr/api/models/salamandra-40b.json"
    },
    {
      "id": "eurollm-22b",
      "name": "EuroLLM 22B Instruct 2512",
      "author": "Utter Project",
      "origin": "eu",
      "family": "EuroLLM",
      "params": 22.6,
      "license": "Apache 2.0",
      "ctx": 32768,
      "tags": [
        "chat",
        "general",
        "multilingual",
        "fr"
      ],
      "vram": {
        "q4": 13,
        "q5": 16,
        "q8": 24,
        "fp16": 45
      },
      "url": "https://quelllm.fr/modele/eurollm-22b",
      "apiUrl": "https://quelllm.fr/api/models/eurollm-22b.json"
    },
    {
      "id": "claire-7b",
      "name": "Claire 7B 0.1",
      "author": "LINAGORA",
      "origin": "fr",
      "family": "Claire",
      "params": 7,
      "license": "CC-BY-NC-SA 4.0",
      "ctx": 2048,
      "tags": [
        "chat",
        "fr"
      ],
      "vram": {
        "q4": 5,
        "q5": 6,
        "q8": 9,
        "fp16": 14
      },
      "url": "https://quelllm.fr/modele/claire-7b",
      "apiUrl": "https://quelllm.fr/api/models/claire-7b.json"
    },
    {
      "id": "jamba-15-mini",
      "name": "Jamba 1.5 Mini",
      "author": "AI21 Labs",
      "origin": "il",
      "family": "Jamba",
      "params": 52,
      "license": "Jamba Open Model License",
      "ctx": 256000,
      "tags": [
        "chat",
        "general",
        "moe",
        "multilingual"
      ],
      "vram": {
        "q4": 30,
        "q5": 37,
        "q8": 55,
        "fp16": 104
      },
      "url": "https://quelllm.fr/modele/jamba-15-mini",
      "apiUrl": "https://quelllm.fr/api/models/jamba-15-mini.json"
    },
    {
      "id": "hunyuan-a13b",
      "name": "Hunyuan-A13B Instruct",
      "author": "Tencent",
      "origin": "cn",
      "family": "Hunyuan",
      "params": 80,
      "license": "Tencent Hunyuan License",
      "ctx": 262144,
      "tags": [
        "chat",
        "general",
        "reasoning",
        "moe"
      ],
      "vram": {
        "q4": 48,
        "q5": 57,
        "q8": 85,
        "fp16": 160
      },
      "url": "https://quelllm.fr/modele/hunyuan-a13b",
      "apiUrl": "https://quelllm.fr/api/models/hunyuan-a13b.json"
    },
    {
      "id": "llava-onevision-7b",
      "name": "LLaVA-OneVision 7B",
      "author": "LMMs-Lab",
      "origin": "sg",
      "family": "LLaVA",
      "params": 7,
      "license": "Apache 2.0",
      "ctx": 32768,
      "tags": [
        "vision",
        "chat"
      ],
      "vram": {
        "q4": 5,
        "q5": 6,
        "q8": 9,
        "fp16": 16
      },
      "url": "https://quelllm.fr/modele/llava-onevision-7b",
      "apiUrl": "https://quelllm.fr/api/models/llava-onevision-7b.json"
    },
    {
      "id": "llava-onevision-72b",
      "name": "LLaVA-OneVision 72B",
      "author": "LMMs-Lab",
      "origin": "sg",
      "family": "LLaVA",
      "params": 72,
      "license": "Apache 2.0",
      "ctx": 32768,
      "tags": [
        "vision",
        "chat"
      ],
      "vram": {
        "q4": 42,
        "q5": 50,
        "q8": 78,
        "fp16": 144
      },
      "url": "https://quelllm.fr/modele/llava-onevision-72b",
      "apiUrl": "https://quelllm.fr/api/models/llava-onevision-72b.json"
    },
    {
      "id": "arctic-instruct",
      "name": "Snowflake Arctic Instruct",
      "author": "Snowflake",
      "origin": "us",
      "family": "Arctic",
      "params": 480,
      "license": "Apache 2.0",
      "ctx": 4096,
      "tags": [
        "chat",
        "general",
        "moe"
      ],
      "vram": {
        "q4": 290,
        "q5": 345,
        "q8": 510,
        "fp16": 960
      },
      "url": "https://quelllm.fr/modele/arctic-instruct",
      "apiUrl": "https://quelllm.fr/api/models/arctic-instruct.json"
    },
    {
      "id": "grok-1",
      "name": "Grok-1 (base)",
      "author": "xAI",
      "origin": "us",
      "family": "Grok",
      "params": 314,
      "license": "Apache 2.0",
      "ctx": 8192,
      "tags": [
        "chat",
        "general",
        "moe"
      ],
      "vram": {
        "q4": 188,
        "q5": 225,
        "q8": 335,
        "fp16": 630
      },
      "url": "https://quelllm.fr/modele/grok-1",
      "apiUrl": "https://quelllm.fr/api/models/grok-1.json"
    },
    {
      "id": "gpt-oss-120b",
      "name": "gpt-oss 120B",
      "author": "OpenAI",
      "origin": "us",
      "family": "gpt-oss",
      "params": 117,
      "license": "Apache 2.0",
      "ctx": 128000,
      "tags": [
        "chat",
        "general",
        "reasoning",
        "moe"
      ],
      "vram": {
        "q4": 70,
        "q5": 85,
        "q8": 125,
        "fp16": 234
      },
      "url": "https://quelllm.fr/modele/gpt-oss-120b",
      "apiUrl": "https://quelllm.fr/api/models/gpt-oss-120b.json"
    },
    {
      "id": "gpt-oss-20b",
      "name": "gpt-oss 20B",
      "author": "OpenAI",
      "origin": "us",
      "family": "gpt-oss",
      "params": 21,
      "license": "Apache 2.0",
      "ctx": 128000,
      "tags": [
        "chat",
        "general",
        "reasoning",
        "moe",
        "small"
      ],
      "vram": {
        "q4": 13,
        "q5": 16,
        "q8": 23,
        "fp16": 42
      },
      "url": "https://quelllm.fr/modele/gpt-oss-20b",
      "apiUrl": "https://quelllm.fr/api/models/gpt-oss-20b.json"
    },
    {
      "id": "kimi-k26",
      "name": "Kimi K2.6",
      "author": "Moonshot AI",
      "origin": "cn",
      "family": "Kimi",
      "params": 1000,
      "license": "Modified MIT",
      "ctx": 256000,
      "tags": [
        "chat",
        "general",
        "vision",
        "moe"
      ],
      "vram": {
        "q4": 600,
        "q5": 720,
        "q8": 1080,
        "fp16": 2000
      },
      "url": "https://quelllm.fr/modele/kimi-k26",
      "apiUrl": "https://quelllm.fr/api/models/kimi-k26.json"
    },
    {
      "id": "qwen3-vl-235b",
      "name": "Qwen 3 VL 235B-A22B",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 235,
      "license": "Apache 2.0",
      "ctx": 262144,
      "tags": [
        "vision",
        "chat",
        "general",
        "moe",
        "multilingual"
      ],
      "vram": {
        "q4": 142,
        "q5": 170,
        "q8": 250,
        "fp16": 470
      },
      "url": "https://quelllm.fr/modele/qwen3-vl-235b",
      "apiUrl": "https://quelllm.fr/api/models/qwen3-vl-235b.json"
    },
    {
      "id": "qwen3-vl-30b",
      "name": "Qwen 3 VL 30B-A3B",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 30,
      "license": "Apache 2.0",
      "ctx": 262144,
      "tags": [
        "vision",
        "chat",
        "general",
        "moe",
        "multilingual"
      ],
      "vram": {
        "q4": 19,
        "q5": 23,
        "q8": 35,
        "fp16": 62
      },
      "url": "https://quelllm.fr/modele/qwen3-vl-30b",
      "apiUrl": "https://quelllm.fr/api/models/qwen3-vl-30b.json"
    },
    {
      "id": "qwen3-vl-8b",
      "name": "Qwen 3 VL 8B",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 8,
      "license": "Apache 2.0",
      "ctx": 262144,
      "tags": [
        "vision",
        "chat",
        "general",
        "multilingual"
      ],
      "vram": {
        "q4": 6,
        "q5": 7,
        "q8": 10,
        "fp16": 16
      },
      "url": "https://quelllm.fr/modele/qwen3-vl-8b",
      "apiUrl": "https://quelllm.fr/api/models/qwen3-vl-8b.json"
    },
    {
      "id": "ernie-45-300b",
      "name": "ERNIE 4.5 300B-A47B",
      "author": "Baidu",
      "origin": "cn",
      "family": "ERNIE",
      "params": 300,
      "license": "Apache 2.0",
      "ctx": 131072,
      "tags": [
        "chat",
        "general",
        "multilingual",
        "moe"
      ],
      "vram": {
        "q4": 180,
        "q5": 215,
        "q8": 320,
        "fp16": 600
      },
      "url": "https://quelllm.fr/modele/ernie-45-300b",
      "apiUrl": "https://quelllm.fr/api/models/ernie-45-300b.json"
    },
    {
      "id": "ernie-45-21b",
      "name": "ERNIE 4.5 21B-A3B Thinking",
      "author": "Baidu",
      "origin": "cn",
      "family": "ERNIE",
      "params": 21,
      "license": "Apache 2.0",
      "ctx": 131072,
      "tags": [
        "reasoning",
        "moe"
      ],
      "vram": {
        "q4": 13,
        "q5": 16,
        "q8": 23,
        "fp16": 42
      },
      "url": "https://quelllm.fr/modele/ernie-45-21b",
      "apiUrl": "https://quelllm.fr/api/models/ernie-45-21b.json"
    },
    {
      "id": "ring-1t",
      "name": "Ring-1T",
      "author": "Ant Group",
      "origin": "cn",
      "family": "Ring",
      "params": 1000,
      "license": "MIT",
      "ctx": 131072,
      "tags": [
        "reasoning",
        "moe"
      ],
      "vram": {
        "q4": 600,
        "q5": 720,
        "q8": 1080,
        "fp16": 2000
      },
      "url": "https://quelllm.fr/modele/ring-1t",
      "apiUrl": "https://quelllm.fr/api/models/ring-1t.json"
    },
    {
      "id": "seed-oss-36b",
      "name": "Seed-OSS 36B Instruct",
      "author": "ByteDance",
      "origin": "cn",
      "family": "Seed",
      "params": 36,
      "license": "Apache 2.0",
      "ctx": 524288,
      "tags": [
        "chat",
        "general"
      ],
      "vram": {
        "q4": 22,
        "q5": 26,
        "q8": 40,
        "fp16": 72
      },
      "url": "https://quelllm.fr/modele/seed-oss-36b",
      "apiUrl": "https://quelllm.fr/api/models/seed-oss-36b.json"
    },
    {
      "id": "exaone-45-33b",
      "name": "EXAONE 4.5 33B",
      "author": "LG AI Research",
      "origin": "kr",
      "family": "EXAONE",
      "params": 33,
      "license": "EXAONE AI Model License",
      "ctx": 262144,
      "tags": [
        "chat",
        "general",
        "vision",
        "multilingual"
      ],
      "vram": {
        "q4": 20,
        "q5": 24,
        "q8": 36,
        "fp16": 66
      },
      "url": "https://quelllm.fr/modele/exaone-45-33b",
      "apiUrl": "https://quelllm.fr/api/models/exaone-45-33b.json"
    },
    {
      "id": "nemotron-nano-3-30b",
      "name": "Nemotron Nano 3 30B-A3B",
      "author": "NVIDIA",
      "origin": "us",
      "family": "Nemotron",
      "params": 30,
      "license": "NVIDIA Open Model License",
      "ctx": 1000000,
      "tags": [
        "chat",
        "general",
        "reasoning",
        "moe"
      ],
      "vram": {
        "q4": 19,
        "q5": 23,
        "q8": 35,
        "fp16": 62
      },
      "url": "https://quelllm.fr/modele/nemotron-nano-3-30b",
      "apiUrl": "https://quelllm.fr/api/models/nemotron-nano-3-30b.json"
    },
    {
      "id": "nemotron-nano-v2-vl-12b",
      "name": "Nemotron Nano v2 VL 12B",
      "author": "NVIDIA",
      "origin": "us",
      "family": "Nemotron",
      "params": 12.6,
      "license": "NVIDIA Open Model License",
      "ctx": 128000,
      "tags": [
        "vision",
        "chat"
      ],
      "vram": {
        "q4": 8,
        "q5": 10,
        "q8": 14,
        "fp16": 25
      },
      "url": "https://quelllm.fr/modele/nemotron-nano-v2-vl-12b",
      "apiUrl": "https://quelllm.fr/api/models/nemotron-nano-v2-vl-12b.json"
    },
    {
      "id": "apertus-70b",
      "name": "Apertus 70B",
      "author": "Swiss AI",
      "origin": "ch",
      "family": "Apertus",
      "params": 70,
      "license": "Apache 2.0",
      "ctx": 65536,
      "tags": [
        "chat",
        "general",
        "multilingual",
        "fr"
      ],
      "vram": {
        "q4": 40,
        "q5": 48,
        "q8": 75,
        "fp16": 140
      },
      "url": "https://quelllm.fr/modele/apertus-70b",
      "apiUrl": "https://quelllm.fr/api/models/apertus-70b.json"
    },
    {
      "id": "apertus-8b",
      "name": "Apertus 8B",
      "author": "Swiss AI",
      "origin": "ch",
      "family": "Apertus",
      "params": 8,
      "license": "Apache 2.0",
      "ctx": 65536,
      "tags": [
        "chat",
        "general",
        "multilingual",
        "fr"
      ],
      "vram": {
        "q4": 6,
        "q5": 7,
        "q8": 10,
        "fp16": 16
      },
      "url": "https://quelllm.fr/modele/apertus-8b",
      "apiUrl": "https://quelllm.fr/api/models/apertus-8b.json"
    },
    {
      "id": "trinity-mini-26b",
      "name": "Trinity Mini 26B-A3B",
      "author": "Arcee AI",
      "origin": "us",
      "family": "Trinity",
      "params": 26,
      "license": "Apache 2.0",
      "ctx": 131072,
      "tags": [
        "chat",
        "general",
        "moe"
      ],
      "vram": {
        "q4": 15,
        "q5": 18,
        "q8": 28,
        "fp16": 52
      },
      "url": "https://quelllm.fr/modele/trinity-mini-26b",
      "apiUrl": "https://quelllm.fr/api/models/trinity-mini-26b.json"
    },
    {
      "id": "hunyuan-20-large",
      "name": "Hunyuan Large 2.0",
      "author": "Tencent",
      "origin": "cn",
      "family": "Hunyuan",
      "params": 406,
      "license": "Tencent Hunyuan License",
      "ctx": 262144,
      "tags": [
        "chat",
        "general",
        "reasoning",
        "moe"
      ],
      "vram": {
        "q4": 245,
        "q5": 290,
        "q8": 435,
        "fp16": 810
      },
      "url": "https://quelllm.fr/modele/hunyuan-20-large",
      "apiUrl": "https://quelllm.fr/api/models/hunyuan-20-large.json"
    },
    {
      "id": "internvl-35-8b",
      "name": "InternVL 3.5 8B",
      "author": "OpenGVLab",
      "origin": "cn",
      "family": "InternVL",
      "params": 8,
      "license": "Apache 2.0",
      "ctx": 32768,
      "tags": [
        "vision",
        "chat"
      ],
      "vram": {
        "q4": 6,
        "q5": 7,
        "q8": 10,
        "fp16": 16
      },
      "url": "https://quelllm.fr/modele/internvl-35-8b",
      "apiUrl": "https://quelllm.fr/api/models/internvl-35-8b.json"
    },
    {
      "id": "mimo-v2-flash",
      "name": "MiMo V2 Flash",
      "author": "Xiaomi",
      "origin": "cn",
      "family": "MiMo",
      "params": 309,
      "license": "MIT",
      "ctx": 128000,
      "tags": [
        "chat",
        "code",
        "moe"
      ],
      "vram": {
        "q4": 185,
        "q5": 222,
        "q8": 330,
        "fp16": 618
      },
      "url": "https://quelllm.fr/modele/mimo-v2-flash",
      "apiUrl": "https://quelllm.fr/api/models/mimo-v2-flash.json"
    },
    {
      "id": "rakuten-ai-3",
      "name": "Rakuten AI 3.0",
      "author": "Rakuten",
      "origin": "jp",
      "family": "Rakuten",
      "params": 700,
      "license": "Apache 2.0",
      "ctx": 32768,
      "tags": [
        "chat",
        "general",
        "multilingual",
        "moe"
      ],
      "vram": {
        "q4": 420,
        "q5": 500,
        "q8": 745,
        "fp16": 1400
      },
      "url": "https://quelllm.fr/modele/rakuten-ai-3",
      "apiUrl": "https://quelllm.fr/api/models/rakuten-ai-3.json"
    },
    {
      "id": "kanana-2-30b",
      "name": "Kanana 2 30B-A3B Thinking",
      "author": "Kakao",
      "origin": "kr",
      "family": "Kanana",
      "params": 30,
      "license": "Apache 2.0",
      "ctx": 131072,
      "tags": [
        "chat",
        "general",
        "reasoning",
        "multilingual",
        "moe"
      ],
      "vram": {
        "q4": 18,
        "q5": 22,
        "q8": 33,
        "fp16": 60
      },
      "url": "https://quelllm.fr/modele/kanana-2-30b",
      "apiUrl": "https://quelllm.fr/api/models/kanana-2-30b.json"
    },
    {
      "id": "deepseek-ocr",
      "name": "DeepSeek-OCR",
      "author": "DeepSeek",
      "origin": "cn",
      "family": "DeepSeek",
      "params": 3,
      "license": "MIT",
      "ctx": 8192,
      "tags": [
        "vision",
        "chat",
        "small"
      ],
      "vram": {
        "q4": 2,
        "q5": 2.5,
        "q8": 4,
        "fp16": 6
      },
      "url": "https://quelllm.fr/modele/deepseek-ocr",
      "apiUrl": "https://quelllm.fr/api/models/deepseek-ocr.json"
    },
    {
      "id": "hunyuan-ocr-1b",
      "name": "HunyuanOCR 1B",
      "author": "Tencent",
      "origin": "cn",
      "family": "Hunyuan",
      "params": 1,
      "license": "Tencent Hunyuan License",
      "ctx": 8192,
      "tags": [
        "vision",
        "chat",
        "small"
      ],
      "vram": {
        "q4": 0.8,
        "q5": 1,
        "q8": 1.5,
        "fp16": 2
      },
      "url": "https://quelllm.fr/modele/hunyuan-ocr-1b",
      "apiUrl": "https://quelllm.fr/api/models/hunyuan-ocr-1b.json"
    },
    {
      "id": "gemma4-26b-moe",
      "name": "Gemma 4 26B-A4B MoE",
      "author": "Google",
      "origin": "us",
      "family": "Gemma",
      "params": 26,
      "license": "Gemma",
      "ctx": 128000,
      "tags": [
        "chat",
        "general",
        "vision",
        "audio",
        "multilingual",
        "moe"
      ],
      "vram": {
        "q4": 16,
        "q5": 19,
        "q8": 28,
        "fp16": 52
      },
      "url": "https://quelllm.fr/modele/gemma4-26b-moe",
      "apiUrl": "https://quelllm.fr/api/models/gemma4-26b-moe.json"
    },
    {
      "id": "dots-llm1",
      "name": "dots.llm1 Instruct",
      "author": "Rednote",
      "origin": "cn",
      "family": "dots",
      "params": 142,
      "license": "MIT",
      "ctx": 32768,
      "tags": [
        "chat",
        "general",
        "moe"
      ],
      "vram": {
        "q4": 85,
        "q5": 102,
        "q8": 152,
        "fp16": 284
      },
      "url": "https://quelllm.fr/modele/dots-llm1",
      "apiUrl": "https://quelllm.fr/api/models/dots-llm1.json"
    },
    {
      "id": "qwen3-omni-30b",
      "name": "Qwen 3 Omni 30B-A3B",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 30,
      "license": "Apache 2.0",
      "ctx": 131072,
      "tags": [
        "vision",
        "audio",
        "chat",
        "moe"
      ],
      "vram": {
        "q4": 19,
        "q5": 23,
        "q8": 35,
        "fp16": 62
      },
      "url": "https://quelllm.fr/modele/qwen3-omni-30b",
      "apiUrl": "https://quelllm.fr/api/models/qwen3-omni-30b.json"
    },
    {
      "id": "qwen35-122b-a10b",
      "name": "Qwen 3.5 122B-A10B",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 122,
      "license": "Apache 2.0",
      "ctx": 262000,
      "tags": [
        "chat",
        "general",
        "reasoning",
        "multilingual",
        "moe"
      ],
      "vram": {
        "q4": 73,
        "q5": 88,
        "q8": 131,
        "fp16": 244
      },
      "url": "https://quelllm.fr/modele/qwen35-122b-a10b",
      "apiUrl": "https://quelllm.fr/api/models/qwen35-122b-a10b.json"
    },
    {
      "id": "pangu-pro-moe-72b",
      "name": "Pangu Pro MoE 72B",
      "author": "Huawei",
      "origin": "cn",
      "family": "Pangu",
      "params": 72,
      "license": "Pangu Model License",
      "ctx": 32768,
      "tags": [
        "chat",
        "general",
        "moe"
      ],
      "vram": {
        "q4": 42,
        "q5": 50,
        "q8": 78,
        "fp16": 144
      },
      "url": "https://quelllm.fr/modele/pangu-pro-moe-72b",
      "apiUrl": "https://quelllm.fr/api/models/pangu-pro-moe-72b.json"
    },
    {
      "id": "qwen36-27b",
      "name": "Qwen 3.6 27B",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 27,
      "license": "Apache 2.0",
      "ctx": 262144,
      "tags": [
        "chat",
        "general",
        "code",
        "reasoning",
        "vision",
        "multilingual"
      ],
      "vram": {
        "q4": 16,
        "q5": 19,
        "q8": 29,
        "fp16": 54
      },
      "url": "https://quelllm.fr/modele/qwen36-27b",
      "apiUrl": "https://quelllm.fr/api/models/qwen36-27b.json"
    },
    {
      "id": "deepseek-v4-pro",
      "name": "DeepSeek V4 Pro 1.6T",
      "author": "DeepSeek",
      "origin": "cn",
      "family": "DeepSeek",
      "params": 1600,
      "license": "MIT",
      "ctx": 1000000,
      "tags": [
        "chat",
        "general",
        "reasoning",
        "moe",
        "multilingual"
      ],
      "vram": {
        "q4": 960,
        "q5": 1150,
        "q8": 1700,
        "fp16": 3200
      },
      "url": "https://quelllm.fr/modele/deepseek-v4-pro",
      "apiUrl": "https://quelllm.fr/api/models/deepseek-v4-pro.json"
    },
    {
      "id": "deepseek-v4-flash",
      "name": "DeepSeek V4 Flash 284B",
      "author": "DeepSeek",
      "origin": "cn",
      "family": "DeepSeek",
      "params": 284,
      "license": "MIT",
      "ctx": 1000000,
      "tags": [
        "chat",
        "general",
        "reasoning",
        "moe",
        "multilingual"
      ],
      "vram": {
        "q4": 170,
        "q5": 205,
        "q8": 305,
        "fp16": 568
      },
      "url": "https://quelllm.fr/modele/deepseek-v4-flash",
      "apiUrl": "https://quelllm.fr/api/models/deepseek-v4-flash.json"
    },
    {
      "id": "tencent-hy3-preview",
      "name": "Tencent Hy3 Preview 295B",
      "author": "Tencent",
      "origin": "cn",
      "family": "Hunyuan",
      "params": 295,
      "license": "Tencent Hunyuan License",
      "ctx": 256000,
      "tags": [
        "chat",
        "general",
        "reasoning",
        "moe"
      ],
      "vram": {
        "q4": 177,
        "q5": 210,
        "q8": 315,
        "fp16": 590
      },
      "url": "https://quelllm.fr/modele/tencent-hy3-preview",
      "apiUrl": "https://quelllm.fr/api/models/tencent-hy3-preview.json"
    },
    {
      "id": "llada2-uni",
      "name": "LLaDA 2.0 Uni 16B",
      "author": "Ant Group / inclusionAI",
      "origin": "cn",
      "family": "LLaDA",
      "params": 16,
      "license": "Apache 2.0",
      "ctx": 8192,
      "tags": [
        "chat",
        "vision",
        "general",
        "moe"
      ],
      "vram": {
        "q4": 18,
        "q5": 22,
        "q8": 30,
        "fp16": 47
      },
      "url": "https://quelllm.fr/modele/llada2-uni",
      "apiUrl": "https://quelllm.fr/api/models/llada2-uni.json"
    },
    {
      "id": "mimo-v25-pro",
      "name": "MiMo V2.5 Pro",
      "author": "Xiaomi",
      "origin": "cn",
      "family": "MiMo",
      "params": 1020,
      "license": "MIT",
      "ctx": 1000000,
      "tags": [
        "chat",
        "reasoning",
        "code",
        "moe",
        "multilingual"
      ],
      "vram": {
        "q4": 595,
        "q5": 720,
        "q8": 1090,
        "fp16": 2040
      },
      "url": "https://quelllm.fr/modele/mimo-v25-pro",
      "apiUrl": "https://quelllm.fr/api/models/mimo-v25-pro.json"
    },
    {
      "id": "mimo-v25",
      "name": "MiMo V2.5",
      "author": "Xiaomi",
      "origin": "cn",
      "family": "MiMo",
      "params": 310,
      "license": "MIT",
      "ctx": 1000000,
      "tags": [
        "chat",
        "vision",
        "audio",
        "moe",
        "multilingual"
      ],
      "vram": {
        "q4": 180,
        "q5": 220,
        "q8": 330,
        "fp16": 620
      },
      "url": "https://quelllm.fr/modele/mimo-v25",
      "apiUrl": "https://quelllm.fr/api/models/mimo-v25.json"
    },
    {
      "id": "granite41-8b",
      "name": "Granite 4.1 8B Instruct",
      "author": "IBM",
      "origin": "us",
      "family": "Granite",
      "params": 8,
      "license": "Apache 2.0",
      "ctx": 131072,
      "tags": [
        "chat",
        "general",
        "code",
        "multilingual"
      ],
      "vram": {
        "q4": 5,
        "q5": 6,
        "q8": 9,
        "fp16": 16
      },
      "url": "https://quelllm.fr/modele/granite41-8b",
      "apiUrl": "https://quelllm.fr/api/models/granite41-8b.json"
    },
    {
      "id": "nemotron-omni-30b",
      "name": "Nemotron 3 Nano Omni 30B-A3B",
      "author": "NVIDIA",
      "origin": "us",
      "family": "Nemotron",
      "params": 30,
      "license": "NVIDIA Open Model License",
      "ctx": 256000,
      "tags": [
        "chat",
        "vision",
        "audio",
        "reasoning",
        "moe"
      ],
      "vram": {
        "q4": 21,
        "q5": 25,
        "q8": 33,
        "fp16": 62
      },
      "url": "https://quelllm.fr/modele/nemotron-omni-30b",
      "apiUrl": "https://quelllm.fr/api/models/nemotron-omni-30b.json"
    },
    {
      "id": "laguna-xs2",
      "name": "Laguna XS.2",
      "author": "Poolside",
      "origin": "us",
      "family": "Laguna",
      "params": 33,
      "license": "Apache 2.0",
      "ctx": 131072,
      "tags": [
        "code",
        "moe"
      ],
      "vram": {
        "q4": 19,
        "q5": 23,
        "q8": 35,
        "fp16": 66
      },
      "url": "https://quelllm.fr/modele/laguna-xs2",
      "apiUrl": "https://quelllm.fr/api/models/laguna-xs2.json"
    },
    {
      "id": "granite41-30b",
      "name": "Granite 4.1 30B Instruct",
      "author": "IBM",
      "origin": "us",
      "family": "Granite",
      "params": 30,
      "license": "Apache 2.0",
      "ctx": 131072,
      "tags": [
        "chat",
        "general",
        "code",
        "multilingual"
      ],
      "vram": {
        "q4": 17,
        "q5": 21,
        "q8": 32,
        "fp16": 60
      },
      "url": "https://quelllm.fr/modele/granite41-30b",
      "apiUrl": "https://quelllm.fr/api/models/granite41-30b.json"
    },
    {
      "id": "granite41-3b",
      "name": "Granite 4.1 3B Instruct",
      "author": "IBM",
      "origin": "us",
      "family": "Granite",
      "params": 3,
      "license": "Apache 2.0",
      "ctx": 131072,
      "tags": [
        "chat",
        "general",
        "code",
        "multilingual",
        "small"
      ],
      "vram": {
        "q4": 2,
        "q5": 2.5,
        "q8": 3,
        "fp16": 6
      },
      "url": "https://quelllm.fr/modele/granite41-3b",
      "apiUrl": "https://quelllm.fr/api/models/granite41-3b.json"
    },
    {
      "id": "ling-26-1t",
      "name": "Ling 2.6 1T",
      "author": "Ant Group / inclusionAI",
      "origin": "cn",
      "family": "Ling",
      "params": 1000,
      "license": "MIT",
      "ctx": 262144,
      "tags": [
        "chat",
        "general",
        "moe",
        "multilingual"
      ],
      "vram": {
        "q4": 580,
        "q5": 710,
        "q8": 1070,
        "fp16": 2000
      },
      "url": "https://quelllm.fr/modele/ling-26-1t",
      "apiUrl": "https://quelllm.fr/api/models/ling-26-1t.json"
    },
    {
      "id": "gemma4-e2b",
      "name": "Gemma 4 E2B",
      "author": "Google",
      "origin": "us",
      "family": "Gemma",
      "params": 2,
      "license": "Gemma",
      "ctx": 128000,
      "tags": [
        "chat",
        "vision",
        "small",
        "multilingual",
        "reasoning"
      ],
      "vram": {
        "q4": 7,
        "q5": 9,
        "q8": 13,
        "fp16": 25
      },
      "url": "https://quelllm.fr/modele/gemma4-e2b",
      "apiUrl": "https://quelllm.fr/api/models/gemma4-e2b.json"
    },
    {
      "id": "nemotron-cascade-2",
      "name": "Nemotron Cascade 2 30B-A3B",
      "author": "NVIDIA",
      "origin": "us",
      "family": "Nemotron",
      "params": 30,
      "license": "NVIDIA Open Model License",
      "ctx": 128000,
      "tags": [
        "chat",
        "code",
        "reasoning",
        "moe"
      ],
      "vram": {
        "q4": 17,
        "q5": 21,
        "q8": 32,
        "fp16": 60
      },
      "url": "https://quelllm.fr/modele/nemotron-cascade-2",
      "apiUrl": "https://quelllm.fr/api/models/nemotron-cascade-2.json"
    },
    {
      "id": "nemotron3",
      "name": "Nemotron 3 33B",
      "author": "NVIDIA",
      "origin": "us",
      "family": "Nemotron",
      "params": 33,
      "license": "NVIDIA Open Model License",
      "ctx": 128000,
      "tags": [
        "chat",
        "code",
        "reasoning"
      ],
      "vram": {
        "q4": 19,
        "q5": 23,
        "q8": 35,
        "fp16": 66
      },
      "url": "https://quelllm.fr/modele/nemotron3",
      "apiUrl": "https://quelllm.fr/api/models/nemotron3.json"
    },
    {
      "id": "nemotron-3-nano",
      "name": "Nemotron 3 Nano 30B-A3B",
      "author": "NVIDIA",
      "origin": "us",
      "family": "Nemotron",
      "params": 30,
      "license": "NVIDIA Open Model License",
      "ctx": 128000,
      "tags": [
        "chat",
        "code",
        "reasoning",
        "moe"
      ],
      "vram": {
        "q4": 17,
        "q5": 21,
        "q8": 32,
        "fp16": 60
      },
      "url": "https://quelllm.fr/modele/nemotron-3-nano",
      "apiUrl": "https://quelllm.fr/api/models/nemotron-3-nano.json"
    },
    {
      "id": "medgemma",
      "name": "MedGemma 4B",
      "author": "Google",
      "origin": "us",
      "family": "Gemma",
      "params": 4,
      "license": "Gemma",
      "ctx": 128000,
      "tags": [
        "chat",
        "vision",
        "multilingual",
        "small"
      ],
      "vram": {
        "q4": 2.3,
        "q5": 2.8,
        "q8": 4.3,
        "fp16": 8
      },
      "url": "https://quelllm.fr/modele/medgemma",
      "apiUrl": "https://quelllm.fr/api/models/medgemma.json"
    },
    {
      "id": "gemma4",
      "name": "Gemma 4 2B",
      "author": "Google",
      "origin": "us",
      "family": "Gemma",
      "params": 2,
      "license": "Gemma",
      "ctx": 128000,
      "tags": [
        "chat",
        "vision",
        "multilingual",
        "small"
      ],
      "vram": {
        "q4": 1.2,
        "q5": 1.4,
        "q8": 2.1,
        "fp16": 4
      },
      "url": "https://quelllm.fr/modele/gemma4",
      "apiUrl": "https://quelllm.fr/api/models/gemma4.json"
    },
    {
      "id": "qwen3-5",
      "name": "Qwen 3.5 0.8B",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 0.8,
      "license": "Apache 2.0",
      "ctx": 256000,
      "tags": [
        "chat",
        "general",
        "small",
        "multilingual"
      ],
      "vram": {
        "q4": 0.5,
        "q5": 0.6,
        "q8": 0.9,
        "fp16": 1.6
      },
      "url": "https://quelllm.fr/modele/qwen3-5",
      "apiUrl": "https://quelllm.fr/api/models/qwen3-5.json"
    },
    {
      "id": "medgemma1-5",
      "name": "MedGemma 1.5 4B",
      "author": "Google",
      "origin": "us",
      "family": "Gemma",
      "params": 4,
      "license": "Gemma",
      "ctx": 128000,
      "tags": [
        "chat",
        "vision",
        "multilingual",
        "small"
      ],
      "vram": {
        "q4": 2.3,
        "q5": 2.8,
        "q8": 4.3,
        "fp16": 8
      },
      "url": "https://quelllm.fr/modele/medgemma1-5",
      "apiUrl": "https://quelllm.fr/api/models/medgemma1-5.json"
    },
    {
      "id": "granite4-1",
      "name": "Granite 4.1",
      "author": "IBM",
      "origin": "us",
      "family": "Granite",
      "params": 3,
      "license": "Apache 2.0",
      "ctx": 128000,
      "tags": [
        "chat",
        "code"
      ],
      "vram": {
        "q4": 1.7,
        "q5": 2.1,
        "q8": 3.2,
        "fp16": 6
      },
      "url": "https://quelllm.fr/modele/granite4-1",
      "apiUrl": "https://quelllm.fr/api/models/granite4-1.json"
    },
    {
      "id": "qwen3-6",
      "name": "Qwen 3.6 27B",
      "author": "Alibaba",
      "origin": "cn",
      "family": "Qwen",
      "params": 27,
      "license": "Qwen License",
      "ctx": 256000,
      "tags": [
        "chat",
        "code",
        "reasoning",
        "vision",
        "multilingual"
      ],
      "vram": {
        "q4": 16,
        "q5": 19,
        "q8": 29,
        "fp16": 54
      },
      "url": "https://quelllm.fr/modele/qwen3-6",
      "apiUrl": "https://quelllm.fr/api/models/qwen3-6.json"
    },
    {
      "id": "lfm2-5-thinking",
      "name": "LFM2.5 Thinking 1.2B",
      "author": "Liquid AI",
      "origin": "us",
      "family": "LFM",
      "params": 1.2,
      "license": "LFM Open License v1.0",
      "ctx": 32768,
      "tags": [
        "chat",
        "general",
        "reasoning",
        "small"
      ],
      "vram": {
        "q4": 0.7,
        "q5": 0.9,
        "q8": 1.3,
        "fp16": 2.4
      },
      "url": "https://quelllm.fr/modele/lfm2-5-thinking",
      "apiUrl": "https://quelllm.fr/api/models/lfm2-5-thinking.json"
    },
    {
      "id": "glm-4-7-flash",
      "name": "GLM 4.7 Flash",
      "author": "Zhipu AI",
      "origin": "cn",
      "family": "GLM",
      "params": 3,
      "license": "MIT",
      "ctx": 128000,
      "tags": [
        "chat",
        "multilingual"
      ],
      "vram": {
        "q4": 1.7,
        "q5": 2.1,
        "q8": 3.2,
        "fp16": 6
      },
      "url": "https://quelllm.fr/modele/glm-4-7-flash",
      "apiUrl": "https://quelllm.fr/api/models/glm-4-7-flash.json"
    },
    {
      "id": "osint-researcher",
      "name": "OSINT Researcher 4B",
      "author": "Yemen-JPT",
      "origin": "ye",
      "family": "Qwen3",
      "params": 4,
      "license": "Apache 2.0",
      "ctx": 32768,
      "tags": [
        "chat",
        "small",
        "multilingual"
      ],
      "vram": {
        "q4": 2.3,
        "q5": 2.8,
        "q8": 4.3,
        "fp16": 8
      },
      "url": "https://quelllm.fr/modele/osint-researcher",
      "apiUrl": "https://quelllm.fr/api/models/osint-researcher.json"
    },
    {
      "id": "lfm2",
      "name": "LFM2 24B",
      "author": "Liquid AI",
      "origin": "us",
      "family": "LFM",
      "params": 24,
      "license": "LFM Open License v1.0",
      "ctx": 32768,
      "tags": [
        "chat",
        "general"
      ],
      "vram": {
        "q4": 14,
        "q5": 17,
        "q8": 26,
        "fp16": 48
      },
      "url": "https://quelllm.fr/modele/lfm2",
      "apiUrl": "https://quelllm.fr/api/models/lfm2.json"
    },
    {
      "id": "glm-5",
      "name": "GLM 5 744B-A40B",
      "author": "Zhipu AI",
      "origin": "cn",
      "family": "GLM",
      "params": 744,
      "license": "MIT",
      "ctx": 128000,
      "tags": [
        "chat",
        "multilingual",
        "moe"
      ],
      "vram": {
        "q4": 432,
        "q5": 528,
        "q8": 796,
        "fp16": 1488
      },
      "url": "https://quelllm.fr/modele/glm-5",
      "apiUrl": "https://quelllm.fr/api/models/glm-5.json"
    },
    {
      "id": "minimax-m2-5",
      "name": "MiniMax M2.5 2.5B",
      "author": "MiniMaxAI",
      "origin": "cn",
      "family": "MiniMax",
      "params": 2.5,
      "license": "Apache 2.0",
      "ctx": 128000,
      "tags": [
        "chat",
        "code",
        "reasoning",
        "small",
        "multilingual"
      ],
      "vram": {
        "q4": 1.4,
        "q5": 1.8,
        "q8": 2.7,
        "fp16": 5
      },
      "url": "https://quelllm.fr/modele/minimax-m2-5",
      "apiUrl": "https://quelllm.fr/api/models/minimax-m2-5.json"
    },
    {
      "id": "glm-ocr",
      "name": "GLM-OCR 1.1B",
      "author": "Zhipu AI",
      "origin": "cn",
      "family": "GLM",
      "params": 1.1,
      "license": "MIT",
      "ctx": 131072,
      "tags": [
        "vision",
        "code",
        "small"
      ],
      "vram": {
        "q4": 0.6,
        "q5": 0.8,
        "q8": 1.2,
        "fp16": 2.2
      },
      "url": "https://quelllm.fr/modele/glm-ocr",
      "apiUrl": "https://quelllm.fr/api/models/glm-ocr.json"
    },
    {
      "id": "nemotron-3-super",
      "name": "Nemotron 3 Super 12B",
      "author": "NVIDIA",
      "origin": "us",
      "family": "Nemotron",
      "params": 12,
      "license": "NVIDIA Open Model License",
      "ctx": 128000,
      "tags": [
        "chat",
        "code",
        "reasoning"
      ],
      "vram": {
        "q4": 7,
        "q5": 9,
        "q8": 13,
        "fp16": 24
      },
      "url": "https://quelllm.fr/modele/nemotron-3-super",
      "apiUrl": "https://quelllm.fr/api/models/nemotron-3-super.json"
    },
    {
      "id": "minicpm5-1b-sft",
      "name": "MiniCPM5 1B SFT",
      "author": "OpenBMB",
      "origin": "cn",
      "family": "MiniCPM",
      "params": 1.1,
      "license": "Apache 2.0",
      "ctx": 32768,
      "tags": [
        "chat",
        "general",
        "small"
      ],
      "vram": {
        "q4": 0.6,
        "q5": 0.8,
        "q8": 1.2,
        "fp16": 2.2
      },
      "url": "https://quelllm.fr/modele/minicpm5-1b-sft",
      "apiUrl": "https://quelllm.fr/api/models/minicpm5-1b-sft.json"
    },
    {
      "id": "minicpm5-1b",
      "name": "MiniCPM5 1B",
      "author": "OpenBMB",
      "origin": "cn",
      "family": "MiniCPM",
      "params": 1.1,
      "license": "Apache 2.0",
      "ctx": 32768,
      "tags": [
        "general",
        "small"
      ],
      "vram": {
        "q4": 0.6,
        "q5": 0.8,
        "q8": 1.2,
        "fp16": 2.2
      },
      "url": "https://quelllm.fr/modele/minicpm5-1b",
      "apiUrl": "https://quelllm.fr/api/models/minicpm5-1b.json"
    },
    {
      "id": "hrm-text-1b",
      "name": "HRM-Text 1B",
      "author": "Sapient",
      "origin": "us",
      "family": "HRM",
      "params": 1.2,
      "license": "Apache 2.0",
      "ctx": 2048,
      "tags": [
        "reasoning",
        "small"
      ],
      "vram": {
        "q4": 0.7,
        "q5": 0.9,
        "q8": 1.3,
        "fp16": 2.4
      },
      "url": "https://quelllm.fr/modele/hrm-text-1b",
      "apiUrl": "https://quelllm.fr/api/models/hrm-text-1b.json"
    },
    {
      "id": "llama-3-1-70b-latamgpt-sft-1-0",
      "name": "Llama 3.1 70B LatamGPT SFT",
      "author": "LatamGPT (CENIA)",
      "origin": "cl",
      "family": "Llama",
      "params": 71,
      "license": "Llama 3.1 Community",
      "ctx": 128000,
      "tags": [
        "chat",
        "general",
        "multilingual"
      ],
      "vram": {
        "q4": 41,
        "q5": 50,
        "q8": 76,
        "fp16": 142
      },
      "url": "https://quelllm.fr/modele/llama-3-1-70b-latamgpt-sft-1-0",
      "apiUrl": "https://quelllm.fr/api/models/llama-3-1-70b-latamgpt-sft-1-0.json"
    }
  ]
}