Add OVHcloud AI Endpoints as an Inference Provder (#3541)

eliasto · Wauplin · github-actions[bot] · web-flow · commit 45e147415a9e · 2025-11-14T12:29:42.000+01:00
* Add OVHcloud AI Endpoints provider

# Conflicts:
#	docs/source/en/guides/inference.md

* Only add text-generation and conversational task from feedback

* Edit name of class and text-generation

* Remove text_generation capability

* Apply style fixes

---------

Co-authored-by: Lucain &lt;lucain@huggingface.co&gt;
Co-authored-by: github-actions[bot] &lt;github-actions[bot]@users.noreply.github.com&gt;
diff --git a/docs/source/en/guides/inference.md b/docs/source/en/guides/inference.md
diff --git a/src/huggingface_hub/inference/_client.py b/src/huggingface_hub/inference/_client.py
@@ -135,7 +135,7 @@ class InferenceClient:
             Note: for better compatibility with OpenAI's client, `model` has been aliased as `base_url`. Those 2
             arguments are mutually exclusive. If a URL is passed as `model` or `base_url` for chat completion, the `(/v1)/chat/completions` suffix path will be appended to the URL.
         provider (`str`, *optional*):
-            Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"clarifai"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `"publicai"`, `"replicate"`, `"sambanova"`, `"scaleway"`, `"together"`, `"wavespeed"` or `"zai-org"`.
+            Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"clarifai"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `"ovhcloud"`, `"publicai"`, `"replicate"`, `"sambanova"`, `"scaleway"`, `"together"`, `"wavespeed"` or `"zai-org"`.
             Defaults to "auto" i.e. the first of the providers available for the model, sorted by the user's order in https://hf.co/settings/inference-providers.
             If model is a URL or `base_url` is passed, then `provider` is not used.
         token (`str`, *optional*):
diff --git a/src/huggingface_hub/inference/_generated/_async_client.py b/src/huggingface_hub/inference/_generated/_async_client.py
@@ -126,7 +126,7 @@ class AsyncInferenceClient:
             Note: for better compatibility with OpenAI's client, `model` has been aliased as `base_url`. Those 2
             arguments are mutually exclusive. If a URL is passed as `model` or `base_url` for chat completion, the `(/v1)/chat/completions` suffix path will be appended to the URL.
         provider (`str`, *optional*):
-            Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"clarifai"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `"publicai"`, `"replicate"`, `"sambanova"`, `"scaleway"`, `"together"`, `"wavespeed"` or `"zai-org"`.
+            Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"clarifai"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `"ovhcloud"`, `"publicai"`, `"replicate"`, `"sambanova"`, `"scaleway"`, `"together"`, `"wavespeed"` or `"zai-org"`.
             Defaults to "auto" i.e. the first of the providers available for the model, sorted by the user's order in https://hf.co/settings/inference-providers.
             If model is a URL or `base_url` is passed, then `provider` is not used.
         token (`str`, *optional*):
diff --git a/src/huggingface_hub/inference/_providers/__init__.py b/src/huggingface_hub/inference/_providers/__init__.py
@@ -38,6 +38,7 @@
 from .novita import NovitaConversationalTask, NovitaTextGenerationTask, NovitaTextToVideoTask
 from .nscale import NscaleConversationalTask, NscaleTextToImageTask
 from .openai import OpenAIConversationalTask
+from .ovhcloud import OVHcloudConversationalTask
 from .publicai import PublicAIConversationalTask
 from .replicate import (
     ReplicateAutomaticSpeechRecognitionTask,
@@ -76,6 +77,7 @@
     "novita",
     "nscale",
     "openai",
+    "ovhcloud",
     "publicai",
     "replicate",
     "sambanova",
@@ -172,6 +174,9 @@
     "openai": {
         "conversational": OpenAIConversationalTask(),
     },
+    "ovhcloud": {
+        "conversational": OVHcloudConversationalTask(),
+    },
     "publicai": {
         "conversational": PublicAIConversationalTask(),
     },
diff --git a/src/huggingface_hub/inference/_providers/_common.py b/src/huggingface_hub/inference/_providers/_common.py
@@ -32,6 +32,7 @@
     "hyperbolic": {},
     "nebius": {},
     "nscale": {},
+    "ovhcloud": {},
     "replicate": {},
     "sambanova": {},
     "scaleway": {},
diff --git a/src/huggingface_hub/inference/_providers/ovhcloud.py b/src/huggingface_hub/inference/_providers/ovhcloud.py
@@ -0,0 +1,10 @@
+from huggingface_hub.inference._providers._common import BaseConversationalTask
+
+
+_PROVIDER = "ovhcloud"
+_BASE_URL = "https://oai.endpoints.kepler.ai.cloud.ovh.net"
+
+
+class OVHcloudConversationalTask(BaseConversationalTask):
+    def __init__(self):
+        super().__init__(provider=_PROVIDER, base_url=_BASE_URL)
diff --git a/tests/test_inference_client.py b/tests/test_inference_client.py
@@ -117,6 +117,9 @@
         "text-generation": "NousResearch/Nous-Hermes-Llama2-13b",
         "conversational": "meta-llama/Llama-3.1-8B-Instruct",
     },
+    "ovhcloud": {
+        "conversational": "meta-llama/Llama-3.1-8B-Instruct",
+    },
     "replicate": {
         "text-to-image": "ByteDance/SDXL-Lightning",
     },
diff --git a/tests/test_inference_providers.py b/tests/test_inference_providers.py
@@ -46,6 +46,7 @@
 from huggingface_hub.inference._providers.novita import NovitaConversationalTask, NovitaTextGenerationTask
 from huggingface_hub.inference._providers.nscale import NscaleConversationalTask, NscaleTextToImageTask
 from huggingface_hub.inference._providers.openai import OpenAIConversationalTask
+from huggingface_hub.inference._providers.ovhcloud import OVHcloudConversationalTask
 from huggingface_hub.inference._providers.publicai import PublicAIConversationalTask
 from huggingface_hub.inference._providers.replicate import (
     ReplicateAutomaticSpeechRecognitionTask,
@@ -1423,6 +1424,57 @@ def test_prepare_url(self):
         assert helper._prepare_url("sk-XXXXXX", "gpt-4o-mini") == "https://api.openai.com/v1/chat/completions"
 
 
+class TestOVHcloudAIEndpointsProvider:
+    def test_prepare_hf_url_conversational(self):
+        helper = OVHcloudConversationalTask()
+        url = helper._prepare_url("hf_token", "username/repo_name")
+        assert url == "https://router.huggingface.co/ovhcloud/v1/chat/completions"
+
+    def test_prepare_url_conversational(self):
+        helper = OVHcloudConversationalTask()
+        url = helper._prepare_url("ovhcloud_token", "username/repo_name")
+        assert url == "https://oai.endpoints.kepler.ai.cloud.ovh.net/v1/chat/completions"
+
+    def test_prepare_payload_as_dict(self):
+        helper = OVHcloudConversationalTask()
+        payload = helper._prepare_payload_as_dict(
+            [
+                {"role": "system", "content": "You are a helpful assistant"},
+                {"role": "user", "content": "Hello!"},
+            ],
+            {
+                "max_tokens": 512,
+                "temperature": 0.15,
+                "top_p": 1,
+                "presence_penalty": 0,
+                "stream": True,
+            },
+            InferenceProviderMapping(
+                provider="ovhcloud",
+                hf_model_id="meta-llama/Llama-3.1-8B-Instruct",
+                providerId="Llama-3.1-8B-Instruct",
+                task="conversational",
+                status="live",
+            ),
+        )
+        assert payload == {
+            "max_tokens": 512,
+            "messages": [
+                {"content": "You are a helpful assistant", "role": "system"},
+                {"role": "user", "content": "Hello!"},
+            ],
+            "model": "Llama-3.1-8B-Instruct",
+            "presence_penalty": 0,
+            "stream": True,
+            "temperature": 0.15,
+            "top_p": 1,
+        }
+
+    def test_prepare_route_conversational(self):
+        helper = OVHcloudConversationalTask()
+        assert helper._prepare_route("username/repo_name", "hf_token") == "/v1/chat/completions"
+
+
 class TestReplicateProvider:
     def test_automatic_speech_recognition_payload(self):
         helper = ReplicateAutomaticSpeechRecognitionTask()