> For clean Markdown of any page, append .md to the page URL.
> For a complete documentation index, see https://nemo-platform.docs.buildwithfern.com/nemo/platform/llms.txt.
> For AI client integration (Claude Code, Cursor, etc.), connect to the MCP server at https://nemo-platform.docs.buildwithfern.com/nemo/platform/_mcp/server.

# Create ModelDeploymentConfig

POST https://host.com/apis/models/v2/workspaces/{workspace}/deployment-configs
Content-Type: application/json

Create a new ModelDeploymentConfig (version 1).

Reference: https://nemo-platform.docs.buildwithfern.com/nemo/platform/nemo/platform/documentation/reference/api-reference/model-deployment-configs/create-deployment-config-apis-models-v-2-workspaces-workspace-deployment-configs-post

## OpenAPI Specification

```yaml
openapi: 3.1.0
info:
  title: Nemo Platform API
  version: 1.0.0
paths:
  /apis/models/v2/workspaces/{workspace}/deployment-configs:
    post:
      operationId: >-
        create-deployment-config-apis-models-v-2-workspaces-workspace-deployment-configs-post
      summary: Create ModelDeploymentConfig
      description: Create a new ModelDeploymentConfig (version 1).
      tags:
        - subpackage_modelDeploymentConfigs
      parameters:
        - name: workspace
          in: path
          required: true
          schema:
            type: string
      responses:
        '201':
          description: Create a new model deployment configuration
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ModelDeploymentConfig'
        '422':
          description: Validation Error
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HTTPValidationError'
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CreateModelDeploymentConfigRequest'
servers:
  - url: https://host.com
    description: Default
components:
  schemas:
    ModelType:
      type: string
      enum:
        - llm
        - embed
        - other
      description: Model type enum for NIM deployments.
      title: ModelType
    ToolCallConfig:
      type: object
      properties:
        tool_call_parser:
          type: string
          description: >-
            Name of the tool call parser to use (e.g., 'openai', 'hermes',
            'pythonic', 'llama3_json', 'mistral').
        tool_call_plugin:
          type: string
          description: >-
            Reference to a fileset containing the custom tool call plugin Python
            file. Expected format: '{workspace}/{fileset_name}'. The fileset is
            mounted separately from the model checkpoint at deployment time.
        auto_tool_choice:
          type: boolean
          description: >-
            Whether to enable automatic tool choice. When enabled, the model can
            decide to call tools without explicit user instruction.
      description: Configuration for tool calling support in NIM deployments.
      title: ToolCallConfig
    K8sNIMOperatorConfig:
      type: object
      properties:
        resources:
          type: object
          additionalProperties:
            description: Any type
          description: >-
            Kubernetes resource requirements including requests and limits.
            Example: {'requests': {'cpu': '2', 'memory': '8Gi'}, 'limits':
            {'memory': '16Gi'}}
        tolerations:
          type: array
          items:
            type: object
            additionalProperties:
              description: Any type
          description: >-
            Kubernetes tolerations for pod scheduling. Example: [{'key':
            'nvidia.com/gpu', 'operator': 'Exists', 'effect': 'NoSchedule'}]
        node_selector:
          type: object
          additionalProperties:
            type: string
          description: >-
            Kubernetes node selector for pod placement. Example: {'node-type':
            'gpu-node', 'zone': 'us-west1-a'}
        startup_probe_grace_seconds:
          type: integer
          description: >-
            Grace period in seconds for NIM startup. Determines how long
            Kubernetes will wait for the NIM to become ready before restarting
            it. Example: 600 (10 minutes). Must be a positive integer.
      description: >-
        Kubernetes configuration for NIM deployment via k8s-nim-operator.


        These fields provide typed access to commonly-used NIMService Spec
        fields

        and are applied before override_config in the compilation precedence.
      title: K8sNIMOperatorConfig
    NIMDeployment:
      type: object
      properties:
        model_type:
          $ref: '#/components/schemas/ModelType'
          description: Type of model being deployed
        lora_enabled:
          type: boolean
          default: false
          description: Whether to enable LoRA support
        gpu:
          type: integer
          description: Number of GPUs required for the deployment
        disk_size:
          type: string
          default: 50Gi
          description: Disk size for the deployment
        image_name:
          type: string
          description: >-
            Container image name from NGC. If not specified, defaults to
            multi-llm
        image_tag:
          type: string
          description: Container image tag from NGC
        model_namespace:
          type: string
          description: >-
            Model repository namespace - organization/user namespace as it
            exists in repo_id.
        model_name:
          type: string
          description: Model name - model repository name for model weights.
        model_revision:
          type: string
          description: >-
            Model revision (branch, tag, or commit). If not specified, parsed
            from model_name @revision suffix or defaults to 'main'
        model_provider:
          type: string
          description: 'Model provider: ''hf'' for HuggingFace or ''nmp'' for NeMo Platform'
        chat_template:
          type: string
          description: >-
            Jinja2 chat template string for the model. Overrides the
            chat_template from ModelEntity.spec if both are set. Used by NIM to
            format chat completions.
        tool_call_config:
          $ref: '#/components/schemas/ToolCallConfig'
          description: >-
            Tool calling configuration for NIM deployments. Overrides
            tool_call_config from ModelEntity.spec if both are set. Controls how
            the model handles function/tool calling.
        additional_envs:
          type: object
          additionalProperties:
            description: Any type
          description: Additional environment variables for the deployment
        k8s_nim_operator_config:
          $ref: '#/components/schemas/K8sNIMOperatorConfig'
          description: >-
            Typed Kubernetes configuration for common NIMService Spec fields.
            Applied after defaults but before override_config.
        override_config:
          type: object
          additionalProperties:
            description: Any type
          description: >-
            Raw NIMService spec configuration that takes precedence over
            generated config. Allows end users to provide advanced configuration
            options directly.
      required:
        - gpu
      description: Configuration for NIM-based model deployment.
      title: NIMDeployment
    CreateModelDeploymentConfigRequest:
      type: object
      properties:
        name:
          type: string
          description: >-
            Name of the deployment configuration. Allowed characters: letters
            (a-z, A-Z), digits (0-9), underscores, hyphens, and dots.
        project:
          type: string
          description: The URN of the project associated with this deployment configuration
        description:
          type: string
          description: Optional description of the deployment configuration
        nim_deployment:
          $ref: '#/components/schemas/NIMDeployment'
          description: Configuration for NIM-based deployment
        model_entity_id:
          type: string
          description: Optional reference to the base model entity ID for this deployment
      required:
        - name
        - nim_deployment
      description: Request model for creating a ModelDeploymentConfig.
      title: CreateModelDeploymentConfigRequest
    ModelDeploymentConfig:
      type: object
      properties:
        id:
          type: string
          description: Unique identifier for the deployment config
        name:
          type: string
          description: >-
            Name of the entity. Name/workspace combo must be unique across all
            entities. Allowed characters: letters (a-z, A-Z), digits (0-9),
            underscores, hyphens, and dots.
        workspace:
          type: string
          description: >-
            The workspace of the entity. Allowed characters: letters (a-z, A-Z),
            digits (0-9), underscores, hyphens, and dots.
        project:
          type: string
          description: The URN of the project associated with this entity.
        created_at:
          type: string
          format: date-time
          description: The timestamp of model entity creation
        updated_at:
          type: string
          format: date-time
          description: The timestamp of the last model entity update
        entity_version:
          type: integer
          description: Version of this deployment config. Automatically managed.
        description:
          type: string
          description: Optional description of the deployment configuration
        nim_deployment:
          $ref: '#/components/schemas/NIMDeployment'
          description: Configuration for NIM-based deployment
        model_entity_id:
          type: string
          description: Optional reference to the base model entity ID for this deployment
      required:
        - name
        - workspace
        - created_at
        - updated_at
        - entity_version
        - nim_deployment
      description: >-
        ModelDeploymentConfig stores the configuration details for deploying a
        model.

        These objects are immutable with automatic versioning.


        The unique identifier is the combination of
        workspace/name/entity_version.
      title: ModelDeploymentConfig
    ValidationErrorLocItems:
      oneOf:
        - type: string
        - type: integer
      title: ValidationErrorLocItems
    ValidationError:
      type: object
      properties:
        loc:
          type: array
          items:
            $ref: '#/components/schemas/ValidationErrorLocItems'
        msg:
          type: string
        type:
          type: string
        input:
          description: Any type
        ctx:
          type: object
          additionalProperties:
            description: Any type
      required:
        - loc
        - msg
        - type
      title: ValidationError
    HTTPValidationError:
      type: object
      properties:
        detail:
          type: array
          items:
            $ref: '#/components/schemas/ValidationError'
      title: HTTPValidationError

```

## Examples


**Request**

```json
{
  "name": "nim-config-v1",
  "nim_deployment": {
    "gpu": 2
  }
}
```

**Response**

```json
{
  "name": "llama-3.1-8b",
  "workspace": "research-team-alpha",
  "created_at": "2024-01-15T09:30:00Z",
  "updated_at": "2024-01-15T09:30:00Z",
  "entity_version": 1,
  "nim_deployment": {
    "gpu": 2,
    "model_type": "llm",
    "lora_enabled": false,
    "disk_size": "100Gi",
    "image_name": "nvcr.io/nemo/multi-llm",
    "image_tag": "v1.2.3",
    "model_namespace": "nvidia",
    "model_name": "llama-3-8b",
    "model_revision": "main",
    "model_provider": "hf",
    "chat_template": "default_chat_template.j2",
    "tool_call_config": {
      "tool_call_parser": "openai",
      "tool_call_plugin": "research-team-alpha/custom-tool-plugin",
      "auto_tool_choice": true
    },
    "additional_envs": {},
    "k8s_nim_operator_config": {
      "resources": {},
      "tolerations": [
        {}
      ],
      "node_selector": {},
      "startup_probe_grace_seconds": 600
    },
    "override_config": {}
  },
  "id": "d4f8a9b2-3c7e-4f1a-9b6d-2e5f7a8c9d01",
  "project": "projects/nemo-platform/llm-deployments",
  "description": "Deployment config for LLaMA 3 8B model with 2 GPUs",
  "model_entity_id": "model-entity-12345"
}
```

**SDK Code**

```python
import requests

url = "https://host.com/apis/models/v2/workspaces/workspace/deployment-configs"

payload = {
    "name": "nim-config-v1",
    "nim_deployment": { "gpu": 2 }
}
headers = {"Content-Type": "application/json"}

response = requests.post(url, json=payload, headers=headers)

print(response.json())
```

```javascript
const url = 'https://host.com/apis/models/v2/workspaces/workspace/deployment-configs';
const options = {
  method: 'POST',
  headers: {'Content-Type': 'application/json'},
  body: '{"name":"nim-config-v1","nim_deployment":{"gpu":2}}'
};

try {
  const response = await fetch(url, options);
  const data = await response.json();
  console.log(data);
} catch (error) {
  console.error(error);
}
```

```go
package main

import (
	"fmt"
	"strings"
	"net/http"
	"io"
)

func main() {

	url := "https://host.com/apis/models/v2/workspaces/workspace/deployment-configs"

	payload := strings.NewReader("{\n  \"name\": \"nim-config-v1\",\n  \"nim_deployment\": {\n    \"gpu\": 2\n  }\n}")

	req, _ := http.NewRequest("POST", url, payload)

	req.Header.Add("Content-Type", "application/json")

	res, _ := http.DefaultClient.Do(req)

	defer res.Body.Close()
	body, _ := io.ReadAll(res.Body)

	fmt.Println(res)
	fmt.Println(string(body))

}
```

```ruby
require 'uri'
require 'net/http'

url = URI("https://host.com/apis/models/v2/workspaces/workspace/deployment-configs")

http = Net::HTTP.new(url.host, url.port)
http.use_ssl = true

request = Net::HTTP::Post.new(url)
request["Content-Type"] = 'application/json'
request.body = "{\n  \"name\": \"nim-config-v1\",\n  \"nim_deployment\": {\n    \"gpu\": 2\n  }\n}"

response = http.request(request)
puts response.read_body
```

```java
import com.mashape.unirest.http.HttpResponse;
import com.mashape.unirest.http.Unirest;

HttpResponse<String> response = Unirest.post("https://host.com/apis/models/v2/workspaces/workspace/deployment-configs")
  .header("Content-Type", "application/json")
  .body("{\n  \"name\": \"nim-config-v1\",\n  \"nim_deployment\": {\n    \"gpu\": 2\n  }\n}")
  .asString();
```

```php
<?php
require_once('vendor/autoload.php');

$client = new \GuzzleHttp\Client();

$response = $client->request('POST', 'https://host.com/apis/models/v2/workspaces/workspace/deployment-configs', [
  'body' => '{
  "name": "nim-config-v1",
  "nim_deployment": {
    "gpu": 2
  }
}',
  'headers' => [
    'Content-Type' => 'application/json',
  ],
]);

echo $response->getBody();
```

```csharp
using RestSharp;

var client = new RestClient("https://host.com/apis/models/v2/workspaces/workspace/deployment-configs");
var request = new RestRequest(Method.POST);
request.AddHeader("Content-Type", "application/json");
request.AddParameter("application/json", "{\n  \"name\": \"nim-config-v1\",\n  \"nim_deployment\": {\n    \"gpu\": 2\n  }\n}", ParameterType.RequestBody);
IRestResponse response = client.Execute(request);
```

```swift
import Foundation

let headers = ["Content-Type": "application/json"]
let parameters = [
  "name": "nim-config-v1",
  "nim_deployment": ["gpu": 2]
] as [String : Any]

let postData = JSONSerialization.data(withJSONObject: parameters, options: [])

let request = NSMutableURLRequest(url: NSURL(string: "https://host.com/apis/models/v2/workspaces/workspace/deployment-configs")! as URL,
                                        cachePolicy: .useProtocolCachePolicy,
                                    timeoutInterval: 10.0)
request.httpMethod = "POST"
request.allHTTPHeaderFields = headers
request.httpBody = postData as Data

let session = URLSession.shared
let dataTask = session.dataTask(with: request as URLRequest, completionHandler: { (data, response, error) -> Void in
  if (error != nil) {
    print(error as Any)
  } else {
    let httpResponse = response as? HTTPURLResponse
    print(httpResponse)
  }
})

dataTask.resume()
```