# -- Hardware Configuration --
# (String) A unique name for your deployment. If not provided, the base name of the current directory will be used.
# Default: null
# can only be 15 characters long. If longer, it will be truncated to first 15 characters.
name: my-model
# (Integer) The number of GPUs to allocate to each replica.
# Default: 0
gpus: 1
# (String) The specific GPU model.
# Options: 'V100', 'A10G', 'T4', 'L4', 'L40S', 'A100', 'H100'
# Default: null
gpu_type: l4
# (Float) CPU cores to allocate, in millicores (e.g., 1000 = 1 vCPU).
# Default: 100.0
cpu: 2000
# (Float) RAM to allocate, in megabytes (MB).
# Default: 200.0
memory: 8192
# -- Scaling Configuration --
# (Integer) Minimum number of replicas to keep running. Can be 0 for scale-to-zero.
# Default: 0
min_scale: 0
# (Integer) Maximum number of replicas to scale out to under load.
# Default: 3
max_scale: 5
# (Integer) The number of concurrent requests a single replica can handle
# before the autoscaler adds a new one.
# Note: The deployment will start scaling up at 70% of the concurrency value
# keeping 30% buffer for extra requests while new pod is getting ready for serving.
# Default: 100
concurrency: 50
# (List of Objects) A list of volumes to mount to the container.
# See the Volumes guide for more details.
# Default: []
volumes:
- name: my-volume
type: efs
mount_path: /mnt/my-efs-volume
# -- Application & Networking --
# (Integer) The port your container exposes for traffic.
# Default: 80
port: 8000
# (String) A custom domain to use for the endpoint. Requires CNAME configuration.
# Default: null
domain_name: my-model.mydomain.com
# (Object) Health check to verify your application is ready to serve traffic.
# This is crucial for robust, zero-downtime deployments.
# Default: null
readiness:
httpGet:
path: /healthz
port: 8000
# -- Environment & Secrets --
# (String) The target environment for the deployment.
# Default: "default"
env: production
# (List of Strings) A list of secret names to mount as environment variables.
# See the Secrets guide for more details.
# Default: []
secret:
- my-api-keys
- hugging-face-token
# (Boolean) Set to true when deploying from a CI/CD pipeline like GitHub Actions.
# Default: false
github_actions: false