hive-apps/projects/ai/values/localai.yaml

133 lines
3.5 KiB
YAML

replicaCount: 1
deployment:
image:
repository: quay.io/go-skynet/local-ai
tag: v2.12.4-cublas-cuda12-ffmpeg-core
pullPolicy: Always
runtimeClassName: nvidia
env:
threads: 16
context_size: 4096
DEBUG: "true"
#
# SINGLE_ACTIVE_BACKEND: "true"
# PYTHON_GRPC_MAX_WORKERS: "1"
# LLAMACPP_PARALLEL: "1"
# PARALLEL_REQUESTS: "false"
## Specify a different bind address (defaults to ":8080")
# ADDRESS=127.0.0.1:8080
## Define galleries.
## models will to install will be visible in `/models/available`
GALLERIES: '[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}]'
## Default path for models
#MODELS_PATH=/models
## Enable debug mode
#DEBUG=true
## Disables COMPEL (Lets Stable Diffuser work, uncomment if you plan on using it)
# COMPEL=0
## Enable/Disable single backend (useful if only one GPU is available)
# SINGLE_ACTIVE_BACKEND=true
## Specify a build type. Available: cublas, openblas, clblas.
#BUILD_TYPE=cublas
## Uncomment and set to true to enable rebuilding from source
# REBUILD=true
## Enable go tags, available: stablediffusion, tts
## stablediffusion: image generation with stablediffusion
## tts: enables text-to-speech with go-piper
## (requires REBUILD=true)
#
#GO_TAGS=tts
## Path where to store generated images
# IMAGE_PATH=/tmp
## Specify a default upload limit in MB (whisper)
# UPLOAD_LIMIT
# HUGGINGFACEHUB_API_TOKEN=Token here
modelsPath: "/models"
download_model:
# To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox
image: busybox
prompt_templates:
# To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox
image: busybox
resources:
requests:
memory: 100Mi
limits:
memory: 64Gi
# Note: the keys of this map will be the names of the prompt template files
promptTemplates:
{}
# ggml-gpt4all-j.tmpl: |
# The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
# ### Prompt:
# {{.Input}}
# ### Response:
# Models to download at runtime
models:
# Whether to force download models even if they already exist
forceDownload: false
# The list of URLs to download models from
# Note: the name of the file will be the name of the loaded model
list: []
# - url: "https://gpt4all.io/models/ggml-gpt4all-j.bin"
# basicAuth: base64EncodedCredentials
persistence:
models:
enabled: true
annotations: {}
storageClass: ssd
accessModes: ReadWriteOnce
size: 100Gi
globalMount: /models
output:
enabled: false
annotations: {}
storageClass: ssd
accessModes: ReadWriteOnce
size: 100Gi
globalMount: /tmp/generated
service:
type: ClusterIP
port: 80
ingress:
enabled: true
className: "ingress-internal"
annotations:
cert-manager.io/cluster-issuer: vault-issuer
nginx.ingress.kubernetes.io/proxy-buffering: "off"
nginx.ingress.kubernetes.io/proxy-http-version: "1.1"
nginx.ingress.kubernetes.io/chunked-transfer-encoding: "on"
nginx.ingress.kubernetes.io/proxy_request_buffering: "off"
hosts:
- host: ai.dc
paths:
- path: /
pathType: ImplementationSpecific
tls:
- secretName: localai-tls
hosts:
- ai.dc