hive-apps/projects/ai/values/localai.yaml

115 lines
3 KiB
YAML

replicaCount: 1
deployment:
image: quay.io/go-skynet/local-ai:master-ffmpeg-core
env:
threads: 16
context_size: 2048
DEBUG: "true"
## Specify a different bind address (defaults to ":8080")
# ADDRESS=127.0.0.1:8080
## Define galleries.
## models will to install will be visible in `/models/available`
#GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}]
## Default path for models
#MODELS_PATH=/models
## Enable debug mode
#DEBUG=true
## Disables COMPEL (Lets Stable Diffuser work, uncomment if you plan on using it)
# COMPEL=0
## Enable/Disable single backend (useful if only one GPU is available)
# SINGLE_ACTIVE_BACKEND=true
## Specify a build type. Available: cublas, openblas, clblas.
#BUILD_TYPE=cublas
## Uncomment and set to true to enable rebuilding from source
# REBUILD=true
## Enable go tags, available: stablediffusion, tts
## stablediffusion: image generation with stablediffusion
## tts: enables text-to-speech with go-piper
## (requires REBUILD=true)
#
#GO_TAGS=tts
## Path where to store generated images
# IMAGE_PATH=/tmp
## Specify a default upload limit in MB (whisper)
# UPLOAD_LIMIT
# HUGGINGFACEHUB_API_TOKEN=Token here
modelsPath: "/models"
download_model:
# To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox
image: busybox
prompt_templates:
# To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox
image: busybox
pullPolicy: Always
imagePullSecrets: []
# - name: secret-names
resources:
limits:
memory: 64Gi
# Note: the keys of this map will be the names of the prompt template files
promptTemplates:
{}
# ggml-gpt4all-j.tmpl: |
# The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
# ### Prompt:
# {{.Input}}
# ### Response:
# Models to download at runtime
models:
# Whether to force download models even if they already exist
forceDownload: false
# The list of URLs to download models from
# Note: the name of the file will be the name of the loaded model
list:
- url: "https://gpt4all.io/models/ggml-gpt4all-j.bin"
# basicAuth: base64EncodedCredentials
persistence:
pvc:
enabled: true
size: 100Gi
accessModes:
- ReadWriteOnce
storageClass: "ssd"
service:
type: ClusterIP
port: 80
ingress:
enabled: true
className: "ingress-internal"
annotations:
cert-manager.io/cluster-issuer: vault-issuer
nginx.ingress.kubernetes.io/proxy-body-size: 100m
hosts:
- host: ai.dc
paths:
- path: /
pathType: ImplementationSpecific
tls:
- secretName: localai-tls
hosts:
- ai.dc
image:
pullPolicy: IfNotPresent