mirror of
https://github.com/nold360/hive-apps
synced 2025-01-25 16:32:49 +00:00
141 lines
3.7 KiB
YAML
141 lines
3.7 KiB
YAML
replicaCount: 1
|
|
|
|
deployment:
|
|
image:
|
|
repository: quay.io/go-skynet/local-ai
|
|
#tag: latest-aio-gpu-nvidia-cuda-12
|
|
tag: v2.22.1-cublas-cuda12-ffmpeg
|
|
pullPolicy: Always
|
|
runtimeClassName: nvidia
|
|
|
|
env:
|
|
threads: 16
|
|
context_size: 4096
|
|
DEBUG: "true"
|
|
#
|
|
# SINGLE_ACTIVE_BACKEND: "true"
|
|
# PYTHON_GRPC_MAX_WORKERS: "1"
|
|
# LLAMACPP_PARALLEL: "1"
|
|
# PARALLEL_REQUESTS: "false"
|
|
|
|
## Specify a different bind address (defaults to ":8080")
|
|
# ADDRESS=127.0.0.1:8080
|
|
|
|
## Define galleries.
|
|
## models will to install will be visible in `/models/available`
|
|
#GALLERIES: '[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}]'
|
|
|
|
## Default path for models
|
|
#MODELS_PATH=/models
|
|
|
|
## Enable debug mode
|
|
#DEBUG=true
|
|
|
|
## Disables COMPEL (Lets Stable Diffuser work, uncomment if you plan on using it)
|
|
# COMPEL=0
|
|
|
|
## Enable/Disable single backend (useful if only one GPU is available)
|
|
# SINGLE_ACTIVE_BACKEND=true
|
|
|
|
## Specify a build type. Available: cublas, openblas, clblas.
|
|
#BUILD_TYPE=cublas
|
|
|
|
## Uncomment and set to true to enable rebuilding from source
|
|
# REBUILD=true
|
|
|
|
## Enable go tags, available: stablediffusion, tts
|
|
## stablediffusion: image generation with stablediffusion
|
|
## tts: enables text-to-speech with go-piper
|
|
## (requires REBUILD=true)
|
|
#
|
|
#GO_TAGS=tts
|
|
|
|
## Path where to store generated images
|
|
# IMAGE_PATH=/tmp
|
|
|
|
## Specify a default upload limit in MB (whisper)
|
|
# UPLOAD_LIMIT
|
|
|
|
# HUGGINGFACEHUB_API_TOKEN=Token here
|
|
# Inject Secrets into Environment:
|
|
secretEnv:
|
|
- name: HF_TOKEN
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: localai
|
|
key: hf-token
|
|
|
|
|
|
modelsPath: "/models"
|
|
download_model:
|
|
# To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox
|
|
image: busybox
|
|
prompt_templates:
|
|
# To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox
|
|
image: busybox
|
|
|
|
resources:
|
|
requests:
|
|
memory: 100Mi
|
|
limits:
|
|
memory: 64Gi
|
|
|
|
# Note: the keys of this map will be the names of the prompt template files
|
|
promptTemplates:
|
|
{}
|
|
# ggml-gpt4all-j.tmpl: |
|
|
# The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
|
|
# ### Prompt:
|
|
# {{.Input}}
|
|
# ### Response:
|
|
|
|
# Models to download at runtime
|
|
models:
|
|
# Whether to force download models even if they already exist
|
|
forceDownload: false
|
|
|
|
# The list of URLs to download models from
|
|
# Note: the name of the file will be the name of the loaded model
|
|
list: []
|
|
# - url: "https://gpt4all.io/models/ggml-gpt4all-j.bin"
|
|
# basicAuth: base64EncodedCredentials
|
|
|
|
persistence:
|
|
models:
|
|
enabled: true
|
|
annotations: {}
|
|
storageClass: ssd
|
|
accessModes: ReadWriteOnce
|
|
size: 100Gi
|
|
globalMount: /models
|
|
output:
|
|
enabled: false
|
|
annotations: {}
|
|
storageClass: ssd
|
|
accessModes: ReadWriteOnce
|
|
size: 100Gi
|
|
globalMount: /tmp/generated
|
|
|
|
service:
|
|
type: ClusterIP
|
|
port: 80
|
|
|
|
ingress:
|
|
enabled: true
|
|
className: "ingress-internal"
|
|
annotations:
|
|
cert-manager.io/cluster-issuer: vault-issuer
|
|
nginx.ingress.kubernetes.io/proxy-buffering: "off"
|
|
nginx.ingress.kubernetes.io/proxy-http-version: "1.1"
|
|
nginx.ingress.kubernetes.io/chunked-transfer-encoding: "on"
|
|
nginx.ingress.kubernetes.io/proxy_request_buffering: "off"
|
|
hosts:
|
|
- host: ai.dc
|
|
paths:
|
|
- path: /
|
|
pathType: ImplementationSpecific
|
|
tls:
|
|
- secretName: localai-tls
|
|
hosts:
|
|
- ai.dc
|
|
|