feat(localai): runtimeclass nvidia

This commit is contained in:
nold 2024-02-21 16:41:05 +01:00
parent d9a93a6fb9
commit 6dba741b68
2 changed files with 30 additions and 19 deletions

View file

@ -3,9 +3,12 @@ config:
apps:
- name: localai
repoURL: https://go-skynet.github.io/helm-charts
chart: local-ai
targetRevision: 3.1.0
# repoURL: https://go-skynet.github.io/helm-charts
# chart: local-ai
# targetRevision: 3.1.0
repoURL: https://github.com/nold360/localai-charts.git
path: charts/local-ai
targetRevision: feat/runtimeclass
- name: anythingllm
repo: bjw-s

View file

@ -1,9 +1,14 @@
replicaCount: 1
deployment:
image: quay.io/go-skynet/local-ai:master-ffmpeg-core
image:
repository: quay.io/go-skynet/local-ai
tag: v2.8.0-cublas-cuda12
pullPolicy: Always
runtimeClassName: nvidia
env:
threads: 16
# threads: 16
context_size: 2048
DEBUG: "true"
@ -54,9 +59,6 @@ deployment:
prompt_templates:
# To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox
image: busybox
pullPolicy: Always
imagePullSecrets: []
# - name: secret-names
resources:
requests:
@ -80,17 +82,25 @@ models:
# The list of URLs to download models from
# Note: the name of the file will be the name of the loaded model
list:
- url: "https://gpt4all.io/models/ggml-gpt4all-j.bin"
list: []
# - url: "https://gpt4all.io/models/ggml-gpt4all-j.bin"
# basicAuth: base64EncodedCredentials
persistence:
pvc:
persistence:
models:
enabled: true
annotations: {}
storageClass: ssd
accessModes: ReadWriteOnce
size: 100Gi
accessModes:
- ReadWriteOnce
storageClass: "ssd"
globalMount: /models
output:
enabled: false
annotations: {}
storageClass: ssd
accessModes: ReadWriteOnce
size: 100Gi
globalMount: /tmp/generated
service:
type: ClusterIP
@ -115,5 +125,3 @@ ingress:
hosts:
- ai.dc
image:
pullPolicy: IfNotPresent