feat(localai): runtimeclass nvidia

This commit is contained in:
nold 2024-02-21 16:41:05 +01:00
parent d9a93a6fb9
commit 6dba741b68
2 changed files with 30 additions and 19 deletions

View file

@ -3,9 +3,12 @@ config:
apps: apps:
- name: localai - name: localai
repoURL: https://go-skynet.github.io/helm-charts # repoURL: https://go-skynet.github.io/helm-charts
chart: local-ai # chart: local-ai
targetRevision: 3.1.0 # targetRevision: 3.1.0
repoURL: https://github.com/nold360/localai-charts.git
path: charts/local-ai
targetRevision: feat/runtimeclass
- name: anythingllm - name: anythingllm
repo: bjw-s repo: bjw-s

View file

@ -1,9 +1,14 @@
replicaCount: 1 replicaCount: 1
deployment: deployment:
image: quay.io/go-skynet/local-ai:master-ffmpeg-core image:
repository: quay.io/go-skynet/local-ai
tag: v2.8.0-cublas-cuda12
pullPolicy: Always
runtimeClassName: nvidia
env: env:
threads: 16 # threads: 16
context_size: 2048 context_size: 2048
DEBUG: "true" DEBUG: "true"
@ -54,9 +59,6 @@ deployment:
prompt_templates: prompt_templates:
# To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox # To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox
image: busybox image: busybox
pullPolicy: Always
imagePullSecrets: []
# - name: secret-names
resources: resources:
requests: requests:
@ -80,17 +82,25 @@ models:
# The list of URLs to download models from # The list of URLs to download models from
# Note: the name of the file will be the name of the loaded model # Note: the name of the file will be the name of the loaded model
list: list: []
- url: "https://gpt4all.io/models/ggml-gpt4all-j.bin" # - url: "https://gpt4all.io/models/ggml-gpt4all-j.bin"
# basicAuth: base64EncodedCredentials # basicAuth: base64EncodedCredentials
persistence: persistence:
pvc: models:
enabled: true enabled: true
annotations: {}
storageClass: ssd
accessModes: ReadWriteOnce
size: 100Gi size: 100Gi
accessModes: globalMount: /models
- ReadWriteOnce output:
storageClass: "ssd" enabled: false
annotations: {}
storageClass: ssd
accessModes: ReadWriteOnce
size: 100Gi
globalMount: /tmp/generated
service: service:
type: ClusterIP type: ClusterIP
@ -115,5 +125,3 @@ ingress:
hosts: hosts:
- ai.dc - ai.dc
image:
pullPolicy: IfNotPresent