diff --git a/projects/ai/project.yaml b/projects/ai/project.yaml index f74c8b6c..b4398357 100644 --- a/projects/ai/project.yaml +++ b/projects/ai/project.yaml @@ -3,9 +3,12 @@ config: apps: - name: localai - repoURL: https://go-skynet.github.io/helm-charts - chart: local-ai - targetRevision: 3.1.0 + # repoURL: https://go-skynet.github.io/helm-charts + # chart: local-ai + # targetRevision: 3.1.0 + repoURL: https://github.com/nold360/localai-charts.git + path: charts/local-ai + targetRevision: feat/runtimeclass - name: anythingllm repo: bjw-s diff --git a/projects/ai/values/localai.yaml b/projects/ai/values/localai.yaml index 42eddf4a..38121f88 100644 --- a/projects/ai/values/localai.yaml +++ b/projects/ai/values/localai.yaml @@ -1,9 +1,14 @@ replicaCount: 1 deployment: - image: quay.io/go-skynet/local-ai:master-ffmpeg-core + image: + repository: quay.io/go-skynet/local-ai + tag: v2.8.0-cublas-cuda12 + pullPolicy: Always + runtimeClassName: nvidia + env: - threads: 16 + # threads: 16 context_size: 2048 DEBUG: "true" @@ -54,9 +59,6 @@ deployment: prompt_templates: # To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox image: busybox - pullPolicy: Always - imagePullSecrets: [] - # - name: secret-names resources: requests: @@ -80,17 +82,25 @@ models: # The list of URLs to download models from # Note: the name of the file will be the name of the loaded model - list: - - url: "https://gpt4all.io/models/ggml-gpt4all-j.bin" + list: [] + # - url: "https://gpt4all.io/models/ggml-gpt4all-j.bin" # basicAuth: base64EncodedCredentials - persistence: - pvc: - enabled: true - size: 100Gi - accessModes: - - ReadWriteOnce - storageClass: "ssd" +persistence: + models: + enabled: true + annotations: {} + storageClass: ssd + accessModes: ReadWriteOnce + size: 100Gi + globalMount: /models + output: + enabled: false + annotations: {} + storageClass: ssd + accessModes: ReadWriteOnce + size: 100Gi + globalMount: /tmp/generated service: type: ClusterIP @@ -115,5 +125,3 @@ ingress: hosts: - ai.dc -image: - pullPolicy: IfNotPresent