feat(localai): runtimeclass nvidia

2024-12-22 21:51:20 +00:00 · 2024-02-21 16:41:05 +01:00 · 2024-02-21 16:41:05 +01:00 · 6dba741b68
commit 6dba741b68
parent d9a93a6fb9
2 changed files with 30 additions and 19 deletions
--- a/projects/ai/project.yaml
+++ b/projects/ai/project.yaml
@ -3,9 +3,12 @@ config:

 apps:
 - name: localai
-  repoURL: https://go-skynet.github.io/helm-charts
-  chart: local-ai
-  targetRevision: 3.1.0
+  # repoURL: https://go-skynet.github.io/helm-charts
+  # chart: local-ai
+  # targetRevision: 3.1.0
+  repoURL: https://github.com/nold360/localai-charts.git
+  path: charts/local-ai
+  targetRevision: feat/runtimeclass

 - name: anythingllm
  repo: bjw-s 
--- a/projects/ai/values/localai.yaml
+++ b/projects/ai/values/localai.yaml
@ -1,9 +1,14 @@
 replicaCount: 1

 deployment:
-  image: quay.io/go-skynet/local-ai:master-ffmpeg-core
+  image: 
+    repository: quay.io/go-skynet/local-ai
+    tag: v2.8.0-cublas-cuda12
+  pullPolicy: Always
+  runtimeClassName: nvidia
+
  env:
-    threads: 16
+    # threads: 16
    context_size: 2048
    DEBUG: "true"

@ -54,9 +59,6 @@ deployment:
  prompt_templates:
    # To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox
    image: busybox
-  pullPolicy: Always
-  imagePullSecrets: []
-    # - name: secret-names

 resources:
  requests:
@ -80,17 +82,25 @@ models:

  # The list of URLs to download models from
  # Note: the name of the file will be the name of the loaded model
-  list:
-    - url: "https://gpt4all.io/models/ggml-gpt4all-j.bin"
+  list: []
+    # - url: "https://gpt4all.io/models/ggml-gpt4all-j.bin"
      # basicAuth: base64EncodedCredentials

-  persistence:
-    pvc:
+persistence:
+  models: 
    enabled: true
+    annotations: {}
+    storageClass: ssd
+    accessModes: ReadWriteOnce
    size: 100Gi
-      accessModes:
-        - ReadWriteOnce
-      storageClass: "ssd"
+    globalMount: /models
+  output:
+    enabled: false
+    annotations: {}
+    storageClass: ssd
+    accessModes: ReadWriteOnce
+    size: 100Gi
+    globalMount: /tmp/generated

 service:
  type: ClusterIP
@ -115,5 +125,3 @@ ingress:
      hosts:
        - ai.dc

-image:
-  pullPolicy: IfNotPresent