feat(localai): runtimeclass nvidia

2024-12-22 21:51:20 +00:00 · 2024-02-21 16:41:05 +01:00 · 2024-02-21 16:41:05 +01:00 · 6dba741b68
commit 6dba741b68
parent d9a93a6fb9
2 changed files with 30 additions and 19 deletions
--- a/projects/ai/project.yaml
+++ b/projects/ai/project.yaml
@ -3,9 +3,12 @@ config:
 apps:
 - name: localai
-  repoURL: https://go-skynet.github.io/helm-charts
+  # repoURL: https://go-skynet.github.io/helm-charts
-  chart: local-ai
+  # chart: local-ai
-  targetRevision: 3.1.0
+  # targetRevision: 3.1.0
  repoURL: https://github.com/nold360/localai-charts.git
  path: charts/local-ai
  targetRevision: feat/runtimeclass
 - name: anythingllm
  repo: bjw-s 
--- a/projects/ai/values/localai.yaml
+++ b/projects/ai/values/localai.yaml
@ -1,9 +1,14 @@
 replicaCount: 1
 deployment:
-  image: quay.io/go-skynet/local-ai:master-ffmpeg-core
+  image: 
    repository: quay.io/go-skynet/local-ai
    tag: v2.8.0-cublas-cuda12
  pullPolicy: Always
  runtimeClassName: nvidia
  env:
-    threads: 16
+    # threads: 16
    context_size: 2048
    DEBUG: "true"
@ -54,9 +59,6 @@ deployment:
  prompt_templates:
    # To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox
    image: busybox
  pullPolicy: Always
  imagePullSecrets: []
    # - name: secret-names
 resources:
  requests:
@ -80,17 +82,25 @@ models:
  # The list of URLs to download models from
  # Note: the name of the file will be the name of the loaded model
-  list:
+  list: []
-    - url: "https://gpt4all.io/models/ggml-gpt4all-j.bin"
+    # - url: "https://gpt4all.io/models/ggml-gpt4all-j.bin"
      # basicAuth: base64EncodedCredentials
-  persistence:
+persistence:
-    pvc:
+  models: 
    enabled: true
    annotations: {}
    storageClass: ssd
    accessModes: ReadWriteOnce
    size: 100Gi
-      accessModes:
+    globalMount: /models
-        - ReadWriteOnce
+  output:
-      storageClass: "ssd"
+    enabled: false
    annotations: {}
    storageClass: ssd
    accessModes: ReadWriteOnce
    size: 100Gi
    globalMount: /tmp/generated
 service:
  type: ClusterIP
@ -115,5 +125,3 @@ ingress:
      hosts:
        - ai.dc
 image:
  pullPolicy: IfNotPresent