hive-apps/projects/ai/values/localai.yaml

replicaCount: 1

deployment:
  image: 
    repository: quay.io/go-skynet/local-ai
    tag: v2.16.0-cublas-cuda12-ffmpeg
  pullPolicy: Always
  runtimeClassName: nvidia

  env:
    threads: 16
    context_size: 4096
    DEBUG: "true"
    #
    # SINGLE_ACTIVE_BACKEND: "true"
    # PYTHON_GRPC_MAX_WORKERS: "1"
    # LLAMACPP_PARALLEL: "1"
    # PARALLEL_REQUESTS: "false"

    ## Specify a different bind address (defaults to ":8080")
    # ADDRESS=127.0.0.1:8080

    ## Define galleries.
    ## models will to install will be visible in `/models/available`
    #GALLERIES: '[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}]'

    ## Default path for models
    #MODELS_PATH=/models

    ## Enable debug mode
    #DEBUG=true

    ## Disables COMPEL (Lets Stable Diffuser work, uncomment if you plan on using it)
    # COMPEL=0

    ## Enable/Disable single backend (useful if only one GPU is available)
    # SINGLE_ACTIVE_BACKEND=true

    ## Specify a build type. Available: cublas, openblas, clblas.
    #BUILD_TYPE=cublas

    ## Uncomment and set to true to enable rebuilding from source
    # REBUILD=true

    ## Enable go tags, available: stablediffusion, tts
    ## stablediffusion: image generation with stablediffusion
    ## tts: enables text-to-speech with go-piper 
    ## (requires REBUILD=true)
    #
    #GO_TAGS=tts

    ## Path where to store generated images
    # IMAGE_PATH=/tmp

    ## Specify a default upload limit in MB (whisper)
    # UPLOAD_LIMIT

    # HUGGINGFACEHUB_API_TOKEN=Token here

  modelsPath: "/models"
  download_model:
    # To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox
    image: busybox
  prompt_templates:
    # To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox
    image: busybox

resources:
  requests:
    memory: 100Mi
  limits:
    memory: 64Gi

# Note: the keys of this map will be the names of the prompt template files
promptTemplates:
  {}
  # ggml-gpt4all-j.tmpl: |
  #   The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
  #   ### Prompt:
  #   {{.Input}}
  #   ### Response:

# Models to download at runtime
models:
  # Whether to force download models even if they already exist
  forceDownload: false

  # The list of URLs to download models from
  # Note: the name of the file will be the name of the loaded model
  list: []
    # - url: "https://gpt4all.io/models/ggml-gpt4all-j.bin"
      # basicAuth: base64EncodedCredentials

persistence:
  models: 
    enabled: true
    annotations: {}
    storageClass: ssd
    accessModes: ReadWriteOnce
    size: 100Gi
    globalMount: /models
  output:
    enabled: false
    annotations: {}
    storageClass: ssd
    accessModes: ReadWriteOnce
    size: 100Gi
    globalMount: /tmp/generated

service:
  type: ClusterIP
  port: 80

ingress:
  enabled: true
  className: "ingress-internal"
  annotations:
    cert-manager.io/cluster-issuer: vault-issuer
    nginx.ingress.kubernetes.io/proxy-buffering: "off"
    nginx.ingress.kubernetes.io/proxy-http-version: "1.1"
    nginx.ingress.kubernetes.io/chunked-transfer-encoding: "on"
    nginx.ingress.kubernetes.io/proxy_request_buffering: "off"
  hosts:
    - host: ai.dc
      paths:
        - path: /
          pathType: ImplementationSpecific
  tls: 
    - secretName: localai-tls
      hosts:
        - ai.dc
add: localai stuff 2023-12-25 20:46:48 +00:00			`replicaCount: 1`

			`deployment:`
feat(localai): runtimeclass nvidia 2024-02-21 15:41:05 +00:00			`image:`
			`repository: quay.io/go-skynet/local-ai`
update(localai): 2.16 2024-05-25 08:02:17 +00:00			`tag: v2.16.0-cublas-cuda12-ffmpeg`
feat(localai): runtimeclass nvidia 2024-02-21 15:41:05 +00:00			`pullPolicy: Always`
			`runtimeClassName: nvidia`

add: localai stuff 2023-12-25 20:46:48 +00:00			`env:`
feat(localai): single backend 2024-03-12 11:08:07 +00:00			`threads: 16`
			`context_size: 4096`
add: localai stuff 2023-12-25 20:46:48 +00:00			`DEBUG: "true"`
feat(localai): single backend 2024-03-12 11:08:07 +00:00			`#`
fix(localai): multibackend 2024-03-12 20:06:45 +00:00			`# SINGLE_ACTIVE_BACKEND: "true"`
			`# PYTHON_GRPC_MAX_WORKERS: "1"`
			`# LLAMACPP_PARALLEL: "1"`
			`# PARALLEL_REQUESTS: "false"`
add(localai): default env values for later usage 2023-12-26 06:09:54 +00:00
			`## Specify a different bind address (defaults to ":8080")`
			`# ADDRESS=127.0.0.1:8080`

			`## Define galleries.`
			## models will to install will be visible in `/models/available`
change(localai): use default model galleries 2024-04-28 19:07:09 +00:00			`#GALLERIES: '[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.yaml"}, {"url": "github:go-skynet/model-gallery/huggingface.yaml","name":"huggingface"}]'`
add(localai): default env values for later usage 2023-12-26 06:09:54 +00:00
			`## Default path for models`
			`#MODELS_PATH=/models`

			`## Enable debug mode`
			`#DEBUG=true`

			`## Disables COMPEL (Lets Stable Diffuser work, uncomment if you plan on using it)`
			`# COMPEL=0`

			`## Enable/Disable single backend (useful if only one GPU is available)`
			`# SINGLE_ACTIVE_BACKEND=true`

			`## Specify a build type. Available: cublas, openblas, clblas.`
			`#BUILD_TYPE=cublas`

			`## Uncomment and set to true to enable rebuilding from source`
			`# REBUILD=true`

			`## Enable go tags, available: stablediffusion, tts`
			`## stablediffusion: image generation with stablediffusion`
			`## tts: enables text-to-speech with go-piper`
			`## (requires REBUILD=true)`
			`#`
			`#GO_TAGS=tts`

			`## Path where to store generated images`
			`# IMAGE_PATH=/tmp`

			`## Specify a default upload limit in MB (whisper)`
			`# UPLOAD_LIMIT`

			`# HUGGINGFACEHUB_API_TOKEN=Token here`

add: localai stuff 2023-12-25 20:46:48 +00:00			`modelsPath: "/models"`
			`download_model:`
			`# To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox`
			`image: busybox`
			`prompt_templates:`
			`# To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox`
			`image: busybox`

fix(localai): threads 2023-12-27 11:35:17 +00:00			`resources:`
fix(localai): memory requests 2024-01-21 18:37:34 +00:00			`requests:`
			`memory: 100Mi`
fix(localai): threads 2023-12-27 11:35:17 +00:00			`limits:`
			`memory: 64Gi`

add: localai stuff 2023-12-25 20:46:48 +00:00			`# Note: the keys of this map will be the names of the prompt template files`
			`promptTemplates:`
			`{}`
			`# ggml-gpt4all-j.tmpl: \|`
			`# The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.`
			`# ### Prompt:`
			`# {{.Input}}`
			`# ### Response:`

			`# Models to download at runtime`
			`models:`
			`# Whether to force download models even if they already exist`
			`forceDownload: false`

			`# The list of URLs to download models from`
			`# Note: the name of the file will be the name of the loaded model`
feat(localai): runtimeclass nvidia 2024-02-21 15:41:05 +00:00			`list: []`
			`# - url: "https://gpt4all.io/models/ggml-gpt4all-j.bin"`
add: localai stuff 2023-12-25 20:46:48 +00:00			`# basicAuth: base64EncodedCredentials`

feat(localai): runtimeclass nvidia 2024-02-21 15:41:05 +00:00			`persistence:`
			`models:`
			`enabled: true`
			`annotations: {}`
			`storageClass: ssd`
			`accessModes: ReadWriteOnce`
			`size: 100Gi`
			`globalMount: /models`
			`output:`
			`enabled: false`
			`annotations: {}`
			`storageClass: ssd`
			`accessModes: ReadWriteOnce`
			`size: 100Gi`
			`globalMount: /tmp/generated`
add: localai stuff 2023-12-25 20:46:48 +00:00
			`service:`
			`type: ClusterIP`
			`port: 80`

			`ingress:`
			`enabled: true`
			`className: "ingress-internal"`
			`annotations:`
			`cert-manager.io/cluster-issuer: vault-issuer`
fix(localai): nginx ingress disable buffering for text streaming 2024-01-10 16:44:04 +00:00			`nginx.ingress.kubernetes.io/proxy-buffering: "off"`
			`nginx.ingress.kubernetes.io/proxy-http-version: "1.1"`
			`nginx.ingress.kubernetes.io/chunked-transfer-encoding: "on"`
			`nginx.ingress.kubernetes.io/proxy_request_buffering: "off"`
add: localai stuff 2023-12-25 20:46:48 +00:00			`hosts:`
			`- host: ai.dc`
			`paths:`
			`- path: /`
			`pathType: ImplementationSpecific`
			`tls:`
			`- secretName: localai-tls`
			`hosts:`
			`- ai.dc`