sigils/packages/llm.scm


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113

(define-module (sigils packages llm)
  #:use-module (guix packages)
  #:use-module (guix git-download)
  #:use-module (guix build-system cmake)
  #:use-module (guix gexp)
  #:use-module ((guix licenses) #:prefix license:)
  #:use-module (gnu packages)
  #:use-module (gnu packages bash)
  #:use-module (gnu packages machine-learning)
  #:use-module (gnu packages maths)
  #:use-module (gnu packages pkg-config)
  #:use-module (gnu packages python)
  #:use-module (gnu packages python-xyz)
  #:use-module (gnu packages vulkan)
  #:use-module (gnu packages version-control))

(define-public llama-cpp
  (let ((tag "b4640"))
    (package
      (name "llama-cpp")
      (version (string-append "0.0.0-" tag))
      (source
       (origin
         (method git-fetch)
         (uri (git-reference
               (url "https://github.com/ggerganov/llama.cpp")
               (commit tag)))
         (file-name (git-file-name name tag))
         (sha256
          (base32 "13mc5cv3iwgc41k6mdpxh6cxb5a4zm3g1g9r3jr03fpw6426pvxr"))
         (patches
          (search-patches "llama-cpp-vulkan-optional.patch"))))
      (build-system cmake-build-system)
      (arguments
       (list
        #:configure-flags
        #~(list "-DBUILD_SHARED_LIBS=ON"
                "-DGGML_VULKAN=ON"
                "-DGGML_BLAS=ON"
                "-DGGML_BLAS_VENDOR=OpenBLAS"
                (string-append "-DBLAS_INCLUDE_DIRS="
                               #$(this-package-input "openblas")
                               "/include")
                (string-append "-DBLAS_LIBRARIES="
                               #$(this-package-input "openblas")
                               "/lib/libopenblas.so")

                "-DGGML_NATIVE=OFF" ;no '-march=native'
                "-DGGML_FMA=OFF"    ;and no '-mfma', etc.
                "-DGGML_AVX2=OFF"
                "-DGGML_AVX512=OFF"
                "-DGGML_AVX512_VBMI=OFF"
                "-DGGML_AVX512_VNNI=OFF")

        #:modules '((ice-9 textual-ports)
                    (guix build utils)
                    ((guix build python-build-system) #:prefix python:)
                    (guix build cmake-build-system))
        #:imported-modules `(,@%cmake-build-system-modules
                             (guix build python-build-system))
        #:phases
        #~(modify-phases %standard-phases
            (add-after 'unpack 'patch-paths
              (lambda* (#:key inputs #:allow-other-keys)
                (substitute* "ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp"
                  (("\"/bin/sh\"")
                   (string-append "\"" (search-input-file inputs "/bin/sh") "\"")))))
            (add-after 'unpack 'disable-unrunable-tests
              (lambda _
                ;; test-eval-callback downloads ML model from network, cannot
                ;; run in Guix build environment
                (substitute* '("examples/eval-callback/CMakeLists.txt")
                  (("COMMAND llama-eval-callback")
                   "COMMAND true llama-eval-callback"))
                (substitute* "tests/CMakeLists.txt"
                  (("^    llama_target_and_test\\(test-chat\\.cpp\\)\n$") ""))))
            (add-before 'install 'install-python-scripts
              (lambda _
                (let ((bin (string-append #$output "/bin/")))
                  (define (make-script script)
                    (let ((suffix (if (string-suffix? ".py" script) "" ".py")))
                      (call-with-input-file
                          (string-append "../source/" script suffix)
                        (lambda (input)
                          (call-with-output-file (string-append bin script)
                            (lambda (output)
                              (format output "#!~a/bin/python3\n~a"
                                      #$(this-package-input "python")
                                      (get-string-all input))))))
                      (chmod (string-append bin script) #o555)))
                  (mkdir-p bin)
                  (make-script "convert_hf_to_gguf")
                  (make-script "convert_llama_ggml_to_gguf")
                  (make-script "convert_hf_to_gguf_update.py"))))
            (add-after 'install-python-scripts 'wrap-python-scripts
              (assoc-ref python:%standard-phases 'wrap))
            (add-after 'install 'remove-tests
              (lambda* (#:key outputs #:allow-other-keys)
                (for-each delete-file (find-files
                                       (string-append (assoc-ref outputs "out")
                                                      "/bin")
                                       "^test-")))))))
      (inputs (list python vulkan-headers vulkan-loader))
      (native-inputs (list pkg-config shaderc bash git))
      (propagated-inputs
       (list python-numpy python-pytorch python-sentencepiece openblas))
      (properties '((tunable? . #true))) ;use AVX512, FMA, etc. when available
      (home-page "https://github.com/ggerganov/llama.cpp")
      (synopsis "Port of Facebook's LLaMA model in C/C++")
      (description "This package provides a port to Facebook's LLaMA collection
of foundation language models.  It requires models parameters to be downloaded
independently to be able to run a LLaMA model.")
      (license license:expat))))