Skip to content

Commit 1e7c5d0

Browse files
authored
fix: bugs (#241)
* fix: avoid duplicate context shifts * fix: `onProgress` on `ModelDownloader` * fix: re-enable CUDA binary compression * fix: more thorough tests before loading a binary * fix: increase compatibility of prebuilt binaries
1 parent 0d40ffc commit 1e7c5d0

File tree

8 files changed

+142
-99
lines changed

8 files changed

+142
-99
lines changed

.github/workflows/build.yml

Lines changed: 71 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -53,24 +53,17 @@ jobs:
5353
fail-fast: false
5454
matrix:
5555
config:
56-
- name: "Windows MSVC"
56+
- name: "Windows for x64"
57+
os: windows-2019
58+
artifact: "win-x64"
59+
- name: "Windows for Arm"
5760
os: windows-2022
58-
cc: "cl"
59-
cxx: "cl"
60-
environment_script: "C:/Program Files (x86)/Microsoft Visual Studio/2019/Enterprise/VC/Auxiliary/Build/vcvars64.bat"
61-
generators: "Visual Studio 17 2022"
62-
artifact: "win"
63-
- name: "Ubuntu GCC"
64-
os: ubuntu-22.04
65-
cc: "gcc"
66-
cxx: "g++"
67-
generators: "Ninja"
61+
artifact: "win-arm"
62+
- name: "Ubuntu"
63+
os: ubuntu-20.04
6864
artifact: "linux"
69-
- name: "macOS Clang"
65+
- name: "macOS"
7066
os: macos-13
71-
cc: "clang"
72-
cxx: "clang++"
73-
generators: "Xcode"
7467
artifact: "mac"
7568

7669
steps:
@@ -97,7 +90,7 @@ jobs:
9790
choco install ninja cmake
9891
9992
- name: Install dependencies on Ubuntu
100-
if: startsWith(matrix.config.name, 'Ubuntu GCC')
93+
if: matrix.config.name == 'Ubuntu'
10194
run: |
10295
sudo apt-get update
10396
sudo apt-get install ninja-build cmake libtbb-dev g++-aarch64-linux-gnu gcc-aarch64-linux-gnu g++-arm-linux-gnueabihf gcc-arm-linux-gnueabihf
@@ -108,24 +101,24 @@ jobs:
108101
which arm-linux-gnueabihf-gcc
109102
which arm-linux-gnueabihf-g++
110103
111-
- name: Install Cuda on Windows
112-
if: startsWith(matrix.config.os, 'windows')
104+
- name: Install Cuda on Windows for x64
105+
if: matrix.config.name == 'Windows for x64'
113106
uses: Jimver/cuda-toolkit@v0.2.15
114107
with:
115-
cuda: '12.4.1'
108+
cuda: '12.2.0'
116109
method: 'network'
117110
sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]'
118111
use-local-cache: false
119112

120113
- name: Install Cuda on Ubuntu
121-
if: startsWith(matrix.config.name, 'Ubuntu GCC')
114+
if: matrix.config.name == 'Ubuntu'
122115
uses: Jimver/cuda-toolkit@v0.2.15
123116
with:
124-
cuda: '12.4.1'
117+
cuda: '12.2.0'
125118
method: 'network'
126119

127-
- name: Install Vulkan SDK on Windows
128-
if: startsWith(matrix.config.os, 'windows')
120+
- name: Install Vulkan SDK on Windows for x64
121+
if: matrix.config.name == 'Windows for x64'
129122
shell: powershell
130123
env:
131124
VULKAN_VERSION: 1.3.261.1
@@ -136,15 +129,15 @@ jobs:
136129
Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
137130
138131
- name: Install Vulkan SDK on Ubuntu
139-
if: startsWith(matrix.config.name, 'Ubuntu GCC')
132+
if: matrix.config.name == 'Ubuntu'
140133
run: |
141-
wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc
142-
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
134+
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
135+
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-focal.list https://packages.lunarg.com/vulkan/lunarg-vulkan-focal.list
143136
sudo apt update
144137
sudo apt install vulkan-sdk
145138
146139
- name: Install dependencies on macOS
147-
if: startsWith(matrix.config.os, 'macos')
140+
if: matrix.config.name == 'macOS'
148141
run: |
149142
brew install cmake ninja
150143
alias make=cmake
@@ -203,10 +196,11 @@ jobs:
203196
}
204197
205198
// build binaries
206-
if (process.env.ARTIFACT_NAME === "win") {
199+
if (process.env.ARTIFACT_NAME === "win-x64") {
207200
await buildBinary("x64", ["--gpu", "false"]);
208201
await buildBinary("x64", ["--gpu", "cuda"]);
209202
await buildBinary("x64", ["--gpu", "vulkan"]);
203+
} else if (process.env.ARTIFACT_NAME === "win-arm") {
210204
await buildBinary("arm64", ["--gpu", "false"], windowsOnArmNodeVersion);
211205
} else if (process.env.ARTIFACT_NAME === "linux") {
212206
await buildBinary("x64", ["--gpu", "false"]);
@@ -234,53 +228,53 @@ jobs:
234228
235229
EOF
236230
237-
# - name: Cache UPX
238-
# id: cache-upx
239-
# uses: actions/cache@v4
240-
# with:
241-
# path: "upxInstallations/**"
242-
# key: cache-upx-${{ runner.os }}-${{ github.workflow }}
243-
#
244-
# - name: Compress CUDA binary on Windows
245-
# if: startsWith(matrix.config.os, 'windows')
246-
# shell: bash
247-
# env:
248-
# UPX_VERSION: 4.2.4
249-
# run: |
250-
# mkdir -p upxInstallations
251-
#
252-
# if [ ! -f "./upxInstallations/upx-${UPX_VERSION}-win64.zip" ]; then
253-
# pushd upxInstallations
254-
# curl -OL "https://github.com/upx/upx/releases/download/v${UPX_VERSION}/upx-${UPX_VERSION}-win64.zip"
255-
# popd
256-
# fi
257-
#
258-
# mkdir -p upx
259-
# unzip -d ./upx "./upxInstallations/upx-${UPX_VERSION}-win64.zip"
260-
# mv "./upx/upx-${UPX_VERSION}-win64" ./upx/upx
261-
#
262-
# ./upx/upx/upx.exe --best ./bins/win-x64-cuda/llama-addon.node
263-
#
264-
# - name: Compress CUDA binary on Ubuntu
265-
# if: startsWith(matrix.config.name, 'Ubuntu GCC')
266-
# env:
267-
# UPX_VERSION: 4.2.4
268-
# run: |
269-
# mkdir -p upxInstallations
270-
#
271-
# if [ ! -f "./upxInstallations/upx-${UPX_VERSION}-amd64_linux.tar.xz" ]; then
272-
# pushd upxInstallations
273-
# curl -OL "https://github.com/upx/upx/releases/download/v${UPX_VERSION}/upx-${UPX_VERSION}-amd64_linux.tar.xz"
274-
# popd
275-
# fi
276-
#
277-
# mkdir -p upx
278-
# tar -xvf "./upxInstallations/upx-${UPX_VERSION}-amd64_linux.tar.xz" -C ./upx
279-
# mv "./upx/upx-${UPX_VERSION}-amd64_linux" ./upx/upx
280-
#
281-
# chmod +x ./bins/linux-x64-cuda/llama-addon.node
282-
# ./upx/upx/upx --best ./bins/linux-x64-cuda/llama-addon.node
283-
# chmod -x ./bins/linux-x64-cuda/llama-addon.node
231+
- name: Cache UPX
232+
id: cache-upx
233+
uses: actions/cache@v4
234+
with:
235+
path: "upxInstallations/**"
236+
key: cache-upx-${{ runner.os }}-${{ github.workflow }}
237+
238+
- name: Compress CUDA binary on Windows
239+
if: matrix.config.name == 'Windows for x64'
240+
shell: bash
241+
env:
242+
UPX_VERSION: 4.2.4
243+
run: |
244+
mkdir -p upxInstallations
245+
246+
if [ ! -f "./upxInstallations/upx-${UPX_VERSION}-win64.zip" ]; then
247+
pushd upxInstallations
248+
curl -OL "https://github.com/upx/upx/releases/download/v${UPX_VERSION}/upx-${UPX_VERSION}-win64.zip"
249+
popd
250+
fi
251+
252+
mkdir -p upx
253+
unzip -d ./upx "./upxInstallations/upx-${UPX_VERSION}-win64.zip"
254+
mv "./upx/upx-${UPX_VERSION}-win64" ./upx/upx
255+
256+
./upx/upx/upx.exe --best ./bins/win-x64-cuda/llama-addon.node
257+
258+
- name: Compress CUDA binary on Ubuntu
259+
if: matrix.config.name == 'Ubuntu'
260+
env:
261+
UPX_VERSION: 4.2.4
262+
run: |
263+
mkdir -p upxInstallations
264+
265+
if [ ! -f "./upxInstallations/upx-${UPX_VERSION}-amd64_linux.tar.xz" ]; then
266+
pushd upxInstallations
267+
curl -OL "https://github.com/upx/upx/releases/download/v${UPX_VERSION}/upx-${UPX_VERSION}-amd64_linux.tar.xz"
268+
popd
269+
fi
270+
271+
mkdir -p upx
272+
tar -xvf "./upxInstallations/upx-${UPX_VERSION}-amd64_linux.tar.xz" -C ./upx
273+
mv "./upx/upx-${UPX_VERSION}-amd64_linux" ./upx/upx
274+
275+
chmod +x ./bins/linux-x64-cuda/llama-addon.node
276+
./upx/upx/upx --best ./bins/linux-x64-cuda/llama-addon.node
277+
chmod -x ./bins/linux-x64-cuda/llama-addon.node
284278
285279
- name: Publish artifact
286280
uses: actions/upload-artifact@v4
@@ -510,7 +504,7 @@ jobs:
510504
- name: "Windows"
511505
os: windows-2022
512506
- name: "Ubuntu"
513-
os: ubuntu-22.04
507+
os: ubuntu-20.04
514508
- name: "macOS"
515509
os: macos-13
516510

@@ -521,7 +515,7 @@ jobs:
521515
node-version: "20"
522516

523517
- name: Install dependencies on Ubuntu
524-
if: startsWith(matrix.config.name, 'Ubuntu')
518+
if: matrix.config.name == 'Ubuntu'
525519
run: |
526520
sudo apt-get update
527521
sudo apt-get install libarchive-tools rpm

src/bindings/getLlama.ts

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -190,11 +190,14 @@ const defaultBuildOption: Exclude<LlamaOptions["build"], undefined> = runningInE
190190

191191
/**
192192
* Get a `llama.cpp` binding.
193-
* Defaults to prefer a prebuilt binary, and fallbacks to building from source if a prebuilt binary is not found.
194-
* Pass `"lastCliBuild"` to default to use the last successful build created using the `download` or `build` CLI commands if one exists.
193+
*
194+
* Defaults to use a local binary built using the `download` or `build` CLI commands if one exists,
195+
* otherwise, uses a prebuilt binary, and fallbacks to building from source if a prebuilt binary is not found.
196+
*
197+
* Pass `"lastBuild"` to default to use the last successful build created using the `download` or `build` CLI commands if one exists.
195198
*/
196-
export async function getLlama(type: "lastBuild", lastBuildOptions?: LastBuildOptions): Promise<Llama>;
197199
export async function getLlama(options?: LlamaOptions): Promise<Llama>;
200+
export async function getLlama(type: "lastBuild", lastBuildOptions?: LastBuildOptions): Promise<Llama>;
198201
export async function getLlama(options?: LlamaOptions | "lastBuild", lastBuildOptions?: LastBuildOptions) {
199202
if (options === "lastBuild") {
200203
const lastBuildInfo = await getLastBuildInfo();
@@ -515,14 +518,22 @@ async function loadExistingLlamaBinary({
515518
skipLlamaInit,
516519
debug
517520
});
518-
} else if (progressLogs)
521+
} else if (progressLogs) {
522+
const binaryDescription = describeBinary({
523+
...buildOptions,
524+
customCmakeOptions: existingPrebuiltBinaryMustMatchBuildOptions
525+
? buildOptions.customCmakeOptions
526+
: new Map()
527+
});
519528
console.warn(
520-
getConsoleLogPrefix() + "The prebuilt binary is not compatible with the current system" + (
529+
getConsoleLogPrefix() +
530+
`The prebuilt ${binaryDescription} is not compatible with the current system` + (
521531
fallbackMessage != null
522532
? ", " + fallbackMessage
523533
: ""
524534
)
525535
);
536+
}
526537
} catch (err) {
527538
const binaryDescription = describeBinary({
528539
...buildOptions,
@@ -687,6 +698,9 @@ function getShouldTestBinaryBeforeLoading({
687698
platformInfo.version !== buildMetadata.buildOptions.platformInfo.version
688699
)
689700
return true;
701+
} else if (platform === "win") {
702+
if (buildMetadata.buildOptions.gpu !== false)
703+
return true;
690704
}
691705

692706
return false;

src/bindings/utils/testBindingBinary.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@ if (process.env.TEST_BINDING_CP === "true" && process.send != null) {
101101
try {
102102
const binding: BindingModule = require(message.bindingBinaryPath);
103103
await binding.init();
104+
binding.getGpuVramInfo();
105+
binding.getGpuDeviceInfo();
104106
process.send({type: "done"} satisfies ChildToParentMessage);
105107
} catch (err) {
106108
console.error(err);

src/cli/commands/inspect/commands/InspectGpuCommand.ts

Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import {getPrettyBuildGpuName} from "../../../../bindings/consts.js";
1010
import {getModuleVersion} from "../../../../utils/getModuleVersion.js";
1111
import {withCliCommandDescriptionDocsUrl} from "../../../utils/withCliCommandDescriptionDocsUrl.js";
1212
import {documentationPageUrls} from "../../../../config.js";
13+
import {Llama} from "../../../../bindings/Llama.js";
1314

1415
type InspectGpuCommand = {
1516
// no options for now
@@ -26,6 +27,14 @@ export const InspectGpuCommand: CommandModule<object, InspectGpuCommand> = {
2627
const arch = process.arch;
2728
const availableComputeLayers = await detectAvailableComputeLayers({platform});
2829
const gpusToLogVramUsageOf: BuildGpu[] = [];
30+
const gpuToLlama = new Map<BuildGpu, Llama | undefined>();
31+
32+
async function loadLlamaForGpu(gpu: BuildGpu) {
33+
if (!gpuToLlama.has(gpu))
34+
gpuToLlama.set(gpu, await getLlamaForGpu(gpu));
35+
36+
return gpuToLlama.get(gpu);
37+
}
2938

3039
console.info(`${chalk.yellow("OS:")} ${os.type()} ${os.release()} ${chalk.dim("(" + os.arch() + ")")}`);
3140

@@ -62,28 +71,44 @@ export const InspectGpuCommand: CommandModule<object, InspectGpuCommand> = {
6271
} else if (availableComputeLayers.cuda.hasCudaRuntime && !availableComputeLayers.cuda.hasNvidiaDriver) {
6372
console.info(`${chalk.yellow("CUDA:")} ${chalk.red("CUDA runtime is installed, but NVIDIA driver is not")}`);
6473
} else if (availableComputeLayers.cuda.hasCudaRuntime && availableComputeLayers.cuda.hasNvidiaDriver) {
65-
console.info(`${chalk.yellow("CUDA:")} ${chalk.green("available")}`);
66-
gpusToLogVramUsageOf.push("cuda");
74+
const llama = await loadLlamaForGpu("cuda");
75+
76+
if (llama == null)
77+
console.info(`${chalk.yellow("CUDA:")} ${chalk.red("CUDA is detected, but using it failed")}`);
78+
else {
79+
console.info(`${chalk.yellow("CUDA:")} ${chalk.green("available")}`);
80+
gpusToLogVramUsageOf.push("cuda");
81+
}
6782
}
6883

6984
if (availableComputeLayers.vulkan) {
70-
console.info(`${chalk.yellow("Vulkan:")} ${chalk.green("available")}`);
71-
gpusToLogVramUsageOf.push("vulkan");
85+
const llama = await loadLlamaForGpu("vulkan");
86+
87+
if (llama == null)
88+
console.info(`${chalk.yellow("Vulkan:")} ${chalk.red("Vulkan is detected, but using it failed")}`);
89+
else {
90+
console.info(`${chalk.yellow("Vulkan:")} ${chalk.green("available")}`);
91+
gpusToLogVramUsageOf.push("vulkan");
92+
}
7293
}
7394

7495
for (const gpu of gpusToLogVramUsageOf) {
96+
const llama = gpuToLlama.get(gpu);
97+
if (llama == null)
98+
continue;
99+
75100
console.info();
76-
await logGpuVramUsage(gpu);
101+
await logGpuVramUsage(gpu, llama);
77102
}
78103

79104
console.info();
80105
await logRamUsage();
81106
}
82107
};
83108

84-
async function logGpuVramUsage(gpu: BuildGpu) {
109+
async function getLlamaForGpu(gpu: BuildGpu) {
85110
try {
86-
const llama = await getLlamaForOptions({
111+
return await getLlamaForOptions({
87112
gpu: gpu,
88113
build: "never",
89114
progressLogs: false,
@@ -92,6 +117,13 @@ async function logGpuVramUsage(gpu: BuildGpu) {
92117
}, {
93118
skipLlamaInit: true
94119
});
120+
} catch (err) {
121+
return undefined;
122+
}
123+
}
124+
125+
async function logGpuVramUsage(gpu: BuildGpu, llama: Llama) {
126+
try {
95127
const gpuName = getPrettyBuildGpuName(gpu);
96128
const vramStatus = await llama.getVramState();
97129
const gpuDeviceNames = await llama.getGpuDeviceNames();

0 commit comments

Comments
 (0)