Skip to content

Commit b89ad2d

Browse files
authored
feat: compress CUDA prebuilt binaries (#236)
* feat: compress CUDA prebuilt binaries * feat: automatically solve more CUDA compilation errors
1 parent 23012d1 commit b89ad2d

File tree

3 files changed

+71
-14
lines changed

3 files changed

+71
-14
lines changed

.github/workflows/build.yml

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ jobs:
126126

127127
- name: Install Vulkan SDK on Windows
128128
if: startsWith(matrix.config.os, 'windows')
129+
shell: powershell
129130
env:
130131
VULKAN_VERSION: 1.3.261.1
131132
run: |
@@ -233,6 +234,54 @@ jobs:
233234
234235
EOF
235236
237+
- name: Cache UPX
238+
id: cache-upx
239+
uses: actions/cache@v4
240+
with:
241+
path: "upxInstallations/**"
242+
key: cache-upx-${{ runner.os }}-${{ github.workflow }}
243+
244+
- name: Compress CUDA binary on Windows
245+
if: startsWith(matrix.config.os, 'windows')
246+
shell: bash
247+
env:
248+
UPX_VERSION: 4.2.4
249+
run: |
250+
mkdir -p upxInstallations
251+
252+
if [ ! -f "./upxInstallations/upx-${UPX_VERSION}-win64.zip" ]; then
253+
pushd upxInstallations
254+
curl -OL "https://github.com/upx/upx/releases/download/v${UPX_VERSION}/upx-${UPX_VERSION}-win64.zip"
255+
popd
256+
fi
257+
258+
mkdir -p upx
259+
unzip -d ./upx "./upxInstallations/upx-${UPX_VERSION}-win64.zip"
260+
mv "./upx/upx-${UPX_VERSION}-win64" ./upx/upx
261+
262+
./upx/upx/upx.exe --best ./bins/win-x64-cuda/llama-addon.node
263+
264+
- name: Compress CUDA binary on Ubuntu
265+
if: startsWith(matrix.config.name, 'Ubuntu GCC')
266+
env:
267+
UPX_VERSION: 4.2.4
268+
run: |
269+
mkdir -p upxInstallations
270+
271+
if [ ! -f "./upxInstallations/upx-${UPX_VERSION}-amd64_linux.tar.xz" ]; then
272+
pushd upxInstallations
273+
curl -OL "https://github.com/upx/upx/releases/download/v${UPX_VERSION}/upx-${UPX_VERSION}-amd64_linux.tar.xz"
274+
popd
275+
fi
276+
277+
mkdir -p upx
278+
tar -xvf "./upxInstallations/upx-${UPX_VERSION}-amd64_linux.tar.xz" -C ./upx
279+
mv "./upx/upx-${UPX_VERSION}-amd64_linux" ./upx/upx
280+
281+
chmod +x ./bins/linux-x64-cuda/llama-addon.node
282+
./upx/upx/upx --best ./bins/linux-x64-cuda/llama-addon.node
283+
chmod -x ./bins/linux-x64-cuda/llama-addon.node
284+
236285
- name: Publish artifact
237286
uses: actions/upload-artifact@v4
238287
with:

src/bindings/utils/compileLLamaCpp.ts

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -192,9 +192,17 @@ export async function compileLlamaCpp(buildOptions: BuildOptions, compileOptions
192192
chalk.yellow('To install Xcode command line tools, run "xcode-select --install"')
193193
);
194194
else if (buildOptions.gpu === "cuda") {
195-
if (!ignoreWorkarounds.includes("cudaArchitecture") && (platform === "win" || platform === "linux") && err instanceof SpawnError &&
196-
err.combinedStd.toLowerCase().includes("Failed to detect a default CUDA architecture".toLowerCase())
197-
) {
195+
if (!ignoreWorkarounds.includes("cudaArchitecture") && (platform === "win" || platform === "linux") &&
196+
err instanceof SpawnError && (
197+
err.combinedStd.toLowerCase().includes("Failed to detect a default CUDA architecture".toLowerCase()) || (
198+
err.combinedStd.toLowerCase().includes(
199+
"Tell CMake where to find the compiler by setting either the environment".toLowerCase()
200+
) &&
201+
err.combinedStd.toLowerCase().includes(
202+
'variable "CUDACXX" or the CMake cache entry CMAKE_CUDA_COMPILER to the full'.toLowerCase()
203+
)
204+
)
205+
)) {
198206
for (const nvccPath of await getCudaNvccPaths()) {
199207
if (buildOptions.progressLogs)
200208
console.info(

test/modelDependent/stableCode/stableCodeModelGpuLayersOptions.test.ts

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ describe("stableCode", () => {
102102
expect(res.contextSize).to.toMatchInlineSnapshot("8687");
103103
}
104104
try {
105-
resolveGpuLayers(16, {
105+
await resolveGpuLayers(16, {
106106
totalVram: s1GB * 6,
107107
freeVram: s1GB * 0
108108
});
@@ -111,7 +111,7 @@ describe("stableCode", () => {
111111
expect(err).toMatchInlineSnapshot("[Error: Not enough VRAM to fit the model with the specified settings]");
112112
}
113113
try {
114-
resolveGpuLayers(16, {
114+
await resolveGpuLayers(16, {
115115
totalVram: s1GB * 6,
116116
freeVram: s1GB * 0.2
117117
});
@@ -165,7 +165,7 @@ describe("stableCode", () => {
165165
expect(res.contextSize).to.toMatchInlineSnapshot("10905");
166166
}
167167
try {
168-
resolveGpuLayers(32, {
168+
await resolveGpuLayers(32, {
169169
totalVram: s1GB * 6,
170170
freeVram: s1GB * 0.2
171171
});
@@ -214,7 +214,7 @@ describe("stableCode", () => {
214214
expect(res.contextSize).to.toMatchInlineSnapshot("10905");
215215
}
216216
try {
217-
resolveGpuLayers(33, {
217+
await resolveGpuLayers(33, {
218218
totalVram: s1GB * 6,
219219
freeVram: s1GB * 0.2
220220
});
@@ -255,7 +255,7 @@ describe("stableCode", () => {
255255

256256
it('attempts to resolve "max"', async () => {
257257
try {
258-
resolveGpuLayers("max", {
258+
await resolveGpuLayers("max", {
259259
totalVram: s1GB * 6,
260260
freeVram: s1GB * 0
261261
});
@@ -265,7 +265,7 @@ describe("stableCode", () => {
265265
}
266266

267267
try {
268-
resolveGpuLayers("max", {
268+
await resolveGpuLayers("max", {
269269
totalVram: s1GB * 6,
270270
freeVram: s1GB * 0.2
271271
});
@@ -275,7 +275,7 @@ describe("stableCode", () => {
275275
}
276276

277277
try {
278-
resolveGpuLayers("max", {
278+
await resolveGpuLayers("max", {
279279
totalVram: s1GB * 6,
280280
freeVram: s1GB * 3.2
281281
});
@@ -467,7 +467,7 @@ describe("stableCode", () => {
467467
expect(res.contextSize).to.toMatchInlineSnapshot("16384");
468468
}
469469
try {
470-
resolveGpuLayers({min: 2}, {
470+
await resolveGpuLayers({min: 2}, {
471471
totalVram: s1GB * 6,
472472
freeVram: s1GB * 0
473473
});
@@ -476,7 +476,7 @@ describe("stableCode", () => {
476476
expect(err).toMatchInlineSnapshot("[Error: Not enough VRAM to fit the model with the specified settings]");
477477
}
478478
try {
479-
resolveGpuLayers({min: 2, max: 4}, {
479+
await resolveGpuLayers({min: 2, max: 4}, {
480480
totalVram: s1GB * 6,
481481
freeVram: s1GB * 0
482482
});
@@ -494,7 +494,7 @@ describe("stableCode", () => {
494494
expect(res.contextSize).to.toMatchInlineSnapshot("13167");
495495
}
496496
try {
497-
resolveGpuLayers({min: 16}, {
497+
await resolveGpuLayers({min: 16}, {
498498
totalVram: s1GB * 6,
499499
freeVram: s1GB * 2
500500
});
@@ -597,7 +597,7 @@ describe("stableCode", () => {
597597
}
598598
{
599599
try {
600-
resolveGpuLayers({min: 1, fitContext: {contextSize: 8192}}, {
600+
await resolveGpuLayers({min: 1, fitContext: {contextSize: 8192}}, {
601601
totalVram: s1GB * 0.2,
602602
freeVram: s1GB * 0
603603
});

0 commit comments

Comments
 (0)