Skip to content

Commit 3567250

Browse files
authored
[SPARKNLP-1194] Upgrade jsl-llamacpp to newest version (#14633)
* Upgrade jsl-llama.cpp - Embeddings Passing - Adjust metadata extraction - Fix changed parameters - Add default system prompt - Default params for AutoGGUFEmbeddings * jsl-llama.cpp upgrade python side
1 parent 7e6e464 commit 3567250

File tree

16 files changed

+400
-348
lines changed

16 files changed

+400
-348
lines changed

build.sbt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ name := getPackageName(is_silicon, is_gpu, is_aarch64)
66

77
organization := "com.johnsnowlabs.nlp"
88

9-
version := "6.0.5"
9+
version := "6.1.0-rc1"
1010

1111
(ThisBuild / scalaVersion) := scalaVer
1212

project/Dependencies.scala

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -128,11 +128,11 @@ object Dependencies {
128128
val azureIdentity = "com.azure" % "azure-identity" % azureIdentityVersion % Provided
129129
val azureStorage = "com.azure" % "azure-storage-blob" % azureStorageVersion % Provided
130130

131-
val llamaCppVersion = "0.1.6"
132-
val llamaCppCPU = "com.johnsnowlabs.nlp" %% "jsl-llamacpp-cpu" % llamaCppVersion
133-
val llamaCppGPU = "com.johnsnowlabs.nlp" %% "jsl-llamacpp-gpu" % llamaCppVersion
134-
val llamaCppSilicon = "com.johnsnowlabs.nlp" %% "jsl-llamacpp-silicon" % llamaCppVersion
135-
val llamaCppAarch64 = "com.johnsnowlabs.nlp" %% "jsl-llamacpp-aarch64" % llamaCppVersion
131+
val llamaCppVersion = "1.0.1"
132+
val llamaCppCPU = "com.johnsnowlabs.nlp" % "jsl-llamacpp-cpu" % llamaCppVersion
133+
val llamaCppGPU = "com.johnsnowlabs.nlp" % "jsl-llamacpp-gpu" % llamaCppVersion
134+
val llamaCppSilicon = "com.johnsnowlabs.nlp" % "jsl-llamacpp-silicon" % llamaCppVersion
135+
val llamaCppAarch64 = "com.johnsnowlabs.nlp" % "jsl-llamacpp-aarch64" % llamaCppVersion
136136

137137
val jsoupVersion = "1.18.2"
138138

python/sparknlp/annotator/seq2seq/auto_gguf_model.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,9 @@ def __init__(self, classname="com.johnsnowlabs.nlp.annotators.seq2seq.AutoGGUFMo
253253
nCtx=4096,
254254
nBatch=512,
255255
embedding=False,
256-
nPredict=100
256+
nPredict=100,
257+
nGpuLayers=99,
258+
systemPrompt="You are a helpful assistant."
257259
)
258260

259261
@staticmethod

python/sparknlp/common/properties.py

Lines changed: 100 additions & 66 deletions
Large diffs are not rendered by default.

python/test/annotator/embeddings/auto_gguf_embeddings_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,8 @@ def runTest(self):
153153
.setInputCols("document")
154154
.setOutputCol("embeddings")
155155
.setBatchSize(4)
156-
.setNUbatch(2048)
157-
.setNBatch(2048)
156+
.setNUbatch(4096)
157+
.setNBatch(4096)
158158
)
159159
pipeline = Pipeline().setStages([self.document_assembler, model])
160160
results = pipeline.fit(self.long_data).transform(self.long_data)

python/test/annotator/seq2seq/auto_gguf_model_test.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def runTest(self):
4949
.setOutputCol("completions")
5050
.setBatchSize(4)
5151
.setNPredict(20)
52-
.setNGpuLayers(5)
52+
.setNGpuLayers(99)
5353
.setTemperature(0.4)
5454
.setTopK(40)
5555
.setTopP(0.9)
@@ -78,7 +78,7 @@ def runTest(self):
7878
DocumentAssembler().setInputCol("text").setOutputCol("document")
7979
)
8080

81-
model = (
81+
model: AutoGGUFModel = (
8282
AutoGGUFModel.pretrained()
8383
.setInputCols("document")
8484
.setOutputCol("completions")
@@ -87,23 +87,23 @@ def runTest(self):
8787

8888
# Model Parameters
8989
model.setNThreads(8)
90-
model.setNThreadsDraft(8)
90+
# model.setNThreadsDraft(8)
9191
model.setNThreadsBatch(8)
92-
model.setNThreadsBatchDraft(8)
92+
# model.setNThreadsBatchDraft(8)
9393
model.setNCtx(512)
9494
model.setNBatch(32)
9595
model.setNUbatch(32)
9696
model.setNDraft(5)
97-
model.setNChunks(-1)
98-
model.setNSequences(1)
99-
model.setPSplit(0.1)
97+
# model.setNChunks(-1)
98+
# model.setNSequences(1)
99+
# model.setPSplit(0.1)
100100
model.setNGpuLayers(99)
101101
model.setNGpuLayersDraft(99)
102102
model.setGpuSplitMode("NONE")
103103
model.setMainGpu(0)
104-
model.setTensorSplit([])
105-
model.setGrpAttnN(1)
106-
model.setGrpAttnW(512)
104+
# model.setTensorSplit([])
105+
# model.setGrpAttnN(1)
106+
# model.setGrpAttnW(512)
107107
model.setRopeFreqBase(1.0)
108108
model.setRopeFreqScale(1.0)
109109
model.setYarnExtFactor(1.0)
@@ -113,14 +113,14 @@ def runTest(self):
113113
model.setYarnOrigCtx(0)
114114
model.setDefragmentationThreshold(-1.0)
115115
model.setNumaStrategy("DISTRIBUTE")
116-
model.setRopeScalingType("UNSPECIFIED")
116+
model.setRopeScalingType("NONE")
117117
model.setPoolingType("NONE")
118118
model.setModelDraft("")
119-
model.setLookupCacheStaticFilePath("/tmp/sparknlp-llama-cpp-cache")
120-
model.setLookupCacheDynamicFilePath("/tmp/sparknlp-llama-cpp-cache")
119+
# model.setLookupCacheStaticFilePath("/tmp/sparknlp-llama-cpp-cache")
120+
# model.setLookupCacheDynamicFilePath("/tmp/sparknlp-llama-cpp-cache")
121121
model.setEmbedding(False)
122122
model.setFlashAttention(False)
123-
model.setInputPrefixBos(False)
123+
# model.setInputPrefixBos(False)
124124
model.setUseMmap(False)
125125
model.setUseMlock(False)
126126
model.setNoKvOffload(False)
@@ -164,7 +164,7 @@ def runTest(self):
164164
# Special PySpark Parameters (Scala StructFeatures)
165165
model.setTokenIdBias({0: 0.0, 1: 0.0})
166166
model.setTokenBias({"!": 0.0, "?": 0.0})
167-
model.setLoraAdapters({" ": 0.0})
167+
# model.setLoraAdapters({" ": 0.0})
168168

169169
pipeline = Pipeline().setStages([document_assembler, model])
170170
results = pipeline.fit(data).transform(data)

src/main/scala/com/johnsnowlabs/ml/gguf/GGUFWrapper.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@
1515
*/
1616
package com.johnsnowlabs.ml.gguf
1717

18-
import com.johnsnowlabs.nlp.llama.{LlamaModel, ModelParameters}
1918
import com.johnsnowlabs.nlp.util.io.ResourceHelper
19+
import de.kherud.llama.{LlamaModel, ModelParameters}
2020
import org.apache.hadoop.fs.{FileSystem, Path}
2121
import org.apache.spark.SparkFiles
2222
import org.apache.spark.sql.SparkSession
@@ -42,7 +42,7 @@ class GGUFWrapper(var modelFileName: String, var modelFolder: String) extends Se
4242
val modelFilePath = SparkFiles.get(modelFileName)
4343

4444
if (Paths.get(modelFilePath).toFile.exists()) {
45-
modelParameters.setModelFilePath(modelFilePath)
45+
modelParameters.setModel(modelFilePath)
4646
llamaModel = GGUFWrapper.withSafeGGUFModelLoader(modelParameters)
4747
} else
4848
throw new IllegalStateException(

src/main/scala/com/johnsnowlabs/ml/gguf/GGUFWrapperMultiModal.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
*/
1616
package com.johnsnowlabs.ml.gguf
1717

18-
import com.johnsnowlabs.nlp.llama.{LlamaModel, ModelParameters}
18+
import de.kherud.llama.{LlamaModel, ModelParameters}
1919
import com.johnsnowlabs.nlp.util.io.ResourceHelper
2020
import org.apache.hadoop.fs.{FileSystem, Path}
2121
import org.apache.spark.SparkFiles
@@ -44,8 +44,8 @@ class GGUFWrapperMultiModal(var modelFileName: String, var mmprojFileName: Strin
4444
Paths.get(modelFilePath).toFile.exists() && Paths.get(mmprojFilePath).toFile.exists()
4545

4646
if (filesExist) {
47-
modelParameters.setModelFilePath(modelFilePath)
48-
modelParameters.setMMProj(mmprojFilePath)
47+
modelParameters.setModel(modelFilePath)
48+
// modelParameters.setMMProj(mmprojFilePath) // TODO: Vision models implementation
4949
llamaModel = GGUFWrapperMultiModal.withSafeGGUFModelLoader(modelParameters)
5050
} else
5151
throw new IllegalStateException(

src/main/scala/com/johnsnowlabs/nlp/HasLlamaCppInferenceProperties.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
package com.johnsnowlabs.nlp
22

33
import com.johnsnowlabs.nlp.annotators.seq2seq.AutoGGUFModel
4-
import com.johnsnowlabs.nlp.llama.InferenceParameters
5-
import com.johnsnowlabs.nlp.llama.args._
4+
import de.kherud.llama.InferenceParameters
5+
import de.kherud.llama.args._
66
import com.johnsnowlabs.nlp.serialization.StructFeature
77
import org.apache.spark.ml.param._
88

0 commit comments

Comments
 (0)