JohnClaw
diff --git a/‎ggml.dll
631 KB b/‎ggml.dll
631 KB
diff --git a/‎libchatllm.dll
1.32 MB b/‎libchatllm.dll
1.32 MB
diff --git a/‎libchatllm.h
Lines changed: 256 additions & 0 deletions b/‎libchatllm.h
Lines changed: 256 additions & 0 deletions
diff --git a/‎libchatllm.lib
4.29 KB b/‎libchatllm.lib
4.29 KB
diff --git a/‎libchatllm.nim
Lines changed: 55 additions & 0 deletions b/‎libchatllm.nim
Lines changed: 55 additions & 0 deletions
diff --git a/‎main.nim
Lines changed: 38 additions & 0 deletions b/‎main.nim
Lines changed: 38 additions & 0 deletions
@@ -0,0 +1,256 @@
+#pragma once
+
+#ifdef _WIN32
+    #define API_CALL __stdcall
+    #ifndef _WIN64
+        #error unsupported target OS
+    #endif
+#elif __linux__
+    #define API_CALL
+    #if (!defined __x86_64__) && (!defined __aarch64__)
+        #error unsupported target OS
+    #endif
+#else
+    #define API_CALL
+    #warning OS not supported, maybe
+#endif
+
+#ifndef DLL_DECL
+#define DLL_DECL
+#endif
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+enum PrintType
+{
+    PRINT_CHAT_CHUNK        = 0,
+    // below items share the same value with BaseStreamer::TextType
+    PRINTLN_META            = 1,    // print a whole line: general information
+    PRINTLN_ERROR           = 2,    // print a whole line: error message
+    PRINTLN_REF             = 3,    // print a whole line: reference
+    PRINTLN_REWRITTEN_QUERY = 4,    // print a whole line: rewritten query
+    PRINTLN_HISTORY_USER    = 5,    // print a whole line: user input history
+    PRINTLN_HISTORY_AI      = 6,    // print a whole line: AI output history
+    PRINTLN_TOOL_CALLING    = 7,    // print a whole line: tool calling (supported by only a few models)
+    PRINTLN_EMBEDDING       = 8,    // print a whole line: embedding (example: "0.1, 0.3, ...")
+    PRINTLN_RANKING         = 9,    // print a whole line: ranking (example: "0.8")
+    PRINTLN_TOKEN_IDS       =10,    // print a whole line: token ids (example: "1, 3, 5, 8, ...")
+};
+
+typedef void (*f_chatllm_print)(void *user_data, int print_type, const char *utf8_str);
+typedef void (*f_chatllm_end)(void *user_data);
+
+struct chatllm_obj;
+
+/**
+ * Usage:
+ *
+ * ```c
+ * obj = create(callback functions);
+ * append_param(obj, ...);
+ * // ...
+ * app_param(obj, ...);
+ *
+ * start(obj);
+ * while (true)
+ * {
+ *     user_input(obj, ...);
+ * }
+ * ```
+*/
+
+/**
+ * @brief create ChatLLM object
+ *
+ * @return                  the object
+ */
+DLL_DECL struct chatllm_obj * API_CALL chatllm_create(void);
+
+/**
+ * @brief append a command line option
+ *
+ * @param[in] obj               model object
+ * @param[in] utf8_str          a command line option
+ */
+DLL_DECL void API_CALL chatllm_append_param(struct chatllm_obj *obj, const char *utf8_str);
+
+/**
+ * @brief start
+ *
+ * @param[in] obj               model object
+ * @param[in] f_print           callback function for printing
+ * @param[in] f_end             callback function when model generation ends
+ * @param[in] user_data         user data provided to callback functions
+ * @return                      0 if succeeded
+ */
+DLL_DECL int API_CALL chatllm_start(struct chatllm_obj *obj, f_chatllm_print f_print, f_chatllm_end f_end, void *user_data);
+
+/**
+ * @brief set max number of generated tokens in a new round of conversation
+ *
+ * @param[in] obj               model object
+ * @param[in] gen_max_tokens    -1 for as many as possible
+ */
+DLL_DECL void API_CALL chatllm_set_gen_max_tokens(struct chatllm_obj *obj, int gen_max_tokens);
+
+/**
+ * @brief restart (i.e. discard history)
+ *
+ * * When a session has been loaded, the model is restarted to the point that the session is loaded;
+ *
+ *      Note: this would not work if `--extending` is not `none` or the model uses SWA.
+ *
+ * * Otherwise, it is restarted from the very beginning.
+ *
+ * @param[in] obj               model object
+ * @param[in] utf8_sys_prompt   update to a new system prompt
+ *                              if NULL, then system prompt is kept unchanged.
+ */
+DLL_DECL void API_CALL chatllm_restart(struct chatllm_obj *obj, const char *utf8_sys_prompt);
+
+/**
+ * @brief user input
+ *
+ * This function is synchronized, i.e. it returns after model generation ends and `f_end` is called.
+ *
+ * @param[in] obj               model object
+ * @param[in] utf8_str          user input
+ * @return                      0 if succeeded
+ */
+DLL_DECL int API_CALL chatllm_user_input(struct chatllm_obj *obj, const char *utf8_str);
+
+/**
+ * @brief set prefix for AI generation
+ *
+ * This prefix is used in all following rounds..
+ *
+ * @param[in] obj               model object
+ * @param[in] utf8_str          prefix
+ * @return                      0 if succeeded
+ */
+DLL_DECL int API_CALL chatllm_set_ai_prefix(struct chatllm_obj *obj, const char *utf8_str);
+
+/**
+ * @brief tool input
+ *
+ * - If this function is called before `chatllm_user_input` returns, this is asynchronized,
+ * - If this function is called after `chatllm_user_input` returns, this is equivalent to
+ *   `chatllm_user_input`.
+ *
+ * @param[in] obj               model object
+ * @param[in] utf8_str          user input
+ * @return                      0 if succeeded
+ */
+DLL_DECL int API_CALL chatllm_tool_input(struct chatllm_obj *obj, const char *utf8_str);
+
+/**
+ * @brief feed in text generated by external tools
+ *
+ * This text is treated as part of AI's generation. After this is called, LLM generation
+ * is continued.
+ *
+ * Example:
+ *
+ * ```c
+ * // in `f_print` callback:
+ * chatllm_abort_generation();
+ * chatllm_tool_completion(...);
+ * ```
+ *
+ * @param[in] obj               model object
+ * @param[in] utf8_str          text
+ * @return                      0 if succeeded
+ */
+DLL_DECL int chatllm_tool_completion(struct chatllm_obj *obj, const char *utf8_str);
+
+/**
+ * @brief tokenize
+ *
+ * embedding is emit through `PRINTLN_TOKEN_IDS`.
+ *
+ * @param[in] obj               model object
+ * @param[in] utf8_str          text
+ * @return                      0 if succeeded
+ */
+DLL_DECL int chatllm_text_tokenize(struct chatllm_obj *obj, const char *utf8_str);
+
+/**
+ * @brief text embedding
+ *
+ * embedding is emit through `PRINTLN_EMBEDDING`.
+ *
+ * @param[in] obj               model object
+ * @param[in] utf8_str          text
+ * @return                      0 if succeeded
+ */
+DLL_DECL int chatllm_text_embedding(struct chatllm_obj *obj, const char *utf8_str);
+
+/**
+ * @brief question & answer ranking
+ *
+ * embedding is emit through `PRINTLN_RANKING`.
+ *
+ * @param[in] obj               model object
+ * @param[in] utf8_str_q        question
+ * @param[in] utf8_str_q        answer
+ * @return                      0 if succeeded
+ */
+DLL_DECL int chatllm_qa_rank(struct chatllm_obj *obj, const char *utf8_str_q, const char *utf8_str_a);
+
+/**
+ * @brief switching RAG vector store
+ *
+ * @param[in] obj               model object
+ * @param[in] name              vector store name
+ * @return                      0 if succeeded
+ */
+DLL_DECL int chatllm_rag_select_store(struct chatllm_obj *obj, const char *name);
+
+/**
+ * @brief abort generation
+ *
+ * This function is asynchronized, i.e. it returns immediately.
+ *
+ * @param[in] obj               model object
+ */
+DLL_DECL void API_CALL chatllm_abort_generation(struct chatllm_obj *obj);
+
+/**
+ * @brief show timing statistics
+ *
+ * Result is sent to `f_print`.
+ *
+ * @param[in] obj               model object
+ */
+DLL_DECL void API_CALL chatllm_show_statistics(struct chatllm_obj *obj);
+
+/**
+ * @brief save current session on demand
+ *
+ * Note: Call this from the same thread of `chatllm_user_input()`.
+ *
+ * If chat history is empty, then system prompt is evaluated and saved.
+ *
+ * @param[in] obj               model object
+ * @param[in] utf8_str          file full name
+ * @return                      0 if succeeded
+ */
+DLL_DECL int API_CALL chatllm_save_session(struct chatllm_obj *obj, const char *utf8_str);
+
+/**
+ * @brief load a session on demand
+ *
+ * Note: Call this from the same thread of `chatllm_user_input()`.
+ *
+ * @param[in] obj               model object
+ * @param[in] utf8_str          file full name
+ * @return                      0 if succeeded
+ */
+DLL_DECL int API_CALL chatllm_load_session(struct chatllm_obj *obj, const char *utf8_str);
+
+#ifdef __cplusplus
+}
+#endif
@@ -0,0 +1,55 @@
+type
+  PrintType* = enum
+    PRINT_CHAT_CHUNK = 0,
+    PRINTLN_META = 1,
+    PRINTLN_ERROR = 2,
+    PRINTLN_REF = 3,
+    PRINTLN_REWRITTEN_QUERY = 4,
+    PRINTLN_HISTORY_USER = 5,
+    PRINTLN_HISTORY_AI = 6,
+    PRINTLN_TOOL_CALLING = 7,
+    PRINTLN_EMBEDDING = 8,
+    PRINTLN_RANKING = 9,
+    PRINTLN_TOKEN_IDS = 10
+
+  ChatllmObj* = object
+
+  ChatllmPrintProc* = proc (user_data: pointer, print_type: PrintType, utf8_str: cstring) {.cdecl.}
+  ChatllmEndProc* = proc (user_data: pointer) {.cdecl.}
+
+const
+  libName = "libchatllm.dll"
+
+proc chatllm_create*(): ptr ChatllmObj {.cdecl, dynlib: libName, importc.}
+
+proc chatllm_append_param*(obj: ptr ChatllmObj, utf8_str: cstring) {.cdecl, dynlib: libName, importc.}
+
+proc chatllm_start*(obj: ptr ChatllmObj, f_print: ChatllmPrintProc, f_end: ChatllmEndProc, user_data: pointer): cint {.cdecl, dynlib: libName, importc.}
+
+proc chatllm_set_gen_max_tokens*(obj: ptr ChatllmObj, gen_max_tokens: cint) {.cdecl, dynlib: libName, importc.}
+
+proc chatllm_restart*(obj: ptr ChatllmObj, utf8_sys_prompt: cstring) {.cdecl, dynlib: libName, importc.}
+
+proc chatllm_user_input*(obj: ptr ChatllmObj, utf8_str: cstring): cint {.cdecl, dynlib: libName, importc.}
+
+proc chatllm_set_ai_prefix*(obj: ptr ChatllmObj, utf8_str: cstring): cint {.cdecl, dynlib: libName, importc.}
+
+proc chatllm_tool_input*(obj: ptr ChatllmObj, utf8_str: cstring): cint {.cdecl, dynlib: libName, importc.}
+
+proc chatllm_tool_completion*(obj: ptr ChatllmObj, utf8_str: cstring): cint {.cdecl, dynlib: libName, importc.}
+
+proc chatllm_text_tokenize*(obj: ptr ChatllmObj, utf8_str: cstring): cint {.cdecl, dynlib: libName, importc.}
+
+proc chatllm_text_embedding*(obj: ptr ChatllmObj, utf8_str: cstring): cint {.cdecl, dynlib: libName, importc.}
+
+proc chatllm_qa_rank*(obj: ptr ChatllmObj, utf8_str_q: cstring, utf8_str_a: cstring): cint {.cdecl, dynlib: libName, importc.}
+
+proc chatllm_rag_select_store*(obj: ptr ChatllmObj, name: cstring): cint {.cdecl, dynlib: libName, importc.}
+
+proc chatllm_abort_generation*(obj: ptr ChatllmObj) {.cdecl, dynlib: libName, importc.}
+
+proc chatllm_show_statistics*(obj: ptr ChatllmObj) {.cdecl, dynlib: libName, importc.}
+
+proc chatllm_save_session*(obj: ptr ChatllmObj, utf8_str: cstring): cint {.cdecl, dynlib: libName, importc.}
+
+proc chatllm_load_session*(obj: ptr ChatllmObj, utf8_str: cstring): cint {.cdecl, dynlib: libName, importc.}
@@ -0,0 +1,38 @@
+import system
+import strutils
+import os
+import libchatllm
+
+proc chatllm_print(user_data: pointer, print_type: PrintType, utf8_str: cstring) {.cdecl.} =
+  case print_type
+  of PRINT_CHAT_CHUNK:
+    stdout.write(utf8_str)
+  else:
+    echo utf8_str
+  stdout.flushFile()
+
+proc chatllm_end(user_data: pointer) {.cdecl.} =
+  echo ""
+
+proc main() =
+  let chat = chatllm_create()
+  for i in 1 .. paramCount():
+    chatllm_append_param(chat, paramStr(i).cstring)
+
+  let r = chatllm_start(chat, chatllm_print, chatllm_end, nil)
+  if r != 0:
+    echo ">>> chatllm_start error: ", r
+    quit(r)
+
+  while true:
+    stdout.write("You  > ")
+    let input = stdin.readLine()
+    if input.isEmptyOrWhitespace(): continue
+
+    stdout.write("A.I. > ")
+    let r = chatllm_user_input(chat, input.cstring)
+    if r != 0:
+      echo ">>> chatllm_user_input error: ", r
+      break
+
+main()