1
+ #pragma once
2
+
3
+ #ifdef _WIN32
4
+ #define API_CALL __stdcall
5
+ #ifndef _WIN64
6
+ #error unsupported target OS
7
+ #endif
8
+ #elif __linux__
9
+ #define API_CALL
10
+ #if (!defined __x86_64__ ) && (!defined __aarch64__ )
11
+ #error unsupported target OS
12
+ #endif
13
+ #else
14
+ #define API_CALL
15
+ #warning OS not supported, maybe
16
+ #endif
17
+
18
+ #ifndef DLL_DECL
19
+ #define DLL_DECL
20
+ #endif
21
+
22
+ #ifdef __cplusplus
23
+ extern "C"
24
+ {
25
+ #endif
26
+
27
+ enum PrintType
28
+ {
29
+ PRINT_CHAT_CHUNK = 0 ,
30
+ // below items share the same value with BaseStreamer::TextType
31
+ PRINTLN_META = 1 , // print a whole line: general information
32
+ PRINTLN_ERROR = 2 , // print a whole line: error message
33
+ PRINTLN_REF = 3 , // print a whole line: reference
34
+ PRINTLN_REWRITTEN_QUERY = 4 , // print a whole line: rewritten query
35
+ PRINTLN_HISTORY_USER = 5 , // print a whole line: user input history
36
+ PRINTLN_HISTORY_AI = 6 , // print a whole line: AI output history
37
+ PRINTLN_TOOL_CALLING = 7 , // print a whole line: tool calling (supported by only a few models)
38
+ PRINTLN_EMBEDDING = 8 , // print a whole line: embedding (example: "0.1, 0.3, ...")
39
+ PRINTLN_RANKING = 9 , // print a whole line: ranking (example: "0.8")
40
+ PRINTLN_TOKEN_IDS = 10 , // print a whole line: token ids (example: "1, 3, 5, 8, ...")
41
+ };
42
+
43
+ typedef void (* f_chatllm_print )(void * user_data , int print_type , const char * utf8_str );
44
+ typedef void (* f_chatllm_end )(void * user_data );
45
+
46
+ struct chatllm_obj ;
47
+
48
+ /**
49
+ * Usage:
50
+ *
51
+ * ```c
52
+ * obj = create(callback functions);
53
+ * append_param(obj, ...);
54
+ * // ...
55
+ * app_param(obj, ...);
56
+ *
57
+ * start(obj);
58
+ * while (true)
59
+ * {
60
+ * user_input(obj, ...);
61
+ * }
62
+ * ```
63
+ */
64
+
65
+ /**
66
+ * @brief create ChatLLM object
67
+ *
68
+ * @return the object
69
+ */
70
+ DLL_DECL struct chatllm_obj * API_CALL chatllm_create (void );
71
+
72
+ /**
73
+ * @brief append a command line option
74
+ *
75
+ * @param[in] obj model object
76
+ * @param[in] utf8_str a command line option
77
+ */
78
+ DLL_DECL void API_CALL chatllm_append_param (struct chatllm_obj * obj , const char * utf8_str );
79
+
80
+ /**
81
+ * @brief start
82
+ *
83
+ * @param[in] obj model object
84
+ * @param[in] f_print callback function for printing
85
+ * @param[in] f_end callback function when model generation ends
86
+ * @param[in] user_data user data provided to callback functions
87
+ * @return 0 if succeeded
88
+ */
89
+ DLL_DECL int API_CALL chatllm_start (struct chatllm_obj * obj , f_chatllm_print f_print , f_chatllm_end f_end , void * user_data );
90
+
91
+ /**
92
+ * @brief set max number of generated tokens in a new round of conversation
93
+ *
94
+ * @param[in] obj model object
95
+ * @param[in] gen_max_tokens -1 for as many as possible
96
+ */
97
+ DLL_DECL void API_CALL chatllm_set_gen_max_tokens (struct chatllm_obj * obj , int gen_max_tokens );
98
+
99
+ /**
100
+ * @brief restart (i.e. discard history)
101
+ *
102
+ * * When a session has been loaded, the model is restarted to the point that the session is loaded;
103
+ *
104
+ * Note: this would not work if `--extending` is not `none` or the model uses SWA.
105
+ *
106
+ * * Otherwise, it is restarted from the very beginning.
107
+ *
108
+ * @param[in] obj model object
109
+ * @param[in] utf8_sys_prompt update to a new system prompt
110
+ * if NULL, then system prompt is kept unchanged.
111
+ */
112
+ DLL_DECL void API_CALL chatllm_restart (struct chatllm_obj * obj , const char * utf8_sys_prompt );
113
+
114
+ /**
115
+ * @brief user input
116
+ *
117
+ * This function is synchronized, i.e. it returns after model generation ends and `f_end` is called.
118
+ *
119
+ * @param[in] obj model object
120
+ * @param[in] utf8_str user input
121
+ * @return 0 if succeeded
122
+ */
123
+ DLL_DECL int API_CALL chatllm_user_input (struct chatllm_obj * obj , const char * utf8_str );
124
+
125
+ /**
126
+ * @brief set prefix for AI generation
127
+ *
128
+ * This prefix is used in all following rounds..
129
+ *
130
+ * @param[in] obj model object
131
+ * @param[in] utf8_str prefix
132
+ * @return 0 if succeeded
133
+ */
134
+ DLL_DECL int API_CALL chatllm_set_ai_prefix (struct chatllm_obj * obj , const char * utf8_str );
135
+
136
+ /**
137
+ * @brief tool input
138
+ *
139
+ * - If this function is called before `chatllm_user_input` returns, this is asynchronized,
140
+ * - If this function is called after `chatllm_user_input` returns, this is equivalent to
141
+ * `chatllm_user_input`.
142
+ *
143
+ * @param[in] obj model object
144
+ * @param[in] utf8_str user input
145
+ * @return 0 if succeeded
146
+ */
147
+ DLL_DECL int API_CALL chatllm_tool_input (struct chatllm_obj * obj , const char * utf8_str );
148
+
149
+ /**
150
+ * @brief feed in text generated by external tools
151
+ *
152
+ * This text is treated as part of AI's generation. After this is called, LLM generation
153
+ * is continued.
154
+ *
155
+ * Example:
156
+ *
157
+ * ```c
158
+ * // in `f_print` callback:
159
+ * chatllm_abort_generation();
160
+ * chatllm_tool_completion(...);
161
+ * ```
162
+ *
163
+ * @param[in] obj model object
164
+ * @param[in] utf8_str text
165
+ * @return 0 if succeeded
166
+ */
167
+ DLL_DECL int chatllm_tool_completion (struct chatllm_obj * obj , const char * utf8_str );
168
+
169
+ /**
170
+ * @brief tokenize
171
+ *
172
+ * embedding is emit through `PRINTLN_TOKEN_IDS`.
173
+ *
174
+ * @param[in] obj model object
175
+ * @param[in] utf8_str text
176
+ * @return 0 if succeeded
177
+ */
178
+ DLL_DECL int chatllm_text_tokenize (struct chatllm_obj * obj , const char * utf8_str );
179
+
180
+ /**
181
+ * @brief text embedding
182
+ *
183
+ * embedding is emit through `PRINTLN_EMBEDDING`.
184
+ *
185
+ * @param[in] obj model object
186
+ * @param[in] utf8_str text
187
+ * @return 0 if succeeded
188
+ */
189
+ DLL_DECL int chatllm_text_embedding (struct chatllm_obj * obj , const char * utf8_str );
190
+
191
+ /**
192
+ * @brief question & answer ranking
193
+ *
194
+ * embedding is emit through `PRINTLN_RANKING`.
195
+ *
196
+ * @param[in] obj model object
197
+ * @param[in] utf8_str_q question
198
+ * @param[in] utf8_str_q answer
199
+ * @return 0 if succeeded
200
+ */
201
+ DLL_DECL int chatllm_qa_rank (struct chatllm_obj * obj , const char * utf8_str_q , const char * utf8_str_a );
202
+
203
+ /**
204
+ * @brief switching RAG vector store
205
+ *
206
+ * @param[in] obj model object
207
+ * @param[in] name vector store name
208
+ * @return 0 if succeeded
209
+ */
210
+ DLL_DECL int chatllm_rag_select_store (struct chatllm_obj * obj , const char * name );
211
+
212
+ /**
213
+ * @brief abort generation
214
+ *
215
+ * This function is asynchronized, i.e. it returns immediately.
216
+ *
217
+ * @param[in] obj model object
218
+ */
219
+ DLL_DECL void API_CALL chatllm_abort_generation (struct chatllm_obj * obj );
220
+
221
+ /**
222
+ * @brief show timing statistics
223
+ *
224
+ * Result is sent to `f_print`.
225
+ *
226
+ * @param[in] obj model object
227
+ */
228
+ DLL_DECL void API_CALL chatllm_show_statistics (struct chatllm_obj * obj );
229
+
230
+ /**
231
+ * @brief save current session on demand
232
+ *
233
+ * Note: Call this from the same thread of `chatllm_user_input()`.
234
+ *
235
+ * If chat history is empty, then system prompt is evaluated and saved.
236
+ *
237
+ * @param[in] obj model object
238
+ * @param[in] utf8_str file full name
239
+ * @return 0 if succeeded
240
+ */
241
+ DLL_DECL int API_CALL chatllm_save_session (struct chatllm_obj * obj , const char * utf8_str );
242
+
243
+ /**
244
+ * @brief load a session on demand
245
+ *
246
+ * Note: Call this from the same thread of `chatllm_user_input()`.
247
+ *
248
+ * @param[in] obj model object
249
+ * @param[in] utf8_str file full name
250
+ * @return 0 if succeeded
251
+ */
252
+ DLL_DECL int API_CALL chatllm_load_session (struct chatllm_obj * obj , const char * utf8_str );
253
+
254
+ #ifdef __cplusplus
255
+ }
256
+ #endif
0 commit comments