@@ -99,3 +99,132 @@ def __call__(self, text: str) -> NDArray[np.float32]:
99
99
assert self ._model is not None
100
100
result : NDArray [np .float32 ] = self ._model .encode (text , convert_to_numpy = True )
101
101
return result
102
+
103
+
104
+ # ColPali model cache for ColPali embedding functions
105
+ _colpali_model_cache = {}
106
+
107
+
108
+ def get_colpali_model (model : str ):
109
+ """Get or load ColPali model and processor."""
110
+ global _colpali_model_cache
111
+ if model not in _colpali_model_cache :
112
+ try :
113
+ from colpali_engine .models import ColPali , ColPaliProcessor
114
+ except ImportError as e :
115
+ raise ImportError (
116
+ "ColPali is not available. Make sure cocoindex is installed with ColPali support."
117
+ ) from e
118
+
119
+ model_instance = ColPali .from_pretrained (model )
120
+ processor_instance = ColPaliProcessor .from_pretrained (model )
121
+
122
+ # Try to get dimension from FastEmbed API first
123
+ output_dim = None
124
+ try :
125
+ from fastembed import LateInteractionMultimodalEmbedding
126
+
127
+ # Use the standard FastEmbed ColPali model for dimension detection
128
+ # All ColPali variants should have the same embedding dimension
129
+ standard_colpali_model = "Qdrant/colpali-v1.3-fp16"
130
+
131
+ # Try to find the model in FastEmbed's supported models
132
+ supported_models = LateInteractionMultimodalEmbedding .list_supported_models ()
133
+ for supported_model in supported_models :
134
+ if supported_model ["model" ] == standard_colpali_model :
135
+ output_dim = supported_model ["dim" ]
136
+ break
137
+
138
+ except Exception :
139
+ # FastEmbed API failed, will fall back to model config
140
+ pass
141
+
142
+ # Fallback to model config if FastEmbed API failed
143
+ if output_dim is None :
144
+ if hasattr (model_instance , 'config' ):
145
+ # Try different config attributes that might contain the hidden dimension
146
+ if hasattr (model_instance .config , 'hidden_size' ):
147
+ output_dim = model_instance .config .hidden_size
148
+ elif hasattr (model_instance .config , 'text_config' ) and hasattr (model_instance .config .text_config , 'hidden_size' ):
149
+ output_dim = model_instance .config .text_config .hidden_size
150
+ elif hasattr (model_instance .config , 'vision_config' ) and hasattr (model_instance .config .vision_config , 'hidden_size' ):
151
+ output_dim = model_instance .config .vision_config .hidden_size
152
+ else :
153
+ raise ValueError (f"Could not find hidden_size in model config for { model } . Config attributes: { dir (model_instance .config )} " )
154
+ else :
155
+ raise ValueError (f"Model { model } has no config attribute. Model attributes: { dir (model_instance )} " )
156
+
157
+ _colpali_model_cache [model ] = {
158
+ "model" : model_instance ,
159
+ "processor" : processor_instance ,
160
+ "dimension" : output_dim ,
161
+ }
162
+ return _colpali_model_cache [model ]["model" ], _colpali_model_cache [model ]["processor" ], _colpali_model_cache [model ]["dimension" ]
163
+
164
+
165
+ def get_colpali_dimension (model : str ) -> int :
166
+ """Get the output dimension for a ColPali model."""
167
+ _ , _ , dimension = get_colpali_model (model )
168
+ return dimension
169
+
170
+
171
+ def colpali_embed_image (img_bytes : bytes , model : str ) -> list [list [float ]]:
172
+ """Embed image using ColPali model, returning multi-vector format."""
173
+ try :
174
+ from PIL import Image
175
+ import torch
176
+ import io
177
+ except ImportError as e :
178
+ raise ImportError (
179
+ "Required dependencies (PIL, torch) are missing for ColPali image embedding."
180
+ ) from e
181
+
182
+ colpali_model , processor , expected_dim = get_colpali_model (model )
183
+ pil_image = Image .open (io .BytesIO (img_bytes )).convert ("RGB" )
184
+ inputs = processor .process_images ([pil_image ])
185
+ with torch .no_grad ():
186
+ embeddings = colpali_model (** inputs )
187
+
188
+ # Return multi-vector format: [patches, hidden_dim]
189
+ if len (embeddings .shape ) != 3 :
190
+ raise ValueError (f"Expected 3D tensor [batch, patches, hidden_dim], got shape { embeddings .shape } " )
191
+
192
+ # Keep patch-level embeddings: [batch, patches, hidden_dim] -> [patches, hidden_dim]
193
+ patch_embeddings = embeddings [0 ] # Remove batch dimension
194
+
195
+ # Convert to list of lists: [[patch1_embedding], [patch2_embedding], ...]
196
+ result = []
197
+ for patch in patch_embeddings :
198
+ result .append (patch .cpu ().numpy ().tolist ())
199
+
200
+ return result
201
+
202
+
203
+ def colpali_embed_query (query : str , model : str ) -> list [list [float ]]:
204
+ """Embed query using ColPali model, returning multi-vector format."""
205
+ try :
206
+ import torch
207
+ import numpy as np
208
+ except ImportError as e :
209
+ raise ImportError (
210
+ "Required dependencies (torch, numpy) are missing for ColPali query embedding."
211
+ ) from e
212
+
213
+ colpali_model , processor , target_dimension = get_colpali_model (model )
214
+ inputs = processor .process_queries ([query ])
215
+ with torch .no_grad ():
216
+ embeddings = colpali_model (** inputs )
217
+
218
+ # Return multi-vector format: [tokens, hidden_dim]
219
+ if len (embeddings .shape ) != 3 :
220
+ raise ValueError (f"Expected 3D tensor [batch, tokens, hidden_dim], got shape { embeddings .shape } " )
221
+
222
+ # Keep token-level embeddings: [batch, tokens, hidden_dim] -> [tokens, hidden_dim]
223
+ token_embeddings = embeddings [0 ] # Remove batch dimension
224
+
225
+ # Convert to list of lists: [[token1_embedding], [token2_embedding], ...]
226
+ result = []
227
+ for token in token_embeddings :
228
+ result .append (token .cpu ().numpy ().tolist ())
229
+
230
+ return result
0 commit comments