cocoindex-io
diff --git a/‎examples/image_search/README.md
Lines changed: 46 additions & 8 deletions b/‎examples/image_search/README.md
Lines changed: 46 additions & 8 deletions
diff --git a/‎examples/image_search_colpali/main.py renamed to ‎examples/image_search/colpali_main.py
Lines changed: 26 additions & 75 deletions b/‎examples/image_search_colpali/main.py renamed to ‎examples/image_search/colpali_main.py
Lines changed: 26 additions & 75 deletions
diff --git a/‎examples/image_search/frontend/vite.config.js
Lines changed: 1 addition & 0 deletions b/‎examples/image_search/frontend/vite.config.js
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/image_search/pyproject.toml
Lines changed: 4 additions & 2 deletions b/‎examples/image_search/pyproject.toml
Lines changed: 4 additions & 2 deletions
diff --git a/‎examples/image_search_colpali/.env
Lines changed: 0 additions & 1 deletion b/‎examples/image_search_colpali/.env
Lines changed: 0 additions & 1 deletion
diff --git a/‎examples/image_search_colpali/README.md
Lines changed: 0 additions & 71 deletions b/‎examples/image_search_colpali/README.md
Lines changed: 0 additions & 71 deletions
@@ -1,19 +1,33 @@
 # Image Search with CocoIndex
 [![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex)
 
-We will build live image search and query it with natural language, using multimodal embedding model. We are going use CocoIndex to build real-time indexing flow. During running, you can add new files to the folder and it only process changed files and will be indexed within a minute.
+We will build live image search and query it with natural language, using multimodal embedding models. We use CocoIndex to build real-time indexing flow. During running, you can add new files to the folder and it only processes changed files, indexing them within a minute.
 
 We appreciate a star ⭐ at [CocoIndex Github](https://github.com/cocoindex-io/cocoindex) if this is helpful.
 
 <img width="1105" alt="cover" src="https://github.com/user-attachments/assets/544fb80d-c085-4150-84b6-b6e62c4a12b9" />
 
+## Two Implementation Options
+
+This example provides two different image search implementations:
+
+### 1. CLIP-based Search (`main.py`)
+- **Model**: CLIP ViT-L/14 (OpenAI)
+- **Embedding**: Single-vector embeddings (768 dimensions)
+- **Search**: Standard cosine similarity
+
+### 2. ColPali-based Search (`colpali_main.py`)
+- **Model**: ColPali (Contextual Late-interaction over Patches)
+- **Embedding**: Multi-vector embeddings with late interaction
+- **Search**: MaxSim scoring for optimal patch-level matching
+- **Performance**: Better for document/text-in-image search
 
 ## Technologies
 - CocoIndex for ETL and live update
-- CLIP ViT-L/14 - Embeddings Model for images and query
-- Qdrant for Vector Storage
-- FastApi for backend
-- Ollama (Optional) for generating image captions using `gemma3`.
+- **CLIP ViT-L/14** OR **ColPali** - Multimodal embedding models
+- Qdrant for Vector Storage (with multi-vector support for ColPali)
+- FastAPI for backend
+- Ollama (Optional) for generating image captions
 
 ## Setup
 - Make sure Postgres and Qdrant are running
@@ -32,21 +46,45 @@ export OLLAMA_MODEL="gemma3"  # Optional, for caption generation
 ```
 
 ## Run the App
+
+### Option 1: CLIP-based Search
 - Install dependencies:
   ```
   pip install -e .
   ```
 
-- Run Backend
+- Run CLIP Backend:
   ```
   uvicorn main:app --reload --host 0.0.0.0 --port 8000
   ```
 
-- Run Frontend
+### Option 2: ColPali-based Search
+- Install dependencies:
+  ```
+  pip install -e .
+  pip install 'cocoindex[embeddings]'  # Adds ColPali and sentence-transformers support
+  ```
+
+- Configure model (optional):
+  ```sh
+  export COLPALI_MODEL="vidore/colpali-v1.2"  # Default model
+  ```
+
+- Run ColPali Backend:
+  ```
+  uvicorn colpali_main:app --reload --host 0.0.0.0 --port 8000
+  ```
+
+### Frontend (same for both)
+- Run Frontend:
   ```
   cd frontend
   npm install
   npm run dev
   ```
 
-Go to `http://localhost:5174` to search.
+Go to `http://localhost:5173` to search. The frontend works with both backends identically.
+
+## Performance Notes
+- **CLIP**: Faster, good for general image-text matching
+- **ColPali**: More accurate for document images and text-heavy content, supports multi-vector late interaction for better precision
@@ -1,9 +1,7 @@
 import datetime
-import functools
-import io
 import os
 from contextlib import asynccontextmanager
-from typing import Any, Literal
+from typing import Any
 
 import cocoindex
 import numpy as np
@@ -13,7 +11,6 @@
 from fastapi.staticfiles import StaticFiles
 from PIL import Image
 from qdrant_client import QdrantClient
-from colpali_engine.models import ColPali, ColPaliProcessor
 
 
 # --- Config ---
@@ -29,76 +26,24 @@
 OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434/")
 QDRANT_COLLECTION = "ImageSearchColpali"
 COLPALI_MODEL_NAME = os.getenv("COLPALI_MODEL", "vidore/colpali-v1.2")
-COLPALI_MODEL_DIMENSION = 1031  # Set to match ColPali's output
+print(f"📐 Using ColPali model {COLPALI_MODEL_NAME}")
 
-# --- ColPali model cache and embedding functions ---
-_colpali_model_cache = {}
 
+# Create ColPali embedding function using the class-based pattern
+colpali_embed = cocoindex.functions.ColPaliEmbedImage(model=COLPALI_MODEL_NAME)
 
-def get_colpali_model(model: str = COLPALI_MODEL_NAME):
-    global _colpali_model_cache
-    if model not in _colpali_model_cache:
-        print(f"Loading ColPali model: {model}")
-        _colpali_model_cache[model] = {
-            "model": ColPali.from_pretrained(model),
-            "processor": ColPaliProcessor.from_pretrained(model),
-        }
-    return _colpali_model_cache[model]["model"], _colpali_model_cache[model][
-        "processor"
-    ]
-
-
-def colpali_embed_image(
-    img_bytes: bytes, model: str = COLPALI_MODEL_NAME
-) -> list[float]:
-    from PIL import Image
-    import torch
-    import io
-
-    colpali_model, processor = get_colpali_model(model)
-    pil_image = Image.open(io.BytesIO(img_bytes)).convert("RGB")
-    inputs = processor.process_images([pil_image])
-    with torch.no_grad():
-        embeddings = colpali_model(**inputs)
-    pooled_embedding = embeddings.mean(dim=-1)
-    result = pooled_embedding[0].cpu().numpy()  # [1031]
-    return result.tolist()
-
-
-def colpali_embed_query(query: str, model: str = COLPALI_MODEL_NAME) -> list[float]:
-    import torch
-    import numpy as np
-
-    colpali_model, processor = get_colpali_model(model)
-    inputs = processor.process_queries([query])
-    with torch.no_grad():
-        embeddings = colpali_model(**inputs)
-    pooled_embedding = embeddings.mean(dim=-1)
-    query_tokens = pooled_embedding[0].cpu().numpy()  # [15]
-    target_length = COLPALI_MODEL_DIMENSION
-    result = np.zeros(target_length, dtype=np.float32)
-    result[: min(len(query_tokens), target_length)] = query_tokens[:target_length]
-    return result.tolist()
-
-
-# --- End ColPali embedding functions ---
 
-
-def embed_query(text: str) -> list[float]:
-    """
-    Embed the caption using ColPali model.
-    """
-    return colpali_embed_query(text, model=COLPALI_MODEL_NAME)
-
-
-@cocoindex.op.function(cache=True, behavior_version=1, gpu=True)
-def embed_image(
-    img_bytes: bytes,
-) -> cocoindex.Vector[cocoindex.Float32, Literal[COLPALI_MODEL_DIMENSION]]:
+@cocoindex.transform_flow()
+def text_to_colpali_embedding(
+    text: cocoindex.DataSlice[str],
+) -> cocoindex.DataSlice[list[list[float]]]:
     """
-    Convert image to embedding using ColPali model.
+    Embed text using a ColPali model, returning multi-vector format.
+    This is shared logic between indexing and querying, ensuring consistent embeddings.
     """
-    return colpali_embed_image(img_bytes, model=COLPALI_MODEL_NAME)
+    return text.transform(
+        cocoindex.functions.ColPaliEmbedQuery(model=COLPALI_MODEL_NAME)
+    )
 
 
 @cocoindex.flow_def(name="ImageObjectEmbeddingColpali")
@@ -131,7 +76,7 @@ def image_object_embedding_flow(
                 ),
                 image=img["content"],
             )
-        img["embedding"] = img["content"].transform(embed_image)
+        img["embedding"] = img["content"].transform(colpali_embed)
 
         collect_fields = {
             "id": cocoindex.GeneratedField.UUID,
@@ -189,24 +134,30 @@ def search(
     q: str = Query(..., description="Search query"),
     limit: int = Query(5, description="Number of results"),
 ) -> Any:
-    # Get the embedding for the query
-    query_embedding = embed_query(q)
+    # Get the multi-vector embedding for the query
+    query_embedding = text_to_colpali_embedding.eval(q)
+    print(
+        f"🔍 Query multi-vector shape: {len(query_embedding)} tokens x {len(query_embedding[0]) if query_embedding else 0} dims"
+    )
 
-    # Search in Qdrant
-    search_results = app.state.qdrant_client.search(
+    # Search in Qdrant with multi-vector MaxSim scoring using query_points API
+    search_results = app.state.qdrant_client.query_points(
         collection_name=QDRANT_COLLECTION,
-        query_vector=("embedding", query_embedding),
+        query=query_embedding,  # Multi-vector format: list[list[float]]
+        using="embedding",  # Specify the vector field name
         limit=limit,
         with_payload=True,
     )
 
+    print(f"📈 Found {len(search_results.points)} results with MaxSim scoring")
+
     return {
         "results": [
             {
                 "filename": result.payload["filename"],
                 "score": result.score,
                 "caption": result.payload.get("caption"),
             }
-            for result in search_results
+            for result in search_results.points
         ]
     }
@@ -4,6 +4,7 @@ import react from '@vitejs/plugin-react';
 export default defineConfig({
   plugins: [react()],
   server: {
+    host: true, // Allow LAN access
     port: 5173,
     open: true,
   },
 
@@ -1,16 +1,18 @@
 [project]
 name = "image-search"
 version = "0.1.0"
-description = "Simple example for cocoindex: build embedding index based on images."
+description = "Image search examples for cocoindex: CLIP and ColPali-based embedding."
 requires-python = ">=3.11"
 dependencies = [
     "cocoindex>=0.1.75",
     "python-dotenv>=1.0.1",
     "fastapi>=0.100.0",
     "torch>=2.0.0",
-    "transformers>=4.29.0",
+    "transformers>=4.29.0",  # For CLIP model in main.py
     "qdrant-client>=1.14.2",
     "uvicorn>=0.34.3",
+    "Pillow>=10.0.0",  # For ColPali image processing
+    "numpy>=1.24.0",  # For ColPali embeddings
 ]
 
 [tool.setuptools]