cocoindex-io · badmonster0 · Aug 1, 2025 · Jul 24, 2025 · Jul 24, 2025 · Jul 25, 2025
diff --git a/examples/image_search/.env b/examples/image_search/.env
@@ -1 +1 @@
-COCOINDEX_DATABASE_URL="postgresql://cocoindex:cocoindex@127.0.0.1:5432/cocoindex"
+export COCOINDEX_DATABASE_URL="postgres://cocoindex:cocoindex@localhost/cocoindex"
diff --git a/examples/image_search/README.md b/examples/image_search/README.md
@@ -1,25 +1,40 @@
 # Image Search with CocoIndex
 [![GitHub](https://img.shields.io/github/stars/cocoindex-io/cocoindex?color=5B5BD6)](https://github.com/cocoindex-io/cocoindex)
 
-We will build live image search and query it with natural language, using multimodal embedding model. We are going use CocoIndex to build real-time indexing flow. During running, you can add new files to the folder and it only process changed files and will be indexed within a minute.
+We will build live image search and query it with natural language, using multimodal embedding models. We use CocoIndex to build real-time indexing flow. During running, you can add new files to the folder and it only processes changed files, indexing them within a minute.
 
 We appreciate a star ⭐ at [CocoIndex Github](https://github.com/cocoindex-io/cocoindex) if this is helpful.
 
 <img width="1105" alt="cover" src="https://github.com/user-attachments/assets/544fb80d-c085-4150-84b6-b6e62c4a12b9" />
 
+## Two Implementation Options
+
+This example provides two different image search implementations:
+
+### 1. CLIP-based Search (`main.py`)
+- **Model**: CLIP ViT-L/14 (OpenAI)
+- **Embedding**: Single-vector embeddings (768 dimensions)
+- **Search**: Standard cosine similarity
+
+### 2. ColPali-based Search (`colpali_main.py`)
+- **Model**: ColPali (Contextual Late-interaction over Patches)
+- **Embedding**: Multi-vector embeddings with late interaction
+- **Search**: MaxSim scoring for optimal patch-level matching
+- **Performance**: Better for document/text-in-image search
 
 ## Technologies
 - CocoIndex for ETL and live update
-- CLIP ViT-L/14 - Embeddings Model for images and query
-- Qdrant for Vector Storage
-- FastApi for backend
-- Ollama (Optional) for generating image captions using `gemma3`.
+- **CLIP ViT-L/14** OR **ColPali** - Multimodal embedding models
+- Qdrant for Vector Storage (with multi-vector support for ColPali)
+- FastAPI for backend
+- Ollama (Optional) for generating image captions
 
 ## Setup
-- Make sure Postgres and Qdrant are running
+- [Install Postgres](https://cocoindex.io/docs/getting_started/installation#-install-postgres) if you don't have one.
+
+- Make sure Qdrant is running
   ```
   docker run -d -p 6334:6334 -p 6333:6333 qdrant/qdrant
-  export COCOINDEX_DATABASE_URL="postgres://cocoindex:cocoindex@localhost/cocoindex"
   ```
 
 ## (Optional) Run Ollama
@@ -32,21 +47,59 @@ export OLLAMA_MODEL="gemma3"  # Optional, for caption generation
 ```
 
 ## Run the App
+
+### Option 1: CLIP-based Search
 - Install dependencies:
   ```
   pip install -e .
   ```
 
-- Run Backend
+- Run CLIP Backend:
   ```
   uvicorn main:app --reload --host 0.0.0.0 --port 8000
   ```
 
-- Run Frontend
+### Option 2: ColPali-based Search
+- Install dependencies:
+  ```
+  pip install -e .
+  pip install 'cocoindex[colpali]'  # Adds ColPali support
+  ```
+
+- Configure model (optional):
+  ```sh
+  export COLPALI_MODEL="vidore/colpali-v1.2"  # Default model
+  ```
+
+- Run ColPali Backend:
+  ```
+  uvicorn colpali_main:app --reload --host 0.0.0.0 --port 8000
+  ```
+
+Note that recent Nvidia GPUs (RTX 5090) will not work with the Stable pytorch version up to 2.7.1
+
+If you get this error:
+
+```
+The current PyTorch install supports CUDA capabilities sm_37 sm_50 sm_60 sm_61 sm_70 sm_75 sm_80 sm_86 sm_90 compute_37.
+```
+
+You can install the nightly pytorch build here: https://pytorch.org/get-started/locally/
+
+```sh
+pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu129
+```
+
+### Frontend (same for both)
+- Run Frontend:
   ```
   cd frontend
   npm install
   npm run dev
   ```
 
-Go to `http://localhost:5174` to search.
+Go to `http://localhost:5173` to search. The frontend works with both backends identically.
+
+## Performance Notes
+- **CLIP**: Faster, good for general image-text matching
+- **ColPali**: More accurate for document images and text-heavy content, supports multi-vector late interaction for better precision
diff --git a/examples/image_search/colpali_main.py b/examples/image_search/colpali_main.py
@@ -0,0 +1,161 @@
+import datetime
+import os
+from contextlib import asynccontextmanager
+from typing import Any
+
+import cocoindex
+from dotenv import load_dotenv
+from fastapi import FastAPI, Query
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.staticfiles import StaticFiles
+from qdrant_client import QdrantClient
+
+
+# --- Config ---
+
+# Use GRPC
+QDRANT_URL = os.getenv("QDRANT_URL", "localhost:6334")
+PREFER_GRPC = os.getenv("QDRANT_PREFER_GRPC", "true").lower() == "true"
+
+# Use HTTP
+# QDRANT_URL = os.getenv("QDRANT_URL", "localhost:6333")
+# PREFER_GRPC = os.getenv("QDRANT_PREFER_GRPC", "false").lower() == "true"
+
+OLLAMA_URL = os.getenv("OLLAMA_URL", "http://localhost:11434/")
+QDRANT_COLLECTION = "ImageSearchColpali"
+COLPALI_MODEL_NAME = os.getenv("COLPALI_MODEL", "vidore/colpali-v1.2")
+print(f"📐 Using ColPali model {COLPALI_MODEL_NAME}")
+
+
+# Create ColPali embedding function using the class-based pattern
+colpali_embed = cocoindex.functions.ColPaliEmbedImage(model=COLPALI_MODEL_NAME)
+
+
+@cocoindex.transform_flow()
+def text_to_colpali_embedding(
+    text: cocoindex.DataSlice[str],
+) -> cocoindex.DataSlice[list[list[float]]]:
+    """
+    Embed text using a ColPali model, returning multi-vector format.
+    This is shared logic between indexing and querying, ensuring consistent embeddings.
+    """
+    return text.transform(
+        cocoindex.functions.ColPaliEmbedQuery(model=COLPALI_MODEL_NAME)
+    )
+
+
+@cocoindex.flow_def(name="ImageObjectEmbeddingColpali")
+def image_object_embedding_flow(
+    flow_builder: cocoindex.FlowBuilder, data_scope: cocoindex.DataScope
+) -> None:
+    data_scope["images"] = flow_builder.add_source(
+        cocoindex.sources.LocalFile(
+            path="img", included_patterns=["*.jpg", "*.jpeg", "*.png"], binary=True
+        ),
+        refresh_interval=datetime.timedelta(minutes=1),
+    )
+    img_embeddings = data_scope.add_collector()
+    with data_scope["images"].row() as img:
+        ollama_model_name = os.getenv("OLLAMA_MODEL")
+        if ollama_model_name is not None:
+            # If an Ollama model is specified, generate an image caption
+            img["caption"] = flow_builder.transform(
+                cocoindex.functions.ExtractByLlm(
+                    llm_spec=cocoindex.llm.LlmSpec(
+                        api_type=cocoindex.LlmApiType.OLLAMA, model=ollama_model_name
+                    ),
+                    instruction=(
+                        "Describe the image in one detailed sentence. "
+                        "Name all visible animal species, objects, and the main scene. "
+                        "Be specific about type, color, and notable features. "
+                        "Mention what each animal is doing."
+                    ),
+                    output_type=str,
+                ),
+                image=img["content"],
+            )
+        img["embedding"] = img["content"].transform(colpali_embed)
+
+        collect_fields = {
+            "id": cocoindex.GeneratedField.UUID,
+            "filename": img["filename"],
+            "embedding": img["embedding"],
+        }
+
+        if ollama_model_name is not None:
+            print(f"Using Ollama model '{ollama_model_name}' for captioning.")
+            collect_fields["caption"] = img["caption"]
+        else:
+            print(f"No Ollama model '{ollama_model_name}' found — skipping captioning.")
+
+        img_embeddings.collect(**collect_fields)
+
+    img_embeddings.export(
+        "img_embeddings",
+        cocoindex.targets.Qdrant(collection_name=QDRANT_COLLECTION),
+        primary_key_fields=["id"],
+    )
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI) -> None:
+    load_dotenv()
+    cocoindex.init()
+    image_object_embedding_flow.setup(report_to_stdout=True)
+
+    app.state.qdrant_client = QdrantClient(url=QDRANT_URL, prefer_grpc=PREFER_GRPC)
+
+    # Start updater
+    app.state.live_updater = cocoindex.FlowLiveUpdater(image_object_embedding_flow)
+    app.state.live_updater.start()
+
+    yield
+
+
+# --- FastAPI app for web API ---
+app = FastAPI(lifespan=lifespan)
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Serve images from the 'img' directory at /img
+app.mount("/img", StaticFiles(directory="img"), name="img")
+
+
+# --- Search API ---
+@app.get("/search")
+def search(
+    q: str = Query(..., description="Search query"),
+    limit: int = Query(5, description="Number of results"),
+) -> Any:
+    # Get the multi-vector embedding for the query
+    query_embedding = text_to_colpali_embedding.eval(q)
+    print(
+        f"🔍 Query multi-vector shape: {len(query_embedding)} tokens x {len(query_embedding[0]) if query_embedding else 0} dims"
+    )
+
+    # Search in Qdrant with multi-vector MaxSim scoring using query_points API
+    search_results = app.state.qdrant_client.query_points(
+        collection_name=QDRANT_COLLECTION,
+        query=query_embedding,  # Multi-vector format: list[list[float]]
+        using="embedding",  # Specify the vector field name
+        limit=limit,
+        with_payload=True,
+    )
+
+    print(f"📈 Found {len(search_results.points)} results with MaxSim scoring")
+
+    return {
+        "results": [
+            {
+                "filename": result.payload["filename"],
+                "score": result.score,
+                "caption": result.payload.get("caption"),
+            }
+            for result in search_results.points
+        ]
+    }
diff --git a/examples/image_search/frontend/src/App.jsx b/examples/image_search/frontend/src/App.jsx
@@ -1,6 +1,6 @@
 import React, { useState } from 'react';
 
-const API_URL = 'http://localhost:8000/search'; // Adjust this to your backend search endpoint
+const API_URL = `http://${window.location.hostname}:8000/search`;
 
 export default function App() {
   const [query, setQuery] = useState('');
@@ -42,7 +42,7 @@ export default function App() {
         {results.length === 0 && !loading && <div>No results</div>}
         {results.map((result, idx) => (
           <div key={idx} className="result-card">
-            <img src={`http://localhost:8000/img/${result.filename}`} alt={result.filename} className="result-img" />
+            <img src={`http://${window.location.hostname}:8000/img/${result.filename}`} alt={result.filename} className="result-img" />
             <div className="score">Score: {result.score?.toFixed(3)}</div>
           </div>
         ))}

diff --git a/examples/image_search/frontend/vite.config.js b/examples/image_search/frontend/vite.config.js
@@ -4,6 +4,7 @@ import react from '@vitejs/plugin-react';
 export default defineConfig({
   plugins: [react()],
   server: {
+    host: true, // Allow LAN access
     port: 5173,
     open: true,
   },

diff --git a/examples/image_search/pyproject.toml b/examples/image_search/pyproject.toml
@@ -1,14 +1,14 @@
 [project]
 name = "image-search"
 version = "0.1.0"
-description = "Simple example for cocoindex: build embedding index based on images."
+description = "Image search examples for cocoindex: CLIP and ColPali-based embedding."
 requires-python = ">=3.11"
 dependencies = [
-    "cocoindex>=0.1.75",
+    "cocoindex[colpali]>=0.1.75",
     "python-dotenv>=1.0.1",
     "fastapi>=0.100.0",
     "torch>=2.0.0",
-    "transformers>=4.29.0",
+    "transformers>=4.29.0",       # For CLIP model in main.py
     "qdrant-client>=1.14.2",
     "uvicorn>=0.34.3",
 ]

diff --git a/pyproject.toml b/pyproject.toml
@@ -32,10 +32,11 @@ features = ["pyo3/extension-module"]
 dev = ["pytest", "pytest-asyncio", "ruff", "mypy", "pre-commit"]
 
 embeddings = ["sentence-transformers>=3.3.1"]
+colpali = ["colpali-engine"]
 
 # We need to repeat the dependency above to make it available for the `all` feature.
 # Indirect dependencies such as "cocoindex[embeddings]" will not work for local development.
-all = ["sentence-transformers>=3.3.1"]
+all = ["sentence-transformers>=3.3.1", "colpali-engine"]
 
 [tool.mypy]
 python_version = "3.11"
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		COCOINDEX_DATABASE_URL="postgresql://cocoindex:cocoindex@127.0.0.1:5432/cocoindex"
		export COCOINDEX_DATABASE_URL="postgres://cocoindex:cocoindex@localhost/cocoindex"