meilisearch · qdequele · Sep 5, 2024 · Sep 5, 2024 · Sep 5, 2024
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -7,15 +7,15 @@ license = "MIT"
 edition = "2021"
 
 [dependencies]
-anyhow = "1.0.81"
+anyhow = "1.0.86"
 byte-unit = { version = "5.1.4", features = ["byte", "serde"] }
-clap = { version = "4.5.3", features = ["derive"] }
+clap = { version = "4.5.17", features = ["derive"] }
 csv = "1.3.0"
 exponential-backoff = "1.2.0"
-flate2 = "1.0"
+flate2 = "1.0.33"
 indicatif = "0.17.8"
-serde_json = { version = "1.0.114", features = ["preserve_order"] }
-ureq = "2.9.6"
+serde_json = { version = "1.0.128", features = ["preserve_order"] }
+ureq = "2.10.1"
 
 # The profile that 'cargo dist' will build with
 [profile.dist]
@@ -29,7 +29,12 @@ cargo-dist-version = "0.11.1"
 # The installers to generate for each app
 installers = []
 # Target platforms to build apps for (Rust target-triple syntax)
-targets = ["aarch64-apple-darwin", "x86_64-apple-darwin", "x86_64-unknown-linux-gnu", "x86_64-pc-windows-msvc"]
+targets = [
+    "aarch64-apple-darwin",
+    "x86_64-apple-darwin",
+    "x86_64-unknown-linux-gnu",
+    "x86_64-pc-windows-msvc",
+]
 # CI backends to support
 ci = ["github"]
 # Publish jobs to run in CI

diff --git a/README.md b/README.md
@@ -1,47 +1,120 @@
 # Meilisearch Importer
 
-The most efficient CLI tool to import massive CSVs, NSJSON or JSON (array of objects) into Meilisearch.
+The most efficient CLI tool to import massive CSVs, NDJSON, or JSON (array of objects) into Meilisearch.
 
-This tool has been tested with multiple datasets from hundreds of thousand documents to some with more than forty millions documents. The progress bar is very handy in this case.
+This tool has been tested with datasets ranging from hundreds of thousands to over forty million documents. The progress bar is particularly useful for monitoring large imports.
 
 ## Features
 
- - Uploads millions of documents to Meilisearch.
- - Automatically retries on error.
- - Shows the upload progress along with the estimated time of arrival (ETA).
- - [Works on the Cloud](https://www.meilisearch.com/pricing) and on self-hosted instances.
+- Uploads millions of documents to Meilisearch
+- Automatically retries on error with exponential backoff
+- Shows upload progress with estimated time of arrival (ETA)
+- Works with [Meilisearch Cloud](https://www.meilisearch.com/cloud) and self-hosted instances
+- Supports CSV, NDJSON, and JSON file formats
+- Configurable batch size for optimized imports
+- Optional CSV delimiter specification
+- Ability to skip batches for resuming interrupted imports
+- Support for both "add or replace" and "add or update" operations
 
 ## Installation
 
-You can download the latest version of this tool [on the release page](https://github.com/meilisearch/meilisearch-importer/releases).
+Download the latest version of this tool from the [releases page](https://github.com/meilisearch/meilisearch-importer/releases).
 
-## Example Usage
+## Command-line Options
 
-### Send Documents to the Cloud
+- `--url`: Meilisearch instance URL (required)
+- `--index`: Index name to send documents to (required)
+- `--files`: List of file paths to import (required, supports multiple files)
+- `--primary-key`: Field to use as the primary key
+- `--api-key`: API key for authentication
+- `--batch-size`: Size of document batches (default: 20 MiB)
+- `--csv-delimiter`: Custom delimiter for CSV files
+- `--skip-batches`: Number of batches to skip (for resuming imports)
+- `--upload-operation`: Choose between `add-or-replace` (default) and `add-or-update`
 
-It's straightforward to [create a project on the Cloud](https://www.meilisearch.com/pricing) and send your documents into it.
 
-If you cannot send your dataset directly from the website by drag-and-dropping it, this tool is perfect for you. You can send them by running the following command:
+## Usage Examples
+
+### Import to Meilisearch Cloud
+
+```bash
+meilisearch-importer \
+--url 'https://ms-************.sfo.meilisearch.io' \
+--index products \
+--primary-key uuid \
+--api-key 'D2jkS***************' \
+--files products.csv
+```
+
+### Import Large CSV File with Custom Delimiter
 
 ```bash
 meilisearch-importer \
-    --url 'https://ms-************.sfo.meilisearch.io'
-    --index crunchbase \
-    --primary-key uuid \
-    --api-key 'D2jkS***************' \
-    --files ./dataset/organizations.csv
+--url 'https://ms-************.sfo.meilisearch.io' \
+--api-key 'D2jkS***************' \
+--index products \
+--primary-key uuid \
+--files large_product_list.csv \
+--csv-delimiter ';' \
+--batch-size 50MB
 ```
 
-### Send Documents to a Local Instance
+### Import Multiple Files
 
-This tool is also useful when you want to test Meilisearch locally. The only mandatory parameters to define are the URL, the index name and your dataset.
+```bash
+meilisearch-importer \
+--url 'https://ms-************.sfo.meilisearch.io' \
+--api-key 'D2jkS***************' \
+--index library \
+--files books.json authors.json publishers.ndjson
+```
+
+### Use Add or Update Operation
+
+```bash
+meilisearch-importer \
+--url 'https://ms-************.sfo.meilisearch.io' \
+--api-key 'D2jkS***************' \
+--index users \
+--files users_update.json \
+--upload-operation add-or-update
+```
 
-However, you can also increase the batch size to make meilisearch index faster.
+### Resume Interrupted Import
 
 ```bash
 meilisearch-importer \
-    --url 'http://localhost:7700'
-    --index movies \
-    --files movies.json \
-    --batch-size 100MB
+--url 'https://ms-************.sfo.meilisearch.io' \
+--api-key 'D2jkS***************' \
+--index large_dataset \
+--files huge_file.ndjson \
+--skip-batches 100
 ```
+
+## Error Handling and Retries
+
+The importer automatically retries on errors using an exponential backoff strategy:
+- Starts with a 100ms delay
+- Increases delay up to a maximum of 1 hour
+- Makes up to 20 retry attempts before giving up
+
+## Supported File Formats
+
+- JSON: Must contain an array of objects
+- NDJSON: Each line should be a valid JSON object
+- CSV: Can specify custom delimiters with `--csv-delimiter`
+
+## Troubleshooting
+
+- "Too many errors": Check your network connection and Meilisearch instance status.
+- "File does not exist": Verify file paths and permissions.
+- "Failed to read CSV headers": Ensure your CSV file is properly formatted and uses the correct delimiter.
+- If uploads are slow, try increasing the `--batch-size` or check your network speed.
+
+## Contributing
+
+We welcome contributions to the Meilisearch Importer! Please check out our [Contributing Guide](CONTRIBUTING.md) for more information on how to get started.
+
+## License
+
+Meilisearch Importer is released under the MIT License. See the [LICENSE](LICENSE) file for details.