Skip to content

feat(PocketIC): PocketIC can recover from a state directory after being killed #5921

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 22 commits into from
Jul 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/pocket-ic/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## Unreleased

### Added
- The function `PocketIc::start_or_reuse_server` to manually start or reuse a PocketIC server.
- The function `start_server` and its input type `StartServerParams` to manually start a PocketIC server.
- The function `PocketIcBuilder::with_all_icp_features` to specify that all ICP features (supported by PocketIC) should be enabled.


Expand Down
1 change: 1 addition & 0 deletions packages/pocket-ic/src/common/rest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -565,6 +565,7 @@ pub struct InstanceConfig {
pub log_level: Option<String>,
pub bitcoind_addr: Option<Vec<SocketAddr>>,
pub icp_features: Option<IcpFeatures>,
pub allow_incomplete_state: Option<bool>,
}

#[derive(Debug, Clone, Eq, Hash, PartialEq, Serialize, Deserialize, Default, JsonSchema)]
Expand Down
47 changes: 34 additions & 13 deletions packages/pocket-ic/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,14 @@ use std::{
fs::OpenOptions,
net::{IpAddr, SocketAddr},
path::PathBuf,
process::Command,
process::{Child, Command},
sync::{mpsc::channel, Arc},
thread,
thread::JoinHandle,
time::{Duration, SystemTime, UNIX_EPOCH},
};
use strum_macros::EnumIter;
use tempfile::TempDir;
use tempfile::{NamedTempFile, TempDir};
use thiserror::Error;
use tokio::runtime::Runtime;
use tracing::{instrument, warn};
Expand Down Expand Up @@ -617,8 +617,14 @@ impl PocketIc {
let runtime = tokio::runtime::Builder::new_current_thread()
.build()
.unwrap();
let url = runtime
.block_on(async { start_or_reuse_server(None).await.join("instances").unwrap() });
let url = runtime.block_on(async {
let (_, server_url) = start_server(StartServerParams {
reuse: true,
..Default::default()
})
.await;
server_url.join("instances").unwrap()
});
let instances: Vec<String> = reqwest::blocking::Client::new()
.get(url)
.send()
Expand Down Expand Up @@ -1819,12 +1825,19 @@ async fn download_pocketic_server(
Ok(())
}

/// Attempt to start a new PocketIC server if it's not already running.
pub async fn start_or_reuse_server(server_binary: Option<PathBuf>) -> Url {
#[derive(Default)]
pub struct StartServerParams {
pub server_binary: Option<PathBuf>,
/// Reuse an existing PocketIC server spawned by this process.
pub reuse: bool,
}

/// Attempt to start a new PocketIC server.
pub async fn start_server(params: StartServerParams) -> (Child, Url) {
let default_bin_dir =
std::env::temp_dir().join(format!("pocket-ic-server-{}", EXPECTED_SERVER_VERSION));
let default_bin_path = default_bin_dir.join("pocket-ic");
let mut bin_path: PathBuf = server_binary.unwrap_or_else(|| {
let mut bin_path: PathBuf = params.server_binary.unwrap_or_else(|| {
std::env::var_os("POCKET_IC_BIN")
.unwrap_or_else(|| default_bin_path.clone().into())
.into()
Expand Down Expand Up @@ -1868,10 +1881,14 @@ pub async fn start_or_reuse_server(server_binary: Option<PathBuf>) -> Url {
}
}

// We use the test driver's process ID to share the PocketIC server between multiple tests
// launched by the same test driver.
let test_driver_pid = std::process::id();
let port_file_path = std::env::temp_dir().join(format!("pocket_ic_{}.port", test_driver_pid));
let port_file_path = if params.reuse {
// We use the test driver's process ID to share the PocketIC server between multiple tests
// launched by the same test driver.
let test_driver_pid = std::process::id();
std::env::temp_dir().join(format!("pocket_ic_{}.port", test_driver_pid))
} else {
NamedTempFile::new().unwrap().into_temp_path().to_path_buf()
};
let mut cmd = pocket_ic_server_cmd(&bin_path);
cmd.arg("--port-file");
#[cfg(windows)]
Expand All @@ -1895,7 +1912,8 @@ pub async fn start_or_reuse_server(server_binary: Option<PathBuf>) -> Url {

// TODO: SDK-1936
#[allow(clippy::zombie_processes)]
cmd.spawn()
let child = cmd
.spawn()
.unwrap_or_else(|_| panic!("Failed to start PocketIC binary ({})", bin_path.display()));

loop {
Expand All @@ -1905,7 +1923,10 @@ pub async fn start_or_reuse_server(server_binary: Option<PathBuf>) -> Url {
.trim_end()
.parse()
.expect("Failed to parse port to number");
break Url::parse(&format!("http://{}:{}/", LOCALHOST, port)).unwrap();
break (
child,
Url::parse(&format!("http://{}:{}/", LOCALHOST, port)).unwrap(),
);
}
}
std::thread::sleep(Duration::from_millis(20));
Expand Down
19 changes: 15 additions & 4 deletions packages/pocket-ic/src/nonblocking.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ use crate::common::rest::{
use crate::wsl_path;
pub use crate::DefaultEffectiveCanisterIdError;
use crate::{
copy_dir, start_or_reuse_server, IngressStatusResult, PocketIcBuilder, PocketIcState,
RejectResponse, Time,
copy_dir, start_server, IngressStatusResult, PocketIcBuilder, PocketIcState, RejectResponse,
StartServerParams, Time,
};
use backoff::backoff::Backoff;
use backoff::{ExponentialBackoff, ExponentialBackoffBuilder};
Expand Down Expand Up @@ -144,7 +144,12 @@ impl PocketIc {
let server_url = if let Some(server_url) = server_url {
server_url
} else {
start_or_reuse_server(server_binary).await
let (_, server_url) = start_server(StartServerParams {
server_binary,
reuse: true,
})
.await;
server_url
};

let subnet_config_set = subnet_config_set
Expand Down Expand Up @@ -200,6 +205,7 @@ impl PocketIc {
log_level: log_level.map(|l| l.to_string()),
bitcoind_addr,
icp_features: Some(icp_features),
allow_incomplete_state: Some(false),
};

let test_driver_pid = std::process::id();
Expand Down Expand Up @@ -314,7 +320,12 @@ impl PocketIc {
/// List all instances and their status.
#[instrument(ret)]
pub async fn list_instances() -> Vec<String> {
let url = start_or_reuse_server(None).await.join("instances").unwrap();
let (_, server_url) = start_server(StartServerParams {
reuse: true,
..Default::default()
})
.await;
let url = server_url.join("instances").unwrap();
let instances: Vec<String> = reqwest::Client::new()
.get(url)
.send()
Expand Down
7 changes: 5 additions & 2 deletions packages/pocket-ic/tests/icp_features.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use candid::{CandidType, Principal};
use pocket_ic::common::rest::{ExtendedSubnetConfigSet, IcpFeatures, InstanceConfig, SubnetSpec};
use pocket_ic::{start_or_reuse_server, update_candid, PocketIc, PocketIcBuilder, PocketIcState};
use pocket_ic::{
start_server, update_candid, PocketIc, PocketIcBuilder, PocketIcState, StartServerParams,
};
use reqwest::StatusCode;
use serde::Deserialize;
use std::collections::BTreeMap;
Expand Down Expand Up @@ -375,7 +377,7 @@ async fn with_all_icp_features_and_nns_subnet_state() {
.unwrap()
.into();

let url = start_or_reuse_server(None).await;
let (_, url) = start_server(StartServerParams::default()).await;
let client = reqwest::Client::new();
let instance_config = InstanceConfig {
subnet_config_set: ExtendedSubnetConfigSet {
Expand All @@ -387,6 +389,7 @@ async fn with_all_icp_features_and_nns_subnet_state() {
log_level: None,
bitcoind_addr: None,
icp_features: Some(IcpFeatures::all_icp_features()),
allow_incomplete_state: None,
};
let response = client
.post(url.join("instances").unwrap())
Expand Down
132 changes: 131 additions & 1 deletion packages/pocket-ic/tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,24 @@ use pocket_ic::{
query_candid, update_candid, DefaultEffectiveCanisterIdError, ErrorCode, IngressStatusResult,
PocketIc, PocketIcBuilder, PocketIcState, RejectCode, Time,
};
#[cfg(not(windows))]
use pocket_ic::{
common::rest::{CreateInstanceResponse, ExtendedSubnetConfigSet, InstanceConfig},
nonblocking::PocketIc as PocketIcAsync,
start_server, StartServerParams,
};
use reqwest::blocking::Client;
use reqwest::header::CONTENT_LENGTH;
use reqwest::{Method, StatusCode};
use serde::Serialize;
use sha2::{Digest, Sha256};
#[cfg(windows)]
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
use std::{io::Read, sync::OnceLock, time::SystemTime};
use std::{
io::Read,
sync::OnceLock,
time::{Duration, SystemTime},
};
use tempfile::{NamedTempFile, TempDir};
#[cfg(windows)]
use wslpath::windows_to_wsl;
Expand Down Expand Up @@ -426,6 +436,126 @@ fn set_time_into_past() {
pic.set_time(now.into());
}

#[test]
fn time_on_resumed_instance() {
let state = PocketIcState::new();

let pic = PocketIcBuilder::new()
.with_application_subnet()
.with_state(state)
.build();

let now = SystemTime::now();
pic.set_certified_time(now.into());

let time = pic.get_time();
assert_eq!(time, now.into());
let state = pic.drop_and_take_state().unwrap();

let pic = PocketIcBuilder::new().with_state(state).build();

// The time on the resumed instances increases by 2ns:
// - 1ns due to executing a checkpointed round before dropping the original instance;
// - 1ns due to bumping time when creating a new instance to ensure strict time monotonicity.
let resumed_time = pic.get_time();
assert_eq!(resumed_time, time + Duration::from_nanos(2));
}

// Killing the PocketIC server inside WSL is challenging => skipping this test on Windows.
#[cfg(not(windows))]
async fn resume_killed_instance_impl(allow_incomplete_state: Option<bool>) -> Result<(), String> {
let (mut server, server_url) = start_server(StartServerParams::default()).await;
let temp_dir = TempDir::new().unwrap();

let state = PocketIcState::new_from_path(temp_dir.path().to_path_buf());
let pic = PocketIcBuilder::new()
.with_application_subnet()
.with_server_url(server_url)
.with_state(state)
.build_async()
.await;

let canister_id = pic.create_canister().await;

// Execute many rounds to trigger a checkpoint.
for _ in 0..600 {
pic.tick().await;
}

// The following (most recent) changes will be lost after killing the instance.
let now = SystemTime::now();
pic.set_certified_time(now.into()).await;
let another_canister_id = pic.create_canister().await;

assert!(pic.canister_exists(canister_id).await);
assert!(pic.canister_exists(another_canister_id).await);
let time = pic.get_time().await;
assert!(time >= now.into());

server.kill().unwrap();

let (_, server_url) = start_server(StartServerParams::default()).await;
let client = reqwest::Client::new();
let instance_config = InstanceConfig {
subnet_config_set: ExtendedSubnetConfigSet::default(),
state_dir: Some(temp_dir.path().to_path_buf()),
nonmainnet_features: false,
log_level: None,
bitcoind_addr: None,
icp_features: None,
allow_incomplete_state,
};
let response = client
.post(server_url.join("instances").unwrap())
.json(&instance_config)
.send()
.await
.unwrap();
if !response.status().is_success() {
return Err(response.text().await.unwrap());
}
let instance_id = match response.json::<CreateInstanceResponse>().await.unwrap() {
CreateInstanceResponse::Created { instance_id, .. } => instance_id,
CreateInstanceResponse::Error { message } => panic!("Unexpected error: {}", message),
};
let pic = PocketIcAsync::new_from_existing_instance(server_url, instance_id, None);

// Only the first canister (created before the last checkpoint) is preserved,
// the other canister and time change are lost.
assert!(pic.canister_exists(canister_id).await);
assert!(!pic.canister_exists(another_canister_id).await);
let resumed_time = pic.get_time().await;
assert!(resumed_time < now.into());

// Drop instance explicitly to prevent data races in the StateManager.
pic.drop().await;

Ok(())
}

// Killing the PocketIC server inside WSL is challenging => skipping this test on Windows.
#[cfg(not(windows))]
#[tokio::test]
async fn resume_killed_instance_default() {
let err = resume_killed_instance_impl(None).await.unwrap_err();
assert!(err.contains("The state of subnet with seed 7712b2c09cb96b3aa3fbffd4034a21a39d5d13f80e043161d1d71f4c593434af is incomplete."));
}

// Killing the PocketIC server inside WSL is challenging => skipping this test on Windows.
#[cfg(not(windows))]
#[tokio::test]
async fn resume_killed_instance_strict() {
let err = resume_killed_instance_impl(Some(false)).await.unwrap_err();
assert!(err.contains("The state of subnet with seed 7712b2c09cb96b3aa3fbffd4034a21a39d5d13f80e043161d1d71f4c593434af is incomplete."));
}

// Killing the PocketIC server inside WSL is challenging => skipping this test on Windows.
#[cfg(not(windows))]
#[tokio::test]
async fn resume_killed_instance() {
resume_killed_instance_impl(Some(true)).await.unwrap();
}

#[test]
fn test_get_set_cycle_balance() {
let pic = PocketIc::new();
Expand Down
1 change: 1 addition & 0 deletions rs/pocket_ic_server/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- The endpoint `/instances/<instance_id>/update/await_ingress_message` (execute rounds on the PocketIc instance until the message is executed):
to fix a performance regression when using the two endpoints `/instances/<instance_id>/update/tick` and `/instances/<instance_id>/read/ingress_status` in a loop.
- The argument of the endpoint `/instances/` takes an additional optional field `icp_features` specifying ICP features (implemented by system canisters) to be enabled in the newly created PocketIC instance.
- The argument of the endpoint `/instances/` takes an additional optional field `allow_incomplete_state` specifying if incomplete state (e.g., resulting from not deleting a PocketIC instance gracefully) is allowed.



Expand Down
Loading
Loading