diff --git a/src-tauri/src/cmd/profile_switch/driver.rs b/src-tauri/src/cmd/profile_switch/driver.rs index 00b967d4..d8eecc01 100644 --- a/src-tauri/src/cmd/profile_switch/driver.rs +++ b/src-tauri/src/cmd/profile_switch/driver.rs @@ -1,12 +1,16 @@ use super::{ CmdResult, state::{SwitchCancellation, SwitchManager, SwitchRequest, manager}, - workflow, + workflow::{self, SwitchPanicInfo}, }; use crate::{logging, utils::logging::Type}; +use futures::FutureExt; use once_cell::sync::OnceCell; use smartstring::alias::String as SmartString; -use std::collections::{HashMap, VecDeque}; +use std::{ + collections::{HashMap, VecDeque}, + panic::AssertUnwindSafe, +}; use tokio::sync::{ mpsc::{self, error::TrySendError}, oneshot, @@ -30,10 +34,16 @@ enum SwitchDriverMessage { }, Completion { request: SwitchRequest, - success: bool, + outcome: SwitchJobOutcome, }, } +#[derive(Debug)] +enum SwitchJobOutcome { + Completed { success: bool }, + Panicked { info: SwitchPanicInfo }, +} + pub(super) async fn switch_profile( profile_index: impl Into, notify_success: bool, @@ -139,8 +149,8 @@ fn switch_driver_sender() -> &'static mpsc::Sender { } => { handle_enqueue(&mut state, request, respond_to, driver_tx.clone(), manager); } - SwitchDriverMessage::Completion { request, success } => { - handle_completion(&mut state, request, success, driver_tx.clone(), manager); + SwitchDriverMessage::Completion { request, outcome } => { + handle_completion(&mut state, request, outcome, driver_tx.clone(), manager); } } } @@ -203,17 +213,31 @@ fn handle_enqueue( fn handle_completion( state: &mut SwitchDriverState, request: SwitchRequest, - success: bool, + outcome: SwitchJobOutcome, driver_tx: mpsc::Sender, manager: &'static SwitchManager, ) { - logging!( - info, - Type::Cmd, - "Switch task {} completed (success={})", - request.task_id(), - success - ); + match &outcome { + SwitchJobOutcome::Completed { success } => { + logging!( + info, + Type::Cmd, + "Switch task {} completed (success={})", + request.task_id(), + success + ); + } + SwitchJobOutcome::Panicked { info } => { + logging!( + error, + Type::Cmd, + "Switch task {} panicked at stage {:?}: {}", + request.task_id(), + info.stage, + info.detail + ); + } + } if let Some(active) = state.active.as_ref() && active.task_id() == request.task_id() @@ -242,11 +266,23 @@ fn start_switch_job( ) { let completion_request = request.clone(); tokio::spawn(async move { - let success = workflow::run_switch_job(manager, request).await; + let job_result = match AssertUnwindSafe(workflow::run_switch_job(manager, request)) + .catch_unwind() + .await + { + Ok(Ok(success)) => SwitchJobOutcome::Completed { success }, + Ok(Err(info)) => SwitchJobOutcome::Panicked { info }, + Err(payload) => SwitchJobOutcome::Panicked { + info: SwitchPanicInfo::driver_task(workflow::describe_panic_payload( + payload.as_ref(), + )), + }, + }; + if let Err(err) = driver_tx .send(SwitchDriverMessage::Completion { request: completion_request, - success, + outcome: job_result, }) .await { diff --git a/src-tauri/src/cmd/profile_switch/workflow.rs b/src-tauri/src/cmd/profile_switch/workflow.rs index 859401e6..7b577dad 100644 --- a/src-tauri/src/cmd/profile_switch/workflow.rs +++ b/src-tauri/src/cmd/profile_switch/workflow.rs @@ -19,12 +19,13 @@ use tokio::{fs as tokio_fs, time}; mod state_machine; +pub(super) use state_machine::SwitchPanicInfo; use state_machine::SwitchStateMachine; pub(super) async fn run_switch_job( manager: &'static SwitchManager, request: SwitchRequest, -) -> bool { +) -> Result { if request.cancel_token().is_cancelled() { logging!( info, @@ -38,7 +39,7 @@ pub(super) async fn run_switch_job( request.notify(), request.task_id(), ); - return false; + return Ok(false); } let profile_id = request.profile_id().clone(); @@ -56,7 +57,7 @@ pub(super) async fn run_switch_job( ); handle::Handle::notice_message("config_validate::error", err.clone()); handle::Handle::notify_profile_switch_finished(profile_id.clone(), false, notify, task_id); - return false; + return Ok(false); } logging!( @@ -107,7 +108,7 @@ pub(super) async fn run_switch_job( notify, task_id, ); - false + Ok(false) } Ok(Err(panic_payload)) => { let panic_message = describe_panic_payload(panic_payload.as_ref()); @@ -129,54 +130,86 @@ pub(super) async fn run_switch_job( notify, task_id, ); - false + Err(SwitchPanicInfo::workflow_root(panic_message)) } - Ok(Ok(result)) => match result { - Ok(success) => { - handle::Handle::notify_profile_switch_finished( - profile_id.clone(), - success, - notify, - task_id, - ); - close_connections_after_switch(&profile_id).await; - if notify && success { - handle::Handle::notice_message("info", "Profile Switched"); + Ok(Ok(machine_result)) => match machine_result { + Ok(cmd_result) => match cmd_result { + Ok(success) => { + handle::Handle::notify_profile_switch_finished( + profile_id.clone(), + success, + notify, + task_id, + ); + close_connections_after_switch(&profile_id).await; + if notify && success { + handle::Handle::notice_message("info", "Profile Switched"); + } + logging!( + info, + Type::Cmd, + "Profile switch task finished: {} (success={})", + profile_id, + success + ); + Ok(success) } - logging!( - info, - Type::Cmd, - "Profile switch task finished: {} (success={})", - profile_id, - success - ); - success - } - Err(err) => { + Err(err) => { + logging!( + error, + Type::Cmd, + "Profile switch failed ({}): {}", + profile_id, + err + ); + handle::Handle::notice_message("config_validate::error", err.clone()); + handle::Handle::notify_profile_switch_finished( + profile_id.clone(), + false, + notify, + task_id, + ); + Ok(false) + } + }, + Err(panic_info) => { logging!( error, Type::Cmd, - "Profile switch failed ({}): {}", + "State machine panic during profile switch task {} ({} {:?}): {}", + task_id, profile_id, - err + panic_info.stage, + panic_info.detail + ); + handle::Handle::notice_message( + "config_validate::panic", + format!("profile switch panic: {}", profile_id), ); - handle::Handle::notice_message("config_validate::error", err.clone()); handle::Handle::notify_profile_switch_finished( profile_id.clone(), false, notify, task_id, ); - false + Err(panic_info) } }, } } pub(super) async fn patch_profiles_config(profiles: IProfiles) -> CmdResult { - SwitchStateMachine::new(manager(), None, profiles) + match SwitchStateMachine::new(manager(), None, profiles) .run() .await + { + Ok(result) => result, + Err(panic_info) => Err(format!( + "profile switch panic ({:?}): {}", + panic_info.stage, panic_info.detail + ) + .into()), + } } pub(super) async fn validate_profile_yaml(profile: &SmartString) -> CmdResult { @@ -336,7 +369,7 @@ async fn close_connections_after_switch(profile_id: &SmartString) { } } -fn describe_panic_payload(payload: &(dyn Any + Send)) -> String { +pub(super) fn describe_panic_payload(payload: &(dyn Any + Send)) -> String { if let Some(message) = payload.downcast_ref::<&str>() { (*message).to_string() } else if let Some(message) = payload.downcast_ref::() { diff --git a/src-tauri/src/cmd/profile_switch/workflow/state_machine.rs b/src-tauri/src/cmd/profile_switch/workflow/state_machine.rs index 7283a468..ee12ff31 100644 --- a/src-tauri/src/cmd/profile_switch/workflow/state_machine.rs +++ b/src-tauri/src/cmd/profile_switch/workflow/state_machine.rs @@ -1,4 +1,4 @@ -use super::{CmdResult, restore_previous_profile, validate_profile_yaml}; +use super::{CmdResult, describe_panic_payload, restore_previous_profile, validate_profile_yaml}; use crate::{ cmd::profile_switch::state::{SwitchManager, SwitchRequest, SwitchScope}, config::{Config, IProfiles, profiles::profiles_save_file_safe}, @@ -6,8 +6,9 @@ use crate::{ logging, utils::logging::Type, }; +use futures::FutureExt; use smartstring::alias::String as SmartString; -use std::{mem, time::Duration}; +use std::{mem, panic::AssertUnwindSafe, time::Duration}; use tokio::{sync::MutexGuard, time}; /// Explicit state machine for profile switching so we can reason about @@ -17,6 +18,39 @@ pub(super) struct SwitchStateMachine { state: SwitchState, } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum SwitchStage { + Start, + AcquireCore, + Prepare, + ValidateTarget, + PatchDraft, + UpdateCore, + Finalize, + Workflow, + DriverTask, +} + +#[derive(Debug, Clone)] +pub(crate) struct SwitchPanicInfo { + pub(crate) stage: SwitchStage, + pub(crate) detail: String, +} + +impl SwitchPanicInfo { + pub(crate) fn new(stage: SwitchStage, detail: String) -> Self { + Self { stage, detail } + } + + pub(crate) fn workflow_root(detail: String) -> Self { + Self::new(SwitchStage::Workflow, detail) + } + + pub(crate) fn driver_task(detail: String) -> Self { + Self::new(SwitchStage::DriverTask, detail) + } +} + impl SwitchStateMachine { pub(super) fn new( manager: &'static SwitchManager, @@ -29,23 +63,88 @@ impl SwitchStateMachine { } } - pub(super) async fn run(mut self) -> CmdResult { + pub(super) async fn run(mut self) -> Result, SwitchPanicInfo> { loop { let current_state = mem::replace(&mut self.state, SwitchState::Complete(false)); - let next = match current_state { - SwitchState::Start => self.handle_start(), - SwitchState::AcquireCore => self.handle_acquire_core().await, - SwitchState::Prepare => self.handle_prepare().await, - SwitchState::ValidateTarget => self.handle_validate_target().await, - SwitchState::PatchDraft => self.handle_patch_draft().await, - SwitchState::UpdateCore => self.handle_update_core().await, - SwitchState::Finalize(outcome) => self.handle_finalize(outcome).await, - SwitchState::Complete(result) => return Ok(result), - }?; - self.state = next; + match current_state { + SwitchState::Complete(result) => return Ok(Ok(result)), + _ => match self.run_state(current_state).await? { + Ok(state) => self.state = state, + Err(err) => return Ok(Err(err)), + }, + } } } + async fn run_state( + &mut self, + current: SwitchState, + ) -> Result, SwitchPanicInfo> { + match current { + SwitchState::Start => { + self.with_stage( + SwitchStage::Start, + |this| async move { this.handle_start() }, + ) + .await + } + SwitchState::AcquireCore => { + self.with_stage(SwitchStage::AcquireCore, |this| async move { + this.handle_acquire_core().await + }) + .await + } + SwitchState::Prepare => { + self.with_stage(SwitchStage::Prepare, |this| async move { + this.handle_prepare().await + }) + .await + } + SwitchState::ValidateTarget => { + self.with_stage(SwitchStage::ValidateTarget, |this| async move { + this.handle_validate_target().await + }) + .await + } + SwitchState::PatchDraft => { + self.with_stage(SwitchStage::PatchDraft, |this| async move { + this.handle_patch_draft().await + }) + .await + } + SwitchState::UpdateCore => { + self.with_stage(SwitchStage::UpdateCore, |this| async move { + this.handle_update_core().await + }) + .await + } + SwitchState::Finalize(outcome) => { + self.with_stage(SwitchStage::Finalize, |this| async move { + this.handle_finalize(outcome).await + }) + .await + } + SwitchState::Complete(result) => Ok(Ok(SwitchState::Complete(result))), + } + } + + async fn with_stage<'a, F, Fut>( + &'a mut self, + stage: SwitchStage, + f: F, + ) -> Result, SwitchPanicInfo> + where + F: FnOnce(&'a mut Self) -> Fut, + Fut: std::future::Future> + 'a, + { + AssertUnwindSafe(f(self)) + .catch_unwind() + .await + .map_err(|payload| { + SwitchPanicInfo::new(stage, describe_panic_payload(payload.as_ref())) + }) + } + fn handle_start(&mut self) -> CmdResult { if self.ctx.manager.is_switching() { logging!( @@ -137,10 +236,7 @@ impl SwitchStateMachine { self.ctx.sequence() ); - let patch = self - .ctx - .take_profiles_patch() - .ok_or_else(|| "profiles patch already consumed".to_string())?; + let patch = self.ctx.take_profiles_patch()?; self.ctx.new_profile_for_event = patch.current.clone(); let _ = Config::profiles().await.draft_mut().patch_config(patch); @@ -323,8 +419,10 @@ impl SwitchContext { } } - fn take_profiles_patch(&mut self) -> Option { - self.profiles_patch.take() + fn take_profiles_patch(&mut self) -> CmdResult { + self.profiles_patch + .take() + .ok_or_else(|| "profiles patch already consumed".into()) } fn cancelled(&self) -> bool {