fix: modify network event handling and watermark tracking

This commit is contained in:
Jamie Pine 2025-10-14 21:58:24 -07:00
parent c2a490891e
commit ffb187ba47
5 changed files with 2314 additions and 17 deletions

View File

@ -127,7 +127,7 @@ pub async fn broadcast_bulk_state(
let compressed = compress_batch(&batch)?;
// Broadcast to all peers
for peer in self.get_sync_partners().await? {
for peer in self.get_connected_sync_partners().await? {
self.send_to_peer(peer, compressed.clone()).await?;
}

View File

@ -19,18 +19,36 @@ pub struct Device {
/// Operating system
pub os: OperatingSystem,
/// Operating system version
pub os_version: Option<String>,
/// Hardware model (e.g., "MacBook Pro", "iPhone 15")
pub hardware_model: Option<String>,
/// Network addresses for P2P connections
pub network_addresses: Vec<String>,
/// Device capabilities (indexing, P2P, volume detection, etc.)
pub capabilities: serde_json::Value,
/// Whether this device is currently online
pub is_online: bool,
/// Last time this device was seen
pub last_seen_at: DateTime<Utc>,
/// Whether sync is enabled for this device
pub sync_enabled: bool,
/// Last time this device synced
pub last_sync_at: Option<DateTime<Utc>>,
/// Watermark for device-owned data (locations, entries)
pub last_state_watermark: Option<DateTime<Utc>>,
/// Watermark for shared resources (tags, albums) as JSON-serialized HLC
pub last_shared_watermark: Option<String>,
/// When this device was first added
pub created_at: DateTime<Utc>,
@ -57,10 +75,20 @@ impl Device {
id: Uuid::new_v4(),
name,
os: detect_operating_system(),
os_version: None,
hardware_model: detect_hardware_model(),
network_addresses: Vec::new(),
capabilities: serde_json::json!({
"indexing": true,
"p2p": true,
"volume_detection": true
}),
is_online: true,
last_seen_at: now,
sync_enabled: true,
last_sync_at: None,
last_state_watermark: None,
last_shared_watermark: None,
created_at: now,
updated_at: now,
}
@ -175,19 +203,17 @@ impl From<Device> for entities::device::ActiveModel {
uuid: Set(device.id),
name: Set(device.name),
os: Set(device.os.to_string()),
os_version: Set(None), // TODO: Add to domain model if needed
os_version: Set(device.os_version),
hardware_model: Set(device.hardware_model),
network_addresses: Set(serde_json::json!(device.network_addresses)),
is_online: Set(device.is_online),
last_seen_at: Set(device.last_seen_at),
capabilities: Set(serde_json::json!({
"indexing": true,
"p2p": true,
"volume_detection": true
})),
capabilities: Set(device.capabilities),
created_at: Set(device.created_at),
sync_enabled: Set(true), // Enable sync by default
last_sync_at: Set(None),
sync_enabled: Set(device.sync_enabled),
last_sync_at: Set(device.last_sync_at),
last_state_watermark: Set(device.last_state_watermark),
last_shared_watermark: Set(device.last_shared_watermark),
updated_at: Set(device.updated_at),
}
}
@ -203,10 +229,16 @@ impl TryFrom<entities::device::Model> for Device {
id: model.uuid,
name: model.name,
os: parse_operating_system(&model.os),
os_version: model.os_version,
hardware_model: model.hardware_model,
network_addresses,
capabilities: model.capabilities,
is_online: model.is_online,
last_seen_at: model.last_seen_at,
sync_enabled: model.sync_enabled,
last_sync_at: model.last_sync_at,
last_state_watermark: model.last_state_watermark,
last_shared_watermark: model.last_shared_watermark,
created_at: model.created_at,
updated_at: model.updated_at,
})

View File

@ -23,6 +23,8 @@ pub struct Model {
// Sync coordination fields (added in m20251009_000001_add_sync_to_devices)
pub sync_enabled: bool,
pub last_sync_at: Option<DateTimeUtc>,
pub last_state_watermark: Option<DateTimeUtc>,
pub last_shared_watermark: Option<String>,
}
#[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)]
@ -126,6 +128,8 @@ impl crate::infra::sync::Syncable for Model {
updated_at: Set(chrono::Utc::now().into()),
sync_enabled: Set(true),
last_sync_at: Set(None),
last_state_watermark: Set(device.last_state_watermark),
last_shared_watermark: Set(device.last_shared_watermark),
};
// Idempotent upsert by UUID
@ -141,6 +145,8 @@ impl crate::infra::sync::Syncable for Model {
Column::IsOnline,
Column::LastSeenAt,
Column::Capabilities,
Column::LastStateWatermark,
Column::LastSharedWatermark,
Column::UpdatedAt,
])
.to_owned(),

File diff suppressed because it is too large Load Diff

View File

@ -19,7 +19,7 @@ use std::sync::{
atomic::{AtomicBool, Ordering},
Arc,
};
use tokio::sync::{broadcast, RwLock};
use tokio::sync::{broadcast, mpsc, RwLock};
use tracing::{debug, error, info, warn};
use uuid::Uuid;
@ -64,6 +64,9 @@ pub struct PeerSync {
/// Whether the service is running
is_running: Arc<AtomicBool>,
/// Network event receiver (optional - if provided, enables connection event handling)
network_events: Arc<tokio::sync::Mutex<Option<mpsc::UnboundedReceiver<crate::service::network::core::NetworkEvent>>>>,
}
impl PeerSync {
@ -94,9 +97,15 @@ impl PeerSync {
event_bus: library.event_bus().clone(),
retry_queue: Arc::new(RetryQueue::new()),
is_running: Arc::new(AtomicBool::new(false)),
network_events: Arc::new(tokio::sync::Mutex::new(None)),
})
}
/// Set network event receiver for connection tracking
pub async fn set_network_events(&self, receiver: mpsc::UnboundedReceiver<crate::service::network::core::NetworkEvent>) {
*self.network_events.lock().await = Some(receiver);
}
/// Get database connection
pub fn db(&self) -> &Arc<DatabaseConnection> {
&self.db
@ -112,16 +121,68 @@ impl PeerSync {
self.device_id
}
/// Get watermarks for heartbeat
/// Get watermarks for heartbeat and reconnection sync
///
/// Returns (state_watermark, shared_watermark) from the devices table.
/// State watermark tracks device-owned data (locations, entries).
/// Shared watermark (HLC) tracks shared resources (tags, albums).
pub async fn get_watermarks(&self) -> (Option<chrono::DateTime<chrono::Utc>>, Option<HLC>) {
// State watermark: Would need to track last state change timestamp
// For now, return None - this would require adding timestamp tracking
let state_watermark = None;
use crate::infra::db::entities;
use sea_orm::{EntityTrait, QueryFilter, ColumnTrait};
// Shared watermark: Get last HLC from generator
let shared_watermark = self.hlc_generator.lock().await.last();
// Query devices table for this device's watermarks
match entities::device::Entity::find()
.filter(entities::device::Column::Uuid.eq(self.device_id))
.one(self.db.as_ref())
.await
{
Ok(Some(device)) => {
let state_watermark = device.last_state_watermark;
(state_watermark, shared_watermark)
// Deserialize shared watermark from JSON
let shared_watermark = device
.last_shared_watermark
.as_ref()
.and_then(|json_str| serde_json::from_str(json_str).ok());
(state_watermark, shared_watermark)
}
Ok(None) => {
warn!(
device_id = %self.device_id,
"Device not found in devices table, returning None watermarks"
);
(None, None)
}
Err(e) => {
warn!(
device_id = %self.device_id,
error = %e,
"Failed to query watermarks from devices table"
);
(None, None)
}
}
}
/// Exchange watermarks with a peer and trigger catch-up if needed
///
/// TODO: Full implementation requires:
/// 1. Add WatermarkExchange message type to SyncMessage enum
/// 2. Send our watermarks to the peer
/// 3. Receive peer's watermarks
/// 4. Compare timestamps/HLC to determine divergence
/// 5. Trigger StateRequest/SharedChangeRequest for incremental sync
/// 6. Update devices table with peer's watermarks after sync
///
/// For now, this is a placeholder that will be called on reconnection.
pub async fn exchange_watermarks_and_catchup(&self, _peer_id: Uuid) -> Result<()> {
// TODO: Implement watermark exchange protocol (LSYNC-010 Priority 3)
debug!(
peer = %_peer_id,
"Watermark exchange not yet implemented - full sync will occur via backfill instead"
);
Ok(())
}
/// Start the sync service
@ -142,6 +203,9 @@ impl PeerSync {
// Start event listener for TransactionManager events
self.start_event_listener();
// Start network event listener for connection tracking
self.start_network_event_listener().await;
// Start background task for retry queue processing
self.start_retry_processor();
@ -305,6 +369,137 @@ impl PeerSync {
});
}
/// Start network event listener for connection tracking
async fn start_network_event_listener(&self) {
// Take the receiver from the mutex (if available)
let mut receiver = self.network_events.lock().await.take();
if receiver.is_none() {
debug!("No network event receiver available - connection tracking disabled");
return;
}
let db = self.db.clone();
let is_running = self.is_running.clone();
tokio::spawn(async move {
info!("PeerSync network event listener started");
let mut rx = receiver.unwrap();
while is_running.load(Ordering::SeqCst) {
match rx.recv().await {
Some(event) => {
use crate::service::network::core::NetworkEvent;
match event {
NetworkEvent::ConnectionEstablished { device_id, node_id } => {
info!(
device_id = %device_id,
node_id = %node_id,
"Device connected - updating devices table"
);
if let Err(e) = Self::handle_peer_connected(device_id, &db).await {
warn!(
device_id = %device_id,
error = %e,
"Failed to handle peer connected event"
);
}
}
NetworkEvent::ConnectionLost { device_id, node_id } => {
info!(
device_id = %device_id,
node_id = %node_id,
"Device disconnected - updating devices table"
);
if let Err(e) = Self::handle_peer_disconnected(device_id, &db).await {
warn!(
device_id = %device_id,
error = %e,
"Failed to handle peer disconnected event"
);
}
}
_ => {
// Ignore other network events
}
}
}
None => {
info!("Network event channel closed, stopping listener");
break;
}
}
}
info!("PeerSync network event listener stopped");
});
}
/// Handle peer connected event (static for spawned task)
async fn handle_peer_connected(device_id: Uuid, db: &DatabaseConnection) -> Result<()> {
use crate::infra::db::entities;
use sea_orm::{EntityTrait, QueryFilter, ColumnTrait, Set};
// Update devices table: set is_online=true, last_seen_at=now
let now = Utc::now();
entities::device::Entity::update_many()
.col_expr(
entities::device::Column::IsOnline,
sea_orm::sea_query::Expr::value(true),
)
.col_expr(
entities::device::Column::LastSeenAt,
sea_orm::sea_query::Expr::value(now),
)
.col_expr(
entities::device::Column::UpdatedAt,
sea_orm::sea_query::Expr::value(now),
)
.filter(entities::device::Column::Uuid.eq(device_id))
.exec(db)
.await?;
info!(device_id = %device_id, "Device marked as online in devices table");
// TODO: Trigger watermark exchange for reconnection sync (Priority 3)
Ok(())
}
/// Handle peer disconnected event (static for spawned task)
async fn handle_peer_disconnected(device_id: Uuid, db: &DatabaseConnection) -> Result<()> {
use crate::infra::db::entities;
use sea_orm::{EntityTrait, QueryFilter, ColumnTrait, Set};
// Update devices table: set is_online=false, last_seen_at=now
let now = Utc::now();
entities::device::Entity::update_many()
.col_expr(
entities::device::Column::IsOnline,
sea_orm::sea_query::Expr::value(false),
)
.col_expr(
entities::device::Column::LastSeenAt,
sea_orm::sea_query::Expr::value(now),
)
.col_expr(
entities::device::Column::UpdatedAt,
sea_orm::sea_query::Expr::value(now),
)
.filter(entities::device::Column::Uuid.eq(device_id))
.exec(db)
.await?;
info!(device_id = %device_id, "Device marked as offline in devices table");
Ok(())
}
/// Handle state change event from TransactionManager (static version for spawned task)
async fn handle_state_change_event_static(
library_id: Uuid,