From 8b5a16ffb7be3e5ddfef200f7b9f31fc5fb2961a Mon Sep 17 00:00:00 2001 From: Brendan Allan Date: Wed, 19 Oct 2022 14:44:09 +0800 Subject: [PATCH] sync docs --- crates/sync/docs/HLC.md | 73 ----------------------- docs/developers/architecture/sync.md | 86 +++++++++++++++++++++++++--- 2 files changed, 78 insertions(+), 81 deletions(-) delete mode 100644 crates/sync/docs/HLC.md diff --git a/crates/sync/docs/HLC.md b/crates/sync/docs/HLC.md deleted file mode 100644 index f4a947511..000000000 --- a/crates/sync/docs/HLC.md +++ /dev/null @@ -1,73 +0,0 @@ -```rust -pub fn update_with_timestamp(&self, timestamp: &Timestamp) -> Result<(), String> { - let mut now = (self.clock)(); - now.0 &= LMASK; - let msg_time = timestamp.get_time(); - if *msg_time > now && *msg_time - now > self.delta { - let err_msg = format!( - "incoming timestamp from {} exceeding delta {}ms is rejected: {} vs. now: {}", - timestamp.get_id(), - self.delta.to_duration().as_millis(), - msg_time, - now - ); - warn!("{}", err_msg); - Err(err_msg) - } else { - let mut last_time = lock!(self.last_time); - let max_time = cmp::max(cmp::max(now, *msg_time), *last_time); - if max_time == now { - *last_time = now; - } else if max_time == *msg_time { - *last_time = *msg_time + 1; - } else { - *last_time += 1; - } - Ok(()) - } -} -``` - -```javascript -Timestamp.recv = function (msg) { - if (!clock) { - return null; - } - - var now = Date.now(); - - var msg_time = msg.millis(); - var msg_time = msg.counter(); - - if (msg_time - now > config.maxDrift) { - throw new Timestamp.ClockDriftError(); - } - - var last_time = clock.timestamp.millis(); - var last_time = clock.timestamp.counter(); - - var max_time = Math.max(Math.max(last_time, now), msg_time); - - var last_time = - max_time === last_time && lNew === msg_time - ? Math.max(last_time, msg_time) + 1 - : max_time === last_time - ? last_time + 1 - : max_time === msg_time - ? msg_time + 1 - : 0; - - // 3. - if (max_time - phys > config.maxDrift) { - throw new Timestamp.ClockDriftError(); - } - if (last_time > MAX_COUNTER) { - throw new Timestamp.OverflowError(); - } - - clock.timestamp.setMillis(max_time); - clock.timestamp.setCounter(last_time); - - return new Timestamp(clock.timestamp.millis(), clock.timestamp.counter(), clock.timestamp.node()); -}; -``` diff --git a/docs/developers/architecture/sync.md b/docs/developers/architecture/sync.md index 50e05ee41..38fb25500 100644 --- a/docs/developers/architecture/sync.md +++ b/docs/developers/architecture/sync.md @@ -4,15 +4,85 @@ index: 12 # Sync -Spacedrive synchronizes library data in realtime across the distributed network of Nodes. +Spacedrive synchronizes data using a combination of master-slave replication and last-write-wins CRDTs, +with the synchronization method encoded into the Prisma schema using [record type attributes](#record-types). -Using a Unique Hybrid Logical Clock for distributed time synchronization. +In the cases where LWW CRDTs are used, +conflicts are resolved using a [Hybrid Logical Clock](https://github.com/atolab/uhlc-rs) +to determine the ordering of events. -A combination of several property level CRDT types: +We would be remiss to not credit credit [Actual Budget](https://actualbudget.com/) +with many of the CRDT concepts used in Spacedrive's sync system. -- **Local data** - migrations, statistics, sync events -- **Owned data** - locations, paths, volumes -- **Shared data** - objects, tags, spaces, jobs -- **Relationship data** - many to many tables +## Record Types -Built in Rust on top of Prisma, it uses the schema file to determine these sync rules. +All data in a library conforms to one of the following types. +Each type uses a different strategy for syncing. + +### Local Records + +Local records exist entirely outside of the sync system. +They don't have Sync IDs and never leave the node they were created on. + +Used for Nodes, Statistics, and Sync Events. + +`@local` + +### Owned Records + +Owned records are only ever modified by the node they are created by, +so they can be synced in a master-slave fashion. +The creator of an owned record dictates the state of the record to other nodes, +who will simply accept new changes without considering conflicts. + +File paths are owned records since they only exist on one node, +and that node can inform all other nodes about the correct state of the paths. + +Used for Locations, Paths, and Volumes. + +`@owned(owner: String, id?: String)` +- `owner` - Field that identifies the owner of this model. + If a scalar, will directly use that value in sync operations. + If a relation, the Sync ID of the related model will be resolved for sync operations. +- `id` - Scalar field to override the default Sync ID. + +### Shared Records + +Shared records encompass most data synced in the CRDT fashion. +Updates are applied per-field using a last-write-wins strategy. + +Used for Objects, Tags, Spaces, and Jobs. + +`@shared(create: SharedCreateType, id?: String)` +- `id` - Scalar field to override the default Sync ID. +- `create` - How the model should be created. + - `Unique` (default): Model can be created with many required arguemnts, + but ID provided _must_ be unique across all nodes. + Useful for Tags since their IDs are non-deterministic. + - `Atomic`: Require the model to have no required arguments apart from ID and apply all create arguments as atomic updates. + Necessary for models with the same ID that can be created on multiple nodes. + Useful for Objects since their ID is dependent on their content, + and could be the same across nodes. + +### Relation Records + +Similar to shared records, but represent a many-to-many relation between two records. +Sync ID is the combination of `item` and `group` Sync IDs. + +Used for TagOnFile and FileInSpace. + +`@relation(item: String, group: String)` +- `item` - Field that identifies the item that the relation is connecting. + Similar to the `owner` argument of `@owned`. +- `group` - Field that identifies the group that the item should be connected to. + Similar to the `owner` argument of `@owned`. + + +## Other Prisma Attributes + +`@node` + +Indicates that a relation field should be set to the current node. +This could be done manually, +but `@node` allows `node_id` fields to be resolved from the `node_id` field of a `CRDTOperation`, +saving on bandwidth