You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
293 lines
9.6 KiB
Rust
293 lines
9.6 KiB
Rust
//! # Chronofold
|
|
//!
|
|
//! Chronofold is a conflict-free replicated data structure (a.k.a. *CRDT*) for
|
|
//! versioned text.
|
|
//!
|
|
//! This crate aims to offer a fast implementation with an easy-to-use
|
|
//! `Vec`-like API. It should be near impossible to shoot yourself in the foot
|
|
//! and end up with corrupted or lost data.
|
|
//!
|
|
//! **Note:** We are not there yet! While this implementation should be
|
|
//! correct, it is not yet optimized for speed and memory usage. The API might
|
|
//! see some changes as we continue to explore different use cases.
|
|
//!
|
|
//! This implementation is based on ideas published in the paper ["Chronofold:
|
|
//! a data structure for versioned text"][paper] by Victor Grishchenko and
|
|
//! Mikhail Patrakeev. If you look for a formal introduction to what a
|
|
//! chronofold is, reading that excellent paper is highly recommended!
|
|
//!
|
|
//! [paper]: https://arxiv.org/abs/2002.09511
|
|
//!
|
|
//! # Example usage
|
|
//!
|
|
//! ```rust
|
|
//! use chronofold::{Chronofold, LogIndex, Op};
|
|
//!
|
|
//! type AuthorId = &'static str;
|
|
//!
|
|
//! // Alice creates a chronofold on her machine, makes some initial changes
|
|
//! // and sends a copy to Bob.
|
|
//! let mut cfold_a = Chronofold::<AuthorId, char>::default();
|
|
//! cfold_a.session("alice").extend("Hello chronfold!".chars());
|
|
//! let mut cfold_b = cfold_a.clone();
|
|
//!
|
|
//! // Alice adds some more text, ...
|
|
//! let ops_a: Vec<Op<AuthorId, char>> = {
|
|
//! let mut session = cfold_a.session("alice");
|
|
//! session.splice(
|
|
//! LogIndex(16)..LogIndex(16),
|
|
//! " - a data structure for versioned text".chars(),
|
|
//! );
|
|
//! session.iter_ops().map(Op::cloned).collect()
|
|
//! };
|
|
//!
|
|
//! // ... while Bob fixes a typo.
|
|
//! let ops_b: Vec<Op<AuthorId, char>> = {
|
|
//! let mut session = cfold_b.session("bob");
|
|
//! session.insert_after(LogIndex(11), 'o');
|
|
//! session.iter_ops().map(Op::cloned).collect()
|
|
//! };
|
|
//!
|
|
//! // Now their respective states have diverged.
|
|
//! assert_eq!(
|
|
//! "Hello chronfold - a data structure for versioned text!",
|
|
//! format!("{cfold_a}"),
|
|
//! );
|
|
//! assert_eq!("Hello chronofold!", format!("{cfold_b}"));
|
|
//!
|
|
//! // As soon as both have seen all ops, their states have converged.
|
|
//! for op in ops_a {
|
|
//! cfold_b.apply(op).unwrap();
|
|
//! }
|
|
//! for op in ops_b {
|
|
//! cfold_a.apply(op).unwrap();
|
|
//! }
|
|
//! let final_text = "Hello chronofold - a data structure for versioned text!";
|
|
//! assert_eq!(final_text, format!("{cfold_a}"));
|
|
//! assert_eq!(final_text, format!("{cfold_b}"));
|
|
//! ```
|
|
|
|
// As we only have a handful of public items, we've decided to re-export
|
|
// everything in the crate root and keep our internal module structure
|
|
// private. This keeps things simple for our users and gives us more
|
|
// flexibility in restructuring the crate.
|
|
mod change;
|
|
mod debug;
|
|
mod distributed;
|
|
mod error;
|
|
mod fmt;
|
|
mod index;
|
|
mod internal;
|
|
mod iter;
|
|
mod offsetmap;
|
|
mod rangemap;
|
|
mod session;
|
|
mod version;
|
|
|
|
pub use crate::change::*;
|
|
pub use crate::distributed::*;
|
|
pub use crate::error::*;
|
|
pub use crate::fmt::*;
|
|
pub use crate::index::*;
|
|
pub use crate::iter::*;
|
|
pub use crate::session::*;
|
|
pub use crate::version::*;
|
|
|
|
use crate::index::{IndexShift, RelativeNextIndex, RelativeReference};
|
|
use crate::offsetmap::OffsetMap;
|
|
use crate::rangemap::RangeFromMap;
|
|
|
|
#[cfg(feature = "serde")]
|
|
#[macro_use]
|
|
extern crate serde;
|
|
|
|
/// A conflict-free replicated data structure for versioned sequences.
|
|
///
|
|
/// # Terminology
|
|
///
|
|
/// A chronofold can be regarded either as a log of changes or as a sequence of
|
|
/// elements. These two viewpoints require distinct terminology:
|
|
///
|
|
/// - A *log index* is a 0-based index in the log of changes. This indices are
|
|
/// stable (i.e. they stay the same after edits), but are subjective for
|
|
/// each author.
|
|
/// - An *element* is a visible (not yet deleted) value of type `T`.
|
|
/// - *Log order* refers to the chronological order in which changes were
|
|
/// added to the log. This order is subjective for each author.
|
|
/// - *Causal order* refers to the order of the linked list.
|
|
///
|
|
/// # Editing
|
|
///
|
|
/// You can edit a chronofold in two ways: Either by applying [`Op`]s, or by
|
|
/// creating a [`Session`] which has a `Vec`-like API.
|
|
///
|
|
/// # Indexing
|
|
///
|
|
/// Like [`Vec`], the `Chronofold` type allows to access values by index,
|
|
/// because it implements the [`Index`] trait. The same rules apply:
|
|
/// out-of-bound indexes cause panics, and you can use `get` to check whether
|
|
/// the index exists.
|
|
///
|
|
/// [`Vec`]: https://doc.rust-lang.org/std/vec/struct.Vec.html
|
|
/// [`Index`]: https://doc.rust-lang.org/std/ops/trait.Index.html
|
|
#[derive(PartialEq, Eq, Clone, Debug)]
|
|
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
|
pub struct Chronofold<A, T> {
|
|
log: Vec<(Change<T>, EarliestDeletion)>,
|
|
root: LogIndex,
|
|
#[cfg_attr(
|
|
feature = "serde",
|
|
serde(bound(
|
|
serialize = "Version<A>: serde::Serialize",
|
|
deserialize = "Version<A>: serde::Deserialize<'de>"
|
|
))
|
|
)]
|
|
version: Version<A>,
|
|
|
|
next_indices: OffsetMap<LogIndex, RelativeNextIndex>,
|
|
references: OffsetMap<LogIndex, RelativeReference>,
|
|
authors: RangeFromMap<LogIndex, A>,
|
|
index_shifts: RangeFromMap<LogIndex, IndexShift>,
|
|
}
|
|
|
|
pub type EarliestDeletion = Option<LogIndex>;
|
|
|
|
impl<A: Author, T> Chronofold<A, T> {
|
|
/// Constructs a new, empty chronofold.
|
|
pub fn new(author: A) -> Self {
|
|
let root_idx = LogIndex(0);
|
|
let mut version = Version::default();
|
|
version.inc(&Timestamp(root_idx, author));
|
|
let mut next_indices = OffsetMap::default();
|
|
next_indices.set(root_idx, None);
|
|
let mut authors = RangeFromMap::default();
|
|
authors.set(root_idx, author);
|
|
let mut index_shifts = RangeFromMap::default();
|
|
index_shifts.set(root_idx, IndexShift(0));
|
|
let mut references = OffsetMap::default();
|
|
references.set(root_idx, None);
|
|
Self {
|
|
log: vec![(Change::Root, None)],
|
|
root: LogIndex(0),
|
|
version,
|
|
next_indices,
|
|
authors,
|
|
index_shifts,
|
|
references,
|
|
}
|
|
}
|
|
|
|
pub fn empty() -> Self {
|
|
Self {
|
|
log: vec![],
|
|
root: LogIndex(0),
|
|
version: Version::default(),
|
|
next_indices: OffsetMap::default(),
|
|
authors: RangeFromMap::default(),
|
|
index_shifts: RangeFromMap::default(),
|
|
references: OffsetMap::default(),
|
|
}
|
|
}
|
|
|
|
/// Returns `true` if the chronofold contains no elements.
|
|
pub fn is_empty(&self) -> bool {
|
|
self.len() == 0
|
|
}
|
|
|
|
/// Returns the number of elements in the chronofold.
|
|
pub fn len(&self) -> usize {
|
|
self.iter().count()
|
|
}
|
|
|
|
/// Returns a reference to a change in the chronofold's log.
|
|
///
|
|
/// If `index` is out of bounds, `None` is returned.
|
|
pub fn get(&self, index: LogIndex) -> Option<&Change<T>> {
|
|
self.log.get(index.0).map(|e| &e.0)
|
|
}
|
|
|
|
/// Creates an editing session for a single author.
|
|
pub fn session(&mut self, author: A) -> Session<'_, A, T> {
|
|
Session::new(author, self)
|
|
}
|
|
|
|
pub fn log_index(&self, timestamp: &Timestamp<A>) -> Option<LogIndex> {
|
|
for i in (timestamp.0).0..self.log.len() {
|
|
if self.timestamp(LogIndex(i)).unwrap() == *timestamp {
|
|
return Some(LogIndex(i));
|
|
}
|
|
}
|
|
None
|
|
}
|
|
|
|
pub fn timestamp(&self, index: LogIndex) -> Option<Timestamp<A>> {
|
|
if let (Some(shift), Some(author)) =
|
|
(self.index_shifts.get(&index), self.authors.get(&index))
|
|
{
|
|
Some(Timestamp(&index - shift, *author))
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
|
|
/// Applies an op to the chronofold.
|
|
pub fn apply<V>(&mut self, op: Op<A, V>) -> Result<(), ChronofoldError<A, V>>
|
|
where
|
|
V: IntoLocalValue<A, T>,
|
|
{
|
|
// Check if an op with the same id was applied already.
|
|
// TODO: Consider adding an `apply_unchecked` variant to skip this
|
|
// check.
|
|
if self.log_index(&op.id).is_some() {
|
|
return Err(ChronofoldError::ExistingTimestamp(op));
|
|
}
|
|
|
|
// We rely on indices in timestamps being greater or equal than their
|
|
// indices in every local log. This means we cannot apply an op not
|
|
// matching this constraint, even if we know the reference.
|
|
if op.id.0 .0 > self.log.len() {
|
|
return Err(ChronofoldError::FutureTimestamp(op));
|
|
}
|
|
|
|
use OpPayload::*;
|
|
match op.payload {
|
|
Root => {
|
|
self.apply_change(op.id, None, Change::Root);
|
|
Ok(())
|
|
}
|
|
Insert(Some(t), value) => match self.log_index(&t) {
|
|
Some(reference) => {
|
|
self.apply_change(
|
|
op.id,
|
|
Some(reference),
|
|
Change::Insert(value.into_local_value(self)),
|
|
);
|
|
Ok(())
|
|
}
|
|
None => Err(ChronofoldError::UnknownReference(Op::insert(
|
|
op.id,
|
|
Some(t),
|
|
value,
|
|
))),
|
|
},
|
|
Insert(None, value) => {
|
|
self.apply_change(op.id, None, Change::Insert(value.into_local_value(self)));
|
|
Ok(())
|
|
}
|
|
Delete(t) => match self.log_index(&t) {
|
|
Some(reference) => {
|
|
self.apply_change(op.id, Some(reference), Change::Delete);
|
|
Ok(())
|
|
}
|
|
None => Err(ChronofoldError::UnknownReference(op)),
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<A: Author + Default, T> Default for Chronofold<A, T> {
|
|
fn default() -> Self {
|
|
Self::new(A::default())
|
|
}
|
|
}
|