404 lines
14 KiB
Rust
Raw Normal View History

use crate::rev_queue::{RevCommand, RevCommandSender, RevQueue};
use crate::{
RevisionPersistence, RevisionSnapshot, RevisionSnapshotController, RevisionSnapshotDiskCache,
WSDataProviderDataSource,
};
2022-03-11 21:36:00 +08:00
use bytes::Bytes;
use flowy_error::{internal_error, FlowyError, FlowyResult};
use flowy_http_model::revision::{Revision, RevisionRange};
use flowy_http_model::util::md5;
2021-12-13 13:55:44 +08:00
use lib_infra::future::FutureResult;
use std::sync::atomic::AtomicI64;
use std::sync::atomic::Ordering::SeqCst;
2022-02-19 11:34:31 +08:00
use std::sync::Arc;
use tokio::sync::{mpsc, oneshot};
2022-01-14 15:23:21 +08:00
pub trait RevisionCloudService: Send + Sync {
2022-10-13 23:29:37 +08:00
/// Read the object's revision from remote
/// Returns a list of revisions that used to build the object
/// # Arguments
///
/// * `user_id`: the id of the user
/// * `object_id`: the id of the object
///
2022-01-14 15:23:21 +08:00
fn fetch_object(&self, user_id: &str, object_id: &str) -> FutureResult<Vec<Revision>, FlowyError>;
}
2022-10-13 23:29:37 +08:00
pub trait RevisionObjectDeserializer: Send + Sync {
2022-01-14 15:23:21 +08:00
type Output;
2022-10-13 23:29:37 +08:00
/// Deserialize the list of revisions into an concrete object type.
///
/// # Arguments
///
/// * `object_id`: the id of the object
/// * `revisions`: a list of revisions that represent the object
///
fn deserialize_revisions(object_id: &str, revisions: Vec<Revision>) -> FlowyResult<Self::Output>;
fn recover_operations_from_revisions(revisions: Vec<Revision>) -> Option<Self::Output>;
2022-10-13 23:29:37 +08:00
}
pub trait RevisionObjectSerializer: Send + Sync {
/// Serialize a list of revisions into one in `Bytes` format
2022-10-13 23:29:37 +08:00
///
/// * `revisions`: a list of revisions will be serialized to `Bytes`
///
fn combine_revisions(revisions: Vec<Revision>) -> FlowyResult<Bytes>;
2022-01-14 15:23:21 +08:00
}
2022-10-13 23:29:37 +08:00
/// `RevisionCompress` is used to compress multiple revisions into one revision
///
2022-11-06 09:59:53 +08:00
pub trait RevisionMergeable: Send + Sync {
fn merge_revisions(&self, _user_id: &str, object_id: &str, mut revisions: Vec<Revision>) -> FlowyResult<Revision> {
2022-03-11 21:36:00 +08:00
if revisions.is_empty() {
2022-10-13 23:29:37 +08:00
return Err(FlowyError::internal().context("Can't compact the empty revisions"));
2022-03-11 21:36:00 +08:00
}
if revisions.len() == 1 {
return Ok(revisions.pop().unwrap());
}
let first_revision = revisions.first().unwrap();
let last_revision = revisions.last().unwrap();
let (base_rev_id, rev_id) = first_revision.pair_rev_id();
let md5 = last_revision.md5.clone();
let bytes = self.combine_revisions(revisions)?;
2022-11-02 17:15:27 +08:00
Ok(Revision::new(object_id, base_rev_id, rev_id, bytes, md5))
2022-03-11 21:36:00 +08:00
}
fn combine_revisions(&self, revisions: Vec<Revision>) -> FlowyResult<Bytes>;
2022-01-25 20:37:48 +08:00
}
2022-11-01 18:59:53 +08:00
pub struct RevisionManager<Connection> {
2022-01-14 15:23:21 +08:00
pub object_id: String,
2021-12-09 22:28:11 +08:00
user_id: String,
rev_id_counter: Arc<RevIdCounter>,
2022-11-01 18:59:53 +08:00
rev_persistence: Arc<RevisionPersistence<Connection>>,
rev_snapshot: Arc<RevisionSnapshotController<Connection>>,
2022-11-06 09:59:53 +08:00
rev_compress: Arc<dyn RevisionMergeable>,
2022-01-22 18:48:43 +08:00
#[cfg(feature = "flowy_unit_test")]
2022-02-18 23:04:55 +08:00
rev_ack_notifier: tokio::sync::broadcast::Sender<i64>,
rev_queue: RevCommandSender,
}
2022-11-01 18:59:53 +08:00
impl<Connection: 'static> RevisionManager<Connection> {
2022-07-20 14:07:54 +08:00
pub fn new<SP, C>(
user_id: &str,
object_id: &str,
2022-11-01 18:59:53 +08:00
rev_persistence: RevisionPersistence<Connection>,
2022-10-22 21:57:44 +08:00
rev_compress: C,
2022-06-10 22:27:19 +08:00
snapshot_persistence: SP,
) -> Self
where
2022-06-10 22:27:19 +08:00
SP: 'static + RevisionSnapshotDiskCache,
2022-11-06 09:59:53 +08:00
C: 'static + RevisionMergeable,
{
let rev_id_counter = Arc::new(RevIdCounter::new(0));
2022-10-22 21:57:44 +08:00
let rev_compress = Arc::new(rev_compress);
let rev_persistence = Arc::new(rev_persistence);
let rev_snapshot = RevisionSnapshotController::new(
user_id,
object_id,
snapshot_persistence,
rev_id_counter.clone(),
rev_persistence.clone(),
rev_compress.clone(),
);
let (rev_queue, receiver) = mpsc::channel(1000);
let queue = RevQueue::new(
object_id.to_owned(),
rev_id_counter.clone(),
rev_persistence.clone(),
rev_compress.clone(),
receiver,
);
tokio::spawn(queue.run());
Self {
2022-01-14 15:23:21 +08:00
object_id: object_id.to_string(),
2021-12-09 22:28:11 +08:00
user_id: user_id.to_owned(),
rev_id_counter,
2022-02-25 22:27:44 +08:00
rev_persistence,
rev_snapshot: Arc::new(rev_snapshot),
2022-10-22 21:57:44 +08:00
rev_compress,
2022-01-22 18:48:43 +08:00
#[cfg(feature = "flowy_unit_test")]
2022-11-07 20:22:08 +08:00
rev_ack_notifier: tokio::sync::broadcast::channel(1).0,
rev_queue,
}
}
#[tracing::instrument(name = "revision_manager_initialize", level = "info", skip_all, fields(deserializer, object_id, deserialize_revisions) err)]
pub async fn initialize<B>(&mut self, _cloud: Option<Arc<dyn RevisionCloudService>>) -> FlowyResult<B::Output>
2022-01-14 15:23:21 +08:00
where
2022-10-13 23:29:37 +08:00
B: RevisionObjectDeserializer,
2022-01-14 15:23:21 +08:00
{
let revision_records = self.rev_persistence.load_all_records(&self.object_id)?;
tracing::Span::current().record("object_id", &self.object_id.as_str());
tracing::Span::current().record("deserializer", &std::any::type_name::<B>());
let revisions: Vec<Revision> = revision_records.iter().map(|record| record.revision.clone()).collect();
tracing::Span::current().record("deserialize_revisions", &revisions.len());
let current_rev_id = revisions.last().as_ref().map(|revision| revision.rev_id).unwrap_or(0);
match B::deserialize_revisions(&self.object_id, revisions.clone()) {
Ok(object) => {
self.rev_persistence.sync_revision_records(&revision_records).await?;
self.rev_id_counter.set(current_rev_id);
Ok(object)
}
Err(e) => match self.rev_snapshot.restore_from_snapshot::<B>(current_rev_id) {
None => {
tracing::info!("Restore object from validation revisions");
B::recover_operations_from_revisions(revisions).ok_or(e)
}
Some((object, snapshot_rev)) => {
let snapshot_rev_id = snapshot_rev.rev_id;
let _ = self.rev_persistence.reset(vec![snapshot_rev]).await;
// revision_records.retain(|record| record.revision.rev_id <= snapshot_rev_id);
// let _ = self.rev_persistence.sync_revision_records(&revision_records).await?;
self.rev_id_counter.set(snapshot_rev_id);
Ok(object)
}
},
2021-12-18 00:23:26 +08:00
}
2021-10-07 20:46:29 +08:00
}
pub async fn close(&self) {
let _ = self.rev_persistence.compact_lagging_revisions(&self.rev_compress).await;
}
pub async fn generate_snapshot(&self) {
self.rev_snapshot.generate_snapshot().await;
}
pub async fn read_snapshot(&self, rev_id: Option<i64>) -> FlowyResult<Option<RevisionSnapshot>> {
match rev_id {
None => self.rev_snapshot.read_last_snapshot(),
Some(rev_id) => self.rev_snapshot.read_snapshot(rev_id),
}
}
2022-10-22 21:57:44 +08:00
pub async fn load_revisions(&self) -> FlowyResult<Vec<Revision>> {
let revisions = RevisionLoader {
object_id: self.object_id.clone(),
user_id: self.user_id.clone(),
cloud: None,
rev_persistence: self.rev_persistence.clone(),
}
.load_revisions()
.await?;
Ok(revisions)
}
2022-01-01 14:23:58 +08:00
#[tracing::instrument(level = "debug", skip(self, revisions), err)]
2022-11-02 10:21:10 +08:00
pub async fn reset_object(&self, revisions: Vec<Revision>) -> FlowyResult<()> {
2022-01-02 10:34:42 +08:00
let rev_id = pair_rev_id_from_revisions(&revisions).1;
self.rev_persistence.reset(revisions).await?;
2022-01-02 10:34:42 +08:00
self.rev_id_counter.set(rev_id);
Ok(())
2022-01-01 14:23:58 +08:00
}
2022-01-02 10:34:42 +08:00
#[tracing::instrument(level = "debug", skip(self, revision), err)]
2021-12-14 18:04:51 +08:00
pub async fn add_remote_revision(&self, revision: &Revision) -> Result<(), FlowyError> {
if revision.bytes.is_empty() {
2022-10-13 23:29:37 +08:00
return Err(FlowyError::internal().context("Remote revisions is empty"));
2022-01-01 23:09:13 +08:00
}
2022-01-26 23:29:18 +08:00
self.rev_persistence.add_ack_revision(revision).await?;
2022-01-07 17:37:11 +08:00
self.rev_id_counter.set(revision.rev_id);
2021-12-13 22:46:35 +08:00
Ok(())
}
/// Adds the revision that generated by user editing
// #[tracing::instrument(level = "trace", skip_all, err)]
pub async fn add_local_revision(&self, data: Bytes, object_md5: String) -> Result<i64, FlowyError> {
if data.is_empty() {
return Err(FlowyError::internal().context("The data of the revisions is empty"));
2022-01-01 23:09:13 +08:00
}
self.rev_snapshot.generate_snapshot_if_need();
let (ret, rx) = oneshot::channel();
self.rev_queue
.send(RevCommand::RevisionData { data, object_md5, ret })
.await
.map_err(internal_error)?;
rx.await.map_err(internal_error)?
}
2022-01-01 16:16:06 +08:00
#[tracing::instrument(level = "debug", skip(self), err)]
2021-12-16 21:31:36 +08:00
pub async fn ack_revision(&self, rev_id: i64) -> Result<(), FlowyError> {
2022-02-25 22:27:44 +08:00
if self.rev_persistence.ack_revision(rev_id).await.is_ok() {
#[cfg(feature = "flowy_unit_test")]
2022-02-18 23:04:55 +08:00
let _ = self.rev_ack_notifier.send(rev_id);
}
Ok(())
}
2022-11-02 10:23:54 +08:00
/// Returns the current revision id
2022-01-24 17:35:58 +08:00
pub fn rev_id(&self) -> i64 {
self.rev_id_counter.value()
}
2022-11-02 17:15:27 +08:00
pub async fn next_sync_rev_id(&self) -> Option<i64> {
self.rev_persistence.next_sync_rev_id().await
}
2022-01-02 10:34:42 +08:00
pub fn next_rev_id_pair(&self) -> (i64, i64) {
let cur = self.rev_id_counter.value();
2022-11-02 10:21:10 +08:00
let next = self.rev_id_counter.next_id();
(cur, next)
}
2022-11-06 09:59:53 +08:00
pub fn number_of_sync_revisions(&self) -> usize {
self.rev_persistence.number_of_sync_records()
}
pub fn number_of_revisions_in_disk(&self) -> usize {
self.rev_persistence.number_of_records_in_disk()
}
2021-12-25 21:44:45 +08:00
pub async fn get_revisions_in_range(&self, range: RevisionRange) -> Result<Vec<Revision>, FlowyError> {
2022-02-25 22:27:44 +08:00
let revisions = self.rev_persistence.revisions_in_range(&range).await?;
2021-12-25 21:44:45 +08:00
Ok(revisions)
}
2021-12-08 21:51:06 +08:00
2022-01-25 20:37:48 +08:00
pub async fn next_sync_revision(&self) -> FlowyResult<Option<Revision>> {
2022-10-12 15:41:34 +08:00
self.rev_persistence.next_sync_revision().await
2022-01-01 16:16:06 +08:00
}
2021-12-18 18:35:45 +08:00
2021-12-25 21:44:45 +08:00
pub async fn get_revision(&self, rev_id: i64) -> Option<Revision> {
2022-02-25 22:27:44 +08:00
self.rev_persistence.get(rev_id).await.map(|record| record.revision)
}
}
2022-11-01 18:59:53 +08:00
impl<Connection: 'static> WSDataProviderDataSource for Arc<RevisionManager<Connection>> {
2022-02-25 22:27:44 +08:00
fn next_revision(&self) -> FutureResult<Option<Revision>, FlowyError> {
let rev_manager = self.clone();
FutureResult::new(async move { rev_manager.next_sync_revision().await })
}
fn ack_revision(&self, rev_id: i64) -> FutureResult<(), FlowyError> {
let rev_manager = self.clone();
FutureResult::new(async move { (*rev_manager).ack_revision(rev_id).await })
}
fn current_rev_id(&self) -> i64 {
self.rev_id()
2021-12-25 21:44:45 +08:00
}
}
2021-12-08 14:17:40 +08:00
2022-01-25 20:37:48 +08:00
#[cfg(feature = "flowy_unit_test")]
impl<Connection: 'static> RevisionManager<Connection> {
2022-11-01 18:59:53 +08:00
pub async fn revision_cache(&self) -> Arc<RevisionPersistence<Connection>> {
2022-02-25 22:27:44 +08:00
self.rev_persistence.clone()
2022-01-25 20:37:48 +08:00
}
2022-02-18 23:04:55 +08:00
pub fn ack_notify(&self) -> tokio::sync::broadcast::Receiver<i64> {
self.rev_ack_notifier.subscribe()
2022-01-01 16:16:06 +08:00
}
pub fn get_all_revision_records(&self) -> FlowyResult<Vec<flowy_revision_persistence::SyncRecord>> {
self.rev_persistence.load_all_records(&self.object_id)
}
2022-01-01 16:16:06 +08:00
}
2022-11-01 18:59:53 +08:00
pub struct RevisionLoader<Connection> {
2022-03-01 23:38:26 +08:00
pub object_id: String,
pub user_id: String,
pub cloud: Option<Arc<dyn RevisionCloudService>>,
2022-11-01 18:59:53 +08:00
pub rev_persistence: Arc<RevisionPersistence<Connection>>,
2021-12-18 00:23:26 +08:00
}
2022-11-01 18:59:53 +08:00
impl<Connection: 'static> RevisionLoader<Connection> {
2022-10-22 21:57:44 +08:00
pub async fn load_revisions(&self) -> Result<Vec<Revision>, FlowyError> {
let records = self.rev_persistence.load_all_records(&self.object_id)?;
2022-10-22 21:57:44 +08:00
let revisions = records.into_iter().map(|record| record.revision).collect::<_>();
Ok(revisions)
}
2021-12-18 00:23:26 +08:00
}
2022-11-02 10:21:10 +08:00
/// Represents as the md5 of the revision object after applying the
/// revision. For example, RevisionMD5 will be the md5 of the document
/// content.
#[derive(Debug, Clone)]
pub struct RevisionMD5(String);
impl RevisionMD5 {
pub fn from_bytes<T: AsRef<[u8]>>(bytes: T) -> Result<Self, FlowyError> {
Ok(RevisionMD5(md5(bytes)))
}
pub fn into_inner(self) -> String {
self.0
}
pub fn is_equal(&self, s: &str) -> bool {
self.0 == s
}
}
impl std::convert::From<RevisionMD5> for String {
fn from(md5: RevisionMD5) -> Self {
md5.0
}
}
impl std::convert::From<&str> for RevisionMD5 {
fn from(s: &str) -> Self {
Self(s.to_owned())
}
}
impl std::convert::From<String> for RevisionMD5 {
fn from(s: String) -> Self {
Self(s)
}
}
impl std::ops::Deref for RevisionMD5 {
type Target = String;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl PartialEq<Self> for RevisionMD5 {
fn eq(&self, other: &Self) -> bool {
self.0 == other.0
}
}
impl std::cmp::Eq for RevisionMD5 {}
fn pair_rev_id_from_revisions(revisions: &[Revision]) -> (i64, i64) {
let mut rev_id = 0;
revisions.iter().for_each(|revision| {
if rev_id < revision.rev_id {
rev_id = revision.rev_id;
}
});
if rev_id > 0 {
(rev_id - 1, rev_id)
} else {
(0, rev_id)
}
}
#[derive(Debug)]
pub struct RevIdCounter(pub AtomicI64);
impl RevIdCounter {
pub fn new(n: i64) -> Self {
Self(AtomicI64::new(n))
}
pub fn next_id(&self) -> i64 {
let _ = self.0.fetch_add(1, SeqCst);
self.value()
}
pub fn value(&self) -> i64 {
self.0.load(SeqCst)
}
pub fn set(&self, n: i64) {
let _ = self.0.fetch_update(SeqCst, SeqCst, |_| Some(n));
}
}