mirror of
https://github.com/AppFlowy-IO/AppFlowy.git
synced 2025-07-03 15:11:43 +00:00
309 lines
9.1 KiB
Rust
309 lines
9.1 KiB
Rust
![]() |
use anyhow::Result;
|
||
|
use flowy_ai_pub::entities::EmbeddedChunk;
|
||
|
use flowy_sqlite_vec::db::VectorSqliteDB;
|
||
|
use flowy_sqlite_vec::init_sqlite_vector_extension;
|
||
|
use std::collections::HashSet;
|
||
|
use tempfile::tempdir;
|
||
|
use uuid::Uuid;
|
||
|
|
||
|
#[tokio::test]
|
||
|
async fn test_vector_sqlite_db_basic_operations() -> Result<()> {
|
||
|
// Initialize sqlite-vec extension
|
||
|
init_sqlite_vector_extension();
|
||
|
|
||
|
// Create a temporary directory for the test database
|
||
|
let temp_dir = tempdir()?;
|
||
|
|
||
|
// Create the VectorSqliteDB
|
||
|
let db = VectorSqliteDB::new(temp_dir.into_path())?;
|
||
|
|
||
|
// Test inserting vector embeddings
|
||
|
let oid = Uuid::new_v4().to_string();
|
||
|
let fragments = vec![
|
||
|
create_test_fragment(&oid, 0, generate_embedding_with_size(768, 0.1)),
|
||
|
create_test_fragment(&oid, 1, generate_embedding_with_size(768, 0.2)),
|
||
|
create_test_fragment(&oid, 2, generate_embedding_with_size(768, 0.3)),
|
||
|
];
|
||
|
let workspace_id = Uuid::new_v4();
|
||
|
db.upsert_collabs_embeddings(&workspace_id.to_string(), &oid, fragments)
|
||
|
.await?;
|
||
|
|
||
|
// Test querying fragment IDs
|
||
|
let result = db.select_collabs_fragment_ids(&[oid.clone()]).await?;
|
||
|
assert_eq!(result.len(), 1);
|
||
|
assert!(result.contains_key(&Uuid::parse_str(&oid)?));
|
||
|
assert_eq!(result.get(&Uuid::parse_str(&oid)?).unwrap().len(), 3);
|
||
|
|
||
|
Ok(())
|
||
|
}
|
||
|
|
||
|
#[tokio::test]
|
||
|
async fn test_upsert_and_remove_fragments() -> Result<()> {
|
||
|
// Initialize sqlite-vec extension
|
||
|
init_sqlite_vector_extension();
|
||
|
|
||
|
// Create a temporary directory for the test database
|
||
|
let temp_dir = tempdir()?;
|
||
|
|
||
|
// Create the VectorSqliteDB
|
||
|
let db = VectorSqliteDB::new(temp_dir.into_path())?;
|
||
|
|
||
|
// Test inserting initial vector embeddings
|
||
|
let oid = Uuid::new_v4().to_string();
|
||
|
let initial_fragments = vec![
|
||
|
create_test_fragment(&oid, 0, generate_embedding_with_size(768, 0.1)),
|
||
|
create_test_fragment(&oid, 1, generate_embedding_with_size(768, 0.2)),
|
||
|
create_test_fragment(&oid, 2, generate_embedding_with_size(768, 0.3)),
|
||
|
];
|
||
|
|
||
|
let workspace_id = Uuid::new_v4();
|
||
|
db.upsert_collabs_embeddings(&workspace_id.to_string(), &oid, initial_fragments)
|
||
|
.await?;
|
||
|
|
||
|
// Verify initial fragments
|
||
|
let result = db.select_collabs_fragment_ids(&[oid.clone()]).await?;
|
||
|
assert_eq!(result.get(&Uuid::parse_str(&oid)?).unwrap().len(), 3);
|
||
|
|
||
|
// Update with a subset of fragments (this should remove the missing one)
|
||
|
let updated_fragments = vec![
|
||
|
create_test_fragment(&oid, 0, generate_embedding_with_size(768, 0.1)),
|
||
|
create_test_fragment(&oid, 2, generate_embedding_with_size(768, 0.3)),
|
||
|
];
|
||
|
|
||
|
db.upsert_collabs_embeddings(&workspace_id.to_string(), &oid, updated_fragments)
|
||
|
.await?;
|
||
|
// Verify fragment count is now 2
|
||
|
let result = db.select_collabs_fragment_ids(&[oid.clone()]).await?;
|
||
|
assert_eq!(result.get(&Uuid::parse_str(&oid)?).unwrap().len(), 2);
|
||
|
|
||
|
let result = db
|
||
|
.search(
|
||
|
&workspace_id.to_string(),
|
||
|
&generate_embedding_with_size(768, 0.1),
|
||
|
1,
|
||
|
)
|
||
|
.await
|
||
|
.unwrap();
|
||
|
assert!(!result.is_empty());
|
||
|
assert_eq!(result[0].oid, Uuid::parse_str(&oid).unwrap());
|
||
|
assert_eq!(result[0].content, "Content for fragment 0".to_string());
|
||
|
dbg!(result);
|
||
|
|
||
|
Ok(())
|
||
|
}
|
||
|
|
||
|
#[tokio::test]
|
||
|
async fn test_empty_fragments_noop_and_select_empty() -> Result<()> {
|
||
|
init_sqlite_vector_extension();
|
||
|
let temp_dir = tempdir()?;
|
||
|
let db = VectorSqliteDB::new(temp_dir.into_path())?;
|
||
|
|
||
|
let oid = Uuid::new_v4().to_string();
|
||
|
let workspace_id = Uuid::new_v4().to_string();
|
||
|
|
||
|
// Upsert with an empty fragments Vec should not error and not insert anything
|
||
|
db.upsert_collabs_embeddings(&workspace_id, &oid, Vec::new())
|
||
|
.await?;
|
||
|
|
||
|
// select_collabs_fragment_ids should return an empty map
|
||
|
let result = db.select_collabs_fragment_ids(&[oid.clone()]).await?;
|
||
|
assert!(
|
||
|
result.is_empty(),
|
||
|
"Expected no fragments stored, got {:?}",
|
||
|
result
|
||
|
);
|
||
|
|
||
|
Ok(())
|
||
|
}
|
||
|
|
||
|
#[tokio::test]
|
||
|
async fn test_duplicate_upsert_idempotent() -> Result<()> {
|
||
|
init_sqlite_vector_extension();
|
||
|
let temp_dir = tempdir()?;
|
||
|
let db = VectorSqliteDB::new(temp_dir.into_path())?;
|
||
|
|
||
|
let oid = Uuid::new_v4().to_string();
|
||
|
let workspace_id = Uuid::new_v4().to_string();
|
||
|
let fragments = vec![
|
||
|
create_test_fragment(&oid, 0, generate_embedding_with_size(768, 0.5)),
|
||
|
create_test_fragment(&oid, 1, generate_embedding_with_size(768, 0.6)),
|
||
|
];
|
||
|
|
||
|
// First upsert
|
||
|
db.upsert_collabs_embeddings(&workspace_id, &oid, fragments.clone())
|
||
|
.await?;
|
||
|
let first = db.select_collabs_fragment_ids(&[oid.clone()]).await?;
|
||
|
let set1: HashSet<_> = first
|
||
|
.get(&Uuid::parse_str(&oid)?)
|
||
|
.unwrap()
|
||
|
.clone()
|
||
|
.into_iter()
|
||
|
.collect();
|
||
|
|
||
|
// Second upsert with the exact same fragments
|
||
|
db.upsert_collabs_embeddings(&workspace_id, &oid, fragments)
|
||
|
.await?;
|
||
|
let second = db.select_collabs_fragment_ids(&[oid.clone()]).await?;
|
||
|
let set2: HashSet<_> = second
|
||
|
.get(&Uuid::parse_str(&oid)?)
|
||
|
.unwrap()
|
||
|
.clone()
|
||
|
.into_iter()
|
||
|
.collect();
|
||
|
|
||
|
assert_eq!(
|
||
|
set1, set2,
|
||
|
"Upserting the same fragments should be idempotent"
|
||
|
);
|
||
|
Ok(())
|
||
|
}
|
||
|
|
||
|
#[tokio::test]
|
||
|
async fn test_search_no_hits() -> Result<()> {
|
||
|
init_sqlite_vector_extension();
|
||
|
let temp_dir = tempdir()?;
|
||
|
let db = VectorSqliteDB::new(temp_dir.into_path())?;
|
||
|
|
||
|
let oid = Uuid::new_v4().to_string();
|
||
|
let workspace_id = Uuid::new_v4().to_string();
|
||
|
// Insert a single fragment at vector [1.0,...]
|
||
|
let frags = vec![create_test_fragment(
|
||
|
&oid,
|
||
|
0,
|
||
|
generate_embedding_with_size(768, 1.0),
|
||
|
)];
|
||
|
db.upsert_collabs_embeddings(&workspace_id, &oid, frags)
|
||
|
.await?;
|
||
|
|
||
|
// Query with a very different vector should return empty
|
||
|
let query = generate_embedding_with_size(768, -1.0);
|
||
|
let results = db.search(&workspace_id, &query, 1).await?;
|
||
|
assert!(
|
||
|
results.is_empty(),
|
||
|
"Expected no near neighbors for orthogonal vector"
|
||
|
);
|
||
|
Ok(())
|
||
|
}
|
||
|
|
||
|
#[tokio::test]
|
||
|
async fn test_multi_workspace_isolation() -> Result<()> {
|
||
|
init_sqlite_vector_extension();
|
||
|
let temp_dir = tempdir()?;
|
||
|
let db = VectorSqliteDB::new(temp_dir.into_path())?;
|
||
|
|
||
|
let oid = Uuid::new_v4().to_string();
|
||
|
let ws1 = Uuid::new_v4().to_string();
|
||
|
let ws2 = Uuid::new_v4().to_string();
|
||
|
|
||
|
// Insert identical fragment into two workspaces but with different embeddings
|
||
|
let frag = create_test_fragment(&oid, 0, generate_embedding_with_size(768, 0.9));
|
||
|
db.upsert_collabs_embeddings(&ws1, &oid, vec![frag.clone()])
|
||
|
.await?;
|
||
|
let frag2 = create_test_fragment(&oid, 0, generate_embedding_with_size(768, -0.9));
|
||
|
db.upsert_collabs_embeddings(&ws2, &oid, vec![frag2.clone()])
|
||
|
.await?;
|
||
|
|
||
|
// Searching in ws1 should not return ws2's fragment
|
||
|
let res1 = db
|
||
|
.search(&ws1, &generate_embedding_with_size(768, 0.9), 1)
|
||
|
.await?;
|
||
|
assert_eq!(res1.len(), 1);
|
||
|
assert_eq!(res1[0].oid, Uuid::parse_str(&oid)?);
|
||
|
|
||
|
// Searching in ws2 should not return ws1's fragment
|
||
|
let res2 = db
|
||
|
.search(&ws2, &generate_embedding_with_size(768, -0.9), 1)
|
||
|
.await?;
|
||
|
assert_eq!(res2.len(), 1);
|
||
|
assert_eq!(res2[0].oid, Uuid::parse_str(&oid)?);
|
||
|
|
||
|
Ok(())
|
||
|
}
|
||
|
|
||
|
#[tokio::test]
|
||
|
async fn test_select_multiple_oids() -> Result<()> {
|
||
|
init_sqlite_vector_extension();
|
||
|
let temp_dir = tempdir()?;
|
||
|
let db = VectorSqliteDB::new(temp_dir.into_path())?;
|
||
|
|
||
|
let ws = Uuid::new_v4().to_string();
|
||
|
let oid1 = Uuid::new_v4().to_string();
|
||
|
let oid2 = Uuid::new_v4().to_string();
|
||
|
|
||
|
db.upsert_collabs_embeddings(
|
||
|
&ws,
|
||
|
&oid1,
|
||
|
vec![create_test_fragment(
|
||
|
&oid1,
|
||
|
0,
|
||
|
generate_embedding_with_size(768, 0.1),
|
||
|
)],
|
||
|
)
|
||
|
.await?;
|
||
|
db.upsert_collabs_embeddings(
|
||
|
&ws,
|
||
|
&oid2,
|
||
|
vec![create_test_fragment(
|
||
|
&oid2,
|
||
|
0,
|
||
|
generate_embedding_with_size(768, 0.2),
|
||
|
)],
|
||
|
)
|
||
|
.await?;
|
||
|
|
||
|
let map = db
|
||
|
.select_collabs_fragment_ids(&[oid1.clone(), oid2.clone()])
|
||
|
.await?;
|
||
|
assert_eq!(map.len(), 2);
|
||
|
assert!(map.contains_key(&Uuid::parse_str(&oid1)?));
|
||
|
assert!(map.contains_key(&Uuid::parse_str(&oid2)?));
|
||
|
assert_eq!(map[&Uuid::parse_str(&oid1)?].len(), 1);
|
||
|
assert_eq!(map[&Uuid::parse_str(&oid2)?].len(), 1);
|
||
|
|
||
|
Ok(())
|
||
|
}
|
||
|
|
||
|
#[tokio::test]
|
||
|
async fn test_skip_missing_content() -> Result<()> {
|
||
|
init_sqlite_vector_extension();
|
||
|
let temp_dir = tempdir()?;
|
||
|
let db = VectorSqliteDB::new(temp_dir.into_path())?;
|
||
|
|
||
|
let ws = Uuid::new_v4().to_string();
|
||
|
let oid = Uuid::new_v4().to_string();
|
||
|
|
||
|
// One fragment with no content (should be skipped), one with content
|
||
|
let mut bad = create_test_fragment(&oid, 0, generate_embedding_with_size(768, 0.1));
|
||
|
bad.content = None;
|
||
|
let good = create_test_fragment(&oid, 1, generate_embedding_with_size(768, 0.2));
|
||
|
|
||
|
db.upsert_collabs_embeddings(&ws, &oid, vec![bad, good.clone()])
|
||
|
.await?;
|
||
|
|
||
|
let map = db.select_collabs_fragment_ids(&[oid.clone()]).await?;
|
||
|
let frags = &map[&Uuid::parse_str(&oid)?];
|
||
|
assert_eq!(frags.len(), 1);
|
||
|
assert_eq!(frags[0], good.fragment_id);
|
||
|
|
||
|
Ok(())
|
||
|
}
|
||
|
|
||
|
fn generate_embedding_with_size(size: usize, value: f32) -> Vec<f32> {
|
||
|
vec![value; size]
|
||
|
}
|
||
|
|
||
|
fn create_test_fragment(oid: &str, index: i32, embeddings: Vec<f32>) -> EmbeddedChunk {
|
||
|
let fragment_id = format!("fragment_{}", index);
|
||
|
|
||
|
EmbeddedChunk {
|
||
|
fragment_id,
|
||
|
object_id: oid.to_string(),
|
||
|
content_type: 1,
|
||
|
content: Some(format!("Content for fragment {}", index)),
|
||
|
metadata: Some(format!("Metadata for fragment {}", index)),
|
||
|
fragment_index: index,
|
||
|
embedder_type: 1,
|
||
|
embeddings: Some(embeddings),
|
||
|
}
|
||
|
}
|