Nathan.fooo 612d652320
feat: support local ai embedding, local ai search, local document content search (#7839)
* chore: generate embeddings

* chore: save embedding

* chore: vec sqlite

* chore: clippy

* chore: init vector

* chore: create vector db

* chore: periodically write embedding

* chore: fix compile

* chore: skip write

* chore: impl search

* fix: test

* fix: stop scheduler

* fix: search

* chore: add test

* chore: update schema

* chore: index all

* chore: index

* chore: search document content

* chore: index document content and title

* chore: index all view

* chore: rename trait

* refactor: remove folder index manager

* chore: index folder changes

* chore: delete folder search

* chore: update logs

* chore: update logs

* chore: search

* chore: add search test

* chore: fmt

* chore: fix test

* chore: fix ios build
2025-05-02 08:21:42 +08:00

53 lines
1.7 KiB
Rust

use tantivy::collector::TopDocs;
use tantivy::query::QueryParser;
use tantivy::schema::*;
use tantivy::{doc, DocAddress, Index, Score};
#[test]
fn search_folder_test() {
let mut schema_builder = Schema::builder();
let id = schema_builder.add_text_field("id", TEXT);
let title = schema_builder.add_text_field("title", TEXT | STORED);
let schema = schema_builder.build();
// Indexing documents
let index = Index::create_from_tempdir(schema.clone()).unwrap();
// between indexing threads.
let mut index_writer = index.writer(15_000_000).unwrap();
// Let's index one documents!
index_writer
.add_document(doc!(
id => "123456789",
title => "The Old Man and the Seawhale",
))
.unwrap();
// We need to call .commit() explicitly to force the
// index_writer to finish processing the documents in the queue,
// flush the current index to the disk, and advertise
// the existence of new documents.
index_writer.commit().unwrap();
// # Searching
let reader = index.reader().unwrap();
let searcher = reader.searcher();
let mut query_parser = QueryParser::for_index(&index, vec![title]);
query_parser.set_field_fuzzy(title, true, 2, true);
let query = query_parser.parse_query("sewhals").unwrap();
// Perform search.
// `topdocs` contains the 10 most relevant doc ids, sorted by decreasing scores...
let top_docs: Vec<(Score, DocAddress)> =
searcher.search(&query, &TopDocs::with_limit(10)).unwrap();
for (_score, doc_address) in top_docs {
// Retrieve the actual content of documents given its `doc_address`.
let retrieved_doc: TantivyDocument = searcher.doc(doc_address).unwrap();
println!("{}", retrieved_doc.to_json(&schema));
}
}