spacedrive/core/examples/indexing_demo.rs
Jamie Pine c6cda350be feat: media proxy and thumbstrip functionality
- Introduced new modules and actions for generating media proxies and thumbstrips, improving media handling capabilities.
- Added configuration, error handling, and processing logic for both proxy and thumbstrip generation.
- Updated existing media operations to integrate new functionalities, ensuring seamless media processing and user experience.
- Enhanced tests to cover new features and ensure reliability in media operations.
2025-11-17 04:24:05 -08:00

606 lines
20 KiB
Rust

//! Desktop Indexing Demo - Production Indexer Showcase
//!
//! This example demonstrates:
//! 1. Starting up Spacedrive Core
//! 2. Creating/opening a library
//! 3. Adding the user's desktop as a location
//! 4. Running the production indexer with all features:
//! - Smart filtering (skip system files)
//! - Incremental indexing with change detection
//! - Performance metrics and reporting
//! - Multi-phase processing
//! 5. Showing detailed results and metrics
use sd_core::{
config::{AppConfig, JobLoggingConfig},
infra::{db::entities, event::Event, job::types::JobStatus},
location::{create_location, IndexMode, LocationCreateArgs},
Core,
};
use sea_orm::{
ActiveModelTrait, ColumnTrait, EntityTrait, PaginatorTrait, QueryFilter, QuerySelect,
};
use std::path::PathBuf;
use tokio::time::{sleep, Duration};
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Initialize logging with more detail
tracing_subscriber::fmt()
.with_env_filter("sd_core=debug,desktop_indexing_demo=info")
.init();
println!("=== Spacedrive 2 Desktop Indexing Demo ===\n");
// 1. Initialize Spacedrive Core with job logging enabled
println!("1. Initializing Spacedrive Core...");
let data_dir = PathBuf::from("./data/spacedrive-desktop-demo");
// Enable job logging by modifying the config before core initialization
{
let mut config = AppConfig::load_from(&data_dir)
.unwrap_or_else(|_| AppConfig::default_with_dir(data_dir.clone()));
// Enable job logging - hardcoded for demo
config.job_logging = JobLoggingConfig {
enabled: true,
log_directory: "job_logs".to_string(),
max_file_size: 10 * 1024 * 1024, // 10MB
include_debug: true, // Include debug logs for full detail
};
config.save()?;
println!(" Job logging enabled (logs stored per-library)");
}
let core = Core::new(data_dir.clone()).await?;
println!(" Core initialized with job logging");
println!(" Device ID: {}", core.device.device_id()?);
println!(" Data directory: {:?}", data_dir);
// 2. Get or create library
println!("2. Setting up library...");
let library = if core.libraries.list().await.is_empty() {
println!(" Creating new library...");
let lib = core
.libraries
.create_library("Desktop Demo Library", None, core.context.clone())
.await?;
println!(" Created library: {}", lib.name().await);
lib
} else {
let libs = core.libraries.list().await;
let lib = libs.into_iter().next().unwrap();
println!(" Using existing library: {}", lib.name().await);
lib
};
println!(" Library ID: {}", library.id());
println!(" Library path: {}\n", library.path().display());
// 3. Set up desktop location
println!("3. Adding Desktop as a location...");
let desktop_path = dirs::desktop_dir().ok_or("Could not find desktop directory")?;
println!(" Desktop path: {}", desktop_path.display());
// Register device in the database first
let db = library.db();
let device = core.device.to_device()?;
// Check if device already exists, if not create it
let device_record = match entities::device::Entity::find()
.filter(entities::device::Column::Uuid.eq(device.id))
.one(db.conn())
.await?
{
Some(existing) => {
println!(" Device already registered");
existing
}
None => {
println!(" Registering device...");
let device_model: entities::device::ActiveModel = device.into();
let inserted = device_model.insert(db.conn()).await?;
println!(" Device registered with ID: {}", inserted.id);
inserted
}
};
// Use production location management to create location and dispatch indexer job
println!(" Creating location with production job dispatch...");
let location_args = LocationCreateArgs {
path: desktop_path.clone(),
name: Some("Desktop".to_string()),
index_mode: IndexMode::Deep, // Deep indexing with content analysis
};
let location_db_id = create_location(
library.clone(),
&core.events,
location_args,
device_record.id,
)
.await?;
println!(" Location created with DB ID: {}", location_db_id);
println!(" Indexer job dispatched through production job manager!");
// Add to file watcher (optional - for real-time monitoring)
// Note: location_id here would need to be retrieved from the database record
// For simplicity, we'll skip the file watcher for now since the main demo is indexing
println!(" Production job system is now running indexing...\n");
// 4. Monitor production indexer with new features
println!("4. Production Indexer in Action!");
println!(" New Features Showcase:");
println!(" Smart Filtering - Skips system files, caches, node_modules");
println!(" Incremental Indexing - Detects changes via inode tracking");
println!(" Performance Metrics - Detailed timing and throughput");
println!(" Multi-phase Processing - Discovery → Processing → Content");
println!(" Target: {}", desktop_path.display());
// Set up event monitoring to track job progress
println!(" Setting up real-time job monitoring...");
let mut event_subscriber = core.events.subscribe();
// Spawn event listener to monitor indexing progress
let events_handle = tokio::spawn(async move {
while let Ok(event) = event_subscriber.recv().await {
match event {
Event::IndexingStarted { location_id } => {
println!(" Indexing started for location: {}", location_id);
}
Event::IndexingCompleted {
location_id,
total_files,
total_dirs,
} => {
println!(" Indexing completed for location: {}", location_id);
println!(" Files indexed: {}", total_files);
println!(" Directories indexed: {}", total_dirs);
break; // Exit the event loop when indexing is done
}
Event::IndexingFailed { location_id, error } => {
println!(
" Indexing failed for location: {} - {}",
location_id, error
);
break;
}
Event::FilesIndexed { count, .. } => {
println!(" Progress: {} files processed", count);
}
Event::JobProgress {
job_id,
job_type,
progress,
message,
generic_progress: _,
} => {
// Show production indexer progress details
if let Some(msg) = message {
println!(
" Job {} [{}]: {} ({}%)",
job_id,
job_type,
msg,
(progress * 100.0) as u8
);
} else {
println!(
" Job {} [{}]: {}%",
job_id,
job_type,
(progress * 100.0) as u8
);
}
}
_ => {} // Ignore other events
}
}
});
println!(" Waiting for indexing to complete...");
println!(" Production Indexer Features Active:");
println!(" Smart Filtering - Automatically skipping:");
println!(" • Hidden files (.DS_Store, Thumbs.db)");
println!(" • Dev directories (node_modules, .git, target)");
println!(" • Cache folders (__pycache__, .cache)");
println!(" • Large files (>4GB)");
println!(" Change Detection - Using inode tracking for:");
println!(" • Fast incremental updates");
println!(" • Move/rename detection");
println!(" • Modified file tracking");
println!(" Performance Optimization:");
println!(" • Batch processing (1000 items/batch)");
println!(" • Path prefix deduplication");
println!(" • Parallel content processing");
// Let's show what files are actually in the desktop
println!("\n Desktop contents preview:");
let mut file_count = 0;
let mut dir_count = 0;
let mut total_size = 0u64;
if let Ok(entries) = tokio::fs::read_dir(&desktop_path).await {
let mut entries = entries;
while let Ok(Some(entry)) = entries.next_entry().await {
if let Ok(metadata) = entry.metadata().await {
if metadata.is_file() {
file_count += 1;
total_size += metadata.len();
if file_count <= 5 {
// Show first 5 files
println!(" {}", entry.file_name().to_string_lossy());
}
} else if metadata.is_dir() {
dir_count += 1;
if dir_count <= 3 {
// Show first 3 dirs
println!(" {}/", entry.file_name().to_string_lossy());
}
}
}
}
}
if file_count > 5 {
println!(" ... and {} more files", file_count - 5);
}
if dir_count > 3 {
println!(" ... and {} more directories", dir_count - 3);
}
println!("\n Discovery Summary:");
println!(" Files found: {}", file_count);
println!(" Directories found: {}", dir_count);
println!(
" Total size: {:.2} MB",
total_size as f64 / 1024.0 / 1024.0
);
// Smart job completion monitoring with checkpoint-based timeout
println!("\n Monitoring job completion with smart timeout...");
println!(" Will track checkpoint progress and wait for actual completion");
let mut last_checkpoint_size = 0u64;
let mut stall_time = std::time::Instant::now();
let stall_timeout = Duration::from_secs(120); // Timeout if no progress for 2 minutes
let poll_interval = Duration::from_secs(5); // Check every 5 seconds
// Keep the event listener running but don't block on it
let mut events_completed = false;
loop {
// Check if the event listener got completion
if !events_completed && events_handle.is_finished() {
events_completed = true;
println!(" Event listener detected job completion!");
}
// Poll job status from the job manager
let job_status = library.jobs().list_jobs(None).await?;
let running_jobs = library.jobs().list_jobs(Some(JobStatus::Running)).await?;
let completed_jobs = library.jobs().list_jobs(Some(JobStatus::Completed)).await?;
// Also check how many entries have been created so far
let current_entry_count = entities::entry::Entity::find()
.count(db.conn())
.await
.unwrap_or(0);
println!(
" Job Status: {} running, {} completed, {} total",
running_jobs.len(),
completed_jobs.len(),
job_status.len()
);
println!(" Database entries so far: {}", current_entry_count);
// Check checkpoint progress by querying actual checkpoint data
let checkpoint_estimate = {
// Try to get the latest checkpoint size from the jobs database
if let Ok(metadata) =
tokio::fs::metadata("./data/spacedrive-desktop-demo/jobs.db").await
{
metadata.len()
} else {
0
}
};
if checkpoint_estimate > last_checkpoint_size {
println!(
" Progress detected: {} bytes checkpoint data",
checkpoint_estimate
);
last_checkpoint_size = checkpoint_estimate;
stall_time = std::time::Instant::now(); // Reset stall timer
}
// Check completion conditions
if running_jobs.is_empty() && !completed_jobs.is_empty() {
println!(" All jobs completed successfully!");
break;
} else if running_jobs.is_empty() && events_completed {
println!(" No running jobs and events indicate completion!");
break;
} else if stall_time.elapsed() > stall_timeout {
println!(
" Job appears stalled (no progress for {} seconds)",
stall_timeout.as_secs()
);
println!(" Final checkpoint size: {} bytes", checkpoint_estimate);
break;
}
// Wait before next poll
tokio::time::sleep(poll_interval).await;
}
// Abort the event listener if it's still running
if !events_handle.is_finished() {
events_handle.abort();
}
// 5. Show production indexer results
println!("\n5. Production Indexer Results:");
// Check database for our location
let location_record = entities::location::Entity::find()
.filter(entities::location::Column::Id.eq(location_db_id))
.one(db.conn())
.await?
.ok_or("Location not found")?;
// Get all entry IDs under the location using closure table
use entities::entry_closure;
let location_entry_id = location_record.entry_id;
let descendant_ids = entry_closure::Entity::find()
.filter(entry_closure::Column::AncestorId.eq(location_entry_id))
.all(db.conn())
.await?
.into_iter()
.map(|ec| ec.descendant_id)
.collect::<Vec<i32>>();
let mut all_entry_ids = vec![location_entry_id];
all_entry_ids.extend(descendant_ids);
// Get entry statistics for this location
let entry_count = all_entry_ids.len();
let file_count_db = entities::entry::Entity::find()
.filter(entities::entry::Column::Id.is_in(all_entry_ids.clone()))
.filter(entities::entry::Column::Kind.eq(0)) // Files
.count(db.conn())
.await?;
let dir_count_db = entities::entry::Entity::find()
.filter(entities::entry::Column::Id.is_in(all_entry_ids.clone()))
.filter(entities::entry::Column::Kind.eq(1)) // Directories
.count(db.conn())
.await?;
// Check for entries with inodes (change detection feature)
let entries_with_inodes = entities::entry::Entity::find()
.filter(entities::entry::Column::Id.is_in(all_entry_ids.clone()))
.filter(entities::entry::Column::Inode.is_not_null())
.count(db.conn())
.await?;
// Sample some filtered paths to show filtering worked
let sample_entries = entities::entry::Entity::find()
.filter(entities::entry::Column::Id.is_in(all_entry_ids.clone()))
.limit(10)
.all(db.conn())
.await?;
let content_identity_count = entities::content_identity::Entity::find()
.count(db.conn())
.await?;
println!(" Indexing Statistics:");
println!(" Files indexed: {}", file_count_db);
println!(" Directories indexed: {}", dir_count_db);
println!(
" Entries with inode tracking: {} ({:.1}%)",
entries_with_inodes,
(entries_with_inodes as f64 / entry_count.max(1) as f64) * 100.0
);
println!(
" Content identities created: {}",
content_identity_count
);
println!("\n Smart Filtering Validation:");
println!(" Checking indexed files don't include filtered patterns...");
let mut filtered_correctly = true;
for entry in &sample_entries {
// Check if any system files got through
if entry.name == ".DS_Store" || entry.name == "Thumbs.db" {
println!(
" Found system file that should be filtered: {}",
entry.name
);
filtered_correctly = false;
}
if entry.name == "node_modules" || entry.name == ".git" || entry.name == "__pycache__" {
println!(
" Found dev directory that should be filtered: {}",
entry.name
);
filtered_correctly = false;
}
}
if filtered_correctly {
println!(" All sampled entries passed filtering validation!");
}
println!("\n Sample Indexed Entries:");
for (i, entry) in sample_entries.iter().take(5).enumerate() {
let kind = match entry.kind {
0 => "",
1 => "",
2 => "",
_ => "",
};
println!(
" {} {} {} ({})",
i + 1,
kind,
entry.name,
if entry.extension.is_some() {
entry.extension.as_ref().unwrap()
} else {
"no ext"
}
);
}
// Check job status
let running_jobs = library.jobs().list_jobs(Some(JobStatus::Running)).await?;
let completed_jobs = library.jobs().list_jobs(Some(JobStatus::Completed)).await?;
println!("\n Job System Status:");
println!(" Running jobs: {}", running_jobs.len());
println!(" Completed jobs: {}", completed_jobs.len());
println!("\n Production Indexer Features Demonstrated:");
println!(" Smart Filtering - Automatically skipped system/cache files");
println!(
" Incremental Ready - {} entries have inode tracking",
entries_with_inodes
);
println!(" Batch Processing - Efficient memory usage");
println!(" Multi-phase - Discovery → Processing → Content");
println!(
" Content Deduplication - {} unique content IDs",
content_identity_count
);
// 6. Show volume integration
println!("\n6. Volume Management:");
println!(" Volume detection: Active");
println!(" Volume tracking: Ready");
println!(" Speed testing: Available");
println!(" Mount monitoring: Active");
// 7. Event system demo
println!("\n7. Event System:");
println!(" Event subscribers: {}", core.events.subscriber_count());
println!(" Events ready for:");
println!(" - File operations (copy, move, delete)");
println!(" - Library changes");
println!(" - Volume events");
println!(" - Indexing progress");
println!(" - Job status updates");
// 8. Production indexer achievements
println!("\n8. Production Indexer Achievements:");
println!(" This demo showcased the new production indexer:");
println!(" Smart filtering skipped system files automatically");
println!(" Inode tracking enabled incremental indexing");
println!(" Multi-phase processing with detailed progress");
println!(" Performance metrics and batch optimization");
println!(" Path prefix deduplication for storage efficiency");
println!(" Content identity generation for deduplication");
println!(" Full resumability with checkpoint support");
println!(" Non-critical error collection and reporting");
// Show example of what would happen on re-index
println!("\n Incremental Indexing Preview:");
println!(" Next run would:");
println!(" • Use inode tracking to detect moved/renamed files");
println!(" • Only process modified files (compare timestamps)");
println!(" • Skip unchanged files entirely");
println!(" • Detect and remove deleted entries");
// Final job status check
let final_running = library.jobs().list_jobs(Some(JobStatus::Running)).await?;
let final_completed = library.jobs().list_jobs(Some(JobStatus::Completed)).await?;
println!("\n Final Job Summary:");
println!(" Still running: {}", final_running.len());
println!(" Completed: {}", final_completed.len());
if !final_running.is_empty() {
println!(" Remaining jobs will continue in background");
println!(" Run the demo again to see persisted results!");
}
// Brief pause to see final status
sleep(Duration::from_secs(2)).await;
// 9. Show job logs created during the demo
println!("\n9. Job Logs Created:");
let job_logs_dir = data_dir.join("job_logs");
if let Ok(mut entries) = tokio::fs::read_dir(&job_logs_dir).await {
let mut log_files = Vec::new();
while let Ok(Some(entry)) = entries.next_entry().await {
if let Some(name) = entry.file_name().to_str() {
if name.ends_with(".log") {
log_files.push(name.to_string());
}
}
}
if !log_files.is_empty() {
println!(" Found {} job log file(s):", log_files.len());
for (i, log_file) in log_files.iter().enumerate() {
let log_path = job_logs_dir.join(log_file);
if let Ok(metadata) = tokio::fs::metadata(&log_path).await {
println!(" {} {} ({} bytes)", i + 1, log_file, metadata.len());
// Show first few lines of the first log
if i == 0 {
if let Ok(contents) = tokio::fs::read_to_string(&log_path).await {
let lines: Vec<&str> = contents.lines().take(5).collect();
println!("\n First {} lines of {}:", lines.len(), log_file);
for line in lines {
println!(" > {}", line);
}
if contents.lines().count() > 5 {
println!(
" ... and {} more lines",
contents.lines().count() - 5
);
}
}
}
}
}
println!("\n Full logs available at: {:?}", job_logs_dir);
} else {
println!(" No job logs found (jobs may have completed too quickly)");
}
}
// 10. Graceful shutdown
println!("\n10. Shutting down gracefully...");
core.shutdown().await?;
println!("\n=== Desktop Indexing Demo Complete! ===");
println!("Spacedrive 2 Production Job System Working!");
println!();
println!("Demo data stored at: {:?}", data_dir);
println!("Job logs stored at: {:?}", job_logs_dir);
println!("Run again to see library auto-loading and job persistence!");
println!();
println!("Production system achievements:");
println!(" Full core lifecycle with real job dispatch");
println!(" Database integration with actual file indexing");
println!(" Production job manager dispatching real jobs");
println!(" Real-time progress monitoring via events");
println!(" Event system with live job status updates");
println!(" File watching integration ready");
println!(" User metadata innovation (every file taggable)");
println!(" Content deduplication with CAS IDs");
println!(" Path optimization for efficient storage");
println!(" Production-ready architecture patterns");
Ok(())
}