mirror of
https://github.com/spacedriveapp/spacedrive.git
synced 2025-12-11 20:15:30 +01:00
- Introduced new modules and actions for generating media proxies and thumbstrips, improving media handling capabilities. - Added configuration, error handling, and processing logic for both proxy and thumbstrip generation. - Updated existing media operations to integrate new functionalities, ensuring seamless media processing and user experience. - Enhanced tests to cover new features and ensure reliability in media operations.
606 lines
20 KiB
Rust
606 lines
20 KiB
Rust
//! Desktop Indexing Demo - Production Indexer Showcase
|
|
//!
|
|
//! This example demonstrates:
|
|
//! 1. Starting up Spacedrive Core
|
|
//! 2. Creating/opening a library
|
|
//! 3. Adding the user's desktop as a location
|
|
//! 4. Running the production indexer with all features:
|
|
//! - Smart filtering (skip system files)
|
|
//! - Incremental indexing with change detection
|
|
//! - Performance metrics and reporting
|
|
//! - Multi-phase processing
|
|
//! 5. Showing detailed results and metrics
|
|
|
|
use sd_core::{
|
|
config::{AppConfig, JobLoggingConfig},
|
|
infra::{db::entities, event::Event, job::types::JobStatus},
|
|
location::{create_location, IndexMode, LocationCreateArgs},
|
|
Core,
|
|
};
|
|
use sea_orm::{
|
|
ActiveModelTrait, ColumnTrait, EntityTrait, PaginatorTrait, QueryFilter, QuerySelect,
|
|
};
|
|
use std::path::PathBuf;
|
|
use tokio::time::{sleep, Duration};
|
|
|
|
#[tokio::main]
|
|
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
// Initialize logging with more detail
|
|
tracing_subscriber::fmt()
|
|
.with_env_filter("sd_core=debug,desktop_indexing_demo=info")
|
|
.init();
|
|
|
|
println!("=== Spacedrive 2 Desktop Indexing Demo ===\n");
|
|
|
|
// 1. Initialize Spacedrive Core with job logging enabled
|
|
println!("1. Initializing Spacedrive Core...");
|
|
let data_dir = PathBuf::from("./data/spacedrive-desktop-demo");
|
|
|
|
// Enable job logging by modifying the config before core initialization
|
|
{
|
|
let mut config = AppConfig::load_from(&data_dir)
|
|
.unwrap_or_else(|_| AppConfig::default_with_dir(data_dir.clone()));
|
|
|
|
// Enable job logging - hardcoded for demo
|
|
config.job_logging = JobLoggingConfig {
|
|
enabled: true,
|
|
log_directory: "job_logs".to_string(),
|
|
max_file_size: 10 * 1024 * 1024, // 10MB
|
|
include_debug: true, // Include debug logs for full detail
|
|
};
|
|
|
|
config.save()?;
|
|
println!(" Job logging enabled (logs stored per-library)");
|
|
}
|
|
|
|
let core = Core::new(data_dir.clone()).await?;
|
|
println!(" Core initialized with job logging");
|
|
println!(" Device ID: {}", core.device.device_id()?);
|
|
println!(" Data directory: {:?}", data_dir);
|
|
|
|
// 2. Get or create library
|
|
println!("2. Setting up library...");
|
|
let library = if core.libraries.list().await.is_empty() {
|
|
println!(" Creating new library...");
|
|
let lib = core
|
|
.libraries
|
|
.create_library("Desktop Demo Library", None, core.context.clone())
|
|
.await?;
|
|
println!(" Created library: {}", lib.name().await);
|
|
lib
|
|
} else {
|
|
let libs = core.libraries.list().await;
|
|
let lib = libs.into_iter().next().unwrap();
|
|
println!(" Using existing library: {}", lib.name().await);
|
|
lib
|
|
};
|
|
println!(" Library ID: {}", library.id());
|
|
println!(" Library path: {}\n", library.path().display());
|
|
|
|
// 3. Set up desktop location
|
|
println!("3. Adding Desktop as a location...");
|
|
let desktop_path = dirs::desktop_dir().ok_or("Could not find desktop directory")?;
|
|
println!(" Desktop path: {}", desktop_path.display());
|
|
|
|
// Register device in the database first
|
|
let db = library.db();
|
|
let device = core.device.to_device()?;
|
|
|
|
// Check if device already exists, if not create it
|
|
let device_record = match entities::device::Entity::find()
|
|
.filter(entities::device::Column::Uuid.eq(device.id))
|
|
.one(db.conn())
|
|
.await?
|
|
{
|
|
Some(existing) => {
|
|
println!(" Device already registered");
|
|
existing
|
|
}
|
|
None => {
|
|
println!(" Registering device...");
|
|
let device_model: entities::device::ActiveModel = device.into();
|
|
let inserted = device_model.insert(db.conn()).await?;
|
|
println!(" Device registered with ID: {}", inserted.id);
|
|
inserted
|
|
}
|
|
};
|
|
|
|
// Use production location management to create location and dispatch indexer job
|
|
println!(" Creating location with production job dispatch...");
|
|
let location_args = LocationCreateArgs {
|
|
path: desktop_path.clone(),
|
|
name: Some("Desktop".to_string()),
|
|
index_mode: IndexMode::Deep, // Deep indexing with content analysis
|
|
};
|
|
|
|
let location_db_id = create_location(
|
|
library.clone(),
|
|
&core.events,
|
|
location_args,
|
|
device_record.id,
|
|
)
|
|
.await?;
|
|
|
|
println!(" Location created with DB ID: {}", location_db_id);
|
|
println!(" Indexer job dispatched through production job manager!");
|
|
|
|
// Add to file watcher (optional - for real-time monitoring)
|
|
// Note: location_id here would need to be retrieved from the database record
|
|
// For simplicity, we'll skip the file watcher for now since the main demo is indexing
|
|
println!(" Production job system is now running indexing...\n");
|
|
|
|
// 4. Monitor production indexer with new features
|
|
println!("4. Production Indexer in Action!");
|
|
println!(" New Features Showcase:");
|
|
println!(" Smart Filtering - Skips system files, caches, node_modules");
|
|
println!(" Incremental Indexing - Detects changes via inode tracking");
|
|
println!(" Performance Metrics - Detailed timing and throughput");
|
|
println!(" Multi-phase Processing - Discovery → Processing → Content");
|
|
println!(" Target: {}", desktop_path.display());
|
|
|
|
// Set up event monitoring to track job progress
|
|
println!(" Setting up real-time job monitoring...");
|
|
let mut event_subscriber = core.events.subscribe();
|
|
|
|
// Spawn event listener to monitor indexing progress
|
|
let events_handle = tokio::spawn(async move {
|
|
while let Ok(event) = event_subscriber.recv().await {
|
|
match event {
|
|
Event::IndexingStarted { location_id } => {
|
|
println!(" Indexing started for location: {}", location_id);
|
|
}
|
|
Event::IndexingCompleted {
|
|
location_id,
|
|
total_files,
|
|
total_dirs,
|
|
} => {
|
|
println!(" Indexing completed for location: {}", location_id);
|
|
println!(" Files indexed: {}", total_files);
|
|
println!(" Directories indexed: {}", total_dirs);
|
|
break; // Exit the event loop when indexing is done
|
|
}
|
|
Event::IndexingFailed { location_id, error } => {
|
|
println!(
|
|
" Indexing failed for location: {} - {}",
|
|
location_id, error
|
|
);
|
|
break;
|
|
}
|
|
Event::FilesIndexed { count, .. } => {
|
|
println!(" Progress: {} files processed", count);
|
|
}
|
|
Event::JobProgress {
|
|
job_id,
|
|
job_type,
|
|
progress,
|
|
message,
|
|
generic_progress: _,
|
|
} => {
|
|
// Show production indexer progress details
|
|
if let Some(msg) = message {
|
|
println!(
|
|
" Job {} [{}]: {} ({}%)",
|
|
job_id,
|
|
job_type,
|
|
msg,
|
|
(progress * 100.0) as u8
|
|
);
|
|
} else {
|
|
println!(
|
|
" Job {} [{}]: {}%",
|
|
job_id,
|
|
job_type,
|
|
(progress * 100.0) as u8
|
|
);
|
|
}
|
|
}
|
|
_ => {} // Ignore other events
|
|
}
|
|
}
|
|
});
|
|
|
|
println!(" Waiting for indexing to complete...");
|
|
println!(" Production Indexer Features Active:");
|
|
println!(" Smart Filtering - Automatically skipping:");
|
|
println!(" • Hidden files (.DS_Store, Thumbs.db)");
|
|
println!(" • Dev directories (node_modules, .git, target)");
|
|
println!(" • Cache folders (__pycache__, .cache)");
|
|
println!(" • Large files (>4GB)");
|
|
println!(" Change Detection - Using inode tracking for:");
|
|
println!(" • Fast incremental updates");
|
|
println!(" • Move/rename detection");
|
|
println!(" • Modified file tracking");
|
|
println!(" Performance Optimization:");
|
|
println!(" • Batch processing (1000 items/batch)");
|
|
println!(" • Path prefix deduplication");
|
|
println!(" • Parallel content processing");
|
|
|
|
// Let's show what files are actually in the desktop
|
|
println!("\n Desktop contents preview:");
|
|
let mut file_count = 0;
|
|
let mut dir_count = 0;
|
|
let mut total_size = 0u64;
|
|
|
|
if let Ok(entries) = tokio::fs::read_dir(&desktop_path).await {
|
|
let mut entries = entries;
|
|
while let Ok(Some(entry)) = entries.next_entry().await {
|
|
if let Ok(metadata) = entry.metadata().await {
|
|
if metadata.is_file() {
|
|
file_count += 1;
|
|
total_size += metadata.len();
|
|
if file_count <= 5 {
|
|
// Show first 5 files
|
|
println!(" {}", entry.file_name().to_string_lossy());
|
|
}
|
|
} else if metadata.is_dir() {
|
|
dir_count += 1;
|
|
if dir_count <= 3 {
|
|
// Show first 3 dirs
|
|
println!(" {}/", entry.file_name().to_string_lossy());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if file_count > 5 {
|
|
println!(" ... and {} more files", file_count - 5);
|
|
}
|
|
if dir_count > 3 {
|
|
println!(" ... and {} more directories", dir_count - 3);
|
|
}
|
|
|
|
println!("\n Discovery Summary:");
|
|
println!(" Files found: {}", file_count);
|
|
println!(" Directories found: {}", dir_count);
|
|
println!(
|
|
" Total size: {:.2} MB",
|
|
total_size as f64 / 1024.0 / 1024.0
|
|
);
|
|
|
|
// Smart job completion monitoring with checkpoint-based timeout
|
|
println!("\n Monitoring job completion with smart timeout...");
|
|
println!(" Will track checkpoint progress and wait for actual completion");
|
|
|
|
let mut last_checkpoint_size = 0u64;
|
|
let mut stall_time = std::time::Instant::now();
|
|
let stall_timeout = Duration::from_secs(120); // Timeout if no progress for 2 minutes
|
|
let poll_interval = Duration::from_secs(5); // Check every 5 seconds
|
|
|
|
// Keep the event listener running but don't block on it
|
|
let mut events_completed = false;
|
|
|
|
loop {
|
|
// Check if the event listener got completion
|
|
if !events_completed && events_handle.is_finished() {
|
|
events_completed = true;
|
|
println!(" Event listener detected job completion!");
|
|
}
|
|
|
|
// Poll job status from the job manager
|
|
let job_status = library.jobs().list_jobs(None).await?;
|
|
let running_jobs = library.jobs().list_jobs(Some(JobStatus::Running)).await?;
|
|
let completed_jobs = library.jobs().list_jobs(Some(JobStatus::Completed)).await?;
|
|
|
|
// Also check how many entries have been created so far
|
|
let current_entry_count = entities::entry::Entity::find()
|
|
.count(db.conn())
|
|
.await
|
|
.unwrap_or(0);
|
|
|
|
println!(
|
|
" Job Status: {} running, {} completed, {} total",
|
|
running_jobs.len(),
|
|
completed_jobs.len(),
|
|
job_status.len()
|
|
);
|
|
println!(" Database entries so far: {}", current_entry_count);
|
|
|
|
// Check checkpoint progress by querying actual checkpoint data
|
|
let checkpoint_estimate = {
|
|
// Try to get the latest checkpoint size from the jobs database
|
|
if let Ok(metadata) =
|
|
tokio::fs::metadata("./data/spacedrive-desktop-demo/jobs.db").await
|
|
{
|
|
metadata.len()
|
|
} else {
|
|
0
|
|
}
|
|
};
|
|
|
|
if checkpoint_estimate > last_checkpoint_size {
|
|
println!(
|
|
" Progress detected: {} bytes checkpoint data",
|
|
checkpoint_estimate
|
|
);
|
|
last_checkpoint_size = checkpoint_estimate;
|
|
stall_time = std::time::Instant::now(); // Reset stall timer
|
|
}
|
|
|
|
// Check completion conditions
|
|
if running_jobs.is_empty() && !completed_jobs.is_empty() {
|
|
println!(" All jobs completed successfully!");
|
|
break;
|
|
} else if running_jobs.is_empty() && events_completed {
|
|
println!(" No running jobs and events indicate completion!");
|
|
break;
|
|
} else if stall_time.elapsed() > stall_timeout {
|
|
println!(
|
|
" Job appears stalled (no progress for {} seconds)",
|
|
stall_timeout.as_secs()
|
|
);
|
|
println!(" Final checkpoint size: {} bytes", checkpoint_estimate);
|
|
break;
|
|
}
|
|
|
|
// Wait before next poll
|
|
tokio::time::sleep(poll_interval).await;
|
|
}
|
|
|
|
// Abort the event listener if it's still running
|
|
if !events_handle.is_finished() {
|
|
events_handle.abort();
|
|
}
|
|
|
|
// 5. Show production indexer results
|
|
println!("\n5. Production Indexer Results:");
|
|
|
|
// Check database for our location
|
|
let location_record = entities::location::Entity::find()
|
|
.filter(entities::location::Column::Id.eq(location_db_id))
|
|
.one(db.conn())
|
|
.await?
|
|
.ok_or("Location not found")?;
|
|
|
|
// Get all entry IDs under the location using closure table
|
|
use entities::entry_closure;
|
|
let location_entry_id = location_record.entry_id;
|
|
|
|
let descendant_ids = entry_closure::Entity::find()
|
|
.filter(entry_closure::Column::AncestorId.eq(location_entry_id))
|
|
.all(db.conn())
|
|
.await?
|
|
.into_iter()
|
|
.map(|ec| ec.descendant_id)
|
|
.collect::<Vec<i32>>();
|
|
|
|
let mut all_entry_ids = vec![location_entry_id];
|
|
all_entry_ids.extend(descendant_ids);
|
|
|
|
// Get entry statistics for this location
|
|
let entry_count = all_entry_ids.len();
|
|
|
|
let file_count_db = entities::entry::Entity::find()
|
|
.filter(entities::entry::Column::Id.is_in(all_entry_ids.clone()))
|
|
.filter(entities::entry::Column::Kind.eq(0)) // Files
|
|
.count(db.conn())
|
|
.await?;
|
|
|
|
let dir_count_db = entities::entry::Entity::find()
|
|
.filter(entities::entry::Column::Id.is_in(all_entry_ids.clone()))
|
|
.filter(entities::entry::Column::Kind.eq(1)) // Directories
|
|
.count(db.conn())
|
|
.await?;
|
|
|
|
// Check for entries with inodes (change detection feature)
|
|
let entries_with_inodes = entities::entry::Entity::find()
|
|
.filter(entities::entry::Column::Id.is_in(all_entry_ids.clone()))
|
|
.filter(entities::entry::Column::Inode.is_not_null())
|
|
.count(db.conn())
|
|
.await?;
|
|
|
|
// Sample some filtered paths to show filtering worked
|
|
let sample_entries = entities::entry::Entity::find()
|
|
.filter(entities::entry::Column::Id.is_in(all_entry_ids.clone()))
|
|
.limit(10)
|
|
.all(db.conn())
|
|
.await?;
|
|
|
|
let content_identity_count = entities::content_identity::Entity::find()
|
|
.count(db.conn())
|
|
.await?;
|
|
|
|
println!(" Indexing Statistics:");
|
|
println!(" Files indexed: {}", file_count_db);
|
|
println!(" Directories indexed: {}", dir_count_db);
|
|
println!(
|
|
" Entries with inode tracking: {} ({:.1}%)",
|
|
entries_with_inodes,
|
|
(entries_with_inodes as f64 / entry_count.max(1) as f64) * 100.0
|
|
);
|
|
println!(
|
|
" Content identities created: {}",
|
|
content_identity_count
|
|
);
|
|
|
|
println!("\n Smart Filtering Validation:");
|
|
println!(" Checking indexed files don't include filtered patterns...");
|
|
|
|
let mut filtered_correctly = true;
|
|
for entry in &sample_entries {
|
|
// Check if any system files got through
|
|
if entry.name == ".DS_Store" || entry.name == "Thumbs.db" {
|
|
println!(
|
|
" Found system file that should be filtered: {}",
|
|
entry.name
|
|
);
|
|
filtered_correctly = false;
|
|
}
|
|
if entry.name == "node_modules" || entry.name == ".git" || entry.name == "__pycache__" {
|
|
println!(
|
|
" Found dev directory that should be filtered: {}",
|
|
entry.name
|
|
);
|
|
filtered_correctly = false;
|
|
}
|
|
}
|
|
|
|
if filtered_correctly {
|
|
println!(" All sampled entries passed filtering validation!");
|
|
}
|
|
|
|
println!("\n Sample Indexed Entries:");
|
|
for (i, entry) in sample_entries.iter().take(5).enumerate() {
|
|
let kind = match entry.kind {
|
|
0 => "",
|
|
1 => "",
|
|
2 => "",
|
|
_ => "",
|
|
};
|
|
println!(
|
|
" {} {} {} ({})",
|
|
i + 1,
|
|
kind,
|
|
entry.name,
|
|
if entry.extension.is_some() {
|
|
entry.extension.as_ref().unwrap()
|
|
} else {
|
|
"no ext"
|
|
}
|
|
);
|
|
}
|
|
|
|
// Check job status
|
|
let running_jobs = library.jobs().list_jobs(Some(JobStatus::Running)).await?;
|
|
let completed_jobs = library.jobs().list_jobs(Some(JobStatus::Completed)).await?;
|
|
|
|
println!("\n Job System Status:");
|
|
println!(" Running jobs: {}", running_jobs.len());
|
|
println!(" Completed jobs: {}", completed_jobs.len());
|
|
|
|
println!("\n Production Indexer Features Demonstrated:");
|
|
println!(" Smart Filtering - Automatically skipped system/cache files");
|
|
println!(
|
|
" Incremental Ready - {} entries have inode tracking",
|
|
entries_with_inodes
|
|
);
|
|
println!(" Batch Processing - Efficient memory usage");
|
|
println!(" Multi-phase - Discovery → Processing → Content");
|
|
println!(
|
|
" Content Deduplication - {} unique content IDs",
|
|
content_identity_count
|
|
);
|
|
|
|
// 6. Show volume integration
|
|
println!("\n6. Volume Management:");
|
|
println!(" Volume detection: Active");
|
|
println!(" Volume tracking: Ready");
|
|
println!(" Speed testing: Available");
|
|
println!(" Mount monitoring: Active");
|
|
|
|
// 7. Event system demo
|
|
println!("\n7. Event System:");
|
|
println!(" Event subscribers: {}", core.events.subscriber_count());
|
|
println!(" Events ready for:");
|
|
println!(" - File operations (copy, move, delete)");
|
|
println!(" - Library changes");
|
|
println!(" - Volume events");
|
|
println!(" - Indexing progress");
|
|
println!(" - Job status updates");
|
|
|
|
// 8. Production indexer achievements
|
|
println!("\n8. Production Indexer Achievements:");
|
|
println!(" This demo showcased the new production indexer:");
|
|
println!(" Smart filtering skipped system files automatically");
|
|
println!(" Inode tracking enabled incremental indexing");
|
|
println!(" Multi-phase processing with detailed progress");
|
|
println!(" Performance metrics and batch optimization");
|
|
println!(" Path prefix deduplication for storage efficiency");
|
|
println!(" Content identity generation for deduplication");
|
|
println!(" Full resumability with checkpoint support");
|
|
println!(" Non-critical error collection and reporting");
|
|
|
|
// Show example of what would happen on re-index
|
|
println!("\n Incremental Indexing Preview:");
|
|
println!(" Next run would:");
|
|
println!(" • Use inode tracking to detect moved/renamed files");
|
|
println!(" • Only process modified files (compare timestamps)");
|
|
println!(" • Skip unchanged files entirely");
|
|
println!(" • Detect and remove deleted entries");
|
|
|
|
// Final job status check
|
|
let final_running = library.jobs().list_jobs(Some(JobStatus::Running)).await?;
|
|
let final_completed = library.jobs().list_jobs(Some(JobStatus::Completed)).await?;
|
|
|
|
println!("\n Final Job Summary:");
|
|
println!(" Still running: {}", final_running.len());
|
|
println!(" Completed: {}", final_completed.len());
|
|
|
|
if !final_running.is_empty() {
|
|
println!(" Remaining jobs will continue in background");
|
|
println!(" Run the demo again to see persisted results!");
|
|
}
|
|
|
|
// Brief pause to see final status
|
|
sleep(Duration::from_secs(2)).await;
|
|
|
|
// 9. Show job logs created during the demo
|
|
println!("\n9. Job Logs Created:");
|
|
let job_logs_dir = data_dir.join("job_logs");
|
|
if let Ok(mut entries) = tokio::fs::read_dir(&job_logs_dir).await {
|
|
let mut log_files = Vec::new();
|
|
while let Ok(Some(entry)) = entries.next_entry().await {
|
|
if let Some(name) = entry.file_name().to_str() {
|
|
if name.ends_with(".log") {
|
|
log_files.push(name.to_string());
|
|
}
|
|
}
|
|
}
|
|
|
|
if !log_files.is_empty() {
|
|
println!(" Found {} job log file(s):", log_files.len());
|
|
for (i, log_file) in log_files.iter().enumerate() {
|
|
let log_path = job_logs_dir.join(log_file);
|
|
if let Ok(metadata) = tokio::fs::metadata(&log_path).await {
|
|
println!(" {} {} ({} bytes)", i + 1, log_file, metadata.len());
|
|
|
|
// Show first few lines of the first log
|
|
if i == 0 {
|
|
if let Ok(contents) = tokio::fs::read_to_string(&log_path).await {
|
|
let lines: Vec<&str> = contents.lines().take(5).collect();
|
|
println!("\n First {} lines of {}:", lines.len(), log_file);
|
|
for line in lines {
|
|
println!(" > {}", line);
|
|
}
|
|
if contents.lines().count() > 5 {
|
|
println!(
|
|
" ... and {} more lines",
|
|
contents.lines().count() - 5
|
|
);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
println!("\n Full logs available at: {:?}", job_logs_dir);
|
|
} else {
|
|
println!(" No job logs found (jobs may have completed too quickly)");
|
|
}
|
|
}
|
|
|
|
// 10. Graceful shutdown
|
|
println!("\n10. Shutting down gracefully...");
|
|
core.shutdown().await?;
|
|
|
|
println!("\n=== Desktop Indexing Demo Complete! ===");
|
|
println!("Spacedrive 2 Production Job System Working!");
|
|
println!();
|
|
println!("Demo data stored at: {:?}", data_dir);
|
|
println!("Job logs stored at: {:?}", job_logs_dir);
|
|
println!("Run again to see library auto-loading and job persistence!");
|
|
println!();
|
|
println!("Production system achievements:");
|
|
println!(" Full core lifecycle with real job dispatch");
|
|
println!(" Database integration with actual file indexing");
|
|
println!(" Production job manager dispatching real jobs");
|
|
println!(" Real-time progress monitoring via events");
|
|
println!(" Event system with live job status updates");
|
|
println!(" File watching integration ready");
|
|
println!(" User metadata innovation (every file taggable)");
|
|
println!(" Content deduplication with CAS IDs");
|
|
println!(" Path optimization for efficient storage");
|
|
println!(" Production-ready architecture patterns");
|
|
|
|
Ok(())
|
|
}
|