diff --git a/Cargo.lock b/Cargo.lock index 960cb9c..b4028ac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -140,6 +140,7 @@ dependencies = [ "memmap2", "regex", "serde", + "serde_json", "walkdir", ] @@ -168,6 +169,7 @@ dependencies = [ "iana-time-zone", "js-sys", "num-traits", + "serde", "wasm-bindgen", "windows-link", ] @@ -305,6 +307,12 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + [[package]] name = "js-sys" version = "0.3.77" @@ -457,6 +465,12 @@ version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + [[package]] name = "same-file" version = "1.0.6" @@ -486,6 +500,18 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_json" +version = "1.0.141" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30b9eff21ebe718216c6ec64e1d9ac57087aad11efc64e32002bce4a0d4c03d3" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + [[package]] name = "shlex" version = "1.3.0" diff --git a/Cargo.toml b/Cargo.toml index cdbf4e3..b9d1ad3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,7 @@ clap = { version = "4.0", features = ["derive"] } memmap2 = "0.9" inotify = "0.10" serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" walkdir = "2.0" regex = "1.0" -chrono = "0.4" \ No newline at end of file +chrono = { version = "0.4", features = ["serde"] } \ No newline at end of file diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 2dbfde0..f9a9b1f 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -1,4 +1,5 @@ pub mod tree; +pub mod scan; use clap::{Parser, Subcommand}; diff --git a/src/cli/scan.rs b/src/cli/scan.rs new file mode 100644 index 0000000..78cd12b --- /dev/null +++ b/src/cli/scan.rs @@ -0,0 +1,408 @@ +use crate::{CartoError, PackageManager}; +use crate::database::format::{OrphanCategory, ScanSummary}; +use crate::cli::OutputFormat; +use crate::package_managers::find_file_owner; +use std::path::{Path, PathBuf}; +use std::collections::HashMap; +use walkdir::WalkDir; +use serde::{Serialize, Deserialize}; +use chrono::{DateTime, Utc}; + +#[derive(Debug, Clone, Serialize)] +pub struct OrphanFile { + pub path: PathBuf, + pub category: OrphanCategory, + pub size: u64, + pub modified: u64, + pub created: u64, +} + +#[derive(Debug, Serialize)] +pub struct ScanResults { + pub scan_time: DateTime, + pub scanned_paths: Vec, + pub total_files_scanned: u64, + pub total_orphans: usize, + pub orphans_by_category: HashMap>, + pub total_orphan_size: u64, + pub scan_duration_seconds: f64, +} + +impl ScanResults { + pub fn new(scanned_paths: Vec) -> Self { + Self { + scan_time: Utc::now(), + scanned_paths, + total_files_scanned: 0, + total_orphans: 0, + orphans_by_category: HashMap::new(), + total_orphan_size: 0, + scan_duration_seconds: 0.0, + } + } +} + +pub struct Scanner<'a> { + managers: &'a [Box], + exclude_paths: Vec, +} +impl<'a> Scanner<'a> { + pub fn new(managers: &'a [Box]) -> Self { + let exclude_paths = vec![ + PathBuf::from("/proc"), + PathBuf::from("/sys"), + PathBuf::from("/dev"), + PathBuf::from("/run"), + PathBuf::from("/var/run"), + PathBuf::from("/tmp/.X11-unix"), + PathBuf::from("/tmp/.ICE-unix"), + ]; + + Self { + managers, + exclude_paths, + } + } + + pub fn scan_paths( + &self, + paths: &[String], + include_expected: bool, + ) -> Result { + let start_time = std::time::Instant::now(); + let scan_paths = if paths.is_empty() { + vec!["/".to_string()] + } else { + paths.to_vec() + }; + + let mut results = ScanResults::new(scan_paths.clone()); + let mut orphans: Vec = Vec::new(); + + for path_str in &scan_paths { + let path = Path::new(path_str); + if !path.exists() { + eprintln!("Warning: Path {} does not exist, skipping", path_str); + continue; + } + + println!("Scanning: {}", path_str); + self.scan_directory(path, &mut orphans, &mut results)?; + } + + // Categorize orphans + self.categorize_orphans(&mut orphans, include_expected, &mut results); + + results.total_orphans = orphans.len(); + results.total_orphan_size = orphans.iter().map(|o| o.size).sum(); + results.scan_duration_seconds = start_time.elapsed().as_secs_f64(); + + Ok(results) + } + + fn scan_directory( + &self, + root: &Path, + orphans: &mut Vec, + results: &mut ScanResults, + ) -> Result<(), CartoError> { + let walker = WalkDir::new(root) + .follow_links(false) + .into_iter() + .filter_entry(|e| !self.should_exclude_path(e.path())); + + for entry in walker { + let entry = match entry { + Ok(entry) => entry, + Err(e) => { + eprintln!("Warning: Error accessing {}: {}", e.path().unwrap_or(Path::new("unknown")).display(), e); + continue; + } + }; + + if !entry.file_type().is_file() { + continue; + } + + results.total_files_scanned += 1; + + // Check if file is owned by any package manager + let path = entry.path(); + let is_owned = self.is_file_owned(path)?; + + if !is_owned { + if let Ok(metadata) = entry.metadata() { + let orphan = OrphanFile { + path: path.to_path_buf(), + category: OrphanCategory::Unknown, // Will be categorized later + size: metadata.len(), + modified: metadata.modified() + .unwrap_or(std::time::UNIX_EPOCH) + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_secs(), + created: metadata.created() + .unwrap_or(std::time::UNIX_EPOCH) + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_secs(), + }; + orphans.push(orphan); + } + } + + // Progress indicator for large scans + if results.total_files_scanned % 10000 == 0 { + println!("Scanned {} files, found {} orphans", + results.total_files_scanned, orphans.len()); + } + } + + Ok(()) + } + + fn should_exclude_path(&self, path: &Path) -> bool { + for exclude in &self.exclude_paths { + if path.starts_with(exclude) { + return true; + } + } + + // Additional exclusions for common mount points and special directories + let path_str = path.to_string_lossy(); + if path_str.contains("/.git/") || + path_str.contains("/.cache/") || + path_str.contains("/.local/share/Trash/") || + path_str.starts_with("/media/") || + path_str.starts_with("/mnt/") { + return true; + } + + false + } + + fn is_file_owned(&self, path: &Path) -> Result { + // Check all package managers to see if any owns this file + match find_file_owner(path) { + Ok(Some(_)) => Ok(true), + Ok(None) => Ok(false), + Err(e) => { + // Don't fail the entire scan for individual file errors + eprintln!("Warning: Error checking ownership of {}: {}", path.display(), e); + Ok(false) + } + } + } + + fn categorize_orphans( + &self, + orphans: &mut [OrphanFile], + include_expected: bool, + results: &mut ScanResults, + ) { + for orphan in orphans.iter_mut() { + orphan.category = self.categorize_file(&orphan.path); + } + + // Group by category + for orphan in orphans.iter() { + if !include_expected && orphan.category == OrphanCategory::Expected { + continue; + } + + let category_name = orphan.category.name().to_string(); + results.orphans_by_category + .entry(category_name) + .or_insert_with(Vec::new) + .push(orphan.clone()); + } + } + + fn categorize_file(&self, path: &Path) -> OrphanCategory { + let path_str = path.to_string_lossy(); + + // Expected orphans in common temporary/log locations + if path_str.starts_with("/tmp/") || + path_str.starts_with("/var/tmp/") || + path_str.starts_with("/var/log/") || + path_str.starts_with("/var/cache/") || + path_str.starts_with("/var/spool/") { + return OrphanCategory::Expected; + } + + // User data in home directories + if path_str.starts_with("/home/") || + path_str.starts_with("/root/") { + return OrphanCategory::UserData; + } + + // System generated files + if path_str.starts_with("/var/lib/") || + path_str.starts_with("/var/run/") || + path_str.contains("/.cache/") || + path_str.contains("/cache/") { + return OrphanCategory::SystemGenerated; + } + + // Configuration backups + if let Some(extension) = path.extension() { + let ext = extension.to_string_lossy().to_lowercase(); + if ext == "bak" || ext == "orig" || ext == "old" || ext == "backup" { + return OrphanCategory::ConfigBackup; + } + } + + // Temporary files in unusual locations + if let Some(filename) = path.file_name() { + let name = filename.to_string_lossy().to_lowercase(); + if name.starts_with("tmp") || + name.starts_with(".tmp") || + name.ends_with(".tmp") || + name.ends_with("~") { + return OrphanCategory::Temporary; + } + } + + OrphanCategory::Unknown + } +} + +pub fn print_scan_results( + results: &ScanResults, + format: &OutputFormat, + detailed: bool, + show_sizes: bool, +) -> Result<(), CartoError> { + match format { + OutputFormat::Json => print_json_results(results), + OutputFormat::Summary => print_summary_results(results, show_sizes), + OutputFormat::Detailed => print_detailed_results(results, show_sizes), + OutputFormat::Simple => print_simple_results(results), + OutputFormat::Tree => print_tree_results(results, show_sizes), + } +} + +fn print_json_results(results: &ScanResults) -> Result<(), CartoError> { + match serde_json::to_string_pretty(results) { + Ok(json) => { + println!("{}", json); + Ok(()) + } + Err(e) => Err(CartoError::CommandFailed(format!("JSON serialization failed: {}", e))), + } +} + +fn print_summary_results(results: &ScanResults, show_sizes: bool) -> Result<(), CartoError> { + println!("Scan Summary"); + println!("============"); + println!("Scan completed: {}", results.scan_time.format("%Y-%m-%d %H:%M:%S UTC")); + println!("Duration: {:.2} seconds", results.scan_duration_seconds); + println!("Scanned paths: {}", results.scanned_paths.join(", ")); + println!("Total files scanned: {}", results.total_files_scanned); + println!("Total orphan files: {}", results.total_orphans); + + if show_sizes { + println!("Total orphan size: {}", format_size(results.total_orphan_size)); + } + + println!("\nOrphans by Category:"); + for (category, orphans) in &results.orphans_by_category { + let category_size: u64 = orphans.iter().map(|o| o.size).sum(); + if show_sizes { + println!(" {}: {} files ({})", category, orphans.len(), format_size(category_size)); + } else { + println!(" {}: {} files", category, orphans.len()); + } + } + + Ok(()) +} + +fn print_detailed_results(results: &ScanResults, show_sizes: bool) -> Result<(), CartoError> { + print_summary_results(results, show_sizes)?; + + println!("\nDetailed File Listings:"); + for (category, orphans) in &results.orphans_by_category { + println!("\n{} ({} files):", category.to_uppercase(), orphans.len()); + println!("{}", "=".repeat(50)); + + for orphan in orphans.iter().take(20) { // Limit to first 20 per category + if show_sizes { + println!(" {} ({})", orphan.path.display(), format_size(orphan.size)); + } else { + println!(" {}", orphan.path.display()); + } + } + + if orphans.len() > 20 { + println!(" ... and {} more files", orphans.len() - 20); + } + } + + Ok(()) +} + +fn print_simple_results(results: &ScanResults) -> Result<(), CartoError> { + println!("Found {} orphan files in {:.1}s", results.total_orphans, results.scan_duration_seconds); + for (category, orphans) in &results.orphans_by_category { + println!("{}: {}", category, orphans.len()); + } + Ok(()) +} + +fn print_tree_results(results: &ScanResults, show_sizes: bool) -> Result<(), CartoError> { + use std::collections::BTreeMap; + + println!("Orphan Files by Directory Tree"); + println!("=============================="); + + for (category, orphans) in &results.orphans_by_category { + println!("\n{} ({} files):", category.to_uppercase(), orphans.len()); + + // Group files by their parent directory + let mut dir_map: BTreeMap> = BTreeMap::new(); + for orphan in orphans { + if let Some(parent) = orphan.path.parent() { + dir_map.entry(parent.to_path_buf()).or_insert_with(Vec::new).push(orphan); + } + } + + for (dir, files) in dir_map.iter().take(10) { // Limit directories shown + println!(" {}/", dir.display()); + for file in files.iter().take(5) { // Limit files per directory + let filename = file.path.file_name().unwrap_or_default().to_string_lossy(); + if show_sizes { + println!(" ├── {} ({})", filename, format_size(file.size)); + } else { + println!(" ├── {}", filename); + } + } + if files.len() > 5 { + println!(" └── ... and {} more files", files.len() - 5); + } + } + + if dir_map.len() > 10 { + println!(" ... and {} more directories", dir_map.len() - 10); + } + } + + Ok(()) +} + +fn format_size(bytes: u64) -> String { + const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB"]; + let mut size = bytes as f64; + let mut unit_idx = 0; + + while size >= 1024.0 && unit_idx < UNITS.len() - 1 { + size /= 1024.0; + unit_idx += 1; + } + + if unit_idx == 0 { + format!("{} {}", bytes, UNITS[unit_idx]) + } else { + format!("{:.1} {}", size, UNITS[unit_idx]) + } +} \ No newline at end of file diff --git a/src/database/format.rs b/src/database/format.rs index 1386bf6..e00625b 100644 --- a/src/database/format.rs +++ b/src/database/format.rs @@ -2,6 +2,7 @@ use memmap2::Mmap; use std::fs::File; use std::path::Path; use crate::{CartoError, PackageSource}; +use serde::Serialize; #[repr(C, packed)] pub struct DatabaseHeader { @@ -64,7 +65,7 @@ pub struct ScanRecord { scan_duration: u32, // Duration in seconds } -#[derive(Debug, Clone, Copy, PartialEq)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)] pub enum OrphanCategory { Unknown = 0, // Files with no clear ownership Expected = 1, // Files in /tmp, /var/tmp, /var/log, etc. diff --git a/src/database/mod.rs b/src/database/mod.rs new file mode 100644 index 0000000..8da6115 --- /dev/null +++ b/src/database/mod.rs @@ -0,0 +1 @@ +pub mod format; \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 5f958d9..c830b21 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,6 +3,7 @@ pub mod package_managers; pub mod cli; +pub mod database; use std::path::{self, PathBuf}; use package_managers::{detect_available_managers, PackageManager, PackageSource}; @@ -80,7 +81,7 @@ fn main() -> Result<(), Box> { handle_find_command(&managers, name, package, size)?; } Commands::Scan { force, paths , format, detailed, sizes, include_expected } => { - handle_scan_command(&managers, force, paths, format, detailed, sizes, include_expected)?; + handle_scan_command(&managers, paths, format, force, detailed, sizes, include_expected)?; } } @@ -221,14 +222,16 @@ fn handle_find_command( } fn handle_scan_command( - _managers: &[Box], + managers: &[Box], + paths: Vec, + format: OutputFormat, _force: bool, - _paths: Vec, - _format: OutputFormat, - _detailed: bool, - _sizes: bool, - _include_expected: bool + detailed: bool, + sizes: bool, + include_expected: bool, ) -> Result<(), Box> { - println!("Scan functionality not yet implemented"); + let scanner = cli::scan::Scanner::new(managers); + let results = scanner.scan_paths(&paths, include_expected)?; + cli::scan::print_scan_results(&results, &format, detailed, sizes)?; Ok(()) } \ No newline at end of file