Updated to have scan command now

This commit is contained in:
Russell 2025-07-28 18:30:53 +02:00
parent 6b4d342f3f
commit 8108221bd0
7 changed files with 451 additions and 10 deletions

26
Cargo.lock generated
View File

@ -140,6 +140,7 @@ dependencies = [
"memmap2",
"regex",
"serde",
"serde_json",
"walkdir",
]
@ -168,6 +169,7 @@ dependencies = [
"iana-time-zone",
"js-sys",
"num-traits",
"serde",
"wasm-bindgen",
"windows-link",
]
@ -305,6 +307,12 @@ version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]]
name = "itoa"
version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
[[package]]
name = "js-sys"
version = "0.3.77"
@ -457,6 +465,12 @@ version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d"
[[package]]
name = "ryu"
version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
[[package]]
name = "same-file"
version = "1.0.6"
@ -486,6 +500,18 @@ dependencies = [
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.141"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30b9eff21ebe718216c6ec64e1d9ac57087aad11efc64e32002bce4a0d4c03d3"
dependencies = [
"itoa",
"memchr",
"ryu",
"serde",
]
[[package]]
name = "shlex"
version = "1.3.0"

View File

@ -8,6 +8,7 @@ clap = { version = "4.0", features = ["derive"] }
memmap2 = "0.9"
inotify = "0.10"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
walkdir = "2.0"
regex = "1.0"
chrono = "0.4"
chrono = { version = "0.4", features = ["serde"] }

View File

@ -1,4 +1,5 @@
pub mod tree;
pub mod scan;
use clap::{Parser, Subcommand};

408
src/cli/scan.rs Normal file
View File

@ -0,0 +1,408 @@
use crate::{CartoError, PackageManager};
use crate::database::format::{OrphanCategory, ScanSummary};
use crate::cli::OutputFormat;
use crate::package_managers::find_file_owner;
use std::path::{Path, PathBuf};
use std::collections::HashMap;
use walkdir::WalkDir;
use serde::{Serialize, Deserialize};
use chrono::{DateTime, Utc};
#[derive(Debug, Clone, Serialize)]
pub struct OrphanFile {
pub path: PathBuf,
pub category: OrphanCategory,
pub size: u64,
pub modified: u64,
pub created: u64,
}
#[derive(Debug, Serialize)]
pub struct ScanResults {
pub scan_time: DateTime<Utc>,
pub scanned_paths: Vec<String>,
pub total_files_scanned: u64,
pub total_orphans: usize,
pub orphans_by_category: HashMap<String, Vec<OrphanFile>>,
pub total_orphan_size: u64,
pub scan_duration_seconds: f64,
}
impl ScanResults {
pub fn new(scanned_paths: Vec<String>) -> Self {
Self {
scan_time: Utc::now(),
scanned_paths,
total_files_scanned: 0,
total_orphans: 0,
orphans_by_category: HashMap::new(),
total_orphan_size: 0,
scan_duration_seconds: 0.0,
}
}
}
pub struct Scanner<'a> {
managers: &'a [Box<dyn PackageManager>],
exclude_paths: Vec<PathBuf>,
}
impl<'a> Scanner<'a> {
pub fn new(managers: &'a [Box<dyn PackageManager>]) -> Self {
let exclude_paths = vec![
PathBuf::from("/proc"),
PathBuf::from("/sys"),
PathBuf::from("/dev"),
PathBuf::from("/run"),
PathBuf::from("/var/run"),
PathBuf::from("/tmp/.X11-unix"),
PathBuf::from("/tmp/.ICE-unix"),
];
Self {
managers,
exclude_paths,
}
}
pub fn scan_paths(
&self,
paths: &[String],
include_expected: bool,
) -> Result<ScanResults, CartoError> {
let start_time = std::time::Instant::now();
let scan_paths = if paths.is_empty() {
vec!["/".to_string()]
} else {
paths.to_vec()
};
let mut results = ScanResults::new(scan_paths.clone());
let mut orphans: Vec<OrphanFile> = Vec::new();
for path_str in &scan_paths {
let path = Path::new(path_str);
if !path.exists() {
eprintln!("Warning: Path {} does not exist, skipping", path_str);
continue;
}
println!("Scanning: {}", path_str);
self.scan_directory(path, &mut orphans, &mut results)?;
}
// Categorize orphans
self.categorize_orphans(&mut orphans, include_expected, &mut results);
results.total_orphans = orphans.len();
results.total_orphan_size = orphans.iter().map(|o| o.size).sum();
results.scan_duration_seconds = start_time.elapsed().as_secs_f64();
Ok(results)
}
fn scan_directory(
&self,
root: &Path,
orphans: &mut Vec<OrphanFile>,
results: &mut ScanResults,
) -> Result<(), CartoError> {
let walker = WalkDir::new(root)
.follow_links(false)
.into_iter()
.filter_entry(|e| !self.should_exclude_path(e.path()));
for entry in walker {
let entry = match entry {
Ok(entry) => entry,
Err(e) => {
eprintln!("Warning: Error accessing {}: {}", e.path().unwrap_or(Path::new("unknown")).display(), e);
continue;
}
};
if !entry.file_type().is_file() {
continue;
}
results.total_files_scanned += 1;
// Check if file is owned by any package manager
let path = entry.path();
let is_owned = self.is_file_owned(path)?;
if !is_owned {
if let Ok(metadata) = entry.metadata() {
let orphan = OrphanFile {
path: path.to_path_buf(),
category: OrphanCategory::Unknown, // Will be categorized later
size: metadata.len(),
modified: metadata.modified()
.unwrap_or(std::time::UNIX_EPOCH)
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs(),
created: metadata.created()
.unwrap_or(std::time::UNIX_EPOCH)
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs(),
};
orphans.push(orphan);
}
}
// Progress indicator for large scans
if results.total_files_scanned % 10000 == 0 {
println!("Scanned {} files, found {} orphans",
results.total_files_scanned, orphans.len());
}
}
Ok(())
}
fn should_exclude_path(&self, path: &Path) -> bool {
for exclude in &self.exclude_paths {
if path.starts_with(exclude) {
return true;
}
}
// Additional exclusions for common mount points and special directories
let path_str = path.to_string_lossy();
if path_str.contains("/.git/") ||
path_str.contains("/.cache/") ||
path_str.contains("/.local/share/Trash/") ||
path_str.starts_with("/media/") ||
path_str.starts_with("/mnt/") {
return true;
}
false
}
fn is_file_owned(&self, path: &Path) -> Result<bool, CartoError> {
// Check all package managers to see if any owns this file
match find_file_owner(path) {
Ok(Some(_)) => Ok(true),
Ok(None) => Ok(false),
Err(e) => {
// Don't fail the entire scan for individual file errors
eprintln!("Warning: Error checking ownership of {}: {}", path.display(), e);
Ok(false)
}
}
}
fn categorize_orphans(
&self,
orphans: &mut [OrphanFile],
include_expected: bool,
results: &mut ScanResults,
) {
for orphan in orphans.iter_mut() {
orphan.category = self.categorize_file(&orphan.path);
}
// Group by category
for orphan in orphans.iter() {
if !include_expected && orphan.category == OrphanCategory::Expected {
continue;
}
let category_name = orphan.category.name().to_string();
results.orphans_by_category
.entry(category_name)
.or_insert_with(Vec::new)
.push(orphan.clone());
}
}
fn categorize_file(&self, path: &Path) -> OrphanCategory {
let path_str = path.to_string_lossy();
// Expected orphans in common temporary/log locations
if path_str.starts_with("/tmp/") ||
path_str.starts_with("/var/tmp/") ||
path_str.starts_with("/var/log/") ||
path_str.starts_with("/var/cache/") ||
path_str.starts_with("/var/spool/") {
return OrphanCategory::Expected;
}
// User data in home directories
if path_str.starts_with("/home/") ||
path_str.starts_with("/root/") {
return OrphanCategory::UserData;
}
// System generated files
if path_str.starts_with("/var/lib/") ||
path_str.starts_with("/var/run/") ||
path_str.contains("/.cache/") ||
path_str.contains("/cache/") {
return OrphanCategory::SystemGenerated;
}
// Configuration backups
if let Some(extension) = path.extension() {
let ext = extension.to_string_lossy().to_lowercase();
if ext == "bak" || ext == "orig" || ext == "old" || ext == "backup" {
return OrphanCategory::ConfigBackup;
}
}
// Temporary files in unusual locations
if let Some(filename) = path.file_name() {
let name = filename.to_string_lossy().to_lowercase();
if name.starts_with("tmp") ||
name.starts_with(".tmp") ||
name.ends_with(".tmp") ||
name.ends_with("~") {
return OrphanCategory::Temporary;
}
}
OrphanCategory::Unknown
}
}
pub fn print_scan_results(
results: &ScanResults,
format: &OutputFormat,
detailed: bool,
show_sizes: bool,
) -> Result<(), CartoError> {
match format {
OutputFormat::Json => print_json_results(results),
OutputFormat::Summary => print_summary_results(results, show_sizes),
OutputFormat::Detailed => print_detailed_results(results, show_sizes),
OutputFormat::Simple => print_simple_results(results),
OutputFormat::Tree => print_tree_results(results, show_sizes),
}
}
fn print_json_results(results: &ScanResults) -> Result<(), CartoError> {
match serde_json::to_string_pretty(results) {
Ok(json) => {
println!("{}", json);
Ok(())
}
Err(e) => Err(CartoError::CommandFailed(format!("JSON serialization failed: {}", e))),
}
}
fn print_summary_results(results: &ScanResults, show_sizes: bool) -> Result<(), CartoError> {
println!("Scan Summary");
println!("============");
println!("Scan completed: {}", results.scan_time.format("%Y-%m-%d %H:%M:%S UTC"));
println!("Duration: {:.2} seconds", results.scan_duration_seconds);
println!("Scanned paths: {}", results.scanned_paths.join(", "));
println!("Total files scanned: {}", results.total_files_scanned);
println!("Total orphan files: {}", results.total_orphans);
if show_sizes {
println!("Total orphan size: {}", format_size(results.total_orphan_size));
}
println!("\nOrphans by Category:");
for (category, orphans) in &results.orphans_by_category {
let category_size: u64 = orphans.iter().map(|o| o.size).sum();
if show_sizes {
println!(" {}: {} files ({})", category, orphans.len(), format_size(category_size));
} else {
println!(" {}: {} files", category, orphans.len());
}
}
Ok(())
}
fn print_detailed_results(results: &ScanResults, show_sizes: bool) -> Result<(), CartoError> {
print_summary_results(results, show_sizes)?;
println!("\nDetailed File Listings:");
for (category, orphans) in &results.orphans_by_category {
println!("\n{} ({} files):", category.to_uppercase(), orphans.len());
println!("{}", "=".repeat(50));
for orphan in orphans.iter().take(20) { // Limit to first 20 per category
if show_sizes {
println!(" {} ({})", orphan.path.display(), format_size(orphan.size));
} else {
println!(" {}", orphan.path.display());
}
}
if orphans.len() > 20 {
println!(" ... and {} more files", orphans.len() - 20);
}
}
Ok(())
}
fn print_simple_results(results: &ScanResults) -> Result<(), CartoError> {
println!("Found {} orphan files in {:.1}s", results.total_orphans, results.scan_duration_seconds);
for (category, orphans) in &results.orphans_by_category {
println!("{}: {}", category, orphans.len());
}
Ok(())
}
fn print_tree_results(results: &ScanResults, show_sizes: bool) -> Result<(), CartoError> {
use std::collections::BTreeMap;
println!("Orphan Files by Directory Tree");
println!("==============================");
for (category, orphans) in &results.orphans_by_category {
println!("\n{} ({} files):", category.to_uppercase(), orphans.len());
// Group files by their parent directory
let mut dir_map: BTreeMap<PathBuf, Vec<&OrphanFile>> = BTreeMap::new();
for orphan in orphans {
if let Some(parent) = orphan.path.parent() {
dir_map.entry(parent.to_path_buf()).or_insert_with(Vec::new).push(orphan);
}
}
for (dir, files) in dir_map.iter().take(10) { // Limit directories shown
println!(" {}/", dir.display());
for file in files.iter().take(5) { // Limit files per directory
let filename = file.path.file_name().unwrap_or_default().to_string_lossy();
if show_sizes {
println!(" ├── {} ({})", filename, format_size(file.size));
} else {
println!(" ├── {}", filename);
}
}
if files.len() > 5 {
println!(" └── ... and {} more files", files.len() - 5);
}
}
if dir_map.len() > 10 {
println!(" ... and {} more directories", dir_map.len() - 10);
}
}
Ok(())
}
fn format_size(bytes: u64) -> String {
const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB"];
let mut size = bytes as f64;
let mut unit_idx = 0;
while size >= 1024.0 && unit_idx < UNITS.len() - 1 {
size /= 1024.0;
unit_idx += 1;
}
if unit_idx == 0 {
format!("{} {}", bytes, UNITS[unit_idx])
} else {
format!("{:.1} {}", size, UNITS[unit_idx])
}
}

View File

@ -2,6 +2,7 @@ use memmap2::Mmap;
use std::fs::File;
use std::path::Path;
use crate::{CartoError, PackageSource};
use serde::Serialize;
#[repr(C, packed)]
pub struct DatabaseHeader {
@ -64,7 +65,7 @@ pub struct ScanRecord {
scan_duration: u32, // Duration in seconds
}
#[derive(Debug, Clone, Copy, PartialEq)]
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)]
pub enum OrphanCategory {
Unknown = 0, // Files with no clear ownership
Expected = 1, // Files in /tmp, /var/tmp, /var/log, etc.

1
src/database/mod.rs Normal file
View File

@ -0,0 +1 @@
pub mod format;

View File

@ -3,6 +3,7 @@
pub mod package_managers;
pub mod cli;
pub mod database;
use std::path::{self, PathBuf};
use package_managers::{detect_available_managers, PackageManager, PackageSource};
@ -80,7 +81,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
handle_find_command(&managers, name, package, size)?;
}
Commands::Scan { force, paths , format, detailed, sizes, include_expected } => {
handle_scan_command(&managers, force, paths, format, detailed, sizes, include_expected)?;
handle_scan_command(&managers, paths, format, force, detailed, sizes, include_expected)?;
}
}
@ -221,14 +222,16 @@ fn handle_find_command(
}
fn handle_scan_command(
_managers: &[Box<dyn PackageManager>],
managers: &[Box<dyn PackageManager>],
paths: Vec<String>,
format: OutputFormat,
_force: bool,
_paths: Vec<String>,
_format: OutputFormat,
_detailed: bool,
_sizes: bool,
_include_expected: bool
detailed: bool,
sizes: bool,
include_expected: bool,
) -> Result<(), Box<dyn std::error::Error>> {
println!("Scan functionality not yet implemented");
let scanner = cli::scan::Scanner::new(managers);
let results = scanner.scan_paths(&paths, include_expected)?;
cli::scan::print_scan_results(&results, &format, detailed, sizes)?;
Ok(())
}