Updated to have scan command now
This commit is contained in:
parent
6b4d342f3f
commit
8108221bd0
|
@ -140,6 +140,7 @@ dependencies = [
|
|||
"memmap2",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
|
@ -168,6 +169,7 @@ dependencies = [
|
|||
"iana-time-zone",
|
||||
"js-sys",
|
||||
"num-traits",
|
||||
"serde",
|
||||
"wasm-bindgen",
|
||||
"windows-link",
|
||||
]
|
||||
|
@ -305,6 +307,12 @@ version = "1.70.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
|
||||
|
||||
[[package]]
|
||||
name = "js-sys"
|
||||
version = "0.3.77"
|
||||
|
@ -457,6 +465,12 @@ version = "1.0.21"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d"
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
|
||||
|
||||
[[package]]
|
||||
name = "same-file"
|
||||
version = "1.0.6"
|
||||
|
@ -486,6 +500,18 @@ dependencies = [
|
|||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.141"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "30b9eff21ebe718216c6ec64e1d9ac57087aad11efc64e32002bce4a0d4c03d3"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"memchr",
|
||||
"ryu",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shlex"
|
||||
version = "1.3.0"
|
||||
|
|
|
@ -8,6 +8,7 @@ clap = { version = "4.0", features = ["derive"] }
|
|||
memmap2 = "0.9"
|
||||
inotify = "0.10"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
walkdir = "2.0"
|
||||
regex = "1.0"
|
||||
chrono = "0.4"
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
|
@ -1,4 +1,5 @@
|
|||
pub mod tree;
|
||||
pub mod scan;
|
||||
|
||||
use clap::{Parser, Subcommand};
|
||||
|
||||
|
|
|
@ -0,0 +1,408 @@
|
|||
use crate::{CartoError, PackageManager};
|
||||
use crate::database::format::{OrphanCategory, ScanSummary};
|
||||
use crate::cli::OutputFormat;
|
||||
use crate::package_managers::find_file_owner;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::collections::HashMap;
|
||||
use walkdir::WalkDir;
|
||||
use serde::{Serialize, Deserialize};
|
||||
use chrono::{DateTime, Utc};
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct OrphanFile {
|
||||
pub path: PathBuf,
|
||||
pub category: OrphanCategory,
|
||||
pub size: u64,
|
||||
pub modified: u64,
|
||||
pub created: u64,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub struct ScanResults {
|
||||
pub scan_time: DateTime<Utc>,
|
||||
pub scanned_paths: Vec<String>,
|
||||
pub total_files_scanned: u64,
|
||||
pub total_orphans: usize,
|
||||
pub orphans_by_category: HashMap<String, Vec<OrphanFile>>,
|
||||
pub total_orphan_size: u64,
|
||||
pub scan_duration_seconds: f64,
|
||||
}
|
||||
|
||||
impl ScanResults {
|
||||
pub fn new(scanned_paths: Vec<String>) -> Self {
|
||||
Self {
|
||||
scan_time: Utc::now(),
|
||||
scanned_paths,
|
||||
total_files_scanned: 0,
|
||||
total_orphans: 0,
|
||||
orphans_by_category: HashMap::new(),
|
||||
total_orphan_size: 0,
|
||||
scan_duration_seconds: 0.0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Scanner<'a> {
|
||||
managers: &'a [Box<dyn PackageManager>],
|
||||
exclude_paths: Vec<PathBuf>,
|
||||
}
|
||||
impl<'a> Scanner<'a> {
|
||||
pub fn new(managers: &'a [Box<dyn PackageManager>]) -> Self {
|
||||
let exclude_paths = vec![
|
||||
PathBuf::from("/proc"),
|
||||
PathBuf::from("/sys"),
|
||||
PathBuf::from("/dev"),
|
||||
PathBuf::from("/run"),
|
||||
PathBuf::from("/var/run"),
|
||||
PathBuf::from("/tmp/.X11-unix"),
|
||||
PathBuf::from("/tmp/.ICE-unix"),
|
||||
];
|
||||
|
||||
Self {
|
||||
managers,
|
||||
exclude_paths,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn scan_paths(
|
||||
&self,
|
||||
paths: &[String],
|
||||
include_expected: bool,
|
||||
) -> Result<ScanResults, CartoError> {
|
||||
let start_time = std::time::Instant::now();
|
||||
let scan_paths = if paths.is_empty() {
|
||||
vec!["/".to_string()]
|
||||
} else {
|
||||
paths.to_vec()
|
||||
};
|
||||
|
||||
let mut results = ScanResults::new(scan_paths.clone());
|
||||
let mut orphans: Vec<OrphanFile> = Vec::new();
|
||||
|
||||
for path_str in &scan_paths {
|
||||
let path = Path::new(path_str);
|
||||
if !path.exists() {
|
||||
eprintln!("Warning: Path {} does not exist, skipping", path_str);
|
||||
continue;
|
||||
}
|
||||
|
||||
println!("Scanning: {}", path_str);
|
||||
self.scan_directory(path, &mut orphans, &mut results)?;
|
||||
}
|
||||
|
||||
// Categorize orphans
|
||||
self.categorize_orphans(&mut orphans, include_expected, &mut results);
|
||||
|
||||
results.total_orphans = orphans.len();
|
||||
results.total_orphan_size = orphans.iter().map(|o| o.size).sum();
|
||||
results.scan_duration_seconds = start_time.elapsed().as_secs_f64();
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
fn scan_directory(
|
||||
&self,
|
||||
root: &Path,
|
||||
orphans: &mut Vec<OrphanFile>,
|
||||
results: &mut ScanResults,
|
||||
) -> Result<(), CartoError> {
|
||||
let walker = WalkDir::new(root)
|
||||
.follow_links(false)
|
||||
.into_iter()
|
||||
.filter_entry(|e| !self.should_exclude_path(e.path()));
|
||||
|
||||
for entry in walker {
|
||||
let entry = match entry {
|
||||
Ok(entry) => entry,
|
||||
Err(e) => {
|
||||
eprintln!("Warning: Error accessing {}: {}", e.path().unwrap_or(Path::new("unknown")).display(), e);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
if !entry.file_type().is_file() {
|
||||
continue;
|
||||
}
|
||||
|
||||
results.total_files_scanned += 1;
|
||||
|
||||
// Check if file is owned by any package manager
|
||||
let path = entry.path();
|
||||
let is_owned = self.is_file_owned(path)?;
|
||||
|
||||
if !is_owned {
|
||||
if let Ok(metadata) = entry.metadata() {
|
||||
let orphan = OrphanFile {
|
||||
path: path.to_path_buf(),
|
||||
category: OrphanCategory::Unknown, // Will be categorized later
|
||||
size: metadata.len(),
|
||||
modified: metadata.modified()
|
||||
.unwrap_or(std::time::UNIX_EPOCH)
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs(),
|
||||
created: metadata.created()
|
||||
.unwrap_or(std::time::UNIX_EPOCH)
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs(),
|
||||
};
|
||||
orphans.push(orphan);
|
||||
}
|
||||
}
|
||||
|
||||
// Progress indicator for large scans
|
||||
if results.total_files_scanned % 10000 == 0 {
|
||||
println!("Scanned {} files, found {} orphans",
|
||||
results.total_files_scanned, orphans.len());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn should_exclude_path(&self, path: &Path) -> bool {
|
||||
for exclude in &self.exclude_paths {
|
||||
if path.starts_with(exclude) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Additional exclusions for common mount points and special directories
|
||||
let path_str = path.to_string_lossy();
|
||||
if path_str.contains("/.git/") ||
|
||||
path_str.contains("/.cache/") ||
|
||||
path_str.contains("/.local/share/Trash/") ||
|
||||
path_str.starts_with("/media/") ||
|
||||
path_str.starts_with("/mnt/") {
|
||||
return true;
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
fn is_file_owned(&self, path: &Path) -> Result<bool, CartoError> {
|
||||
// Check all package managers to see if any owns this file
|
||||
match find_file_owner(path) {
|
||||
Ok(Some(_)) => Ok(true),
|
||||
Ok(None) => Ok(false),
|
||||
Err(e) => {
|
||||
// Don't fail the entire scan for individual file errors
|
||||
eprintln!("Warning: Error checking ownership of {}: {}", path.display(), e);
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn categorize_orphans(
|
||||
&self,
|
||||
orphans: &mut [OrphanFile],
|
||||
include_expected: bool,
|
||||
results: &mut ScanResults,
|
||||
) {
|
||||
for orphan in orphans.iter_mut() {
|
||||
orphan.category = self.categorize_file(&orphan.path);
|
||||
}
|
||||
|
||||
// Group by category
|
||||
for orphan in orphans.iter() {
|
||||
if !include_expected && orphan.category == OrphanCategory::Expected {
|
||||
continue;
|
||||
}
|
||||
|
||||
let category_name = orphan.category.name().to_string();
|
||||
results.orphans_by_category
|
||||
.entry(category_name)
|
||||
.or_insert_with(Vec::new)
|
||||
.push(orphan.clone());
|
||||
}
|
||||
}
|
||||
|
||||
fn categorize_file(&self, path: &Path) -> OrphanCategory {
|
||||
let path_str = path.to_string_lossy();
|
||||
|
||||
// Expected orphans in common temporary/log locations
|
||||
if path_str.starts_with("/tmp/") ||
|
||||
path_str.starts_with("/var/tmp/") ||
|
||||
path_str.starts_with("/var/log/") ||
|
||||
path_str.starts_with("/var/cache/") ||
|
||||
path_str.starts_with("/var/spool/") {
|
||||
return OrphanCategory::Expected;
|
||||
}
|
||||
|
||||
// User data in home directories
|
||||
if path_str.starts_with("/home/") ||
|
||||
path_str.starts_with("/root/") {
|
||||
return OrphanCategory::UserData;
|
||||
}
|
||||
|
||||
// System generated files
|
||||
if path_str.starts_with("/var/lib/") ||
|
||||
path_str.starts_with("/var/run/") ||
|
||||
path_str.contains("/.cache/") ||
|
||||
path_str.contains("/cache/") {
|
||||
return OrphanCategory::SystemGenerated;
|
||||
}
|
||||
|
||||
// Configuration backups
|
||||
if let Some(extension) = path.extension() {
|
||||
let ext = extension.to_string_lossy().to_lowercase();
|
||||
if ext == "bak" || ext == "orig" || ext == "old" || ext == "backup" {
|
||||
return OrphanCategory::ConfigBackup;
|
||||
}
|
||||
}
|
||||
|
||||
// Temporary files in unusual locations
|
||||
if let Some(filename) = path.file_name() {
|
||||
let name = filename.to_string_lossy().to_lowercase();
|
||||
if name.starts_with("tmp") ||
|
||||
name.starts_with(".tmp") ||
|
||||
name.ends_with(".tmp") ||
|
||||
name.ends_with("~") {
|
||||
return OrphanCategory::Temporary;
|
||||
}
|
||||
}
|
||||
|
||||
OrphanCategory::Unknown
|
||||
}
|
||||
}
|
||||
|
||||
pub fn print_scan_results(
|
||||
results: &ScanResults,
|
||||
format: &OutputFormat,
|
||||
detailed: bool,
|
||||
show_sizes: bool,
|
||||
) -> Result<(), CartoError> {
|
||||
match format {
|
||||
OutputFormat::Json => print_json_results(results),
|
||||
OutputFormat::Summary => print_summary_results(results, show_sizes),
|
||||
OutputFormat::Detailed => print_detailed_results(results, show_sizes),
|
||||
OutputFormat::Simple => print_simple_results(results),
|
||||
OutputFormat::Tree => print_tree_results(results, show_sizes),
|
||||
}
|
||||
}
|
||||
|
||||
fn print_json_results(results: &ScanResults) -> Result<(), CartoError> {
|
||||
match serde_json::to_string_pretty(results) {
|
||||
Ok(json) => {
|
||||
println!("{}", json);
|
||||
Ok(())
|
||||
}
|
||||
Err(e) => Err(CartoError::CommandFailed(format!("JSON serialization failed: {}", e))),
|
||||
}
|
||||
}
|
||||
|
||||
fn print_summary_results(results: &ScanResults, show_sizes: bool) -> Result<(), CartoError> {
|
||||
println!("Scan Summary");
|
||||
println!("============");
|
||||
println!("Scan completed: {}", results.scan_time.format("%Y-%m-%d %H:%M:%S UTC"));
|
||||
println!("Duration: {:.2} seconds", results.scan_duration_seconds);
|
||||
println!("Scanned paths: {}", results.scanned_paths.join(", "));
|
||||
println!("Total files scanned: {}", results.total_files_scanned);
|
||||
println!("Total orphan files: {}", results.total_orphans);
|
||||
|
||||
if show_sizes {
|
||||
println!("Total orphan size: {}", format_size(results.total_orphan_size));
|
||||
}
|
||||
|
||||
println!("\nOrphans by Category:");
|
||||
for (category, orphans) in &results.orphans_by_category {
|
||||
let category_size: u64 = orphans.iter().map(|o| o.size).sum();
|
||||
if show_sizes {
|
||||
println!(" {}: {} files ({})", category, orphans.len(), format_size(category_size));
|
||||
} else {
|
||||
println!(" {}: {} files", category, orphans.len());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn print_detailed_results(results: &ScanResults, show_sizes: bool) -> Result<(), CartoError> {
|
||||
print_summary_results(results, show_sizes)?;
|
||||
|
||||
println!("\nDetailed File Listings:");
|
||||
for (category, orphans) in &results.orphans_by_category {
|
||||
println!("\n{} ({} files):", category.to_uppercase(), orphans.len());
|
||||
println!("{}", "=".repeat(50));
|
||||
|
||||
for orphan in orphans.iter().take(20) { // Limit to first 20 per category
|
||||
if show_sizes {
|
||||
println!(" {} ({})", orphan.path.display(), format_size(orphan.size));
|
||||
} else {
|
||||
println!(" {}", orphan.path.display());
|
||||
}
|
||||
}
|
||||
|
||||
if orphans.len() > 20 {
|
||||
println!(" ... and {} more files", orphans.len() - 20);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn print_simple_results(results: &ScanResults) -> Result<(), CartoError> {
|
||||
println!("Found {} orphan files in {:.1}s", results.total_orphans, results.scan_duration_seconds);
|
||||
for (category, orphans) in &results.orphans_by_category {
|
||||
println!("{}: {}", category, orphans.len());
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn print_tree_results(results: &ScanResults, show_sizes: bool) -> Result<(), CartoError> {
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
println!("Orphan Files by Directory Tree");
|
||||
println!("==============================");
|
||||
|
||||
for (category, orphans) in &results.orphans_by_category {
|
||||
println!("\n{} ({} files):", category.to_uppercase(), orphans.len());
|
||||
|
||||
// Group files by their parent directory
|
||||
let mut dir_map: BTreeMap<PathBuf, Vec<&OrphanFile>> = BTreeMap::new();
|
||||
for orphan in orphans {
|
||||
if let Some(parent) = orphan.path.parent() {
|
||||
dir_map.entry(parent.to_path_buf()).or_insert_with(Vec::new).push(orphan);
|
||||
}
|
||||
}
|
||||
|
||||
for (dir, files) in dir_map.iter().take(10) { // Limit directories shown
|
||||
println!(" {}/", dir.display());
|
||||
for file in files.iter().take(5) { // Limit files per directory
|
||||
let filename = file.path.file_name().unwrap_or_default().to_string_lossy();
|
||||
if show_sizes {
|
||||
println!(" ├── {} ({})", filename, format_size(file.size));
|
||||
} else {
|
||||
println!(" ├── {}", filename);
|
||||
}
|
||||
}
|
||||
if files.len() > 5 {
|
||||
println!(" └── ... and {} more files", files.len() - 5);
|
||||
}
|
||||
}
|
||||
|
||||
if dir_map.len() > 10 {
|
||||
println!(" ... and {} more directories", dir_map.len() - 10);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn format_size(bytes: u64) -> String {
|
||||
const UNITS: &[&str] = &["B", "KB", "MB", "GB", "TB"];
|
||||
let mut size = bytes as f64;
|
||||
let mut unit_idx = 0;
|
||||
|
||||
while size >= 1024.0 && unit_idx < UNITS.len() - 1 {
|
||||
size /= 1024.0;
|
||||
unit_idx += 1;
|
||||
}
|
||||
|
||||
if unit_idx == 0 {
|
||||
format!("{} {}", bytes, UNITS[unit_idx])
|
||||
} else {
|
||||
format!("{:.1} {}", size, UNITS[unit_idx])
|
||||
}
|
||||
}
|
|
@ -2,6 +2,7 @@ use memmap2::Mmap;
|
|||
use std::fs::File;
|
||||
use std::path::Path;
|
||||
use crate::{CartoError, PackageSource};
|
||||
use serde::Serialize;
|
||||
|
||||
#[repr(C, packed)]
|
||||
pub struct DatabaseHeader {
|
||||
|
@ -64,7 +65,7 @@ pub struct ScanRecord {
|
|||
scan_duration: u32, // Duration in seconds
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)]
|
||||
pub enum OrphanCategory {
|
||||
Unknown = 0, // Files with no clear ownership
|
||||
Expected = 1, // Files in /tmp, /var/tmp, /var/log, etc.
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
pub mod format;
|
19
src/main.rs
19
src/main.rs
|
@ -3,6 +3,7 @@
|
|||
|
||||
pub mod package_managers;
|
||||
pub mod cli;
|
||||
pub mod database;
|
||||
|
||||
use std::path::{self, PathBuf};
|
||||
use package_managers::{detect_available_managers, PackageManager, PackageSource};
|
||||
|
@ -80,7 +81,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|||
handle_find_command(&managers, name, package, size)?;
|
||||
}
|
||||
Commands::Scan { force, paths , format, detailed, sizes, include_expected } => {
|
||||
handle_scan_command(&managers, force, paths, format, detailed, sizes, include_expected)?;
|
||||
handle_scan_command(&managers, paths, format, force, detailed, sizes, include_expected)?;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -221,14 +222,16 @@ fn handle_find_command(
|
|||
}
|
||||
|
||||
fn handle_scan_command(
|
||||
_managers: &[Box<dyn PackageManager>],
|
||||
managers: &[Box<dyn PackageManager>],
|
||||
paths: Vec<String>,
|
||||
format: OutputFormat,
|
||||
_force: bool,
|
||||
_paths: Vec<String>,
|
||||
_format: OutputFormat,
|
||||
_detailed: bool,
|
||||
_sizes: bool,
|
||||
_include_expected: bool
|
||||
detailed: bool,
|
||||
sizes: bool,
|
||||
include_expected: bool,
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
println!("Scan functionality not yet implemented");
|
||||
let scanner = cli::scan::Scanner::new(managers);
|
||||
let results = scanner.scan_paths(&paths, include_expected)?;
|
||||
cli::scan::print_scan_results(&results, &format, detailed, sizes)?;
|
||||
Ok(())
|
||||
}
|
Loading…
Reference in New Issue