diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 69dff2b..2dbfde0 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -9,14 +9,42 @@ pub struct Cli { pub command: Commands, } +#[derive(clap::ValueEnum, Clone, Debug)] +pub enum OutputFormat { + Simple, + Detailed, + Summary, + Json, + Tree, +} + #[derive(Subcommand)] pub enum Commands { - /// Scan system and build/update database Scan { - #[arg(short, long, help = "Force full rescan")] + /// Directories to scan (defaults to /) + paths: Vec, + + /// Output format + #[arg(short, long, value_enum, default_value = "summary")] + format: OutputFormat, + + /// Show detailed file information + #[arg(long)] + detailed: bool, + + /// Show file sizes + #[arg(long)] + sizes: bool, + + /// Include expected orphans (/tmp, /var/log) + #[arg(long)] + include_expected: bool, + + /// Force full rescan + #[arg(short, long)] force: bool, }, - + /// Show file information File { path: String, diff --git a/src/database/format.rs b/src/database/format.rs index 0963e8f..1386bf6 100644 --- a/src/database/format.rs +++ b/src/database/format.rs @@ -1,15 +1,21 @@ use memmap2::Mmap; use std::fs::File; +use std::path::Path; +use crate::{CartoError, PackageSource}; #[repr(C, packed)] pub struct DatabaseHeader { - magic: [u8; 8], // "WHEREDB\0" + magic: [u8; 8], // "CARTODB\0" version: u32, created: u64, // Unix timestamp file_count: u64, package_count: u32, + orphan_count: u64, // Count of orphaned files + scan_count: u32, // Number of scans performed files_offset: u64, packages_offset: u64, + orphans_offset: u64, // Offset to orphan records + scans_offset: u64, // Offset to scan metadata strings_offset: u64, index_offset: u64, } @@ -36,14 +42,99 @@ pub struct PackageRecord { first_file_idx: u32, } -pub struct WhereDatabase { +#[repr(C, packed)] +pub struct OrphanRecord { + path_hash: u64, // FNV-1a hash for quick lookups + category: u8, // OrphanCategory as u8 + permissions: u16, + size: u64, + mtime: u64, + ctime: u64, // Creation time for cleanup planning + path_offset: u32, // offset into string pool + scan_id: u32, // Which scan discovered this orphan +} + +#[repr(C, packed)] +pub struct ScanRecord { + id: u32, + scan_time: u64, // Unix timestamp when scan was performed + scanned_paths_offset: u32, // Comma-separated list of scanned paths + orphans_found: u32, // Number of orphans found in this scan + total_files_scanned: u64, + scan_duration: u32, // Duration in seconds +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum OrphanCategory { + Unknown = 0, // Files with no clear ownership + Expected = 1, // Files in /tmp, /var/tmp, /var/log, etc. + UserData = 2, // Files in /home directories + SystemGenerated = 3, // Runtime files, caches + ConfigBackup = 4, // .bak, .orig config files + Temporary = 5, // Temp files in unusual locations +} + +impl OrphanCategory { + pub fn from_u8(value: u8) -> Self { + match value { + 0 => OrphanCategory::Unknown, + 1 => OrphanCategory::Expected, + 2 => OrphanCategory::UserData, + 3 => OrphanCategory::SystemGenerated, + 4 => OrphanCategory::ConfigBackup, + 5 => OrphanCategory::Temporary, + _ => OrphanCategory::Unknown, + } + } + + pub fn name(&self) -> &'static str { + match self { + OrphanCategory::Unknown => "unknown", + OrphanCategory::Expected => "expected", + OrphanCategory::UserData => "user-data", + OrphanCategory::SystemGenerated => "system-generated", + OrphanCategory::ConfigBackup => "config-backup", + OrphanCategory::Temporary => "temporary", + } + } + + pub fn description(&self) -> &'static str { + match self { + OrphanCategory::Unknown => "Files with no clear package ownership", + OrphanCategory::Expected => "Files in temporary/log directories", + OrphanCategory::UserData => "Files in user home directories", + OrphanCategory::SystemGenerated => "Runtime and cache files", + OrphanCategory::ConfigBackup => "Configuration backup files", + OrphanCategory::Temporary => "Temporary files in unusual locations", + } + } +} + +pub struct FileInfo { + pub path: String, + pub package: Option, + pub source: Option, + pub size: u64, + pub modified: u64, +} + +pub struct OrphanInfo { + pub path: String, + pub category: OrphanCategory, + pub size: u64, + pub modified: u64, + pub created: u64, + pub scan_id: u32, +} + +pub struct CartoDatabase { _file: File, mmap: Mmap, header: &'static DatabaseHeader, } -impl WhereDatabase { - pub fn open>(path: P) -> Result { +impl CartoDatabase { + pub fn open>(path: P) -> Result { let file = File::open(path)?; let mmap = unsafe { memmap2::MmapOptions::new().map(&file)? }; @@ -52,11 +143,11 @@ impl WhereDatabase { }; // Validate magic number - if &header.magic != b"WHEREDB\0" { - return Err(WhereError::InvalidDatabase); + if &header.magic != b"CARTODB\0" { + return Err(CartoError::InvalidDatabase); } - Ok(WhereDatabase { + Ok(CartoDatabase { _file: file, mmap, header, @@ -64,17 +155,60 @@ impl WhereDatabase { } pub fn find_file(&self, path: &str) -> Option { - let hash = fnv1a_hash(path.as_bytes()); + let hash = self.fnv1a_hash(path.as_bytes()); - // Binary search through sorted file records + // First check package-owned files + if let Some(file_info) = self.find_package_file(hash, path) { + return Some(file_info); + } + + // Then check orphaned files + self.find_orphan_file(hash, path) + } + + pub fn find_orphans_by_category(&self, category: OrphanCategory) -> Vec { + let orphans = self.get_orphan_records(); + orphans.iter() + .filter(|record| OrphanCategory::from_u8(record.category) == category) + .map(|record| OrphanInfo { + path: self.get_string(record.path_offset), + category: OrphanCategory::from_u8(record.category), + size: record.size, + modified: record.mtime, + created: record.ctime, + scan_id: record.scan_id, + }) + .collect() + } + + pub fn get_scan_summary(&self) -> ScanSummary { + let orphans = self.get_orphan_records(); + let mut category_counts = std::collections::HashMap::new(); + let mut total_size = 0u64; + + for record in orphans { + let category = OrphanCategory::from_u8(record.category); + *category_counts.entry(category).or_insert(0) += 1; + total_size += record.size; + } + + ScanSummary { + total_orphans: orphans.len(), + category_counts, + total_size, + last_scan: self.get_last_scan_time(), + } + } + + fn find_package_file(&self, hash: u64, path: &str) -> Option { let files = self.get_file_records(); match files.binary_search_by_key(&hash, |record| record.path_hash) { Ok(idx) => { let record = &files[idx]; Some(FileInfo { path: self.get_string(record.path_offset), - package: self.get_package_name(record.package_id), - source: PackageSource::from_u8(record.source), + package: Some(self.get_package_name(record.package_id)), + source: Some(PackageSource::from_u8(record.source)), size: record.size, modified: record.mtime, }) @@ -82,4 +216,85 @@ impl WhereDatabase { Err(_) => None, } } + + fn find_orphan_file(&self, hash: u64, path: &str) -> Option { + let orphans = self.get_orphan_records(); + match orphans.binary_search_by_key(&hash, |record| record.path_hash) { + Ok(idx) => { + let record = &orphans[idx]; + Some(FileInfo { + path: self.get_string(record.path_offset), + package: None, + source: None, + size: record.size, + modified: record.mtime, + }) + } + Err(_) => None, + } + } + + fn get_file_records(&self) -> &[FileRecord] { + unsafe { + std::slice::from_raw_parts( + self.mmap.as_ptr().add(self.header.files_offset as usize) as *const FileRecord, + self.header.file_count as usize + ) + } + } + + fn get_orphan_records(&self) -> &[OrphanRecord] { + unsafe { + std::slice::from_raw_parts( + self.mmap.as_ptr().add(self.header.orphans_offset as usize) as *const OrphanRecord, + self.header.orphan_count as usize + ) + } + } + + fn get_string(&self, offset: u32) -> String { + // Implementation for reading from string pool + // This is a placeholder - actual implementation depends on string storage format + String::new() + } + + fn get_package_name(&self, package_id: u32) -> String { + // Implementation for getting package name by ID + // This is a placeholder - actual implementation depends on package storage + String::new() + } + + fn get_last_scan_time(&self) -> Option { + if self.header.scan_count == 0 { + return None; + } + + let scans = unsafe { + std::slice::from_raw_parts( + self.mmap.as_ptr().add(self.header.scans_offset as usize) as *const ScanRecord, + self.header.scan_count as usize + ) + }; + + scans.last().map(|scan| scan.scan_time) + } + + fn fnv1a_hash(&self, bytes: &[u8]) -> u64 { + const FNV_OFFSET_BASIS: u64 = 14695981039346656037; + const FNV_PRIME: u64 = 1099511628211; + + let mut hash = FNV_OFFSET_BASIS; + for byte in bytes { + hash ^= *byte as u64; + hash = hash.wrapping_mul(FNV_PRIME); + } + hash + } } + +pub struct ScanSummary { + pub total_orphans: usize, + pub category_counts: std::collections::HashMap, + pub total_size: u64, + pub last_scan: Option, +} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 46cb413..5f958d9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,11 +4,13 @@ pub mod package_managers; pub mod cli; -use std::path::PathBuf; +use std::path::{self, PathBuf}; use package_managers::{detect_available_managers, PackageManager, PackageSource}; use cli::{Cli, Commands}; use clap::Parser; +use crate::cli::OutputFormat; + // Define the types that your modules need #[derive(Debug, Clone)] pub struct PackageInfo { @@ -77,8 +79,8 @@ fn main() -> Result<(), Box> { Commands::Find { name, package, size } => { handle_find_command(&managers, name, package, size)?; } - Commands::Scan { force } => { - handle_scan_command(&managers, force)?; + Commands::Scan { force, paths , format, detailed, sizes, include_expected } => { + handle_scan_command(&managers, force, paths, format, detailed, sizes, include_expected)?; } } @@ -220,7 +222,12 @@ fn handle_find_command( fn handle_scan_command( _managers: &[Box], - _force: bool + _force: bool, + _paths: Vec, + _format: OutputFormat, + _detailed: bool, + _sizes: bool, + _include_expected: bool ) -> Result<(), Box> { println!("Scan functionality not yet implemented"); Ok(())