Creating the scan command framework

This commit is contained in:
Russell 2025-07-28 16:51:07 +02:00
parent cec1d5409c
commit 6b4d342f3f
3 changed files with 268 additions and 18 deletions

View File

@ -9,14 +9,42 @@ pub struct Cli {
pub command: Commands,
}
#[derive(clap::ValueEnum, Clone, Debug)]
pub enum OutputFormat {
Simple,
Detailed,
Summary,
Json,
Tree,
}
#[derive(Subcommand)]
pub enum Commands {
/// Scan system and build/update database
Scan {
#[arg(short, long, help = "Force full rescan")]
/// Directories to scan (defaults to /)
paths: Vec<String>,
/// Output format
#[arg(short, long, value_enum, default_value = "summary")]
format: OutputFormat,
/// Show detailed file information
#[arg(long)]
detailed: bool,
/// Show file sizes
#[arg(long)]
sizes: bool,
/// Include expected orphans (/tmp, /var/log)
#[arg(long)]
include_expected: bool,
/// Force full rescan
#[arg(short, long)]
force: bool,
},
/// Show file information
File {
path: String,

View File

@ -1,15 +1,21 @@
use memmap2::Mmap;
use std::fs::File;
use std::path::Path;
use crate::{CartoError, PackageSource};
#[repr(C, packed)]
pub struct DatabaseHeader {
magic: [u8; 8], // "WHEREDB\0"
magic: [u8; 8], // "CARTODB\0"
version: u32,
created: u64, // Unix timestamp
file_count: u64,
package_count: u32,
orphan_count: u64, // Count of orphaned files
scan_count: u32, // Number of scans performed
files_offset: u64,
packages_offset: u64,
orphans_offset: u64, // Offset to orphan records
scans_offset: u64, // Offset to scan metadata
strings_offset: u64,
index_offset: u64,
}
@ -36,14 +42,99 @@ pub struct PackageRecord {
first_file_idx: u32,
}
pub struct WhereDatabase {
#[repr(C, packed)]
pub struct OrphanRecord {
path_hash: u64, // FNV-1a hash for quick lookups
category: u8, // OrphanCategory as u8
permissions: u16,
size: u64,
mtime: u64,
ctime: u64, // Creation time for cleanup planning
path_offset: u32, // offset into string pool
scan_id: u32, // Which scan discovered this orphan
}
#[repr(C, packed)]
pub struct ScanRecord {
id: u32,
scan_time: u64, // Unix timestamp when scan was performed
scanned_paths_offset: u32, // Comma-separated list of scanned paths
orphans_found: u32, // Number of orphans found in this scan
total_files_scanned: u64,
scan_duration: u32, // Duration in seconds
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum OrphanCategory {
Unknown = 0, // Files with no clear ownership
Expected = 1, // Files in /tmp, /var/tmp, /var/log, etc.
UserData = 2, // Files in /home directories
SystemGenerated = 3, // Runtime files, caches
ConfigBackup = 4, // .bak, .orig config files
Temporary = 5, // Temp files in unusual locations
}
impl OrphanCategory {
pub fn from_u8(value: u8) -> Self {
match value {
0 => OrphanCategory::Unknown,
1 => OrphanCategory::Expected,
2 => OrphanCategory::UserData,
3 => OrphanCategory::SystemGenerated,
4 => OrphanCategory::ConfigBackup,
5 => OrphanCategory::Temporary,
_ => OrphanCategory::Unknown,
}
}
pub fn name(&self) -> &'static str {
match self {
OrphanCategory::Unknown => "unknown",
OrphanCategory::Expected => "expected",
OrphanCategory::UserData => "user-data",
OrphanCategory::SystemGenerated => "system-generated",
OrphanCategory::ConfigBackup => "config-backup",
OrphanCategory::Temporary => "temporary",
}
}
pub fn description(&self) -> &'static str {
match self {
OrphanCategory::Unknown => "Files with no clear package ownership",
OrphanCategory::Expected => "Files in temporary/log directories",
OrphanCategory::UserData => "Files in user home directories",
OrphanCategory::SystemGenerated => "Runtime and cache files",
OrphanCategory::ConfigBackup => "Configuration backup files",
OrphanCategory::Temporary => "Temporary files in unusual locations",
}
}
}
pub struct FileInfo {
pub path: String,
pub package: Option<String>,
pub source: Option<PackageSource>,
pub size: u64,
pub modified: u64,
}
pub struct OrphanInfo {
pub path: String,
pub category: OrphanCategory,
pub size: u64,
pub modified: u64,
pub created: u64,
pub scan_id: u32,
}
pub struct CartoDatabase {
_file: File,
mmap: Mmap,
header: &'static DatabaseHeader,
}
impl WhereDatabase {
pub fn open<P: AsRef<Path>>(path: P) -> Result<Self, WhereError> {
impl CartoDatabase {
pub fn open<P: AsRef<Path>>(path: P) -> Result<Self, CartoError> {
let file = File::open(path)?;
let mmap = unsafe { memmap2::MmapOptions::new().map(&file)? };
@ -52,11 +143,11 @@ impl WhereDatabase {
};
// Validate magic number
if &header.magic != b"WHEREDB\0" {
return Err(WhereError::InvalidDatabase);
if &header.magic != b"CARTODB\0" {
return Err(CartoError::InvalidDatabase);
}
Ok(WhereDatabase {
Ok(CartoDatabase {
_file: file,
mmap,
header,
@ -64,17 +155,60 @@ impl WhereDatabase {
}
pub fn find_file(&self, path: &str) -> Option<FileInfo> {
let hash = fnv1a_hash(path.as_bytes());
let hash = self.fnv1a_hash(path.as_bytes());
// Binary search through sorted file records
// First check package-owned files
if let Some(file_info) = self.find_package_file(hash, path) {
return Some(file_info);
}
// Then check orphaned files
self.find_orphan_file(hash, path)
}
pub fn find_orphans_by_category(&self, category: OrphanCategory) -> Vec<OrphanInfo> {
let orphans = self.get_orphan_records();
orphans.iter()
.filter(|record| OrphanCategory::from_u8(record.category) == category)
.map(|record| OrphanInfo {
path: self.get_string(record.path_offset),
category: OrphanCategory::from_u8(record.category),
size: record.size,
modified: record.mtime,
created: record.ctime,
scan_id: record.scan_id,
})
.collect()
}
pub fn get_scan_summary(&self) -> ScanSummary {
let orphans = self.get_orphan_records();
let mut category_counts = std::collections::HashMap::new();
let mut total_size = 0u64;
for record in orphans {
let category = OrphanCategory::from_u8(record.category);
*category_counts.entry(category).or_insert(0) += 1;
total_size += record.size;
}
ScanSummary {
total_orphans: orphans.len(),
category_counts,
total_size,
last_scan: self.get_last_scan_time(),
}
}
fn find_package_file(&self, hash: u64, path: &str) -> Option<FileInfo> {
let files = self.get_file_records();
match files.binary_search_by_key(&hash, |record| record.path_hash) {
Ok(idx) => {
let record = &files[idx];
Some(FileInfo {
path: self.get_string(record.path_offset),
package: self.get_package_name(record.package_id),
source: PackageSource::from_u8(record.source),
package: Some(self.get_package_name(record.package_id)),
source: Some(PackageSource::from_u8(record.source)),
size: record.size,
modified: record.mtime,
})
@ -82,4 +216,85 @@ impl WhereDatabase {
Err(_) => None,
}
}
fn find_orphan_file(&self, hash: u64, path: &str) -> Option<FileInfo> {
let orphans = self.get_orphan_records();
match orphans.binary_search_by_key(&hash, |record| record.path_hash) {
Ok(idx) => {
let record = &orphans[idx];
Some(FileInfo {
path: self.get_string(record.path_offset),
package: None,
source: None,
size: record.size,
modified: record.mtime,
})
}
Err(_) => None,
}
}
fn get_file_records(&self) -> &[FileRecord] {
unsafe {
std::slice::from_raw_parts(
self.mmap.as_ptr().add(self.header.files_offset as usize) as *const FileRecord,
self.header.file_count as usize
)
}
}
fn get_orphan_records(&self) -> &[OrphanRecord] {
unsafe {
std::slice::from_raw_parts(
self.mmap.as_ptr().add(self.header.orphans_offset as usize) as *const OrphanRecord,
self.header.orphan_count as usize
)
}
}
fn get_string(&self, offset: u32) -> String {
// Implementation for reading from string pool
// This is a placeholder - actual implementation depends on string storage format
String::new()
}
fn get_package_name(&self, package_id: u32) -> String {
// Implementation for getting package name by ID
// This is a placeholder - actual implementation depends on package storage
String::new()
}
fn get_last_scan_time(&self) -> Option<u64> {
if self.header.scan_count == 0 {
return None;
}
let scans = unsafe {
std::slice::from_raw_parts(
self.mmap.as_ptr().add(self.header.scans_offset as usize) as *const ScanRecord,
self.header.scan_count as usize
)
};
scans.last().map(|scan| scan.scan_time)
}
fn fnv1a_hash(&self, bytes: &[u8]) -> u64 {
const FNV_OFFSET_BASIS: u64 = 14695981039346656037;
const FNV_PRIME: u64 = 1099511628211;
let mut hash = FNV_OFFSET_BASIS;
for byte in bytes {
hash ^= *byte as u64;
hash = hash.wrapping_mul(FNV_PRIME);
}
hash
}
}
pub struct ScanSummary {
pub total_orphans: usize,
pub category_counts: std::collections::HashMap<OrphanCategory, usize>,
pub total_size: u64,
pub last_scan: Option<u64>,
}

View File

@ -4,11 +4,13 @@
pub mod package_managers;
pub mod cli;
use std::path::PathBuf;
use std::path::{self, PathBuf};
use package_managers::{detect_available_managers, PackageManager, PackageSource};
use cli::{Cli, Commands};
use clap::Parser;
use crate::cli::OutputFormat;
// Define the types that your modules need
#[derive(Debug, Clone)]
pub struct PackageInfo {
@ -77,8 +79,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
Commands::Find { name, package, size } => {
handle_find_command(&managers, name, package, size)?;
}
Commands::Scan { force } => {
handle_scan_command(&managers, force)?;
Commands::Scan { force, paths , format, detailed, sizes, include_expected } => {
handle_scan_command(&managers, force, paths, format, detailed, sizes, include_expected)?;
}
}
@ -220,7 +222,12 @@ fn handle_find_command(
fn handle_scan_command(
_managers: &[Box<dyn PackageManager>],
_force: bool
_force: bool,
_paths: Vec<String>,
_format: OutputFormat,
_detailed: bool,
_sizes: bool,
_include_expected: bool
) -> Result<(), Box<dyn std::error::Error>> {
println!("Scan functionality not yet implemented");
Ok(())