Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
!/Cargo.lock
/target
*~
6 changes: 3 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

69 changes: 47 additions & 22 deletions src/commands/gen_source_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,28 @@ use walkdir::WalkDir;

use crate::source_map::is_python_file;

/// Returns true if `name` is a valid Python identifier (ASCII subset),
/// i.e. it can appear as a component of a dotted module name.
fn is_valid_python_identifier(name: &str) -> bool {
let mut chars = name.chars();
match chars.next() {
None => false,
Some(c) if !c.is_ascii_alphabetic() && c != '_' => false,
_ => chars.all(|c| c.is_ascii_alphanumeric() || c == '_'),
}
}

#[derive(Parser)]
pub struct GenSourceDbArgs {
/// Directory containing Python files to scan
input_dir: PathBuf,

/// Path to output JSON file
output_path: PathBuf,

/// Path to site-packages directory (overrides pyproject.toml setting)
#[arg(long)]
site_packages: Option<PathBuf>,
}

#[derive(Serialize)]
Expand Down Expand Up @@ -160,8 +175,11 @@ fn load_site_packages(input_dir: &Path) -> Result<Option<PathBuf>> {
pub fn run(args: GenSourceDbArgs) -> Result<()> {
let input_dir = args.input_dir.canonicalize()?;

// Load site_packages from pyproject.toml if present
let site_packages = load_site_packages(&input_dir)?;
// Use CLI arg if provided, otherwise load from pyproject.toml
let site_packages = match args.site_packages {
Some(sp) => Some(sp.canonicalize().context("site_packages path not found")?),
None => load_site_packages(&input_dir)?,
};
if let Some(ref sp) = site_packages {
eprintln!("Using site-packages: {}", sp.display());
}
Expand All @@ -176,11 +194,27 @@ pub fn run(args: GenSourceDbArgs) -> Result<()> {
let mut visited = HashSet::new();
let mut queue = VecDeque::new();

// Seed the queue with all .py files under input_dir
// Seed the queue with all .py files under input_dir, skipping directories
// and files whose names are not valid Python identifiers.
for entry in WalkDir::new(&input_dir)
.into_iter()
.filter_entry(|e| {
if !e.file_type().is_dir() {
return true;
}
e.depth() == 0
|| e.file_name()
.to_str()
.is_some_and(is_valid_python_identifier)
})
.filter_map(|e| e.ok())
.filter(|e| is_python_file(e.path()))
.filter(|e| {
is_python_file(e.path())
&& e.path()
.file_stem()
.and_then(|s| s.to_str())
.is_some_and(is_valid_python_identifier)
})
{
if let Ok(full_path) = entry.path().canonicalize() {
if visited.insert(full_path.clone()) {
Expand Down Expand Up @@ -218,26 +252,17 @@ pub fn run(args: GenSourceDbArgs) -> Result<()> {
Err(_) => continue,
};
if visited.insert(resolved.clone()) {
// Determine the relative key based on which root it's under
let rel_key = if resolved.starts_with(&input_dir) {
resolved
.strip_prefix(&input_dir)
.expect("file should be under input_dir")
.to_string_lossy()
.into_owned()
} else if let Some(ref sp) = site_packages {
if resolved.starts_with(sp) {
resolved
.strip_prefix(sp)
.expect("file should be under site_packages")
.to_string_lossy()
.into_owned()
} else {
continue;
}
} else {
// Determine the relative key based on which root it's under.
// Check most-specific root first (site_packages may be a
// subdirectory of input_dir).
let rel_key = roots
.iter()
.rev()
.find_map(|root| resolved.strip_prefix(root).ok());
let Some(rel_key) = rel_key else {
continue;
};
let rel_key = rel_key.to_string_lossy().into_owned();

build_map.insert(rel_key, resolved.to_string_lossy().into_owned());
queue.push_back(resolved);
Expand Down
32 changes: 30 additions & 2 deletions src/commands/run_tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,40 @@ pub struct RunTreeArgs {
sorted_output: bool,
}

/// Recursively find all .py files in a directory
/// Returns true if `name` is a valid Python identifier (ASCII subset),
/// i.e. it can appear as a component of a dotted module name.
fn is_valid_python_identifier(name: &str) -> bool {
let mut chars = name.chars();
match chars.next() {
None => false,
Some(c) if !c.is_ascii_alphabetic() && c != '_' => false,
_ => chars.all(|c| c.is_ascii_alphanumeric() || c == '_'),
}
}

/// Recursively find all .py files in a directory, skipping directories
/// and files whose names are not valid Python identifiers
/// (e.g. `.venv`, `site-packages`, `2024-07-23-0813_migration.py`).
fn find_python_files(dir: &Path) -> Vec<PathBuf> {
WalkDir::new(dir)
.into_iter()
.filter_entry(|e| {
if !e.file_type().is_dir() {
return true;
}
e.depth() == 0
|| e.file_name()
.to_str()
.is_some_and(is_valid_python_identifier)
})
.filter_map(|e| e.ok())
.filter(|e| is_python_file(e.path()))
.filter(|e| {
is_python_file(e.path())
&& e.path()
.file_stem()
.and_then(|s| s.to_str())
.is_some_and(is_valid_python_identifier)
})
.map(|e| e.into_path())
.collect()
}
Expand Down