1- //! Filter Git tree objects by glob patterns or gitattributes .
1+ //! Filter Git tree objects by glob patterns, gitattributes, or a custom predicate .
22//!
33//! This crate exposes the [`FilterTree`] trait, implemented on
44//! [`git2::Repository`], which produces a new tree containing only the entries
5- //! that match either a set of **glob patterns** or a set of **gitattributes**.
5+ //! that match either a set of **glob patterns**, a set of **gitattributes**,
6+ //! or an arbitrary **predicate function**.
67//! Trees are walked recursively; patterns are matched against full paths from
78//! the tree root.
89//!
4243//!
4344//! All listed attributes must be set (AND semantics). Entries with an
4445//! attribute explicitly unset (`-export`) or unspecified are excluded.
46+ //!
47+ //! # Filter by Predicate
48+ //!
49+ //! ```no_run
50+ //! use git_filter_tree::FilterTree as _;
51+ //! use std::path::Path;
52+ //!
53+ //! let repo = git2::Repository::open_from_env()?;
54+ //! let tree = repo.head()?.peel_to_tree()?;
55+ //!
56+ //! // Keep only files whose path contains "generated".
57+ //! let filtered = repo.filter_by_predicate(&tree, |_repo, path| {
58+ //! path.to_str().is_some_and(|s| s.contains("generated"))
59+ //! })?;
60+ //! println!("tree sha: {}", filtered.id());
61+ //! # Ok::<(), Box<dyn std::error::Error>>(())
62+ //! ```
63+ //!
64+ //! The predicate receives the repository and the full path of each blob entry
65+ //! relative to the tree root. Subtrees are included as long as at least one
66+ //! descendant matches.
4567pub mod exe;
4668use std:: path:: { Path , PathBuf } ;
4769
@@ -64,13 +86,25 @@ pub trait FilterTree {
6486 /// Filters tree entries by gitattributes and returns a new tree with contents filtered.
6587 /// Recursively walks the tree and matches attributes against full paths from the tree root.
6688 ///
67- /// The `attributes` type is an array of string slices. For attributes which haves values,
89+ /// The `attributes` type is an array of string slices. For attributes which have values,
6890 /// not simply set or unset, use typical `.gitattributes` syntax.
6991 fn filter_by_attributes < ' a > (
7092 & ' a self ,
7193 tree : & ' a git2:: Tree < ' a > ,
7294 attributes : & [ & str ] ,
7395 ) -> Result < git2:: Tree < ' a > , Error > ;
96+
97+ /// Filters tree entries using an arbitrary predicate and returns a new tree.
98+ /// Recursively walks the tree; the predicate is called for each blob entry
99+ /// with the repository and the entry's full path relative to the tree root.
100+ /// Subtrees are retained as long as at least one descendant matches.
101+ fn filter_by_predicate < ' a , F > (
102+ & ' a self ,
103+ tree : & ' a git2:: Tree < ' a > ,
104+ predicate : F ,
105+ ) -> Result < git2:: Tree < ' a > , Error >
106+ where
107+ F : Fn ( & git2:: Repository , & Path ) -> bool ;
74108}
75109
76110impl FilterTree for git2:: Repository {
@@ -109,6 +143,17 @@ impl FilterTree for git2::Repository {
109143 filter_tree_recursive ( self , tree, None , & |_repo, path| matcher. is_match ( path) )
110144 }
111145
146+ fn filter_by_predicate < ' a , F > (
147+ & ' a self ,
148+ tree : & ' a git2:: Tree < ' a > ,
149+ predicate : F ,
150+ ) -> Result < git2:: Tree < ' a > , Error >
151+ where
152+ F : Fn ( & git2:: Repository , & Path ) -> bool ,
153+ {
154+ filter_tree_recursive ( self , tree, None , & predicate)
155+ }
156+
112157 fn filter_by_attributes < ' a > (
113158 & ' a self ,
114159 tree : & ' a git2:: Tree < ' a > ,
@@ -734,4 +779,123 @@ mod tests {
734779 cleanup_test_repo ( temp_path) ;
735780 Ok ( ( ) )
736781 }
782+
783+ #[ test]
784+ fn test_filter_by_predicate_always_false_returns_empty_tree ( ) -> Result < ( ) , Error > {
785+ let ( repo, temp_path) = setup_test_repo ( ) ;
786+ let tree = create_test_tree ( & repo) ?;
787+
788+ let filtered = repo. filter_by_predicate ( & tree, |_repo, _path| false ) ?;
789+ assert_eq ! ( filtered. len( ) , 0 ) ;
790+
791+ cleanup_test_repo ( temp_path) ;
792+ Ok ( ( ) )
793+ }
794+
795+ #[ test]
796+ fn test_filter_by_predicate_always_true_returns_full_tree ( ) -> Result < ( ) , Error > {
797+ let ( repo, temp_path) = setup_test_repo ( ) ;
798+ let tree = create_test_tree ( & repo) ?;
799+
800+ let filtered = repo. filter_by_predicate ( & tree, |_repo, _path| true ) ?;
801+ assert_eq ! ( filtered. len( ) , tree. len( ) ) ;
802+
803+ cleanup_test_repo ( temp_path) ;
804+ Ok ( ( ) )
805+ }
806+
807+ #[ test]
808+ fn test_filter_by_predicate_matches_on_path ( ) -> Result < ( ) , Error > {
809+ let ( repo, temp_path) = setup_test_repo ( ) ;
810+ let tree = create_test_tree ( & repo) ?;
811+
812+ // Keep only entries whose path contains "file"
813+ let filtered = repo. filter_by_predicate ( & tree, |_repo, path| {
814+ path. to_str ( ) . is_some_and ( |s| s. contains ( "file" ) )
815+ } ) ?;
816+
817+ assert_eq ! ( filtered. len( ) , 2 ) ;
818+ assert ! ( filtered. get_name( "file1.txt" ) . is_some( ) ) ;
819+ assert ! ( filtered. get_name( "file2.rs" ) . is_some( ) ) ;
820+ assert ! ( filtered. get_name( "test.md" ) . is_none( ) ) ;
821+
822+ cleanup_test_repo ( temp_path) ;
823+ Ok ( ( ) )
824+ }
825+
826+ #[ test]
827+ fn test_filter_by_predicate_receives_full_nested_path ( ) -> Result < ( ) , Error > {
828+ let ( repo, temp_path) = setup_test_repo ( ) ;
829+
830+ let blob = repo. blob ( b"content" ) ?;
831+
832+ let mut sub_builder = repo. treebuilder ( None ) ?;
833+ sub_builder. insert ( "deep.rs" , blob, 0o100644 ) ?;
834+ sub_builder. insert ( "deep.txt" , blob, 0o100644 ) ?;
835+ let sub_oid = sub_builder. write ( ) ?;
836+
837+ let mut root_builder = repo. treebuilder ( None ) ?;
838+ root_builder. insert ( "top.rs" , blob, 0o100644 ) ?;
839+ root_builder. insert ( "src" , sub_oid, 0o040000 ) ?;
840+ let tree = repo. find_tree ( root_builder. write ( ) ?) ?;
841+
842+ let seen_paths = std:: cell:: RefCell :: new ( Vec :: new ( ) ) ;
843+ let _ = repo. filter_by_predicate ( & tree, |_repo, path| {
844+ seen_paths
845+ . borrow_mut ( )
846+ . push ( path. to_str ( ) . unwrap ( ) . to_string ( ) ) ;
847+ true
848+ } ) ;
849+ let seen_paths = seen_paths. into_inner ( ) ;
850+
851+ assert ! ( seen_paths. contains( & "top.rs" . to_string( ) ) ) ;
852+ assert ! ( seen_paths. contains( & "src/deep.rs" . to_string( ) ) ) ;
853+ assert ! ( seen_paths. contains( & "src/deep.txt" . to_string( ) ) ) ;
854+
855+ cleanup_test_repo ( temp_path) ;
856+ Ok ( ( ) )
857+ }
858+
859+ #[ test]
860+ fn test_filter_by_predicate_prunes_subtree_when_no_descendants_match ( ) -> Result < ( ) , Error > {
861+ let ( repo, temp_path) = setup_test_repo ( ) ;
862+
863+ let blob = repo. blob ( b"content" ) ?;
864+
865+ let mut sub_builder = repo. treebuilder ( None ) ?;
866+ sub_builder. insert ( "a.txt" , blob, 0o100644 ) ?;
867+ sub_builder. insert ( "b.txt" , blob, 0o100644 ) ?;
868+ let sub_oid = sub_builder. write ( ) ?;
869+
870+ let mut root_builder = repo. treebuilder ( None ) ?;
871+ root_builder. insert ( "keep.rs" , blob, 0o100644 ) ?;
872+ root_builder. insert ( "docs" , sub_oid, 0o040000 ) ?;
873+ let tree = repo. find_tree ( root_builder. write ( ) ?) ?;
874+
875+ // Only keep .rs files — docs/ subtree should be pruned entirely
876+ let filtered = repo. filter_by_predicate ( & tree, |_repo, path| {
877+ path. extension ( ) . is_some_and ( |e| e == "rs" )
878+ } ) ?;
879+
880+ assert_eq ! ( filtered. len( ) , 1 ) ;
881+ assert ! ( filtered. get_name( "keep.rs" ) . is_some( ) ) ;
882+ assert ! ( filtered. get_name( "docs" ) . is_none( ) ) ;
883+
884+ cleanup_test_repo ( temp_path) ;
885+ Ok ( ( ) )
886+ }
887+
888+ #[ test]
889+ fn test_filter_by_predicate_empty_tree_stays_empty ( ) -> Result < ( ) , Error > {
890+ let ( repo, temp_path) = setup_test_repo ( ) ;
891+
892+ let tree = repo. find_tree ( repo. treebuilder ( None ) ?. write ( ) ?) ?;
893+ assert_eq ! ( tree. len( ) , 0 ) ;
894+
895+ let filtered = repo. filter_by_predicate ( & tree, |_repo, _path| true ) ?;
896+ assert_eq ! ( filtered. len( ) , 0 ) ;
897+
898+ cleanup_test_repo ( temp_path) ;
899+ Ok ( ( ) )
900+ }
737901}
0 commit comments