Skip to content

Commit e236770

Browse files
committed
class parse optimization, removed log2 values, renamed ignore suspicious & crypto arguments into ignore_keywords
1 parent de228ec commit e236770

7 files changed

Lines changed: 471 additions & 603 deletions

File tree

README.md

Lines changed: 28 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -48,55 +48,54 @@ Download the latest release from the [releases page](https://github.com/Collapse
4848

4949
```bash
5050
# Basic scan of a JAR file
51-
collapsescanner path/to/file.jar
51+
collapsescanner file.jar
5252

5353
# Scan a directory for all JAR and class files
54-
collapsescanner path/to/directory
54+
collapsescanner directory
5555

5656
# Different detection modes
57-
collapsescanner path/to/file.jar --mode network
58-
collapsescanner path/to/file.jar --mode crypto
59-
collapsescanner path/to/file.jar --mode malicious
60-
collapsescanner path/to/file.jar --mode obfuscation
57+
collapsescanner file.jar --mode network
58+
collapsescanner file.jar --mode crypto
59+
collapsescanner file.jar --mode malicious
60+
collapsescanner file.jar --mode obfuscation
6161

6262
# Extract all resources from the JAR
63-
collapsescanner path/to/file.jar --extract
63+
collapsescanner file.jar --extract
6464

6565
# Extract all strings from class files
66-
collapsescanner path/to/file.jar --strings
66+
collapsescanner file.jar --strings
6767

6868
# Specify output directory
69-
collapsescanner path/to/file.jar --extract --output path/to/output/dir
69+
collapsescanner file.jar --extract --output output/dir
7070

7171
# Export analysis to JSON
72-
collapsescanner path/to/file.jar --json
72+
collapsescanner file.jar --json
7373

7474
# Run with 8 processing threads
75-
collapsescanner path/to/file.jar --threads 8
75+
collapsescanner file.jar --threads 8
7676

7777
# Path filtering
78-
collapsescanner path/to/file.jar --exclude "assets/**" --exclude "*.log" --find "com/example/*"
78+
collapsescanner file.jar --exclude "assets/**" --exclude "*.log" --find "com/example/*"
7979

8080
# Skip specific keywords
81-
collapsescanner path/to/file.jar --ignore-suspicious path/to/ignored_keywords.txt --ignore-crypto path/to/ignored_crypto.txt
81+
collapsescanner file.jar --ignore_keywords_file ignore_keywords.txt
8282
```
8383

8484
## 🔍 Command-line Options
8585

86-
| Option | Description |
87-
| --------------------- | ----------------------------------------------------------------------------------- |
88-
| `path` | Path to a JAR file, class file, or directory to scan |
89-
| `--mode` | Detection mode: `network`, `crypto`, `malicious`, `obfuscation`, or `all` (default) |
90-
| `--extract` | Extract all resources from JAR files |
91-
| `--strings` | Extract all strings from class files |
92-
| `--output` | Specify the output directory (default: ./extracted) |
93-
| `--json` | Export results in JSON format |
94-
| `-v, --verbose` | Enable verbose output (shows size/entropy, etc.) |
95-
| `--threads` | Number of threads to use for parallel processing (0 = automatic based on CPU cores) |
96-
| `--exclude` | Exclude paths matching the wildcard pattern (can be used multiple times) |
97-
| `--find` | Only scan paths matching the wildcard pattern (can be used multiple times) |
98-
| `--ignore-suspicious` | Path to a .txt file with suspicious keywords to ignore (one per line) |
99-
| `--ignore-crypto` | Path to a .txt file with crypto keywords to ignore (one per line) |
86+
| Option | Description |
87+
| ------------------------ | ----------------------------------------------------------------------------------- |
88+
| `path` | Path to a JAR file, class file, or directory to scan |
89+
| `--mode` | Detection mode: `network`, `crypto`, `malicious`, `obfuscation`, or `all` (default) |
90+
| `--extract` | Extract all resources from JAR files |
91+
| `--strings` | Extract all strings from class files |
92+
| `--output` | Specify the output directory (default: ./extracted) |
93+
| `--json` | Export results in JSON format |
94+
| `-v, --verbose` | Enable verbose output (shows size/entropy, etc.) |
95+
| `--threads` | Number of threads to use for parallel processing (0 = automatic based on CPU cores) |
96+
| `--exclude` | Exclude paths matching the wildcard pattern (can be used multiple times) |
97+
| `--find` | Only scan paths matching the wildcard pattern (can be used multiple times) |
98+
| `--ignore_keywords_file` | Path to a .txt file with keywords to ignore (one per line) |
10099

101100
## 🛡️ Detection Capabilities
102101

@@ -119,7 +118,7 @@ CollapseScanner analyzes Java class files to find:
119118
- Suspicious character sequences
120119
- Unicode characters in identifiers
121120
- High entropy (potentially obfuscated) files
122-
- Custom JVM bytecode detection (0xDEAD magic bytes)
121+
- Custom JVM bytecode detection (unusual magic bytes)
123122

124123
## 🛠️ Tools
125124

@@ -131,7 +130,7 @@ CollapseScanner analyzes Java class files to find:
131130

132131
```bash
133132
# If running from the source directory
134-
cargo run --bin remapper path/to/input.jar path/to/output.jar
133+
cargo run --bin remapper input.jar output.jar
135134
```
136135

137136
#### Example output:

src/config.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,16 @@ impl SystemConfig {
3232

3333
let result_cache_size = match available_memory {
3434
mem if mem < LOW_MEMORY_THRESHOLD => DEFAULT_RESULT_CACHE_SIZE,
35-
mem if mem < MEDIUM_MEMORY_THRESHOLD => 16384, // 16K entries
36-
mem if mem < HIGH_MEMORY_THRESHOLD => 65536, // 64K entries
37-
_ => 131072, // 128K entries for high-memory systems
35+
mem if mem < MEDIUM_MEMORY_THRESHOLD => 16384, // 16K entries
36+
mem if mem < HIGH_MEMORY_THRESHOLD => 65536, // 64K entries
37+
_ => 131072, // 128K entries for high-memory systems
3838
};
3939

4040
let buffer_size = match available_memory {
4141
mem if mem < LOW_MEMORY_THRESHOLD => DEFAULT_BUFFER_SIZE,
42-
mem if mem < MEDIUM_MEMORY_THRESHOLD => 2 * 1024 * 1024, // 2 MB
43-
mem if mem < HIGH_MEMORY_THRESHOLD => 8 * 1024 * 1024, // 8 MB
44-
_ => 16 * 1024 * 1024, // 16 MB for high-memory systems
42+
mem if mem < MEDIUM_MEMORY_THRESHOLD => 2 * 1024 * 1024, // 2 MB
43+
mem if mem < HIGH_MEMORY_THRESHOLD => 8 * 1024 * 1024, // 8 MB
44+
_ => 16 * 1024 * 1024, // 16 MB for high-memory systems
4545
};
4646

4747
let safe_string_cache_capacity = match available_memory {

src/main.rs

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,7 @@ struct Args {
4949
mode: DetectionMode,
5050

5151
#[clap(long, value_parser)]
52-
ignore_suspicious: Option<PathBuf>,
53-
54-
#[clap(long, value_parser)]
55-
ignore_crypto: Option<PathBuf>,
52+
ignore_keywords: Option<PathBuf>,
5653

5754
#[clap(long, action = clap::ArgAction::Append, value_parser)]
5855
exclude: Vec<String>,
@@ -76,8 +73,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
7673
export_json: args.json,
7774
mode: args.mode,
7875
verbose: args.verbose,
79-
ignore_suspicious_file: args.ignore_suspicious,
80-
ignore_crypto_file: args.ignore_crypto,
76+
ignore_keywords_file: args.ignore_keywords,
8177
exclude_patterns: args.exclude,
8278
find_patterns: args.find,
8379
};
@@ -162,16 +158,9 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
162158
);
163159
}
164160

165-
if let Some(p) = &scanner.options.ignore_suspicious_file {
166-
println!(
167-
"{} Ignoring Suspicious: {}",
168-
"📄".yellow(),
169-
p.display().to_string().dimmed()
170-
);
171-
}
172-
if let Some(p) = &scanner.options.ignore_crypto_file {
161+
if let Some(p) = &scanner.options.ignore_keywords_file {
173162
println!(
174-
"{} Ignoring Crypto: {}",
163+
"{} Ignoring Keywords: {}",
175164
"📄".yellow(),
176165
p.display().to_string().dimmed()
177166
);

0 commit comments

Comments
 (0)