Skip to content

Commit f0f99d4

Browse files
committed
Very large optimization of the code, also refactored code, removed unused code
1 parent a550c06 commit f0f99d4

12 files changed

Lines changed: 2131 additions & 1840 deletions

File tree

Cargo.lock

Lines changed: 27 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ thiserror = "2.0.12"
2727
rayon = "1.10.0"
2828
encoding_rs = "0.8.35"
2929
wildmatch = "2.4.0"
30+
lru = "0.14.0"
3031

3132
[profile.release]
3233
codegen-units = 1

README.md

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,6 @@ collapsescanner path/to/file.jar --json
7474
# Run with 8 processing threads
7575
collapsescanner path/to/file.jar --threads 8
7676

77-
# Fast scan - stop after finding first suspicious item
78-
collapsescanner path/to/file.jar --fast
79-
8077
# Path filtering
8178
collapsescanner path/to/file.jar --exclude "assets/**" --exclude "*.log" --find "com/example/*"
8279

@@ -86,22 +83,20 @@ collapsescanner path/to/file.jar --ignore-suspicious path/to/ignored_keywords.tx
8683

8784
## 🔍 Command-line Options
8885

89-
| Option | Description |
90-
| --------------------- | ------------------------------------------------------------------------------------ |
91-
| `path` | Path to a JAR file, class file, or directory to scan |
92-
| `--mode` | Detection mode: `network`, `crypto`, `malicious`, `obfuscation`, or `all` (default) |
93-
| `--extract` | Extract all resources from JAR files |
94-
| `--strings` | Extract all strings from class files |
95-
| `--output` | Specify the output directory (default: ./extracted) |
96-
| `--json` | Export results in JSON format |
97-
| `-v, --verbose` | Enable verbose output (shows size/entropy, etc.) |
98-
| `--threads` | Number of threads to use for parallel processing (0 = automatic based on CPU cores) |
99-
| `--fast` | Fast scan mode - stops processing a JAR file after finding the first suspicious item |
100-
| `--max-findings` | Maximum number of findings before stopping (default: 50, 0 = no limit) |
101-
| `--exclude` | Exclude paths matching the wildcard pattern (can be used multiple times) |
102-
| `--find` | Only scan paths matching the wildcard pattern (can be used multiple times) |
103-
| `--ignore-suspicious` | Path to a .txt file with suspicious keywords to ignore (one per line) |
104-
| `--ignore-crypto` | Path to a .txt file with crypto keywords to ignore (one per line) |
86+
| Option | Description |
87+
| --------------------- | ----------------------------------------------------------------------------------- |
88+
| `path` | Path to a JAR file, class file, or directory to scan |
89+
| `--mode` | Detection mode: `network`, `crypto`, `malicious`, `obfuscation`, or `all` (default) |
90+
| `--extract` | Extract all resources from JAR files |
91+
| `--strings` | Extract all strings from class files |
92+
| `--output` | Specify the output directory (default: ./extracted) |
93+
| `--json` | Export results in JSON format |
94+
| `-v, --verbose` | Enable verbose output (shows size/entropy, etc.) |
95+
| `--threads` | Number of threads to use for parallel processing (0 = automatic based on CPU cores) |
96+
| `--exclude` | Exclude paths matching the wildcard pattern (can be used multiple times) |
97+
| `--find` | Only scan paths matching the wildcard pattern (can be used multiple times) |
98+
| `--ignore-suspicious` | Path to a .txt file with suspicious keywords to ignore (one per line) |
99+
| `--ignore-crypto` | Path to a .txt file with crypto keywords to ignore (one per line) |
105100

106101
## 🛡️ Detection Capabilities
107102

scripts/remapper.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,14 @@ fn process_jar(input_path: &str, output_path: &str) -> ZipResult<()> {
8181
let mut zip_writer = ZipWriter::new(output_writer);
8282

8383
let pb = ProgressBar::new(num_entries as u64);
84-
pb.set_style(ProgressStyle::default_bar()
85-
.template("{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} entries")
86-
.unwrap()
87-
.progress_chars("=>-"));
84+
pb.set_style(
85+
ProgressStyle::default_bar()
86+
.template(
87+
"{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} entries",
88+
)
89+
.unwrap()
90+
.progress_chars("=>-"),
91+
);
8892

8993
println!("🔧 Building fixed JAR file...");
9094

src/database.rs

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2,42 +2,42 @@ use lazy_static::lazy_static;
22

33
lazy_static! {
44
pub static ref GOOD_LINKS: Vec<String> = vec![
5-
"minecraft.org".to_string(),
6-
"minecraft.net".to_string(),
7-
"netty.io".to_string(),
8-
"optifine.net".to_string(),
9-
"mojang.com".to_string(),
5+
"account.mojang.com".to_string(),
6+
"aka.ms".to_string(),
107
"apache.org".to_string(),
11-
"logging.apache.org".to_string(),
12-
"www.w3.org".to_string(),
13-
"tools.ietf.org".to_string(),
14-
"eclipse.org".to_string(),
15-
"www.openssl.org".to_string(),
16-
"sessionserver.mojang.com".to_string(),
17-
"authserver.mojang.com".to_string(),
188
"api.mojang.com".to_string(),
19-
"shader-tutorial.dev".to_string(),
20-
"s.optifine.net".to_string(),
21-
"snoop.minecraft.net".to_string(),
22-
"account.mojang.com".to_string(),
9+
"api.spiget.org".to_string(),
10+
"authserver.mojang.com".to_string(),
2311
"bugs.mojang.com".to_string(),
24-
"aka.ms".to_string(),
25-
"minotar.net".to_string(),
26-
"dominos.com".to_string(),
2712
"cabaletta/baritone".to_string(),
28-
"yaml.org".to_string(),
29-
"java.sun.org".to_string(),
13+
"ci.viaversion.com".to_string(),
3014
"com/viaversion/".to_string(),
31-
"lwjgl.org".to_string(),
32-
"dump.viaversion.com".to_string(),
3315
"docs.advntr.dev".to_string(),
16+
"dominos.com".to_string(),
17+
"dump.viaversion.com".to_string(),
18+
"eclipse.org".to_string(),
19+
"java.sun.org".to_string(),
3420
"jo0001.github.io".to_string(),
35-
"viaversion.com".to_string(),
36-
"ci.viaversion.com".to_string(),
37-
"paulscode/sound/".to_string(),
38-
"api.spiget.org".to_string(),
21+
"logging.apache.org".to_string(),
3922
"login.live.com".to_string(),
23+
"lwjgl.org".to_string(),
24+
"minecraft.net".to_string(),
25+
"minecraft.org".to_string(),
26+
"minotar.net".to_string(),
27+
"mojang.com".to_string(),
28+
"netty.io".to_string(),
29+
"optifine.net".to_string(),
30+
"paulscode/sound/".to_string(),
31+
"s.optifine.net".to_string(),
32+
"sessionserver.mojang.com".to_string(),
33+
"shader-tutorial.dev".to_string(),
34+
"snoop.minecraft.net".to_string(),
35+
"tools.ietf.org".to_string(),
36+
"viaversion.com".to_string(),
37+
"www.openssl.org".to_string(),
38+
"www.rfc-editor.org".to_string(),
4039
"www.slf4j.org".to_string(),
41-
"www.rfc-editor.org".to_string()
40+
"www.w3.org".to_string(),
41+
"yaml.org".to_string(),
4242
];
4343
}

src/detection.rs

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
pub const NAME_LENGTH_THRESHOLD: usize = 100;
2+
pub const ENTROPY_THRESHOLD: f64 = 7.2;
3+
pub const SUSPICIOUS_CHAR_THRESHOLD: usize = 3;
4+
5+
pub const MIN_STRING_LENGTH: usize = 5;
6+
pub const MAX_PATTERN_CHECK_LENGTH: usize = 4096;
7+
pub const RESULT_CACHE_SIZE: usize = 2048;
8+
9+
pub fn contains_network_indicators(s: &str) -> bool {
10+
s.contains("http")
11+
|| s.contains("www.")
12+
|| s.contains("://")
13+
|| s.contains(".com")
14+
|| s.contains(".net")
15+
|| s.contains(".org")
16+
|| s.contains("192.168.")
17+
|| s.contains("10.0.")
18+
|| s.contains("127.0.0")
19+
}
20+
21+
pub fn contains_crypto_indicators(s: &str) -> bool {
22+
s.contains("aes")
23+
|| s.contains("rsa")
24+
|| s.contains("des")
25+
|| s.contains("sha")
26+
|| s.contains("md5")
27+
|| s.contains("crypt")
28+
|| s.contains("key")
29+
|| s.contains("hash")
30+
|| s.contains("password")
31+
}
32+
33+
pub fn contains_malicious_indicators(s: &str) -> bool {
34+
s.contains("backdoor")
35+
|| s.contains("exploit")
36+
|| s.contains("payload")
37+
|| s.contains("inject")
38+
|| s.contains("exec")
39+
|| s.contains("socket")
40+
|| s.contains("download")
41+
|| s.contains("jndi")
42+
|| s.contains("ldap")
43+
}
44+
45+
pub fn is_obfuscated_name(name: &str) -> bool {
46+
if name.len() <= 2 && name != "of" && name != "to" && name != "at" && name != "id" {
47+
return true;
48+
}
49+
50+
let chars: Vec<_> = name.chars().collect();
51+
if chars.len() >= 3 {
52+
let repeats = chars
53+
.windows(3)
54+
.filter(|w| w[0] == w[1] && w[1] == w[2])
55+
.count();
56+
if repeats > 0 {
57+
return true;
58+
}
59+
}
60+
61+
name.contains("$_")
62+
|| name.contains("$$")
63+
|| name.contains("III")
64+
|| name.contains("lll")
65+
|| name.contains("OOO")
66+
|| name.matches('$').count() > 2
67+
}
68+
69+
lazy_static::lazy_static! {
70+
71+
pub static ref SAFE_STRING_CACHE: std::sync::Mutex<std::collections::HashSet<String>> =
72+
std::sync::Mutex::new(std::collections::HashSet::with_capacity(1000));
73+
74+
75+
pub static ref OBFUSCATED_NAME_CACHE: std::sync::Mutex<std::collections::HashSet<String>> =
76+
std::sync::Mutex::new(std::collections::HashSet::with_capacity(500));
77+
}
78+
79+
pub fn is_cached_safe_string(s: &str) -> bool {
80+
if let Ok(cache) = SAFE_STRING_CACHE.lock() {
81+
return cache.contains(s);
82+
}
83+
false
84+
}
85+
86+
pub fn cache_safe_string(s: &str) -> bool {
87+
if let Ok(mut cache) = SAFE_STRING_CACHE.lock() {
88+
return cache.insert(s.to_string());
89+
}
90+
false
91+
}
92+
93+
pub fn should_analyze_string(s: &str) -> bool {
94+
if s.len() < MIN_STRING_LENGTH {
95+
return false;
96+
}
97+
98+
true
99+
}
100+
101+
pub fn calculate_detection_hash(data: &[u8]) -> u64 {
102+
use std::collections::hash_map::DefaultHasher;
103+
use std::hash::{Hash, Hasher};
104+
105+
let mut hasher = DefaultHasher::new();
106+
107+
if data.len() > 1024 {
108+
let start = &data[..512];
109+
let middle = &data[data.len() / 2 - 256..data.len() / 2 + 256];
110+
let end = &data[data.len() - 512..];
111+
112+
start.hash(&mut hasher);
113+
middle.hash(&mut hasher);
114+
end.hash(&mut hasher);
115+
(data.len() as u64).hash(&mut hasher);
116+
} else {
117+
data.hash(&mut hasher);
118+
}
119+
120+
hasher.finish()
121+
}

src/errors.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
use indicatif;
2+
use serde_json;
3+
use std::io;
4+
use thiserror::Error;
5+
use zip;
6+
7+
#[derive(Debug, Error)]
8+
pub enum ScanError {
9+
#[error("IO error: {0}")]
10+
IoError(#[from] io::Error),
11+
#[error("Zip error: {0}")]
12+
ZipError(#[from] zip::result::ZipError),
13+
#[error("Class parse error in '{path}': {msg}")]
14+
ClassParseError { path: String, msg: String },
15+
#[error("Unsupported file type: {0:?}")]
16+
UnsupportedFileType(Option<std::ffi::OsString>),
17+
#[error("JSON serialization/deserialization error: {0}")]
18+
JsonError(#[from] serde_json::Error),
19+
#[error("Template error: {0}")]
20+
TemplateError(#[from] indicatif::style::TemplateError),
21+
}

0 commit comments

Comments
 (0)