forked from 0xKayala/ParamSpider
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparamspider.py
More file actions
160 lines (133 loc) · 6.33 KB
/
paramspider.py
File metadata and controls
160 lines (133 loc) · 6.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#!/usr/bin/env python3
"""
ParamSpider - Parameter miner for humans
A parameter discovery tool using web archives.
"""
from core import requester, extractor, save_it
from urllib.parse import unquote
from typing import List, Optional
import requests
import re
import argparse
import os
import sys
import time
start_time = time.time()
def validate_domain(domain: str) -> bool:
"""
Validate domain format.
Args:
domain: Domain to validate
Returns:
True if valid, False otherwise
"""
import re
domain_regex = r'^[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$'
return re.match(domain_regex, domain) is not None
def main() -> None:
# Delay to ensure NucleiFuzzer banner appears first
time.sleep(2) # Wait for NucleiFuzzer banner to complete output
# Display the ParamSpider banner
banner = r"""\u001b[36m
___ _ __
/ _ \___ ________ ___ _ ___ ___ (_)__/ /__ ____
/ ___/ _ `/ __/ _ `/ ' \(_-</ _ \/ / _ / -_) __/
/_/ \_,_/_/ \_,_/_/_/_/___/ .__/_/\_,_/\__/_/
/_/
- coded with <3 by Devansh Batham
\u001b[0m
"""
print(banner)
# Initialize argument parser for CLI arguments
parser = argparse.ArgumentParser(description='ParamSpider a parameter discovery suite')
# Required arguments
parser.add_argument('-d','--domain' , help='Domain name of the target [ex : hackerone.com]' , required=True)
# Optional arguments with defaults
parser.add_argument('-s' ,'--subs' , help='Set False for no subs [ex : --subs False ]' , default='True')
parser.add_argument('-l','--level' , help='For nested parameters [ex : --level high]')
parser.add_argument('-e','--exclude', help='extensions to exclude [ex --exclude php,aspx]')
parser.add_argument('-o','--output' , help='Output file name [by default it is \'domain.txt\']')
parser.add_argument('-p','--placeholder' , help='The string to add as a placeholder after the parameter name.', default='FUZZ')
parser.add_argument('-q', '--quiet', help='Do not print the results to the screen', action='store_true')
parser.add_argument('-r', '--retries', help='Specify number of retries for 4xx and 5xx errors', default=3)
args = parser.parse_args()
# Validate domain input
if not validate_domain(args.domain):
print(f"\u001b[31;1m[ERROR] Invalid domain format: {args.domain}\u001b[0m")
sys.exit(1)
# Build Wayback Machine CDX API URL based on subdomain preference
# asterisk (*) before domain includes subdomains, without it excludes subdomains
if args.subs.lower() == 'true' or args.subs.lower() == 'false':
if args.subs.lower() == 'true':
# Include subdomains in search
url = f"https://web.archive.org/cdx/search/cdx?url=*.{args.domain}/*&output=txt&fl=original&collapse=urlkey&page=/"
else:
# Exclude subdomains from search
url = f"https://web.archive.org/cdx/search/cdx?url={args.domain}/*&output=txt&fl=original&collapse=urlkey&page=/"
else:
# Default: include subdomains
url = f"https://web.archive.org/cdx/search/cdx?url=*.{args.domain}/*&output=txt&fl=original&collapse=urlkey&page=/"
# Fetch data from Wayback Machine with retry logic
retry = True
retries = 0
max_retries = int(args.retries)
try:
# Keep retrying until success or max retries reached
while retry and retries <= max_retries:
response, retry = requester.connector(url)
retries += 1
except ValueError:
print(f"\u001b[31;1m[ERROR] Invalid retry count: {args.retries}. Must be a number.\u001b[0m")
sys.exit(1)
# Check if response was received
if response is False:
print("\u001b[31;1m[ERROR] Failed to retrieve data from web archive.\u001b[0m")
return
# Decode URL-encoded characters in the response
response = unquote(response)
# Process extensions to be excluded from results
black_list = []
if args.exclude:
# Parse comma-separated extensions and add dot prefix
if "," in args.exclude:
black_list = args.exclude.split(",")
for i in range(len(black_list)):
black_list[i] = "." + black_list[i] # Ensure dot prefix for matching
else:
black_list.append("." + args.exclude)
print(f"\u001b[31m[!] URLs containing these extensions will be excluded from the results: {black_list}\u001b[0m\n")
# Extract parameters from the response
final_uris = extractor.param_extract(response, args.level, black_list, args.placeholder)
# Check if parameters were found and save them
if not final_uris:
print("\u001b[33;1m[!] No parameters found for this domain.\u001b[0m")
else:
# Save results to file with error handling
try:
save_it.save_func(final_uris, args.output, args.domain)
except IOError as e:
print(f"\u001b[31;1m[ERROR] Failed to save output file: {e}\u001b[0m")
sys.exit(1)
# Display results to console unless quiet mode is enabled
if not args.quiet:
print("\u001b[32;1m") # Green color
print('\n'.join(final_uris))
print("\u001b[0m") # Reset color
# Display summary statistics
print(f"\n\u001b[32m[+] Total number of retries: {retries-1}\u001b[31m")
print(f"\u001b[32m[+] Total unique URLs found: {len(final_uris)}\u001b[31m")
# Display output file location
if args.output:
if "/" in args.output:
# Absolute or relative path provided
print(f"\u001b[32m[+] Output is saved here:\u001b[31m \u001b[36m{args.output}\u001b[31m")
else:
# Filename only - save to output directory
print(f"\u001b[32m[+] Output is saved here:\u001b[31m \u001b[36moutput/{args.output}\u001b[31m")
else:
# Default: save to output/{domain}.txt
print(f"\u001b[32m[+] Output is saved here:\u001b[31m \u001b[36moutput/{args.domain}.txt\u001b[31m")
# Display total execution time
print(f"\n\u001b[31m[!] Total execution time: {str((time.time() - start_time))[:-12]}s\u001b[0m")
if __name__ == "__main__":
main()