ParamSpider/paramspider.py at main · VIPHACKER100/ParamSpider · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#!/usr/bin/env python3
"""
ParamSpider - Parameter miner for humans
A parameter discovery tool using web archives.
"""

from core import requester, extractor, save_it
from urllib.parse import unquote
from typing import List, Optional
import requests
import re
import argparse
import os
import sys
import time

start_time = time.time()


def validate_domain(domain: str) -> bool:
    """
    Validate domain format.

    Args:
        domain: Domain to validate

    Returns:
        True if valid, False otherwise
    """
    import re
    domain_regex = r'^[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$'
    return re.match(domain_regex, domain) is not None


def main() -> None:
    # Delay to ensure NucleiFuzzer banner appears first
    time.sleep(2)  # Wait for NucleiFuzzer banner to complete output

    # Display the ParamSpider banner
    banner = r"""\u001b[36m


            ___                               _    __
           / _ \___ ________ ___ _  ___ ___  (_)__/ /__ ____
          / ___/ _ `/ __/ _ `/  ' \(_-</ _ \/ / _  / -_) __/
         /_/   \_,_/_/  \_,_/_/_/_/___/ .__/_/\_,_/\__/_/
                                     /_/

                               - coded with <3 by Devansh Batham
       \u001b[0m
    """
    print(banner)

    # Initialize argument parser for CLI arguments
    parser = argparse.ArgumentParser(description='ParamSpider a parameter discovery suite')
    # Required arguments
    parser.add_argument('-d','--domain' , help='Domain name of the target [ex : hackerone.com]' , required=True)
    # Optional arguments with defaults
    parser.add_argument('-s' ,'--subs' , help='Set False for no subs [ex : --subs False ]' , default='True')
    parser.add_argument('-l','--level' ,  help='For nested parameters [ex : --level high]')
    parser.add_argument('-e','--exclude', help='extensions to exclude [ex --exclude php,aspx]')
    parser.add_argument('-o','--output' , help='Output file name [by default it is \'domain.txt\']')
    parser.add_argument('-p','--placeholder' , help='The string to add as a placeholder after the parameter name.', default='FUZZ')
    parser.add_argument('-q', '--quiet', help='Do not print the results to the screen', action='store_true')
    parser.add_argument('-r', '--retries', help='Specify number of retries for 4xx and 5xx errors', default=3)
    args = parser.parse_args()

    # Validate domain input
    if not validate_domain(args.domain):
        print(f"\u001b[31;1m[ERROR] Invalid domain format: {args.domain}\u001b[0m")
        sys.exit(1)

    # Build Wayback Machine CDX API URL based on subdomain preference
    # asterisk (*) before domain includes subdomains, without it excludes subdomains
    if args.subs.lower() == 'true' or args.subs.lower() == 'false':
        if args.subs.lower() == 'true':
            # Include subdomains in search
            url = f"https://web.archive.org/cdx/search/cdx?url=*.{args.domain}/*&output=txt&fl=original&collapse=urlkey&page=/"
        else:
            # Exclude subdomains from search
            url = f"https://web.archive.org/cdx/search/cdx?url={args.domain}/*&output=txt&fl=original&collapse=urlkey&page=/"
    else:
        # Default: include subdomains
        url = f"https://web.archive.org/cdx/search/cdx?url=*.{args.domain}/*&output=txt&fl=original&collapse=urlkey&page=/"

    # Fetch data from Wayback Machine with retry logic
    retry = True
    retries = 0
    max_retries = int(args.retries)

    try:
        # Keep retrying until success or max retries reached
        while retry and retries <= max_retries:
            response, retry = requester.connector(url)
            retries += 1
    except ValueError:
        print(f"\u001b[31;1m[ERROR] Invalid retry count: {args.retries}. Must be a number.\u001b[0m")
        sys.exit(1)

    # Check if response was received
    if response is False:
        print("\u001b[31;1m[ERROR] Failed to retrieve data from web archive.\u001b[0m")
        return
    # Decode URL-encoded characters in the response
    response = unquote(response)

    # Process extensions to be excluded from results
    black_list = []
    if args.exclude:
        # Parse comma-separated extensions and add dot prefix
        if "," in args.exclude:
            black_list = args.exclude.split(",")
            for i in range(len(black_list)):
                black_list[i] = "." + black_list[i]  # Ensure dot prefix for matching
        else:
            black_list.append("." + args.exclude)
        print(f"\u001b[31m[!] URLs containing these extensions will be excluded from the results: {black_list}\u001b[0m\n")

    # Extract parameters from the response
    final_uris = extractor.param_extract(response, args.level, black_list, args.placeholder)

    # Check if parameters were found and save them
    if not final_uris:
        print("\u001b[33;1m[!] No parameters found for this domain.\u001b[0m")
    else:
        # Save results to file with error handling
        try:
            save_it.save_func(final_uris, args.output, args.domain)
        except IOError as e:
            print(f"\u001b[31;1m[ERROR] Failed to save output file: {e}\u001b[0m")
            sys.exit(1)

    # Display results to console unless quiet mode is enabled
    if not args.quiet:
        print("\u001b[32;1m")  # Green color
        print('\n'.join(final_uris))
        print("\u001b[0m")  # Reset color

    # Display summary statistics
    print(f"\n\u001b[32m[+] Total number of retries: {retries-1}\u001b[31m")
    print(f"\u001b[32m[+] Total unique URLs found: {len(final_uris)}\u001b[31m")
    # Display output file location
    if args.output:
        if "/" in args.output:
            # Absolute or relative path provided
            print(f"\u001b[32m[+] Output is saved here:\u001b[31m \u001b[36m{args.output}\u001b[31m")
        else:
            # Filename only - save to output directory
            print(f"\u001b[32m[+] Output is saved here:\u001b[31m \u001b[36moutput/{args.output}\u001b[31m")
    else:
        # Default: save to output/{domain}.txt
        print(f"\u001b[32m[+] Output is saved here:\u001b[31m \u001b[36moutput/{args.domain}.txt\u001b[31m")
    # Display total execution time
    print(f"\n\u001b[31m[!] Total execution time: {str((time.time() - start_time))[:-12]}s\u001b[0m")


if __name__ == "__main__":
    main()