-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathWebCloner.py
More file actions
147 lines (141 loc) · 7.27 KB
/
WebCloner.py
File metadata and controls
147 lines (141 loc) · 7.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
from bs4 import BeautifulSoup
import os
from urllib.request import urlretrieve
from urllib.request import urlretrieve
from sys import stderr
import urllib.request
import cssutils
import sys
import logging
import time
Bl='\033[30m'
Re='\033[1;31m'
Gr='\033[1;32m'
Ye='\033[1;33m'
Blu='\033[1;34m'
Mage='\033[1;35m'
Cy='\033[1;36m'
Wh='\033[1;37m'
os.system('clear')
stderr.writelines(f"""{Gr}
{Gr} ██╗ ╔██ █████╗██████╗ {Re} ██████╗██╗ ██████╗ ███╗ ██╗█████╗██████╗
{Gr} ╚██╗ ██ ╔██╝ ██╔══╝██╔══██╗ {Re} ██╔════╝██║ ██╔═══██╗████╗ ██║██╔══╝██╔══██╗
{Gr} ╚██╗ ╔██╝╚██╗ ╔██╝ █████╗██████═╝{Wh}█████╗{Re}██║ ██║ ██║ ██║██╔██╗ ██║█████╗███████║
{Gr} ╚██══██╝ ╚██══██╝ ██╔══╝██╔══██╗{Wh}╚════╝{Re}██║ ██║ ██║ ██║██║╚██╗██║██╔══╝██║ ║██╗
{Gr} ╚████╝ ╚████╝ █████╗██████╔╝ {Re}╚██████╗█████╗╚██████╔╝██║ ╚████║█████╗██║ ╚██║
{Gr} ╚══╝ ╚══╝ ╚════╝╚═════╝ {Re} ╚═════╝╚════╝ ╚═════╝ ╚═╝ ╚═══╝╚════╝╚═╝ ╚═╝
{Wh} <----- {Gr}W E B {Re}C L O N E R {Wh}B Y {Gr}G G A M E S{Wh}----->
""")
URL = input(f"\n [ {Gr}+ {Wh}] URL DU SITE : {Re}")
def report(count, size, total):
progress = [0, 0]
progress[0] = count * size
if progress[0] - progress[1] > 1000000:
progress[1] = progress[0]
print("Téléchargement de {:,}/{:,} ...".format(progress[1], total))
time.sleep(2)
print (f"\n{Wh}[ {Gr}+ {Wh}] Connexion au serveur")
time.sleep(2)
cssutils.log.setLevel(logging.CRITICAL)
directory = ''
opener = urllib.request.build_opener()
opener.addheaders = [('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11'),
('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
('Connection', 'keep-alive')
]
urllib.request.install_opener(opener)
try:
html_doc = urllib.request.urlopen(URL).read()
print(f"\n{Wh}[ {Gr}+ {Wh}] {Gr}Connecté au serveur")
time.sleep(2)
except ValueError as e:
print(f"\n{Wh}[ {Re}- {Wh}] {Re}Erreur: URL incorrecte")
sys.exit()
try :
soup = BeautifulSoup(html_doc, 'html.parser')
f = open( 'index.html', 'w' )
f.write(str(soup))
f.close()
print (f"\n{Wh}[ {Gr}+ {Wh}] {Wh}Inisialiton de l'index")
time.sleep(2)
print (f"\n{Wh}[ {Gr}+ {Wh}] {Wh}Début du clone")
time.sleep(2)
print (f"\n{Wh}[ {Gr}1 {Wh}] {Wh}Images")
a = soup.find_all('img')
for i in range(len(a)):
try:
if(a[i].get('data-src')):
directory = a[i]['data-src']
elif(a[i].get('src')):
directory = a[i]['src']
else:
continue
if "data:image" in directory:
continue
if not os.path.exists(os.path.dirname(directory)):
os.makedirs(os.path.dirname(directory))
testfile, headers = urlretrieve(URL+directory, directory, reporthook=report)
except Exception as e:
print (f"\n {Wh}[ {Re}- {Wh}] {Wh}Problème : ",e)
print (f"\n {Wh}[ {Gr}+ {Wh}] {Gr}Les images sont loadées")
time.sleep(2)
print (f"\n{Wh}[ {Gr}2 {Wh}] {Wh}CSS")
a = soup.find_all('link')
for i in range(len(a)):
try:
directory = a[i]['href']
if(".css" not in directory):
continue
if "http" in directory or "https" in directory:
continue
if "/" not in directory:
print (f"\n {Wh}[ {Gr}DIR {Wh}] {Re}Pas de directory")
elif not os.path.exists(os.path.dirname(directory)):
print (f"\n {Wh}[ {Gr}DIR {Wh}] {Wh}Création du directory")
os.makedirs(os.path.dirname(directory))
testfile, headers = urlretrieve(URL+directory, directory, reporthook=report)
urls = list( cssutils.getUrls(cssutils.parseFile(directory)))
if "fontawesome" in directory:
continue
if(len(urls)!=0):
for link in urls:
try:
if "http" in directory or "https" in link or "data:image/" in link:
continue
while("../" in link):
if("assets" in link):
link = link[3:]
else:
link = "assets/"+link[3:]
if "/" not in link:
print (f"\n {Wh}[ {Gr}DIR {Wh}] {Re}Pas de directory")
elif not os.path.exists(os.path.dirname(link)):
print (f"\n {Wh}[ {Gr}DIR {Wh}] {Wh}Création du directory")
os.makedirs(os.path.dirname(link))
testfile, headers = urlretrieve(URL+link, link, reporthook=report)
except Exception as e:
print (f"\n {Wh}[ {Re}- {Wh}] {Wh}Problème : ",e)
except Exception as e:
print (f"\n {Wh}[ {Re}- {Wh}] {Wh}Problème CSS: ",e)
print (f"\n {Wh}[ {Gr}+ {Wh}] {Gr}Les CSS sont loadées")
time.sleep(2)
print (f"\n{Wh}[ {Gr}3 {Wh}] {Wh}JS")
a = soup.find_all('script')
for i in range(len(a)):
try:
if(a[i].get('src')):
directory=a[i]['src']
else:
continue
if "http" in directory or "https" in directory:
continue
if not os.path.exists(os.path.dirname(directory)):
print (f"\n {Wh}[ {Gr}DIR {Wh}] {Wh}Création du directory")
os.makedirs(os.path.dirname(directory))
testfile, headers = urlretrieve(URL+directory, directory, reporthook=report)
except Exception as e:
print (f"\n {Wh}[ {Re}- {Wh}] {Wh}Problème JS : ",e)
print (f"\n {Wh}[ {Gr}+ {Wh}] {Gr}Les JS sont loadées")
print (f"\n{Wh}[ {Gr}+ {Wh}] {Gr}Tout est bien fini")
except Exception as e:
print (f"\n {Wh}[ {Re}- {Wh}] {Wh}Problème : ",e)