-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgui_parser.py
More file actions
87 lines (68 loc) · 3.04 KB
/
gui_parser.py
File metadata and controls
87 lines (68 loc) · 3.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import tkinter as tk
from tkinter import ttk, scrolledtext, messagebox
import requests
import re
class ModernRegexGui:
def __init__(self, root):
self.root = root
self.root.title("Web Link Inspector")
self.root.geometry("800x600")
self.root.configure(bg="#f0f0f0")
# Define Styles
style = ttk.Style()
style.configure("TButton", font=("Segoe UI", 10))
style.configure("Header.TLabel", font=("Segoe UI", 14, "bold"), background="#f0f0f0")
# Main Layout Container
main_frame = ttk.Frame(root, padding="20")
main_frame.pack(fill=tk.BOTH, expand=True)
# Header
ttk.Label(main_frame, text="URL Link Extractor", style="Header.TLabel").pack(pady=(0, 15))
# Input Area
input_frame = ttk.Frame(main_frame)
input_frame.pack(fill=tk.X, pady=5)
self.url_entry = ttk.Entry(input_frame, font=("Segoe UI", 11))
self.url_entry.insert(0, "https://www.wikipedia.org")
self.url_entry.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(0, 10))
self.btn_fetch = ttk.Button(input_frame, text="Fetch Links", command=self.run_parser)
self.btn_fetch.pack(side=tk.LEFT)
# Results Area
self.result_area = scrolledtext.ScrolledText(
main_frame,
font=("Consolas", 10),
bg="#ffffff",
fg="#333333",
padx=10,
pady=10
)
self.result_area.pack(fill=tk.BOTH, expand=True, pady=15)
# Status Bar
self.status_var = tk.StringVar(value="Ready")
status_bar = ttk.Label(root, textvariable=self.status_var, relief=tk.SUNKEN, anchor=tk.W)
status_bar.pack(side=tk.BOTTOM, fill=tk.X)
def run_parser(self):
url = self.url_entry.get()
self.result_area.delete(1.0, tk.END)
self.status_var.set(f"Connecting to {url}...")
self.root.update_idletasks()
try:
headers = {'User-Agent': 'Mozilla/5.0'}
response = requests.get(url, headers=headers, timeout=10)
# FIX ENCODING HERE
response.encoding = response.apparent_encoding
response.raise_for_status()
# Regex for links
link_pattern = re.compile(r'<a\s+(?:[^>]*?\s+)?href="([^"]*)"[^>]*>(.*?)</a>', re.IGNORECASE)
links = link_pattern.findall(response.text)
for href, text in links:
# Clean up nested tags and fix whitespace
clean_text = re.sub(r'<[^>]+>', '', text).strip()
if not clean_text: clean_text = "[No Text]"
self.result_area.insert(tk.END, f"● {clean_text:<40} \n URL: {href}\n\n")
self.status_var.set(f"Done! Found {len(links)} links.")
except Exception as e:
self.status_var.set("Error occurred.")
messagebox.showerror("Connection Error", str(e))
if __name__ == "__main__":
root = tk.Tk()
app = ModernRegexGui(root)
root.mainloop()