-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgoop-javascript.go
More file actions
198 lines (161 loc) · 4.84 KB
/
goop-javascript.go
File metadata and controls
198 lines (161 loc) · 4.84 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
package goop
import (
"context"
"fmt"
"time"
"github.com/chromedp/chromedp"
)
// JSOptions defines configuration for JavaScript rendering
type JSOptions struct {
Timeout time.Duration // Maximum time to wait for page load
WaitForSelector string // CSS selector to wait for
WaitForNetwork bool // Wait for network to be idle
Headless bool // Run browser in headless mode
UserAgent string // Custom user agent
ViewportWidth int // Browser viewport width
ViewportHeight int // Browser viewport height
DisableImages bool // Disable image loading for performance
}
// DefaultJSOptions provides sensible defaults
var DefaultJSOptions = JSOptions{
Timeout: 30 * time.Second,
WaitForSelector: "",
WaitForNetwork: true,
Headless: true,
UserAgent: "Goop/1.0 (Web Scraper)",
ViewportWidth: 1920,
ViewportHeight: 1080,
DisableImages: false,
}
// RenderWithJS renders a URL with JavaScript execution
func RenderWithJS(url string, options *JSOptions) (Root, error) {
if options == nil {
options = &DefaultJSOptions
}
timer := startTimer("JS Render: "+url, DebugVerbose)
defer timer.finish()
// Create a new Chrome context
ctx, cancel := chromedp.NewContext(context.Background())
defer cancel()
// Set timeout
ctx, cancel = context.WithTimeout(ctx, options.Timeout)
defer cancel()
var htmlContent string
// Execute actions
err := chromedp.Run(ctx,
chromedp.Navigate(url),
chromedp.WaitVisible("body", chromedp.ByQuery),
chromedp.OuterHTML("html", &htmlContent),
)
if err != nil {
return Root{Error: fmt.Errorf("failed to render with JS: %v", err)}, err
}
// Parse the HTML content
doc := HTMLParse(htmlContent)
if doc.Error != nil {
return Root{Error: fmt.Errorf("failed to parse JS-rendered HTML: %v", doc.Error)}, doc.Error
}
debugLog(DebugVerbose, "JS rendering completed for %s (%d chars)", url, len(htmlContent))
return doc, nil
}
// WaitForElement waits for an element to appear on the page
func (r Root) WaitForElement(selector string, timeout time.Duration) error {
if r.Error != nil {
return r.Error
}
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
// Create a new Chrome context
chromeCtx, chromeCancel := chromedp.NewContext(ctx)
defer chromeCancel()
err := chromedp.Run(chromeCtx,
chromedp.WaitVisible(selector, chromedp.ByQuery),
)
if err != nil {
return fmt.Errorf("element not found within timeout: %v", err)
}
return nil
}
// WaitForNetworkIdle waits for network activity to stop
func (r Root) WaitForNetworkIdle(timeout time.Duration) error {
if r.Error != nil {
return r.Error
}
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
// Create a new Chrome context
chromeCtx, chromeCancel := chromedp.NewContext(ctx)
defer chromeCancel()
err := chromedp.Run(chromeCtx,
chromedp.WaitVisible("body", chromedp.ByQuery),
)
if err != nil {
return fmt.Errorf("network did not become idle within timeout: %v", err)
}
return nil
}
// ExecuteJS executes JavaScript code on the page
func (r Root) ExecuteJS(script string) (interface{}, error) {
if r.Error != nil {
return nil, r.Error
}
// Create a new Chrome context
ctx, cancel := chromedp.NewContext(context.Background())
defer cancel()
var result interface{}
err := chromedp.Run(ctx,
chromedp.Evaluate(script, &result),
)
if err != nil {
return nil, fmt.Errorf("failed to execute JavaScript: %v", err)
}
return result, nil
}
// ScrollToElement scrolls to make an element visible
func (r Root) ScrollToElement(selector string) error {
if r.Error != nil {
return r.Error
}
// Create a new Chrome context
ctx, cancel := chromedp.NewContext(context.Background())
defer cancel()
err := chromedp.Run(ctx,
chromedp.ScrollIntoView(selector, chromedp.ByQuery),
)
if err != nil {
return fmt.Errorf("failed to scroll to element: %v", err)
}
return nil
}
// TakeScreenshot captures a screenshot of the current page
func (r Root) TakeScreenshot() ([]byte, error) {
if r.Error != nil {
return nil, r.Error
}
// Create a new Chrome context
ctx, cancel := chromedp.NewContext(context.Background())
defer cancel()
var screenshot []byte
err := chromedp.Run(ctx,
chromedp.CaptureScreenshot(&screenshot),
)
if err != nil {
return nil, fmt.Errorf("failed to capture screenshot: %v", err)
}
return screenshot, nil
}
// SetJSOptions updates the default JS options
func SetJSOptions(options JSOptions) {
DefaultJSOptions = options
}
// GetJSOptions returns the current default JS options
func GetJSOptions() JSOptions {
return DefaultJSOptions
}
// IsJSAvailable checks if JavaScript rendering is available
func IsJSAvailable() bool {
// Try to create a context to test availability
ctx, cancel := chromedp.NewContext(context.Background())
defer cancel()
return ctx != nil
}