-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathuser.go
More file actions
115 lines (98 loc) · 2.87 KB
/
user.go
File metadata and controls
115 lines (98 loc) · 2.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
package speakerdeck
import (
"fmt"
"path"
"github.com/gocolly/colly"
"github.com/luxas/speakerdeck-api/scraper"
)
// ScrapeUser returns an user object based on the given user handle. In opts,
// you may specify possible scraping extensions, or log levels.
func ScrapeUser(userHandle string, opts *scraper.ScrapeOptions) (*User, error) {
if len(userHandle) == 0 {
return nil, fmt.Errorf("userHandle is mandatory!")
}
fullURL := fmt.Sprintf("%s/%s", speakerdeckRootURL, userHandle)
data, err := scraper.Scrape(fullURL, &UserScraper{}, opts)
if err != nil {
return nil, err
}
user := data.(*User)
return user, nil
}
var _ scraper.Scraper = &UserScraper{}
// UserScraper implements scraper.Scraper
type UserScraper struct{}
// Name returns the name of the UserScraper
func (s *UserScraper) Name() string {
return "UserScraper"
}
// Hooks returns mappings between DOM paths in the scraped web pages, and handler functions to extract data out
// of them
func (s *UserScraper) Hooks() []scraper.Hook {
return []scraper.Hook{
{
DOMPath: ".sd-main > :first-child .row",
Handler: onUserAuthor,
},
{
DOMPath: ".deck-description p",
Handler: onUserAbstract,
},
{
DOMPath: ".container a[href][title]",
Handler: onUserTalkFound,
},
{
DOMPath: ".next .page-link[rel='next']",
Handler: onUserNextPage,
},
}
}
// InitialData returns the struct pointer passed around between the handler functions registered in Hooks()
// This pointer is passed as the second argument to all handlers. The handlers can cast it from interface{}
// to its real type, and modify its data.
func (s *UserScraper) InitialData() interface{} {
return NewUser()
}
func onUserAuthor(e *colly.HTMLElement, data interface{}) (*string, error) {
u := data.(*User)
u.Author.Link = e.Request.URL.String()
u.Author.Name = e.ChildText("h1.m-0")
u.Author.Handle = e.ChildText("div.text-muted")
u.Author.AvatarLink = httpsPrefix + e.ChildAttr("img", "src")
return nil, nil
}
func onUserAbstract(e *colly.HTMLElement, data interface{}) (*string, error) {
u := data.(*User)
u.Abstract = e.Text
return nil, nil
}
func onUserTalkFound(e *colly.HTMLElement, data interface{}) (*string, error) {
u := data.(*User)
stars, err := parseNumber(e.ChildText(".deck-preview-meta > :nth-child(2)"))
if err != nil {
return nil, err
}
views, err := parseNumber(e.ChildText(".deck-preview-meta > :nth-child(3)"))
if err != nil {
return nil, err
}
t := TalkPreview{
Title: e.Attr("title"),
Link: sdPrefix(e.Attr("href")),
DataID: e.ChildAttr("div.deck-preview", "data-id"),
Views: views,
Stars: stars,
}
t.ID = path.Base(t.Link)
u.TalkPreviews = append(u.TalkPreviews, t)
return nil, nil
}
func onUserNextPage(e *colly.HTMLElement, _ interface{}) (*string, error) {
href := e.Attr("href")
if len(href) > 0 {
nextURL := sdPrefix(e.Attr("href"))
return &nextURL, nil
}
return nil, nil
}