Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions htmlindex/attributes.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,21 @@ const (
BackgroundAttribute = "background"
HrefAttribute = "href"

DataSrcAttribute = "data-src"
SrcAttribute = "src"
DataSrcAttribute = "data-src"
SrcAttribute = "src"
DataImageAttribute = "data-image"

DataSrcSetAttribute = "data-srcset"
SrcSetAttribute = "srcset"

DataUrlAttribute = "data-url"
)

// nolint: revive
const (
ATag = "a"
BodyTag = "body"
DivTag = "div"
ImgTag = "img"
LinkTag = "link"
ScriptTag = "script"
Expand All @@ -57,8 +61,11 @@ var Nodes = map[string]Node{
BodyTag: {
Attributes: []string{BackgroundAttribute},
},
DivTag: {
Attributes: []string{DataUrlAttribute},
},
ImgTag: {
Attributes: []string{SrcAttribute, DataSrcAttribute, SrcSetAttribute, DataSrcSetAttribute},
Attributes: []string{SrcAttribute, DataSrcAttribute, SrcSetAttribute, DataSrcSetAttribute, DataImageAttribute},
parser: srcSetValueSplitter,
},
LinkTag: {
Expand Down
5 changes: 4 additions & 1 deletion htmlindex/htmlindex.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,11 @@ func srcSetValueSplitter(data nodeAttributeParserData) ([]string, bool) {

for i, value := range values {
value = strings.TrimSpace(value)
// remove extra query param arguments that sometimes remain attached
value = strings.Split(value, "?")[0]
// remove the width in pixels after the url
values[i], _, _ = strings.Cut(value, " ")
value, _, _ = strings.Cut(value, " ")
values[i] = value
}

return values, true
Expand Down
1 change: 1 addition & 0 deletions scraper/download.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ var tagsWithReferences = []string{
htmlindex.ScriptTag,
htmlindex.BodyTag,
htmlindex.StyleTag,
htmlindex.DivTag,
}

func (s *Scraper) downloadReferences(ctx context.Context, index *htmlindex.Index) error {
Expand Down
Loading