@@ -16,6 +16,7 @@ import (
1616type Record struct {
1717 Page string
1818 MeasureValues int
19+ Rank int // Original rank before any filtering
1920}
2021
2122type Config struct {
@@ -34,19 +35,32 @@ func main() {
3435func run () error {
3536 // Parse command-line arguments
3637 if len (os .Args ) < 2 {
37- return fmt .Errorf ("usage: %s [--quiet] <csv-file-path> [range] [output-path]" , os .Args [0 ])
38+ return fmt .Errorf ("usage: %s [--quiet] [--contains <substring>] <csv-file-path> [range] [output-path]" , os .Args [0 ])
3839 }
3940
40- // Check for --quiet flag
41+ // Check for --quiet and --contains flags
4142 quiet := false
43+ containsFilter := ""
4244 args := os .Args [1 :]
43- if len (args ) > 0 && args [0 ] == "--quiet" {
44- quiet = true
45- args = args [1 :] // Remove --quiet from args
45+
46+ // Process flags
47+ for len (args ) > 0 && strings .HasPrefix (args [0 ], "--" ) {
48+ if args [0 ] == "--quiet" {
49+ quiet = true
50+ args = args [1 :] // Remove --quiet from args
51+ } else if args [0 ] == "--contains" {
52+ if len (args ) < 2 {
53+ return fmt .Errorf ("--contains flag requires a substring argument" )
54+ }
55+ containsFilter = args [1 ]
56+ args = args [2 :] // Remove --contains and its argument from args
57+ } else {
58+ return fmt .Errorf ("unknown flag: %s" , args [0 ])
59+ }
4660 }
4761
4862 if len (args ) < 1 {
49- return fmt .Errorf ("usage: %s [--quiet] <csv-file-path> [range] [output-path]" , os .Args [0 ])
63+ return fmt .Errorf ("usage: %s [--quiet] [--contains <substring>] <csv-file-path> [range] [output-path]" , os .Args [0 ])
5064 }
5165
5266 inputPath := args [0 ]
@@ -82,7 +96,7 @@ func run() error {
8296 }
8397
8498 // Read and process CSV
85- records , err := processCSV (inputPath , config .IgnoreURLs , quiet )
99+ records , err := processCSV (inputPath , config .IgnoreURLs , containsFilter , quiet )
86100 if err != nil {
87101 return err
88102 }
@@ -145,7 +159,7 @@ func loadConfig(configPath string) (*Config, error) {
145159 return & config , nil
146160}
147161
148- func processCSV (inputPath string , ignoreURLs []string , quiet bool ) ([]Record , error ) {
162+ func processCSV (inputPath string , ignoreURLs []string , containsFilter string , quiet bool ) ([]Record , error ) {
149163 file , err := os .Open (inputPath )
150164 if err != nil {
151165 return nil , fmt .Errorf ("failed to open file: %v" , err )
@@ -184,8 +198,8 @@ func processCSV(inputPath string, ignoreURLs []string, quiet bool) ([]Record, er
184198 ignoreMap [url ] = true
185199 }
186200
187- // Read and collect all Pageviews records
188- var records []Record
201+ // Read and collect all Pageviews records (before filtering by contains)
202+ var allRecords []Record
189203 var skippedURLs []string
190204 var ignoredURLs []string
191205 for {
@@ -223,12 +237,33 @@ func processCSV(inputPath string, ignoreURLs []string, quiet bool) ([]Record, er
223237 continue // Skip non-integer values
224238 }
225239
226- records = append (records , Record {
240+ allRecords = append (allRecords , Record {
227241 Page : page ,
228242 MeasureValues : measureValue ,
229243 })
230244 }
231245
246+ // Sort all records by pageviews (highest to lowest) to establish true ranking
247+ sort .Slice (allRecords , func (i , j int ) bool {
248+ return allRecords [i ].MeasureValues > allRecords [j ].MeasureValues
249+ })
250+
251+ // Assign ranks to all records
252+ for i := range allRecords {
253+ allRecords [i ].Rank = i + 1
254+ }
255+
256+ // Now filter by contains substring if specified
257+ var records []Record
258+ var filteredURLs []string
259+ for _ , record := range allRecords {
260+ if containsFilter != "" && ! strings .Contains (record .Page , containsFilter ) {
261+ filteredURLs = append (filteredURLs , record .Page )
262+ continue
263+ }
264+ records = append (records , record )
265+ }
266+
232267 // Report skipped URLs
233268 if ! quiet && len (skippedURLs ) > 0 {
234269 fmt .Fprintf (os .Stderr , "Warning: Skipped %d URL(s) that do not match expected structure (www.*):\n " , len (skippedURLs ))
@@ -245,33 +280,27 @@ func processCSV(inputPath string, ignoreURLs []string, quiet bool) ([]Record, er
245280 }
246281 }
247282
283+ // Report filtered URLs
284+ if ! quiet && len (filteredURLs ) > 0 {
285+ fmt .Fprintf (os .Stderr , "Info: Filtered out %d URL(s) not containing '%s':\n " , len (filteredURLs ), containsFilter )
286+ for _ , url := range filteredURLs {
287+ fmt .Fprintf (os .Stderr , " - %s\n " , url )
288+ }
289+ }
290+
248291 return records , nil
249292}
250293
251294func writeOutput (records []Record , outputPath , rangeStr string , minRank , maxRank int , showPageviews , showHeaders bool ) (string , error ) {
252- // Sort by Measure Values (highest to lowest) to establish ranking
253- sort .Slice (records , func (i , j int ) bool {
254- return records [i ].MeasureValues > records [j ].MeasureValues
255- })
256-
257- // Slice to get only the entries within the specified rank range
258- // minRank and maxRank are 1-based, so we need to convert to 0-based indices
259- startIdx := minRank - 1
260- endIdx := maxRank
261-
262- // Ensure we don't go out of bounds
263- if startIdx < 0 {
264- startIdx = 0
265- }
266- if endIdx > len (records ) {
267- endIdx = len (records )
268- }
269- if startIdx >= len (records ) {
270- // No records in this range
271- records = []Record {}
272- } else {
273- records = records [startIdx :endIdx ]
295+ // Records are already sorted and have ranks assigned
296+ // Filter to get only the entries within the specified rank range
297+ var filteredRecords []Record
298+ for _ , record := range records {
299+ if record .Rank >= minRank && record .Rank <= maxRank {
300+ filteredRecords = append (filteredRecords , record )
301+ }
274302 }
303+ records = filteredRecords
275304
276305 // Determine output directory and filename
277306 var outputDir , filename string
@@ -317,18 +346,17 @@ func writeOutput(records []Record, outputPath, rangeStr string, minRank, maxRank
317346 }
318347
319348 // Write records with rank number, URL, and optionally pageviews
320- for i , record := range records {
321- rank := startIdx + i + 1 // Calculate the actual rank
349+ for _ , record := range records {
322350 var row []string
323351 if showPageviews {
324352 row = []string {
325- strconv .Itoa (rank ),
353+ strconv .Itoa (record . Rank ),
326354 record .Page ,
327355 strconv .Itoa (record .MeasureValues ),
328356 }
329357 } else {
330358 row = []string {
331- strconv .Itoa (rank ),
359+ strconv .Itoa (record . Rank ),
332360 record .Page ,
333361 }
334362 }
0 commit comments