-
Notifications
You must be signed in to change notification settings - Fork 7
Expand file tree
/
Copy pathgrepword-m.py
More file actions
executable file
·115 lines (97 loc) · 3.95 KB
/
grepword-m.py
File metadata and controls
executable file
·115 lines (97 loc) · 3.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#!/usr/bin/env python3
# Copyright (c) 2008-11 Qtrac Ltd. All rights reserved.
# This program or module is free software: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version. It is provided for educational
# purposes and is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
import multiprocessing
import optparse
import os
# The maximum length of the word to be search for is BLOCK_SIZE
BLOCK_SIZE = 8000
class Worker(multiprocessing.Process):
def __init__(self, work_queue, word, number):
super().__init__()
self.work_queue = work_queue
self.word = word
self.number = number
def run(self):
while True:
try:
filename = self.work_queue.get()
self.process(filename)
finally:
self.work_queue.task_done()
def process(self, filename):
previous = ""
try:
with open(filename, "rb") as fh:
while True:
current = fh.read(BLOCK_SIZE)
if not current:
break
current = current.decode("utf8", "ignore")
if (self.word in current or
self.word in previous[-len(self.word):] +
current[:len(self.word)]):
print("{0}{1}".format(self.number, filename))
break
if len(current) != BLOCK_SIZE:
break
previous = current
except EnvironmentError as err:
print("{0}{1}".format(self.number, err))
def parse_options():
parser = optparse.OptionParser(
usage=("usage: %prog [options] word name1 "
"[name2 [... nameN]]\n\n"
"names are filenames or paths; paths only "
"make sense with the -r option set"))
parser.add_option("-p", "--processes", dest="count", default=7,
type="int",
help=("the number of processes to use (1..20) "
"[default %default]"))
parser.add_option("-r", "--recurse", dest="recurse",
default=False, action="store_true",
help="recurse into subdirectories")
parser.add_option("-d", "--debug", dest="debug", default=False,
action="store_true")
opts, args = parser.parse_args()
if len(args) == 0:
parser.error("a word and at least one path must be specified")
elif len(args) == 1:
parser.error("at least one path must be specified")
if (not opts.recurse and
not any([os.path.isfile(arg) for arg in args])):
parser.error("at least one file must be specified; or use -r")
if not (1 <= opts.count <= 20):
parser.error("process count must be 1..20")
return opts, args[0], args[1:]
def get_files(args, recurse):
filelist = []
for path in args:
if os.path.isfile(path):
filelist.append(path)
elif recurse:
for root, dirs, files in os.walk(path):
for filename in files:
filelist.append(os.path.join(root, filename))
return filelist
def main():
opts, word, args = parse_options()
filelist = get_files(args, opts.recurse)
work_queue = multiprocessing.JoinableQueue()
for i in range(opts.count):
number = "{0}: ".format(i + 1) if opts.debug else ""
worker = Worker(work_queue, word, number)
worker.daemon = True
worker.start()
for filename in filelist:
work_queue.put(filename)
work_queue.join()
if __name__ == "__main__": # This is *vital* on Windows!
main()