From 57845c07ed4e772abf4ddf353db8d08a2cca6fa0 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Mon, 10 Aug 2015 16:48:19 +0100 Subject: [PATCH 1/4] Clean up fds of segments we delete (during compaction) When we delete a segment, let's close its fd as well. Note as well wasting the fd, this was forcing the filesystem to preserve the deleted file until we exited. I noticed roughly 20 open fds of deleted files when attic saved 10G of data. --- attic/repository.py | 3 +++ 1 file changed, 3 insertions(+) mode change 100644 => 100755 attic/repository.py diff --git a/attic/repository.py b/attic/repository.py old mode 100644 new mode 100755 index eed85dc4..1d2bb0f0 --- a/attic/repository.py +++ b/attic/repository.py @@ -478,6 +478,9 @@ def get_fd(self, segment): return fd def delete_segment(self, segment): + fd = self.fds.pop(segment) + if fd != None: + fd.close() try: os.unlink(self.segment_filename(segment)) except OSError: From 3321a887d34d607fc59e9d2d19f07b5862295908 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Mon, 10 Aug 2015 22:37:32 +0100 Subject: [PATCH 2/4] io.write_commit() already implies io.close_segment() --- attic/repository.py | 1 - 1 file changed, 1 deletion(-) diff --git a/attic/repository.py b/attic/repository.py index 1d2bb0f0..bd10e321 100755 --- a/attic/repository.py +++ b/attic/repository.py @@ -300,7 +300,6 @@ def report_error(msg): report_error('Adding commit tag to segment {}'.format(transaction_id)) self.io.segment = transaction_id + 1 self.io.write_commit() - self.io.close_segment() if current_index and not repair: if len(current_index) != len(self.index): report_error('Index object count mismatch. {} != {}'.format(len(current_index), len(self.index))) From d83b919d52d40af54b5d353e9f408c550f714358 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Wed, 12 Aug 2015 11:20:02 +0100 Subject: [PATCH 3/4] Style fix in added code PEP8 says to prefer "is not None" --- attic/repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/attic/repository.py b/attic/repository.py index bd10e321..a926d7a7 100755 --- a/attic/repository.py +++ b/attic/repository.py @@ -478,7 +478,7 @@ def get_fd(self, segment): def delete_segment(self, segment): fd = self.fds.pop(segment) - if fd != None: + if fd is not None: fd.close() try: os.unlink(self.segment_filename(segment)) From 04887439a0261388d0e3088f851299f075a0e4a5 Mon Sep 17 00:00:00 2001 From: Alan Jenkins Date: Wed, 12 Aug 2015 11:32:12 +0100 Subject: [PATCH 4/4] recover_segment(): don't assume we have an fd for segment Suggested by @ThomasWaldmann. Avoiding a complex assumption should make the code easier to understand and maintain. (Technically we do have an fd for the segment, because the only caller opens the segment and checks it before calling for repair.) --- attic/repository.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/attic/repository.py b/attic/repository.py index a926d7a7..ad703165 100755 --- a/attic/repository.py +++ b/attic/repository.py @@ -515,7 +515,9 @@ def iter_objects(self, segment, include_data=False): header = fd.read(self.header_fmt.size) def recover_segment(self, segment, filename): - self.fds.pop(segment).close() + fd = self.fds.pop(segment) + if fd is not None: + fd.close() # FIXME: save a copy of the original file with open(filename, 'rb') as fd: data = memoryview(fd.read())