-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstats_parser.py
More file actions
192 lines (151 loc) · 5.82 KB
/
stats_parser.py
File metadata and controls
192 lines (151 loc) · 5.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import re
from typing import Dict, Set, Tuple
from dataclasses import dataclass
@dataclass
class GitStats:
"""Data class for git statistics."""
added_lines: int = 0
deleted_lines: int = 0
total_files_changed: int = 0
commits_count: int = 0
@property
def net_change(self) -> int:
"""Calculate net line change (added - deleted)."""
return self.added_lines - self.deleted_lines
@property
def total_activity(self) -> int:
"""Calculate total line activity (added + deleted)."""
return self.added_lines + self.deleted_lines
@dataclass
class AuthorStats:
"""Data class for author-specific git statistics."""
name: str
commits_count: int = 0
added_lines: int = 0
deleted_lines: int = 0
files_changed: Set[str] = None
def __post_init__(self):
if self.files_changed is None:
self.files_changed = set()
@property
def total_files_changed(self) -> int:
"""Get count of files changed by this author."""
return len(self.files_changed)
@property
def net_change(self) -> int:
"""Calculate net line change (added - deleted)."""
return self.added_lines - self.deleted_lines
@property
def total_activity(self) -> int:
"""Calculate total line activity (added + deleted)."""
return self.added_lines + self.deleted_lines
def parse_shortstat_output(shortstat: str) -> Tuple[int, int, int]:
"""
Parses git diff --shortstat output to extract statistics.
Args:
shortstat: Output from git diff --shortstat command
Returns:
Tuple of (files_changed, insertions, deletions)
"""
files_changed = 0
insertions = 0
deletions = 0
if shortstat:
files_match = re.search(r'(\d+) files? changed', shortstat)
insertions_match = re.search(r'(\d+) insertions?\(\+\)', shortstat)
deletions_match = re.search(r'(\d+) deletions?\(-\)', shortstat)
if files_match:
files_changed = int(files_match.group(1))
if insertions_match:
insertions = int(insertions_match.group(1))
if deletions_match:
deletions = int(deletions_match.group(1))
return files_changed, insertions, deletions
def parse_commit_diff_stats(diff_output: str) -> Tuple[int, int, Set[str]]:
"""
Parses git show --stat output to extract line changes and file names.
Args:
diff_output: Output from git show --stat command
Returns:
Tuple of (insertions, deletions, files_changed_set)
"""
insertions = 0
deletions = 0
files_changed = set()
if not diff_output:
return insertions, deletions, files_changed
lines = diff_output.split('\n')
for line in lines:
# Look for the summary line with insertions/deletions
if 'insertion' in line or 'deletion' in line:
insertions_match = re.search(r'(\d+) insertions?\(\+\)', line)
deletions_match = re.search(r'(\d+) deletions?\(-\)', line)
if insertions_match:
insertions += int(insertions_match.group(1))
if deletions_match:
deletions += int(deletions_match.group(1))
# Look for file change lines (filename | changes)
elif line.strip() and '|' in line and not line.startswith(' '):
file_parts = line.split('|')
if len(file_parts) >= 2:
filename = file_parts[0].strip()
files_changed.add(filename)
return insertions, deletions, files_changed
def create_author_stats_dict() -> Dict[str, AuthorStats]:
"""Creates an empty dictionary for storing author statistics."""
return {}
def add_author_commit_stats(author_stats: Dict[str, AuthorStats],
author_email: str,
author_name: str,
added: int = 0,
deleted: int = 0,
files: Set[str] = None) -> None:
"""
Adds commit statistics for an author.
Args:
author_stats: Dictionary to update
author_email: Author's email address
author_name: Author's name
added: Lines added in this commit
deleted: Lines deleted in this commit
files: Set of files changed in this commit
"""
if author_email not in author_stats:
author_stats[author_email] = AuthorStats(name=author_name)
stats = author_stats[author_email]
stats.commits_count += 1
stats.added_lines += added
stats.deleted_lines += deleted
if files:
stats.files_changed.update(files)
def finalize_author_stats(author_stats: Dict[str, AuthorStats]) -> Dict[str, dict]:
"""
Converts AuthorStats objects to dictionaries for backward compatibility.
Args:
author_stats: Dictionary of AuthorStats objects
Returns:
Dictionary with author statistics in the original format
"""
result = {}
for email, stats in author_stats.items():
result[email] = {
'name': stats.name,
'commits_count': stats.commits_count,
'added_lines': stats.added_lines,
'deleted_lines': stats.deleted_lines,
'total_files_changed': stats.total_files_changed
}
return result
def sort_authors_by_activity(author_stats: Dict[str, dict]) -> list:
"""
Sorts authors by total activity (lines added + deleted).
Args:
author_stats: Dictionary of author statistics
Returns:
List of (author_email, stats) tuples sorted by activity
"""
return sorted(author_stats.items(),
key=lambda x: x[1]['added_lines'] + x[1]['deleted_lines'],
reverse=True)