-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdateRecallFunctions.py
More file actions
243 lines (198 loc) · 8.58 KB
/
dateRecallFunctions.py
File metadata and controls
243 lines (198 loc) · 8.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
from datetime import datetime, timezone
from hachoir.metadata import extractMetadata
from hachoir.parser import createParser
import os
from os.path import join, isfile
from PIL import Image
from PIL.ExifTags import TAGS
import json
# import piexif
import re
# import shutil
import win32com.client
import logging
logging.getLogger("hachoir").setLevel(logging.ERROR)
def get_date_from_filename(file_path):
# Patterns to look for
date_patterns = [
r'(\d{4})(\d{2})(\d{2})', # YYYYMMDD
r'(\d{4})-(\d{2})-(\d{2})' # YYYY-MM-DD
]
for pattern in date_patterns:
# For each Pattern, search in file_name for that pattern
file_name = os.path.basename(file_path)
match = re.search(pattern, file_name)
if match:
# Get YMD from match
year, month, day = int(match.group(1)), int(match.group(2)), int(match.group(3))
# Quick sanity check for year
current_year = datetime.now().year
if 1 <= month <= 12 and 1 <= day <= 31 and year <= current_year:
return datetime(year, month, day)
return None #If no date found
def get_time_from_filename(file_path):
# Patterns to look for
time_patterns = [
r'(\d{2})(\d{2})(\d{2})', # HHMMSS
r'(\d{2}):(\d{2}):(\d{2})', # HH:MM:SS
r'(\d{2})_(\d{2})_(\d{2})' # HH_MM_SS (common format)
]
for pattern in time_patterns:
# For each Pattern, search in file_name for that pattern
file_name = os.path.basename(file_path)
match = re.search(pattern, file_name)
if match:
# Get YMD from match
hour, minute, second = int(match.group(1)), int(match.group(2)), int(match.group(3))
# Quick sanity check for year
if 0 <= hour <= 23 and 0 <= minute <= 59 and 0 <= second <= 59:
return datetime(1, 1, 1, hour, minute, second).time()
return None #If no time found
def get_date_from_creation_date(file_path):
try:
creation_time = os.path.getctime(file_path)
return datetime.fromtimestamp(creation_time)
except Exception as e:
print(f" > ERROR @ FileCreation: Cannot get file creation date for {file_path}: {e}")
return None
def get_date_from_modified_date(file_path):
try:
modified_time = os.path.getmtime(file_path)
return datetime.fromtimestamp(modified_time)
except Exception as e:
print(f" > ERROR @ FileModified: Cannot get file modified date for {file_path}: {e}")
return None
def get_date_from_metadata(file_path):
# Function to extract metadata date using hachoir
print("> Metadata: ")
try:
parser = createParser(file_path)
if not parser:
print(f" > @Metadata: Unable to create parser for {file_path}")
return None
metadata = extractMetadata(parser)
if not metadata:
print(f" > Metadata: No metadata found for {file_path}")
return None
# Try to get the creation date from metadata
create_date = metadata.get('creation_date')
if create_date:
# print(" > Date Found: "+ create_date)
return create_date
else:
print(" > Metadata: Metadata does not contain 'creation_date'")
except Exception as e:
print(f" > @Metadata : Metadata extraction failed for {file_path}: {e}")
return None
def get_date_from_EXIF(file_path):
print("> EXIF: ")
try:
image = Image.open(file_path)
info = image._getexif()
if info:
print(" > EXIF Data found ")
# print(info)
for tag, value in info.items():
decoded = TAGS.get(tag, tag)
if decoded == 'DateTimeOriginal':
print(" > Date found: " + datetime.strptime(value, '%Y:%m:%d %H:%M:%S'))
return datetime.strptime(value, '%Y:%m:%d %H:%M:%S')
except Exception as e:
print(f" > ERROR @ EXIF: Error extracting EXIF data from {file_path}: {e}")
return None
def get_date_from_dateAcquired(filepath):
"""
Retrieves the 'Date acquired' property for a file in Windows Explorer.
"""
# Ensure file exists
if not os.path.exists(filepath):
print(f" > File acquired: file does not exist: {filepath}")
return None
# Use Windows Shell COM object to get file properties
shell = win32com.client.Dispatch("Shell.Application")
# Get the folder and file name
folder_path, file_name = os.path.split(filepath)
# Get the folder object
folder = shell.Namespace(folder_path)
# Find the index for the "Date acquired" property (property index 217)
# Note: Index might vary by Windows version, but 217 is common for "Date acquired"
date_acquired_idx = 217
# Get file item
item = folder.ParseName(file_name)
# Retrieve the "Date acquired" property
date_acquired = folder.GetDetailsOf(item, date_acquired_idx)
if date_acquired:
return date_acquired
else:
# print(f" > Date acquired not found for: {filepath}")
return None
def get_dategeo_from_JSON(file_path):
file_name = os.path.basename(file_path)
#base_filename = os.path.basename(file_path)
path_to_directory = os.path.dirname(file_path)
base_dirname = os.path.basename(path_to_directory)
# Normalize the file name to handle variations of the same picture
base_name, ext = os.path.splitext(file_name)
normalized_base_name = normalized_name = re.sub(r' \(\d+\)|-edited', '', base_name)
# Reconstruct the file name with ".json" extension
json_file_name = f"{normalized_base_name}{ext}.json"
print(json_file_name)
# Full path for the file in the current directory
currdir_json_path = os.path.join(path_to_directory, json_file_name)
#gphotos_json_path = "F:\\GPhotos\\Takeout\\Google Photos"
gphotos_json_path = path_to_directory
# fileFoundFlag = 0
for root, dirs, files in os.walk(gphotos_json_path):
for file in files:
if file == json_file_name:
file_path = os.path.join(root, file)
info = extract_dategeo_from_JSON(file_path)
return info
return None
def extract_dategeo_from_JSON(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
try:
data = json.load(f)
# Extract date/time and location information
info = {
"file_path": file_path,
"dates_times": [],
"locations": []
}
format1 = '%Y:%m:%d %H:%M:%S'
format2 = "%b %d, %Y, %I:%M:%S\u202f%p %Z"
# print(type(data["creationTime"]["formatted"]))
# Scan the JSON for date/time fields
if "creationTime" in data and "formatted" in data["creationTime"]:
rawdate = data["creationTime"]["formatted"]
datetime_value = datetime.strptime(rawdate, format2)
datetime_value = datetime_value.replace(tzinfo=timezone.utc)
info["dates_times"].append(("creationTime", datetime_value))
if "photoTakenTime" in data and "formatted" in data["photoTakenTime"]:
rawdate = data["photoTakenTime"]["formatted"]
datetime_value = datetime.strptime(rawdate, format2)
datetime_value = datetime_value.replace(tzinfo=timezone.utc)
info["dates_times"].append(("photoTakenTime",datetime_value))
# Scan for location data
if "geoData" in data:
geo = data["geoData"]
if geo["latitude"] != 0.0 or geo["longitude"] != 0.0:
info["locations"].append({
"latitude": geo["latitude"],
"longitude": geo["longitude"],
"altitude": geo["altitude"]
})
if "geoDataExif" in data:
geo_exif = data["geoDataExif"]
if geo_exif["latitude"] != 0.0 or geo_exif["longitude"] != 0.0:
info["locations"].append({
"latitude": geo_exif["latitude"],
"longitude": geo_exif["longitude"],
"altitude": geo_exif["altitude"]
})
return info
except json.JSONDecodeError:
print(f"Error decoding JSON from file: {file_path}")
except Exception as e:
print(f"An error occurred: {e}")
return None