This repository was archived by the owner on Oct 2, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcapture_example.py
More file actions
137 lines (96 loc) · 3.02 KB
/
capture_example.py
File metadata and controls
137 lines (96 loc) · 3.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import pickle
import pymysql as sql
import boto3
import time
import re
db_id = "pi"
hostname = "pi.cwsp4gygmyca.us-east-2.rds.amazonaws.com"
username = "olive"
password = "olivechinos"
database = "CRDB"
access_key = None # Replace with actual keys
secret_key = None
region = "us-east-2"
credentials = {
"aws_access_key" : access_key,
"aws_secret_access_key" : secret_key,
"region_name" : region
}
s3_client = boto3.client(
's3',
aws_access_key_id = access_key,
aws_secret_access_key = secret_key,
region_name = region
)
rds_client = boto3.client(
'rds',
aws_access_key_id = access_key,
aws_secret_access_key = secret_key,
region_name = region
)
def testConnection(connection):
cur = connection.cursor()
flag = True
while (flag):
command = input("Give command : ")
if (command == "exit"):
break
cur.execute(command)
for line in cur.fetchall():
print(line)
print("Attempt to connect to database.")
myConnection = sql.connect(host = hostname, user = username, passwd = password, db = database)
print("Connection made.")
testConnection(myConnection)
print("Closing connection.")
myConnection.close()
print("Connection closed.")
print("Accessing log file.")
def notBanned(line):
return "Query" in line[:30] and not (
"2 Query SELECT 1" in line or
"2 Query SELECT count(*) from information_schema.TABLES WHERE TABLE_SCHEMA = 'mysql' AND TABLE_NAME = 'rds_heartbeat2'" in line or
"2 Query SELECT value FROM mysql.rds_hearbeat2" in line or
"2 Query SELECT NAME< VALUE FROM mysql.rds_configuration" in line or
)
log_file = rds_client.download_db_log_file_portion(
DBInstanceIdentifier = db_id,
LogFileName = "general/mysql-general.log")
log_file_lines = log_file["LogFileData"].replace("\t"," ").split("\n")
log_file_lines = [line.strip() for line in log_file_lines if notBanned(line)]
print("Log file found. Here are some raw lines from it:")
print(log_file_lines[:10])
pattern = re.compile("\d+ \d{1,2}:\d{1,2}:\d{1,2}")
def parseLine(line):
match = pattern.match(line)
if match is None:
return ("", line.split(" ", 1)[1])
else:
return (match.group().split(" ")[1], line.split(" ", 2)[2])
transactions = [parseLine(line) for line in log_file_lines]
print("Log file parsed. Here is a preview of the data structure:")
print(transactions[:10])
print("Creating S3 bucket.")
# Buckets need a GLOBALLY unique name
bucket_id = "my-crt-test-bucket-olive-chinos"
s3_client.create_bucket(
Bucket = bucket_id,
CreateBucketConfiguration = {"LocationConstraint" : region}
)
print("S3 bucket made. Sending object into bucket.")
byte_log = pickle.dumps(transactions)
log_key = "test-log"
s3_client.put_object(
Bucket = bucket_id,
Body = byte_log,
Key = log_key
)
print("Object sent. Attempting to read back object from bucket.")
bucket_obj = s3_client.get_object(
Bucket = bucket_id,
Key = log_key
)
new_byte_log = pickle.loads(bucket_obj["Body"].read())
print("Object found. Here is the data read:")
print(new_byte_log[:20])
print("Script complete.")