-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsql_setup.py
More file actions
103 lines (88 loc) · 2.77 KB
/
sql_setup.py
File metadata and controls
103 lines (88 loc) · 2.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import psycopg2
from database_key import db_host, db_name, db_pass, db_user
import pandas as pd
def create_table():
try:
conn = psycopg2.connect(
host=db_host,
database=db_name,
user=db_user,
password=db_pass,
)
except:
print("I am unable to connect to the database")
raise ConnectionError
with conn.cursor() as curs:
curs.execute(f'''
DROP TABLE IF EXISTS dictionary;
CREATE TABLE dictionary (
kana VARCHAR(32) NOT NULL,
kanji VARCHAR(16),
pos VARCHAR(16),
translation VARCHAR(96) NOT NULL,
chapter INTEGER NOT NULL,
PRIMARY KEY (kana, translation)
);
''')
conn.commit()
print("transaction finished")
def insert_into_database(df):
try:
conn = psycopg2.connect(
host=db_host,
database=db_name,
user=db_user,
password=db_pass,
)
except ConnectionError:
print("Error: Unable to connect to the database.")
raise ConnectionError
sql = """
INSERT INTO dictionary VALUES (
%s, %s, %s, %s, %s
);
"""
with conn.cursor() as curs:
data = df.where(pd.notnull(df), None).values.tolist()
curs.executemany(sql, data)
conn.commit()
print("data inserted")
def clean_df() -> pd.DataFrame:
xlsx_file = "genki_words.xlsx"
data_file = pd.ExcelFile(xlsx_file)
read_file = pd.read_excel(data_file, sheet_name="Sheet1")
formatted_df = read_file.drop('No.', axis=1)
def extract_chapter(chapter):
import re
numbers = re.findall(r'\d+', chapter)
if numbers:
return min(map(int, numbers))
if 'G' in str(chapter):
return 0
return chapter
formatted_df['chapter'] = formatted_df['chapter'].apply(extract_chapter)
return formatted_df
def extract_data_from_other_db() -> pd.DataFrame:
try:
conn = psycopg2.connect(
host='localhost',
database='dictionary',
user='postgres',
password='microsoft11',
)
except:
print("Unable to connect to the other database")
raise ConnectionError
query = """
SELECT * FROM dictionary;
"""
df = pd.read_sql_query(query, conn)
conn.close()
return df
if __name__ == "__main__":
create_table()
# dfs = clean_df()
dfs = extract_data_from_other_db()
insert_into_database(dfs)
# other_df = extract_data_from_other_db()
# print(other_df))