bookstack-backup/export_pages.py at master · MfellnerDev/bookstack-backup · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import sys

from requests.exceptions import HTTPError
from pathlib import Path
from dotenv import load_dotenv
from gather_pages_information import get_and_store_all_page_infos_in_file
import requests
from book import Book
import os

"""
@author MfellnerDev | Manuel Fellner
@version 16.06.2023
"""

# Load all environment vars from .env
load_dotenv()

# URL to your BookStack instance
BOOKSTACK_URL = os.getenv('BOOKSTACK_URL')

# Token information of Admin user
TOKEN_ID = os.getenv('TOKEN_ID')
TOKEN_SECRET = os.getenv('TOKEN_SECRET')

# The auth header -> see BookStack API documentation
AUTH_HEADER = {'Authorization': f'Token {TOKEN_ID}:{TOKEN_SECRET}'}

# Options: "markdown", "pdf", "plaintext"
EXPORT_TYPE = os.getenv('EXPORT_TYPE')

# Get the name & extension of the file where all info will be stored
INFO_FILE = os.getenv('INFO_FILE')

# Open a new session and set the correct authentication headers
session = requests.Session()
session.headers.update(AUTH_HEADER)


def _remove_substring_out_of_string(string: str, substring: str):
    """
    Just remove an unwanted substring out of a string, in our case the "\n" substring
    :param string:
    :param substring:
    :return: string without substring
    """
    return string.replace(substring, '')


def _get_export_file_extension():
    """
    Return the suiting file extension for the selected export type
    :return: md, pdf or txt
    """
    if EXPORT_TYPE == 'markdown':
        return 'md'
    elif EXPORT_TYPE == 'pdf':
        return 'pdf'
    elif EXPORT_TYPE == 'plaintext':
        return 'txt'


def _create_necessary_folders_windows(parent_book_slug_slug):
    """
    Creates the needed folders for exporting all pages on Windows OS
    :param parent_book_slug_slug:
    :return:
    """
    current_directory = os.getcwd()

    if not os.path.exists(f'{current_directory}\\exports\\{EXPORT_TYPE}'):
        os.makedirs(f'{current_directory}\\exports\\{EXPORT_TYPE}')

    if not os.path.exists(f'{current_directory}\\exports\\{EXPORT_TYPE}\\{parent_book_slug_slug}'):
        os.makedirs(f'{current_directory}\\exports\\{EXPORT_TYPE}\\{parent_book_slug_slug}')


def _create_necessary_folders_linux(parent_book_slug_slug):
    """
    Creates the needed folders for exporting all pages on Linux OS
    :param parent_book_slug_slug:
    :return:
    """
    current_directory = os.getcwd()

    if not os.path.exists(f'{current_directory}/exports/{EXPORT_TYPE}'):
        os.makedirs(f'{current_directory}/exports/{EXPORT_TYPE}')

    if not os.path.exists(f'{current_directory}/exports/{EXPORT_TYPE}/{parent_book_slug_slug}'):
        os.makedirs(f'{current_directory}/exports/{EXPORT_TYPE}/{parent_book_slug_slug}')


def _build_correct_export_path(parent_book_slug_slug, page_id, page_slug):
    """
    Create the correct folders for the export and build the correct filename for an individual page
    :return:
    """

    current_directory = os.getcwd()

    # Check which OS the host is running -> for creating the correct folders
    if sys.platform.startswith('win'):
        _create_necessary_folders_windows(parent_book_slug_slug)
    elif sys.platform.startswith('linux'):
        _create_necessary_folders_linux(parent_book_slug_slug)

    # Build the complete export path with the current directory and exports/export_type
    export_directory_path = os.path.join(current_directory, 'exports', EXPORT_TYPE)

    # Get the current file extension from the EXPORT_TYPE variable
    file_extension = _get_export_file_extension()
    # build the file_name with [page_slug].[file_extension]
    file_name = f'{page_slug}.{file_extension}'
    # Return the path of the individual page, formatted in: [current_directory]/[parent_book_slug_slug]/[filename]
    return os.path.join(export_directory_path, parent_book_slug_slug, file_name)


def _read_file_and_store_info_in_list():
    """
    Read the INFO_FILE, convert all information into Book objects and put these objects into an object-list.
    Easy attribute-handling, hehe
    :return:
    """
    book_list = []
    with open(INFO_FILE, 'r', encoding='utf-8') as info_file:
        # go through all lines
        for current_line in info_file:
            # Split the information, format: [page_id]:[page_slug]:[parent_book_slug_slug]
            page_info = current_line.split(':')
            # Store the id
            page_id = page_info[0]
            # Store the slug but remove the "\n" sign
            page_slug = _remove_substring_out_of_string(page_info[1], '\n')
            # Store the slug of the parent book of the entry
            parent_book_slug = _remove_substring_out_of_string(page_info[2], '\n')

            # build the path, where the page should be stored - filename
            filename = _build_correct_export_path(parent_book_slug, page_id, page_slug)

            # store the values in the object list
            book_list.append(Book(parent_book_slug, page_id, page_slug, filename))
    return book_list


def export_and_store_pages():
    """
    Export all the pages via BookStack API /api/pages/{page_id}/export/{filetype}
    :return:
    """
    book_list = _read_file_and_store_info_in_list()

    for book in book_list:
        try:
            # try to download the exported page file in the expected format
            exported_page = session.get(f'{BOOKSTACK_URL}/api/pages/{book.page_id}/export/{EXPORT_TYPE}')
            filename_object = Path(book.filename)
            # write the page.content (so bytecode) in the real file
            filename_object.write_bytes(exported_page.content)

            print(f'Successfully exported & stored file "{book.filename}".')
        except HTTPError as http_err:
            print(f'Oh, no! An HTTP Error occurred! Error: {http_err}')
        except Exception as err:
            print(f'Oh, no! An Error occurred! Error: {err}')


get_and_store_all_page_infos_in_file()
export_and_store_pages()