diff --git a/.gitignore b/.gitignore index bc402e3f..029fbf2b 100644 --- a/.gitignore +++ b/.gitignore @@ -2,10 +2,12 @@ _site *.DS_Store node_modules -tools/__pycache__ +*__pycache__ tools/venv tools/files/meetup.ics bin/ lib/ .venv -pyvenv.cfg \ No newline at end of file +pyvenv.cfg +tools/blog_automation/service_account_key.json +tools/blog_automation/venv diff --git a/tools/blog_automation/README.md b/tools/blog_automation/README.md new file mode 100644 index 00000000..8c1727b5 --- /dev/null +++ b/tools/blog_automation/README.md @@ -0,0 +1,82 @@ +# Goals of this Automation + +# Setup +## 🔑 Setting up Google Service Account Credentials + +To allow our scripts to access Google Drive and export documents, you need to create a **Google Cloud Project**, a **service account**, and download its credentials as a JSON key file. + +*Alternatively, you can ask Silke for the service_account_key.json (in which case, you can skip steps 0-3).* + +### 0. Create a Google Cloud Project +1. Go to [Google Cloud Console](https://console.cloud.google.com/). +2. Open the project selector (top bar) and click **New Project**. +3. Name the project **`blog-automation`**. +4. Click **Create**. +5. Make sure the new project is selected in the top bar. + +👉 **Note:** You need the **Project Editor** or **Owner** role on this project to create service accounts and keys. +If you’re the one who created the project, you already have these permissions. + +### 1. Enable the Drive API +1. In the left menu, go to **APIs & Services → Library**. +2. Search for **Google Drive API**. +3. Click **Enable**. + +### 2. Create a Service Account +1. In the left menu, go to **IAM & Admin → Service Accounts**. +2. Click **Create Service Account**. +3. Name it **`blog-exporter`** and add a short description (e.g. *Exports blog content from Google Docs*). +4. Click **Create and Continue**. + - You do **not** need to assign project-wide roles to this service account. + - Just click **Done** once the account is created. + +### 3. Generate and Download the JSON Key +1. Click on the service account you just created. +2. Go to the **Keys** tab. +3. Click **Add Key → Create new key**. +4. Choose **JSON** and click **Create**. +5. A file will be downloaded (e.g. `blog-exporter-123abc.json`). + - Rename it to **`service_account_key.json`**. + - Move this file into the **`blog_automation`** folder of this repo (the same folder where this README lives). + - **Do not commit this file to Git!** It should already be listed in `.gitignore`. + +### 4. Share the Google Drive Folder or Document +1. In Google Drive, right-click the folder (e.g. `blog_automation`) or a specific Google Doc. +2. Click **Share**. +3. Enter the service account’s email (looks like `blog-exporter@blog-automation.iam.gserviceaccount.com`). +4. Give it at least **Viewer** access. +5. Save changes. + - Now the service account can read/export files in that folder or doc. + +--- + +✅ At this point you should have: +- A **Google Cloud Project** called `blog-automation` with the **Drive API enabled**. +- A **service account** called `blog-exporter`. +- A local **`service_account_key.json`** file inside your `blog_automation` folder (same place as this README, ignored by git). +- The service account email added as a **Viewer** to your Google Drive folder or doc. + + +## Set up Virtual Environment & Install Required Package +- Set up venv `python -m venv venv` +- `source venv/bin/activate` +- `pip install -r requirements.txt` + +# Automation Script + +## Find Document ID +Each Google Doc has a unique ID in its URL. +For example, if the URL is: +https://docs.google.com/document/d/1ABCDEFghijklmnopQRS_tUVWXyz1234567890/edit + +Then the **Document ID** is: + +1ABCDEFghijklmnopQRS_tUVWXyz1234567890 + +Use this ID in your scripts when exporting the document. + +## Run Automation +1. Activate virtual environment: `source venv/bin/activate` +2. Run the script: `python export_blog.py ` + + diff --git a/tools/blog_automation/doc_to_html_conversion.py b/tools/blog_automation/doc_to_html_conversion.py new file mode 100644 index 00000000..24f60679 --- /dev/null +++ b/tools/blog_automation/doc_to_html_conversion.py @@ -0,0 +1,109 @@ +from google.oauth2 import service_account +from googleapiclient.discovery import build +import os +import markdown +import argparse +from pathlib import Path +from googleapiclient.errors import HttpError +import datetime as dt + +# --- Configuration --- +SERVICE_ACCOUNT_FILE = 'service_account_key.json' +YAML_HEADER = ''' +--- +layout: post +title: [TITLE] +date: [DATE] +author_name: [AUTHOR] +author_role: [AUTHOR ROLE] +blurb_img: [IMG PATH] +blurb_img_source: [IMG SOURCE (optional)] +description: [BLOG DESCRIPTION] +category: [CATEGORY] +--- +''' + +def _current_directory(): + return Path(__file__).resolve().parent + +def _posts_directory(): + # Path to the directory where the script itself is located + script_dir = _current_directory() + + # Construct the path relative to the script’s location + posts_dir = (script_dir / "../../_posts").resolve() + + return posts_dir + +def _today_date_str(): + return dt.date.today().isoformat() + +def _create_blog_filename_with_date(doc_name, date_str): + formatted_blog_title = doc_name.lower().replace(' ', '-').strip() + filename = f"{date_str}-{formatted_blog_title}.html" + return filename + +def export_blog_as_html(document_id, date=None): + if date is None: + date = _today_date_str() + service_account_path = os.path.join(_current_directory(), SERVICE_ACCOUNT_FILE) + if not os.path.exists(service_account_path): + print(f"ERROR: Service account key file '{service_account_path}' not found.\n" + "Please obtain your own Google service account key and place it at this path.\n" + "(Never commit this file to version control.)") + exit(1) + creds = service_account.Credentials.from_service_account_file( + service_account_path, + scopes=['https://www.googleapis.com/auth/drive.readonly'] + ) + drive = build('drive', 'v3', credentials=creds) + + try: + # 1. Get document name from Drive + doc_metadata = drive.files().get(fileId=document_id, fields='name').execute() + doc_name = doc_metadata.get('name', 'exported_blog') + blog_filename = _create_blog_filename_with_date(doc_name, date) + + # 2. Export as Markdown + request = drive.files().export_media( + fileId=document_id, + mimeType='text/markdown' + ) + md_bytes = request.execute() + except HttpError as error: + if error.resp.status == 404: + raise FileNotFoundError(f"Document ID '{document_id}' not found.") from error + else: + raise + + # 3. Convert Markdown to HTML and save to local file + import re + html = markdown.markdown(md_bytes.decode('utf-8')) + # Remove tags from inside heading tags (e.g.

Heading

->

Heading

) + html = re.sub(r'(.+?)', r'\2', html) + + # Remove the first heading if present (e.g.

...

or

...

at the start) + html = re.sub(r'^.*?\s*', '', html, flags=re.DOTALL) + + # Wrap the body in
+ html_body = f'
\n{html}\n
' + + # YAML front matter + yaml_header = YAML_HEADER.replace('[TITLE]', doc_name.title()).replace('[DATE]', date) + + final_html = yaml_header + '\n' + html_body + + posts_dir = _posts_directory() + filename = f"{posts_dir}/{blog_filename}.html" + with open(filename, 'w', encoding='utf-8') as f: + f.write(final_html) + + print(f"Saved HTML to: {filename}") + +if __name__ == "__main__": + # To run script: `python export_blog.py --date ` + parser = argparse.ArgumentParser(description="Export a Google Doc as HTML with custom formatting.") + parser.add_argument("doc_id", help="The Google Doc ID to export.") + parser.add_argument("--date", help="Date for the blog post (YYYY-MM-DD). If not provided, uses today.", default=None) + args = parser.parse_args() + export_blog_as_html(args.doc_id, args.date) diff --git a/tools/blog_automation/requirements.txt b/tools/blog_automation/requirements.txt new file mode 100644 index 00000000..fa7d8270 --- /dev/null +++ b/tools/blog_automation/requirements.txt @@ -0,0 +1,23 @@ +cachetools==5.5.2 +certifi==2025.8.3 +charset-normalizer==3.4.2 +google-api-core==2.25.1 +google-api-python-client==2.177.0 +google-auth==2.40.3 +google-auth-httplib2==0.2.0 +google-auth-oauthlib==1.2.2 +googleapis-common-protos==1.70.0 +httplib2==0.22.0 +idna==3.10 +Markdown==3.8.2 +oauthlib==3.3.1 +proto-plus==1.26.1 +protobuf==6.31.1 +pyasn1==0.6.1 +pyasn1_modules==0.4.2 +pyparsing==3.2.3 +requests==2.32.4 +requests-oauthlib==2.0.0 +rsa==4.9.1 +uritemplate==4.2.0 +urllib3==2.5.0