Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions etl/extract/fetch_airtable_posts/.edufc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
name: fetch_airtable_posts
runtime: python3_10
41 changes: 41 additions & 0 deletions etl/extract/fetch_airtable_posts/index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import requests

def handler(inputs):
# Fetch API Key from inputs to ensure it's passed correctly
api_key = inputs.get("airtable_api_key")
if not api_key:
return {"error": "AIRTABLE_API_KEY is missing. Ensure it is set in Edurata Secrets."}

# Fetch Airtable Base & Table ID
airtable_base_id = inputs.get("airtable_base_id")
airtable_table_id = inputs.get("airtable_table_id")

if not airtable_base_id:
return {"error": "airtable_base_id is missing."}

if not airtable_table_id:
return {"error": "airtable_table_id is missing."}

# Airtable API request
url = f"https://api.airtable.com/v0/{airtable_base_id}/{airtable_table_id}"
headers = {
"Authorization": f"Bearer {api_key}", # Ensure correct header format
"Content-Type": "application/json"
}
params = {
"filterByFormula": "AND({Status}='unprocessed')",
"maxRecords": 10
}

# Send GET request
response = requests.get(url, headers=headers, params=params)

# Error Handling
if response.status_code == 401:
return {"error": "Authentication failed. Check if the AIRTABLE_API_KEY is correct and has read access."}
elif response.status_code == 403:
return {"error": "Access denied. Ensure the API key has proper permissions to read this table."}
elif response.status_code != 200:
return {"error": f"Airtable API error {response.status_code}: {response.text}"}

return {"records": response.json().get("records", [])}
1 change: 1 addition & 0 deletions etl/extract/fetch_airtable_posts/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
requests
2 changes: 2 additions & 0 deletions etl/extract/get_job_description/.edufc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
name: get-job-description
runtime: python3_10
39 changes: 39 additions & 0 deletions etl/extract/get_job_description/index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import os
import requests

def handler(inputs):
NOTION_API_KEY = inputs.get("notion_api_key") # Get API Key from inputs
if not NOTION_API_KEY:
return {"error": "NOTION_API_KEY is missing. Ensure it's set in Edurata Secrets."}

notion_page_id = inputs.get("notion_page_id")
if not notion_page_id:
return {"error": "notion_page_id is required but was not provided."}

url = f"https://api.notion.com/v1/blocks/{notion_page_id}/children"

headers = {
"Authorization": f"Bearer {NOTION_API_KEY}",
"Notion-Version": "2022-06-28"
}

response = requests.get(url, headers=headers)

if response.status_code != 200:
return {"error": f"Failed to fetch Notion page: {response.text}"}

data = response.json()
blocks = data.get("results", [])
content = ""

# Extract text from blocks
for block in blocks:
block_type = block.get("type")
block_data = block.get(block_type, {})

if block_type in ["paragraph", "heading_1", "heading_2", "heading_3"]:
text_elements = block_data.get("rich_text", [])
text = "".join([part.get("plain_text", "") for part in text_elements])
content += text + " "

return {"job_description": content.strip() if content else "No text found"}
1 change: 1 addition & 0 deletions etl/extract/get_job_description/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
requests
2 changes: 2 additions & 0 deletions etl/load/create_notion_card/.edufc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
name: create-notion-card
runtime: python3_10
61 changes: 61 additions & 0 deletions etl/load/create_notion_card/index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import requests
import json

def handler(inputs):
NOTION_API_KEY = inputs.get("notion_api_key")
if not NOTION_API_KEY:
return {"error": "NOTION_API_KEY is missing."}

database_id = inputs.get("notion_database_id")
if not database_id:
return {"error": "notion_database_id is missing."}

company_name = inputs.get("company_name", "Unknown")
job_name = inputs.get("job_name", "Unknown")
short_description = inputs.get("short_job_description", "No description provided.")
language = inputs.get("language", "Unknown")
working_model = inputs.get("working_model", "Unknown")
location = inputs.get("location", "Unknown")
role = inputs.get("role", "Unknown")

# Convert salary expectation to a number or null
try:
salary_expectation = float(inputs.get("salary_expectation", 0))
except ValueError:
salary_expectation = None # If not numeric, store as None

iso_time = inputs.get("iso_time", None)

data = {
"parent": {"database_id": database_id},
"properties": {
"Company Name": {"title": [{"text": {"content": company_name}}]},
"Status": {"status": {"name": "Not applied"}},
"Date": {"date": {"start": iso_time} if iso_time else None},
"Job Name": {"rich_text": [{"text": {"content": job_name}}]},
"Job Description": {"rich_text": [{"text": {"content": short_description}}]},
"Language": {"rich_text": [{"text": {"content": language}}]},
"Working Model": {"rich_text": [{"text": {"content": working_model}}]},
"Location": {"rich_text": [{"text": {"content": location}}]},
"Role": {"rich_text": [{"text": {"content": role}}]},
"Salary Expectation": {"number": salary_expectation}
}
}

headers = {
"Authorization": f"Bearer {NOTION_API_KEY}",
"Notion-Version": "2022-06-28",
"Content-Type": "application/json"
}

print("Payload Sent to Notion API:", json.dumps(data, indent=4))

response = requests.post("https://api.notion.com/v1/pages", json=data, headers=headers)

if response.status_code != 200:
return {"error": f"Failed to create Notion card: {response.text}"}

notion_card = response.json()
print(f"Successfully created Notion Card: {notion_card}")

return {"notion_card": notion_card} # This ensures the workflow has an output
1 change: 1 addition & 0 deletions etl/load/create_notion_card/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
requests
2 changes: 2 additions & 0 deletions etl/load/update_airtable_record/.edufc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
name: update_airtable_record
runtime: python3_10
47 changes: 47 additions & 0 deletions etl/load/update_airtable_record/index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import requests

def handler(inputs):
# Fetch inputs safely
airtable_base_id = inputs.get("airtable_base_id")
airtable_table_id = inputs.get("airtable_table_id")
record_id = inputs.get("record_id")
generated_content = inputs.get("generated_content")
api_key = inputs.get("airtable_api_key")

# Validate inputs
if not api_key:
return {"error": "AIRTABLE_API_KEY is missing. Ensure it's set in Edurata Secrets."}
if not airtable_base_id:
return {"error": "airtable_base_id is missing."}
if not airtable_table_id:
return {"error": "airtable_table_id is missing."}
if not record_id:
return {"error": "record_id is missing."}
if not generated_content:
return {"error": "generated_content is missing."}

# Construct Airtable API request
url = f"https://api.airtable.com/v0/{airtable_base_id}/{airtable_table_id}/{record_id}"
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
data = {
"fields": {
"Generated Content": generated_content,
"Status": "processed"
}
}

# Send PATCH request
response = requests.patch(url, headers=headers, json=data)

# Error handling
if response.status_code == 401:
return {"error": "Authentication failed. Check if the AIRTABLE_API_KEY is correct and has write permissions."}
elif response.status_code == 403:
return {"error": "Access denied. Ensure the API key has proper permissions to update this table."}
elif response.status_code != 200:
return {"error": f"Airtable API error {response.status_code}: {response.text}"}

return {"status": "success", "updated_record": response.json()}
1 change: 1 addition & 0 deletions etl/load/update_airtable_record/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
requests
2 changes: 2 additions & 0 deletions etl/transform/extract_job_info/.edufc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
name: extract-job-info
runtime: python3_10
46 changes: 46 additions & 0 deletions etl/transform/extract_job_info/index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import openai
import json

def handler(inputs):
OPENAI_API_KEY = inputs.get("openai_api_key")
if not OPENAI_API_KEY:
return {"error": "OPENAI_API_KEY is missing. Ensure it's set in Edurata Secrets."}

job_description = inputs.get("job_description")
if not job_description:
return {"error": "Job description is missing."}

prompt = f"""
You are an AI agent designed to extract key details from job descriptions. Based on the following job description, provide the information in JSON format with these fields:
- "company_name": string
- "job_name": string
- "language": string
- "working_model": string
- "location": string
- "role": string
- "short_job_description": string
- "salary_expectation": number - without "" (salary expectation in euros per year, if unavailable make a guess based on seniority)

Job Description:
{job_description}

Respond in JSON format only.
"""

try:
client = openai.OpenAI(api_key=OPENAI_API_KEY)
response = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role": "system", "content": prompt}],
temperature=0.7,
max_tokens=500
)

completion = response.choices[0].message.content.strip()

job_info = json.loads(completion)

return {"job_info": job_info}

except Exception as e:
return {"error": f"Error extracting job info: {str(e)}"}
2 changes: 2 additions & 0 deletions etl/transform/extract_job_info/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
openai>=1.0.0
requests
2 changes: 2 additions & 0 deletions etl/transform/generate_gpt_content/.edufc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
name: generate_gpt_content
runtime: python3_10
42 changes: 42 additions & 0 deletions etl/transform/generate_gpt_content/index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import openai

def handler(inputs):
# Fetch API key
api_key = inputs.get("openai_api_key")
if not api_key:
return {"error": "OPENAI_API_KEY is missing. Ensure it's set in Edurata Secrets."}

# Fetch required inputs
raw_content = inputs.get("raw_content")
record_id = inputs.get("record_id")
messages = inputs.get("messages") # Dynamic message prompt
model = inputs.get("model", "gpt-4") # Default to GPT-4
temperature = inputs.get("temperature", 0.7) # Default temperature

if not raw_content:
return {"error": "raw_content is missing."}
if not record_id:
return {"error": "record_id is missing."}
if not messages:
return {"error": "messages array is missing."}

try:
# Initialize OpenAI client
client = openai.OpenAI(api_key=api_key)

# Call OpenAI API
response = client.chat.completions.create(
model=model,
messages=messages,
temperature=temperature
)

return {
"id": record_id, # Return ID for mapping
"generated_content": response.choices[0].message.content
}

except openai.OpenAIError as e:
return {"error": f"OpenAI API error: {str(e)}"}
except Exception as e:
return {"error": f"Unexpected error: {str(e)}"}
1 change: 1 addition & 0 deletions etl/transform/generate_gpt_content/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
openai>=1.0.0