Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion services/database-api/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
log_level = os.environ.get("LOG_LEVEL", "DEBUG").upper()
logging.basicConfig(level=log_level, format="%(asctime)s - %(levelname)s - %(message)s")

from fastapi import FastAPI, HTTPException, status
from fastapi import FastAPI, HTTPException, Request, status
from pydantic import BaseModel
from pymongo import MongoClient

Expand Down Expand Up @@ -117,3 +117,10 @@ async def delete_report(ticker_symbol: str, report_type: str, year: int, season:
if result.deleted_count:
return {"message": f"{report_type.capitalize()} report deleted"}
raise HTTPException(status_code=404, detail=f"{report_type.capitalize()} report not found")

@app.post("/synchronize_company_table")
async def synchronize_company_table(request: Request):
collection = db['company']
collection.drop()
companies = await request.json()
collection.insert_many(companies)
33 changes: 33 additions & 0 deletions services/mops-crawler/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import pandas
import requests
from fastapi import FastAPI, HTTPException
from lxml import etree
from pydantic import BaseModel

# Set up logging
Expand Down Expand Up @@ -193,3 +194,35 @@ def get_financial_report(report_type: str, ticker_symbol: str, year: int, season

# If everything went well, return the sanitized data
return result["data"]

@app.get('/get_all_companies')
def download_company_info():
base_url = 'https://isin.twse.com.tw/isin/class_main.jsp?owncode=&stockname=&isincode=&market=1&issuetype=1&industry_code=&Page=1&chklike=Y'
try:
response = requests.get(base_url)
response.raise_for_status()

except Exception as err:
logging.error(f"HTTP error occurred while crawling: {err}")
raise

listed_companies_data = response.text
root = etree.HTML(listed_companies_data)

symbol_column_locator = '//tr//*[normalize-space()=\'{}\']/preceding-sibling::*'.format('有價證券代號')
symbol_column_index = len(root.xpath(symbol_column_locator)) + 1
name_column_locator = '//tr//*[normalize-space()=\'{}\']/preceding-sibling::*'.format('有價證券名稱')
name_column_index = len(root.xpath(name_column_locator)) + 1
row_locator = '//tr[position()>1]'
rows = root.xpath(row_locator)

results = []
for row in rows:
symbol = row.xpath('.//td[{}]'.format(symbol_column_index))[0].text
company = row.xpath('.//td[{}]'.format(name_column_index))[0].text
symbol_company = {
'symbol': symbol,
'company': company
}
results.append(symbol_company)
return results
35 changes: 32 additions & 3 deletions services/report-harvester/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,39 @@ def store_financial_report(report_type, post_data):
logging.error(f"Request error when storing data: {e}")
return {"status_code": 500, "message": "Internal Server Error"}


def retrieve_ticker_symbols():
# [TODO] Implement logic to retrieve the list of companies from the database
return ["2330", "2331"]
companies = get_companies_by_crawler()
synchronize_company(companies)
symbols = [company['symbol'] for company in companies]
return symbols

def get_companies_by_crawler() -> list:
base_url = 'http://mops-crawler'
url = f'{base_url}/get_all_companies'
try:
response = requests.get(url)
response.raise_for_status()
return response.json()
except HTTPError as http_err:
logging.error(f"HTTP error occurred while retrieving company table from crawler: {http_err}")
raise
except Exception as err:
logging.error(f"Error occurred while retrieving company table from crawler: {err}")
raise

def synchronize_company(companies: list):
base_url = 'http://database-api'
url = f'{base_url}/synchronize_company_table'

try:
response = requests.post(url, json=companies)
response.raise_for_status()
except HTTPError as http_err:
logging.error(f"HTTP error occurred while synchronizing company table: {http_err}")
raise
except Exception as err:
logging.error(f"Error occurred while synchronizing company table: {err}")
raise

def retrieve_financial_report_version_table(ticker_symbol, report_type):
base_url = "http://database-api"
Expand Down