From 81e52c7ca4f2a15e4aaa569cf7e66f2721885215 Mon Sep 17 00:00:00 2001 From: Fritz Paz Date: Mon, 10 Feb 2025 18:48:50 -0500 Subject: [PATCH] Improving Docs Formatting --- readme.md | 102 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 56 insertions(+), 46 deletions(-) diff --git a/readme.md b/readme.md index c13bfb5..9f86979 100644 --- a/readme.md +++ b/readme.md @@ -15,62 +15,72 @@ The base Azure Function image does not contain the necessary chromium packages t Run the following commands that installs chromium, chrome driver and selenium on top of the Azure Function base image: -`$acr_id = "<>.azurecr.io"` -`docker login $acr_id -u <> -p <>` -`docker build --tag $acr_id/selenium .` -`docker push $acr_id/selenium:latest` +``` bash +$acr_id = "<>.azurecr.io" +docker login $acr_id -u <> -p <> +docker build --tag $acr_id/selenium . +docker push $acr_id/selenium:latest +``` ### 3. Create Azure Function using docker image Run the following commands: -`$rg = "<>"` -`$loc = "<>"` -`$plan = "<>"` -`$stor = "<>"` -`$fun = "<>"` -`$acr_id = "<>.azurecr.io"` - -`az group create -n $rg -l $loc` -`az storage account create -n $stor -g $rg --sku Standard_LRS` -`az appservice plan create --name $plan --resource-group $rg --sku P1v2 --is-linux` -`az functionapp create --resource-group $rg --os-type Linux --plan $plan --deployment-container-image-name $acr_id/selenium:latest --name $fun --storage-account $stor --docker-registry-server-user <> --docker-registry-server-password <> --functions-version 4` +``` bash +$rg = "<>" +$loc = "<>" +$plan = "<>" +$stor = "<>" +$fun = "<>" +$acr_id = "<>.azurecr.io" +``` + +``` bash +az group create -n $rg -l $loc +az storage account create -n $stor -g $rg --sku Standard_LRS +az appservice plan create --name $plan --resource-group $rg --sku P1v2 --is-linux +az functionapp create --resource-group $rg --os-type Linux --plan $plan --deployment-container-image-name $acr_id/selenium:latest --name $fun --storage-account $stor --docker-registry-server-user <> --docker-registry-server-password <> --functions-version 4 +``` ### 4. Add Data Lake Account to store scraping results Run the following commands: -`$rg = "<>"` -`$fun = "<>"` -`$adls = "<>"` -`$sub_id = "<>"` -`$container_name = "scraperesults"` -`# Create adlsgen2` -`az storage account create --name $adls --resource-group $rg --location $loc --sku Standard_RAGRS --kind StorageV2 --enable-hierarchical-namespace true` -`az storage container create --account-name $adls -n $container_name# Assign identity to function and set params` -`az webapp identity assign --name $fun --resource-group $rg` -`az functionapp config appsettings set --name $fun --resource-group $rg --settings par_storage_account_name=$adls par_storage_container_name=$container_name# Give fun MI RBAC role to ADLS gen 2 account` -`$fun_object_id = az functionapp identity show --name $fun --resource-group $rg --query 'principalId' -o tsv` -`New-AzRoleAssignment -ObjectId $fun_object_id -RoleDefinitionName "Storage Blob Data Contributor" -Scope "/subscriptions/$sub_id/resourceGroups/$rg/providers/Microsoft.Storage/storageAccounts/$adls/blobServices/default"` - -### 4. Run Azure Function as HTTP trigger +``` +$rg = "<>" +$fun = "<>" +$adls = "<>" +$sub_id = "<>" +$container_name = "scraperesults" +# Create adlsgen2 +az storage account create --name $adls --resource-group $rg --location $loc --sku Standard_RAGRS --kind StorageV2 --enable-hierarchical-namespace true +az storage container create --account-name $adls -n $container_name# Assign identity to function and set params +az webapp identity assign --name $fun --resource-group $rg +az functionapp config appsettings set --name $fun --resource-group $rg --settings par_storage_account_name=$adls par_storage_container_name=$container_name# Give fun MI RBAC role to ADLS gen 2 account +$fun_object_id = az functionapp identity show --name $fun --resource-group $rg --query 'principalId' -o tsv +New-AzRoleAssignment -ObjectId $fun_object_id -RoleDefinitionName "Storage Blob Data Contributor" -Scope "/subscriptions/$sub_id/resourceGroups/$rg/providers/Microsoft.Storage/storageAccounts/$adls/blobServices/default" +``` + +### 5. Run Azure Function as HTTP trigger Test the Function in the portal or in your browser. The following code in __init__.py will return all URLs in the following webpage: -`import azure.functions as func` -`from selenium import webdriver` -`from selenium.webdriver.chrome.options import Options` -`from selenium.webdriver.chrome.service import Service` -`from selenium.webdriver.common.by import By` -`from webdriver_manager.chrome import ChromeDriverManager` - -`def main(req: func.HttpRequest) -> func.HttpResponse:` - -    `options = Options()` -    `options.add_argument('--headless')` -    `options.add_argument('--no-sandbox')` -    `options.add_argument('--disable-dev-shm-usage')` -    `driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)` - -    `driver.get('http://www.ubuntu.com/')` -    `links = driver.find_elements(By.TAG_NAME, "a")` \ No newline at end of file +``` python +import azure.functions as func +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.chrome.service import Service +from selenium.webdriver.common.by import By +from webdriver_manager.chrome import ChromeDriverManager + +def main(req: func.HttpRequest) -> func.HttpResponse: + + options = Options() + options.add_argument('--headless') + options.add_argument('--no-sandbox') + options.add_argument('--disable-dev-shm-usage') + driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) + + driver.get('http://www.ubuntu.com/') + links = driver.find_elements(By.TAG_NAME, "a") +``` \ No newline at end of file