diff --git a/.quarto/_freeze/chap1/execute-results/html.json b/.quarto/_freeze/chap1/execute-results/html.json index a901d30..3b76475 100644 --- a/.quarto/_freeze/chap1/execute-results/html.json +++ b/.quarto/_freeze/chap1/execute-results/html.json @@ -1,9 +1,11 @@ { - "hash": "2272ee920a759b5538d4c653fbc60815", + "hash": "69bded67ced01ce7e9420fd7fdd90036", "result": { "engine": "knitr", - "markdown": "# R code for module activity\n\n## 1- Install and load the main packages\n\n#### Install packages\n\nIf any of the below packages is not installed on your computer, please install it. Remember to delete the **#** symbol before running the code.\n\n\n::: {.cell}\n\n```{.r .cell-code}\n#install.packages(\"tidyverse\")\n#install.packages(\"ggplot2\")\n#install.packages(\"dplyr\")\n#install.packages(\"ggrepel\")\n#install.packages(\"patchwork\")\n#install.packages(\"gridExtra\")\n```\n:::\n\n\n#### Load packages\n\n\n::: {.cell}\n\n```{.r .cell-code}\nlibrary(tidyverse)\nlibrary(ggplot2)\nlibrary(dplyr)\nlibrary(ggrepel)\nlibrary(patchwork)\nlibrary(gridExtra)\nlibrary(haven)\n```\n:::\n\n\n## 2- Load the dataset and make a copy\n\n\n::: {.cell}\n\n```{.r .cell-code}\npublicmicrodatateachingsample <- read_sav(\"data/publicmicrodatateachingsample.sav\")\n\ncensus2021teaching <- publicmicrodatateachingsample\n```\n:::\n\n\nImportant: Remember to change to pathway to your dataset accordingly. \n\nIf the **read_sav** code does not work, you can use the **Import Dataset** button in the Environment pane to load the dataset in R.\n\n## 3- Drop unnecessary variables\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncensus2021teaching <- census2021teaching[,c(\"health_in_general\",\n \"hours_per_week_worked\",\n \"resident_age_7d\",\"sex\", \n \"ethnic_group_tb_6a\",\n \"approx_social_grade\")]\n```\n:::\n\n\n## 4- Exploratory analysis\n\n### 4.1- Univariate analysis \n\n#### 4.1.1 Univariate analysis for health in general and hours worked\n\n##### Frequencies for health in general and hours worked\n\n\n::: {.cell}\n\n```{.r .cell-code}\ntable(census2021teaching$health_in_general)\n\ntable(census2021teaching$hours_per_week_worked)\n```\n:::\n\n\n##### Percentages for health in general and hours worked\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\nprop.table(table(census2021teaching$health_in_general)) * 100\n\nprop.table(table(census2021teaching$hours_per_week_worked)) * 100\n```\n:::\n\n\nYou can also calculate these frequencies and percentages with appropriate labels.\n\n##### Frequencies and percentages with labels for health in general and hours worked\n\n \n\n::: {.cell}\n\n```{.r .cell-code}\ncensus2021teaching %>%\n mutate(\n health_code = as.integer(as.character(health_in_general)),\n health_label = factor(\n health_code,\n levels = c(1, 2, 3, 4, 5, -8),\n labels = c(\"Very good\", \"Good\", \"Fair\", \"Bad\", \"Very bad\", \"Does not apply\")\n )\n ) %>%\n count(health_label) %>%\n mutate(percent = round(100 * n / sum(n), 1))\n\n\ncensus2021teaching %>%\n mutate(\n hours_code = as.integer(as.character(hours_per_week_worked)),\n hours_label = factor(\n hours_code,\n levels = c(1, 2, 3, 4, -8),\n labels = c(\"[0 – 15]\", \"[16 – 30]\", \"[31 – 48]\", \"[49 and +]\", \"Does not apply\")\n )\n ) %>%\n count(hours_label) %>%\n mutate(percent = round(100 * n / sum(n), 1))\n```\n:::\n\n\nYou can also generate pie charts for better visualization\n\n### a) Pie chart for Health in general\n\n##### a.1) Create a labelled health variable\n \n\n::: {.cell}\n\n```{.r .cell-code}\nhealth <- census2021teaching %>%\n mutate(\n health_code = as.integer(as.character(health_in_general)),\n health_label = factor(\n health_code,\n levels = c(1,2,3,4,5,-8),\n labels = c(\"Very good\", \"Good\", \"Fair\", \"Bad\", \"Very bad\", \"Not applicable\")\n )\n ) %>%\n filter(!is.na(health_label)) %>%\n count(health_label, name = \"n\") %>%\n mutate(\n pct = round(100 * n / sum(n), 1),\n label = paste0(health_label, \" (\", pct, \"%)\")\n )\n```\n:::\n\n\n##### a.2) Plot pie chart\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\np1 <- ggplot(health, aes(x = \"\", y = n, fill = health_label)) +\n geom_col(width = 1, color = \"white\") +\n coord_polar(theta = \"y\") +\n geom_label_repel(aes(label = label),\n position = position_stack(vjust = 0.5),\n show.legend = FALSE,\n size = 3) +\n labs(title = \"Distribution of general health\", fill = \"Health status\") +\n theme_void()+\n scale_fill_brewer(palette = \"Set2\") +\n theme(plot.title = element_text(size = 16, hjust = 1))\n\np1\n```\n:::\n\n\n### b) Pie chart for Hours worked\n\n##### b.1) Create a labelled hours worked variable\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\nhours <- census2021teaching %>%\n mutate(\n hours_code = as.integer(as.character(hours_per_week_worked)),\n hours_label = factor(\n hours_code,\n levels = c(1, 2, 3, 4, -8),\n labels = c(\"[0 – 15]\", \"[16 – 30]\", \"[31 – 48]\", \"[49 and +]\", \"Not applicable\")\n )\n ) %>%\n filter(!is.na(hours_label)) %>%\n count(hours_label, name = \"n\") %>%\n mutate(\n pct = round(100 * n / sum(n), 1),\n label = paste0(hours_label, \" (\", pct, \"%)\")\n )\n```\n:::\n\n\n##### b.2) Plot pie chart\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\np2 <- ggplot(hours, aes(x = \"\", y = n, fill = hours_label)) +\n geom_col(width = 1, color = \"white\") +\n coord_polar(theta = \"y\") +\n geom_label_repel(aes(label = label),\n position = position_stack(vjust = 0.5),\n show.legend = FALSE,\n size = 3) +\n labs(title = \"Distribution of worked hours\", fill = \"Hours worked\") +\n theme_void()+\n scale_fill_brewer(palette = \"Dark2\") +\n theme(plot.title = element_text(size = 16, hjust = 1))\n\np2\n```\n:::\n\n\n\n### 4.1.1.1 Transformation of variable Health in general\n\nCreate a new variable called **health_binary** by regrouping health in general in two broader categories: **Good or very good health** and **Poor health**\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncensus2021teaching$health_binary <- ifelse(\n census2021teaching$health_in_general %in% c(1, 2), \"Good or very good health\",\n ifelse(census2021teaching$health_in_general %in% c(3, 4, 5), \"Poor health\", NA)\n)\n```\n:::\n\n\n##### Drop rows where health_in_general = -8 (Does not apply)\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncensus2021teaching <- subset(census2021teaching, health_in_general != -8)\n```\n:::\n\n\n##### Check the new variable's distribution\n\n\n::: {.cell}\n\n```{.r .cell-code}\ntable(census2021teaching$health_binary)\n```\n:::\n\n\n##### Create variable **Poor_health** and check its distribution\n\nCode a binary variable that takes 1 for poor health and 0 otherwise. Call this new variable **Poor_health**. This variable will be the explained variable of our regression model.\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncensus2021teaching$Poor_health <- ifelse(\n census2021teaching$health_binary == \"Poor health\", 1,\n ifelse(census2021teaching$health_binary == \"Good or very good health\", 0, NA)\n)\n\ntable(census2021teaching$Poor_health)\n```\n:::\n\n\n### 4.1.2 Univariate analysis for age, sex, ethnicity, social category \n\n#### Frequencies and percentages for age, sex, ethnicity, social category \n\n\n::: {.cell}\n\n```{.r .cell-code}\ntable(census2021teaching$resident_age_7d)\ntable(census2021teaching$sex)\ntable(census2021teaching$ethnic_group_tb_6a)\ntable(census2021teaching$approx_social_grade)\n\nprop.table(table(census2021teaching$resident_age_7d)) * 100\nprop.table(table(census2021teaching$sex)) * 100\nprop.table(table(census2021teaching$ethnic_group_tb_6a)) * 100\nprop.table(table(census2021teaching$approx_social_grade)) * 100\n```\n:::\n\n\n#### Plot pie chart Age variable\n\n##### a) Create a labelled Age variable\n\n\n::: {.cell}\n\n```{.r .cell-code}\nAge <- census2021teaching %>%\n mutate(\n Age_code = as.integer(as.character(resident_age_7d)),\n Age_label = factor(\n Age_code,\n levels = c(1, 2, 3, 4,5,6,7, -8),\n labels = c(\"]0 – 15]\", \"[16 – 24]\", \"[25 – 34]\", \"[35 - 44]\", \"[45 - 54]\", \"[55 - 64]\", \"[65 and +]\",\"Not applicable\")\n )\n ) %>%\n filter(!is.na(Age_label)) %>%\n count(Age_label, name = \"n\") %>%\n mutate(\n pct = round(100 * n / sum(n), 1),\n label = paste0(Age_label, \" (\", pct, \"%)\")\n )\n```\n:::\n\n\n##### b) Plot pie chart\n\n\n::: {.cell}\n\n```{.r .cell-code}\nggplot(Age, aes(x = \"\", y = n, fill = Age_label)) +\n geom_col(width = 1, color = \"white\") +\n coord_polar(theta = \"y\") +\n geom_label_repel(aes(label = label),\n position = position_stack(vjust = 0.5),\n show.legend = FALSE,\n size = 3) +\n labs(title = \"Distribution of respondents by Age\", fill = \"Resident age\") +\n theme_void()+\n scale_fill_brewer(palette = \"Pastel1\") +\n theme(plot.title = element_text(size = 16, hjust = 1))\n```\n:::\n\n\n#### Histogram for Sex variable\n\n##### a) Recode sex variable with labels\n\n\n::: {.cell}\n\n```{.r .cell-code}\nsex <- census2021teaching %>%\n mutate(sex_label = factor(sex, levels = c(1, 2), labels = c(\"Male\", \"Female\"))) %>%\n count(sex_label, name = \"n\") %>%\n mutate(prop = n / sum(n),\n label = scales::percent(prop, accuracy = 0.1))\n```\n:::\n\n\n##### b) Plot histogram\n\n\n::: {.cell}\n\n```{.r .cell-code}\nggplot(sex, aes(x = sex_label, y = prop)) +\n geom_col(fill = \"skyblue\", color = \"black\", width = 0.3) +\n geom_text(aes(label = label), vjust = -0.5) +\n scale_y_continuous(labels = scales::percent_format()) +\n scale_x_discrete(expand = expansion(mult = c(0.5, 0.7))) +\n labs(title = \"Distribution of respondents by sex\", x = \"\", y = \"Percentage\") +\n theme_classic()+\n theme(axis.text.x = element_text(size = 12))+\n theme(plot.title = element_text(size = 16, hjust = 0.5))\n```\n:::\n\n\n#### Histogram for Ethnicity variable\n\n##### a) Recode Ethnicity variable with labels\n\n\n::: {.cell}\n\n```{.r .cell-code}\nEthnicity <- census2021teaching %>%\n mutate(Ethnicity_label = factor(ethnic_group_tb_6a, levels = c(1, 2, 3, 4, 5, -8), \n labels = c(\"Asian\", \"Black\", \"Mixed\", \"White\", \"Other\", \"Does not apply\"))) %>%\n count(Ethnicity_label, name = \"n\") %>%\n mutate(prop = n / sum(n),\n label = scales::percent(prop, accuracy = 0.1))\n```\n:::\n\n\n##### b) Plot histogram\n\n\n::: {.cell}\n\n```{.r .cell-code}\nggplot(Ethnicity, aes(x = Ethnicity_label, y = prop)) +\n geom_col(fill = \"chocolate\", color = \"black\", width = 0.3) +\n geom_text(aes(label = label), vjust = -0.5) +\n scale_y_continuous(labels = scales::percent_format()) +\n labs(title = \"Distribution of respondents by Ethnicity\", x = \"\", y = \"Percentage\") +\n theme_classic()+\n theme(axis.text.x = element_text(size = 12))+\n theme(plot.title = element_text(size = 16, hjust = 0.5))\n```\n:::\n\n\n#### Pie chart for variable social class\n\n##### a) Create a labelled social class variable\n\n\n::: {.cell}\n\n```{.r .cell-code}\nSocial_class <- census2021teaching %>%\n mutate(\n Social_class_code = as.integer(as.character(approx_social_grade)),\n Social_class_label = factor(\n Social_class_code,\n levels = c(1, 2, 3, 4, -8),\n labels = c(\"Higher, intermediate managers\", \"Supervisory, junior managers\", \n \"Skilled manual\", \"Semi-skilled, unskilled manual\",\"Not applicable\")\n )\n ) %>%\n filter(!is.na(Social_class_label)) %>%\n count(Social_class_label, name = \"n\") %>%\n mutate(\n pct = round(100 * n / sum(n), 1),\n label = paste0(Social_class_label, \" (\", pct, \"%)\")\n )\n```\n:::\n\n\n##### b) Plot pie chart\n\n\n::: {.cell}\n\n```{.r .cell-code}\nggplot(Social_class, aes(x = \"\", y = n, fill = Social_class_label)) +\n geom_col(width = 1, color = \"white\") +\n coord_polar(theta = \"y\") +\n geom_label_repel(aes(label = label),\n position = position_stack(vjust = 0.5),\n show.legend = FALSE,\n size = 3) +\n labs(title = \"Distribution of respondents by Social class\", fill = \"Social class\") +\n theme_void()+\n scale_fill_brewer(palette = \"Pastel2\") +\n theme(plot.title = element_text(size = 16, hjust = 1))\n```\n:::\n\n\n##### Drop under 16 from the table\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncensus2021teaching <- subset(census2021teaching, resident_age_7d != 1)\n```\n:::\n\n\n### 4.2- Bivariate analysis \n\n##### Cross tabulation of Poor_health by hours worked\n\n\n::: {.cell}\n\n```{.r .cell-code}\ntabulation <- table(census2021teaching$Poor_health, census2021teaching$hours_per_week_worked)\ntabulation\n```\n:::\n\n\n##### Row percentages\n\n\n::: {.cell}\n\n```{.r .cell-code}\nprop.table(tabulation, margin = 1) * 100\n```\n:::\n\n\n##### Column percentages\n\n\n::: {.cell}\n\n```{.r .cell-code}\nprop.table(tabulation, margin = 2) * 100\n```\n:::\n\n#### Chi square test\n\n##### a) Creating cross tabulations to be tested\n\n\n::: {.cell}\n\n```{.r .cell-code}\ntabulation.health_hours <- table(census2021teaching$Poor_health, census2021teaching$hours_per_week_worked)\ntabulation.health_age <- table(census2021teaching$Poor_health, census2021teaching$resident_age_7d)\ntabulation.health_sex <- table(census2021teaching$Poor_health, census2021teaching$sex)\ntabulation.health_ethnicity <- table(census2021teaching$Poor_health, census2021teaching$ethnic_group_tb_6a)\ntabulation.health_classes <- table(census2021teaching$Poor_health, census2021teaching$approx_social_grade)\n```\n:::\n\n\n##### b) chi square test on health and our 5 other variables\n\n\n::: {.cell}\n\n```{.r .cell-code}\nchisq.test(tabulation.health_hours)\nchisq.test(tabulation.health_age)\nchisq.test(tabulation.health_sex)\nchisq.test(tabulation.health_ethnicity)\nchisq.test(tabulation.health_classes)\n```\n:::\n\n## 5 Regression model\n\n### 5.1 Recode variables as factors with labels\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncensus2021teaching$hours_per_week_worked <- factor(\n census2021teaching$hours_per_week_worked,\n levels = c(1, 2, 3, 4, -8),\n labels = c(\"0-15\", \"16-30\", \"31-48\", \"49+\", \"Does not apply\")\n)\n\ncensus2021teaching$resident_age_7d <- factor(\n census2021teaching$resident_age_7d,\n levels = c(1, 2, 3, 4, 5, 6, 7, -8),\n labels = c(\"0-15\", \"16-24\", \"25-34\", \"35-44\", \"45-54\", \"55-64\", \"65+\", \"Not applicable\")\n)\n\ncensus2021teaching$ethnic_group_tb_6a <- factor(\n census2021teaching$ethnic_group_tb_6a,\n levels = c(1, 2, 3, 4, 5, -8),\n labels = c(\"Asian\", \"Black\", \"Mixed\", \"White\", \"Other\", \"Does not apply\")\n)\n\ncensus2021teaching$approx_social_grade <- factor(\n census2021teaching$approx_social_grade,\n levels = c(1, 2, 3, 4, -8),\n labels = c(\"Higher, intermediate managers\",\n \"Supervisory, junior managers\",\n \"Skilled manual\",\n \"Semi-skilled, unskilled manual\",\n \"Not applicable\")\n)\n\ncensus2021teaching$sex <- factor(\n census2021teaching$sex,\n levels = c(1, 2),\n labels = c(\"Male\", \"Female\")\n)\n```\n:::\n\n\n### 5.2 Pick references that are central or policy relevant\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncensus2021teaching$hours_per_week_worked <- relevel(census2021teaching$hours_per_week_worked, ref = \"31-48\")\ncensus2021teaching$resident_age_7d <- relevel(census2021teaching$resident_age_7d, ref = \"45-54\")\ncensus2021teaching$sex <- relevel(census2021teaching$sex, ref = \"Male\")\ncensus2021teaching$ethnic_group_tb_6a <- relevel(census2021teaching$ethnic_group_tb_6a, ref = \"White\")\ncensus2021teaching$approx_social_grade <- relevel(census2021teaching$approx_social_grade, ref = \"Higher, intermediate managers\")\n```\n:::\n\n\n### 5.3 Fit logistic regression \n\n\n\n::: {.cell}\n\n```{.r .cell-code}\nmodel_health <- glm(\n Poor_health ~ hours_per_week_worked + resident_age_7d + sex + ethnic_group_tb_6a + approx_social_grade,\n data = census2021teaching,\n family = binomial(link = \"logit\")\n)\n```\n:::\n\n#### Obtain summary results of the logistic regression\n\n\n::: {.cell}\n\n```{.r .cell-code}\nsummary(model_health)\n```\n:::\n\n\n\n\n\n\n\n\n", - "supporting": [], + "markdown": "# R code for module activity\n\n## 1- Install and load the main packages\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\n#### Load the packages if installed; install them if not present \n\npkg <- c(\"tidyverse\", \n \"ggplot2\", \n \"ggrepel\", \n \"patchwork\", \n \"gridExtra\", \n \"haven\", ### Import Stata and SPSS datasets\n \"viridis\" ### colourblind friendly palette\n)\n\nfor (p in pkg) {\n if (!require(p, character.only = T)) {\n install.packages(p)\n library(p, character.only = T)\n }\n}\n```\n:::\n\n\n## 2- Load the dataset and make a copy\n\n\n::: {.cell}\n\n```{.r .cell-code}\npublicmicrodatateachingsample <- read_sav(\"data/publicmicrodatateachingsample.sav\")\n\ncensus2021teaching <- publicmicrodatateachingsample\n```\n:::\n\n\nImportant: Remember to change the filepath to your dataset accordingly. \n\nIf the **read_sav** code does not work, you can use the **Import Dataset** button in the RStudio Environment Pane to load the dataset in R.\n\n## 3- Drop unnecessary variables\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncensus2021teaching <- census2021teaching[,c(\"health_in_general\",\n \"hours_per_week_worked\",\n \"resident_age_7d\",\"sex\", \n \"ethnic_group_tb_6a\",\n \"approx_social_grade\")]\n\nhead(census2021teaching)\ndim(census2021teaching)\n```\n:::\n\n\n::: {.callout-note collapse=\"true\"}\n\n## Click to view results\n\n\n::: {.cell}\n::: {.cell-output .cell-output-stdout}\n\n```\n# A tibble: 6 × 6\n health_in_general hours_per_week_worked resident_age_7d sex \n \n1 1 [Very good health] 4 [Full-time: 49 or more hours … 4 [Aged 35 to … 2 [Mal…\n2 2 [Good health] -8 [Does not apply] 7 [Aged 65 yea… 2 [Mal…\n3 2 [Good health] -8 [Does not apply] 7 [Aged 65 yea… 2 [Mal…\n4 3 [Fair health] -8 [Does not apply] 7 [Aged 65 yea… 1 [Fem…\n5 2 [Good health] -8 [Does not apply] 2 [Aged 16 to … 2 [Mal…\n6 4 [Bad health] -8 [Does not apply] 7 [Aged 65 yea… 2 [Mal…\n# ℹ 2 more variables: ethnic_group_tb_6a ,\n# approx_social_grade \n```\n\n\n:::\n\n::: {.cell-output .cell-output-stdout}\n\n```\n[1] 604351 6\n```\n\n\n:::\n:::\n\n\n:::\n\n## 4- Exploratory analysis\n\n### 4.1- Univariate analysis \n\n#### 4.1.1 Univariate analysis for health in general and hours worked\n\n##### Frequencies for health in general and hours worked\n\n\n::: {.cell}\n\n```{.r .cell-code}\n### Raw values\ntable(census2021teaching$health_in_general)\n\n### Labelled values\ntable(as_factor(census2021teaching$hours_per_week_worked))\n```\n:::\n\n\n::: {.callout-note collapse=\"true\"}\n\n## Click to view results\n\n\n::: {.cell}\n::: {.cell-output .cell-output-stdout}\n\n```\n\n -8 1 2 3 4 5 \n 6989 289229 200500 76185 24306 7142 \n```\n\n\n:::\n\n::: {.cell-output .cell-output-stdout}\n\n```\n\n Does not apply Part-time: 15 hours or less worked \n 326132 28680 \n Part-time: 16 to 30 hours worked Full-time: 31 to 48 hours worked \n 54414 164603 \nFull-time: 49 or more hours worked \n 30522 \n```\n\n\n:::\n:::\n\n\n:::\n\n##### Distribution of self-rated health and hours worked\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\nround(\n prop.table(\n table(\n as_factor(census2021teaching$health_in_general\n )\n )\n ) * 100,\n 1) \n\nround(\n prop.table(\n table(\n as_factor(census2021teaching$hours_per_week_worked)\n )\n ) * 100,\n 1)\n```\n:::\n\n::: {.callout-note collapse=\"true\"}\n\n## Click to view results\n\n\n::: {.cell}\n::: {.cell-output .cell-output-stdout}\n\n```\n\n Does not apply Very good health Good health Fair health \n 1.2 47.9 33.2 12.6 \n Bad health Very bad health \n 4.0 1.2 \n```\n\n\n:::\n\n::: {.cell-output .cell-output-stdout}\n\n```\n\n Does not apply Part-time: 15 hours or less worked \n 54.0 4.7 \n Part-time: 16 to 30 hours worked Full-time: 31 to 48 hours worked \n 9.0 27.2 \nFull-time: 49 or more hours worked \n 5.1 \n```\n\n\n:::\n:::\n\n\n:::\n\n\n\n##### Frequencies and percentages with labels for health in general and hours worked\n\n\nYou can also generate pie charts for better visualization\n\n### a) Pie chart for self-rated general health\n\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\nh1<-round(100*\n prop.table(\n table(\n as_factor(census2021teaching$health_in_general)\n )\n ),1\n )\n\nh1_df <- as.data.frame(h1)\n\nnames(h1_df) <- c(\"srh\", \"pct\")\n\npie(h1_df$pct,\n labels = paste(h1_df$srh, sep = \" \", h1_df$pct, \"%\"),\n cex = 0.7, \n radius=0.9,\n col = viridis::viridis(length(h1_df$srh)), \n main = \"Self-rated general health\")\n```\n:::\n\n::: {.callout-note collapse=\"true\"}\n\n## Click to view results\n\n::: {.cell}\n::: {.cell-output-display}\n![](chap1_files/figure-html/p1.2-1.png){width=672}\n:::\n:::\n\n\n:::\n\n### b) Pie chart for Hours worked\n\n##### b.1) Create a labelled hours worked variable\n\n\n\n::: {.cell}\n\n```{.r .cell-code code-fold=\"true\" code-summary=\"View output\"}\nh2<-round(100*\n prop.table(\n table(\n as_factor(census2021teaching$hours_per_week_worked))\n ),1\n )\n\nh2_df <- as.data.frame(h2)\n\nnames(h2_df) <- c(\"hpw\", \"pct\")\n\np2<-pie(h2_df$pct,\n labels = paste(h2_df$hpw, sep = \" \", h2_df$pct, \"%\"),\n cex = 0.7, \n radius=0.9,\n col = viridis::viridis(length(h2_df$hpw)), \n main = \"Hours worked per week\")\n```\n\n::: {.cell-output-display}\n![](chap1_files/figure-html/p2-1.png){width=672}\n:::\n:::\n\n\n\n### 4.1.1.1 Transformation of variable Health in general\n\nCreate a new variable called **health_binary** by regrouping health in general in two broader categories: **Good or very good health** and **Poor health**\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncensus2021teaching$health_binary <- ifelse(\n census2021teaching$health_in_general %in% c(1, 2), \"Good or very good health\",\n ifelse(census2021teaching$health_in_general %in% c(3, 4, 5), \"Poor health\", NA)\n)\n```\n:::\n\n\n##### Drop rows where health_in_general = -8 (Does not apply)\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncensus2021teaching <-census2021teaching %>%\n filter(health_in_general != -8)\n```\n:::\n\n\n##### Check the new variable's distribution\n\n\n::: {.cell}\n\n```{.r .cell-code}\ntable(census2021teaching$health_binary)\n```\n\n::: {.cell-output .cell-output-stdout}\n\n```\n\nGood or very good health Poor health \n 489729 107633 \n```\n\n\n:::\n:::\n\n\n##### Create variable **Poor_health** and check its distribution\n\nCode a binary variable that takes 1 for poor health and 0 otherwise. Call this new variable **Poor_health**. This variable will be the explained variable of our regression model.\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncensus2021teaching$Poor_health <- ifelse(\n census2021teaching$health_binary == \"Poor health\", 1,\n ifelse(census2021teaching$health_binary == \"Good or very good health\", 0, NA)\n)\n\ntable(census2021teaching$Poor_health)\n```\n\n::: {.cell-output .cell-output-stdout}\n\n```\n\n 0 1 \n489729 107633 \n```\n\n\n:::\n:::\n\n\n### 4.1.2 Univariate analysis for age, sex, ethnicity, social category \n\n#### Frequencies and percentages for age, sex, ethnicity, social category \n\n\n::: {.cell}\n\n```{.r .cell-code}\nt_a<-table(as_factor(census2021teaching$resident_age_7d))\nt_s<-table(as_factor(census2021teaching$sex))\nt_e<-table(as_factor(census2021teaching$ethnic_group_tb_6a))\nt_g<-table(as_factor(census2021teaching$approx_social_grade))\n\ncbind(t_a,round(prop.table(t_a)*100,1))\n```\n\n::: {.cell-output .cell-output-stdout}\n\n```\n t_a \nDoes not apply 0 0.0\nAged 15 years and under 110127 18.4\nAged 16 to 24 years 64094 10.7\nAged 25 to 34 years 80612 13.5\nAged 35 to 44 years 77424 13.0\nAged 45 to 54 years 79268 13.3\nAged 55 to 64 years 74757 12.5\nAged 65 years and over 111080 18.6\n```\n\n\n:::\n\n```{.r .cell-code}\ncbind(t_s,round(prop.table(t_s)*100,1))\n```\n\n::: {.cell-output .cell-output-stdout}\n\n```\n t_s \nDoes not apply 0 0\nFemale 304863 51\nMale 292499 49\n```\n\n\n:::\n\n```{.r .cell-code}\ncbind(t_e,round(prop.table(t_e)*100,1))\n```\n\n::: {.cell-output .cell-output-stdout}\n\n```\n t_e \nDoes not apply 0 0.0\nAsian, Asian British or Asian Welsh 55705 9.3\nBlack, Black British, Black Welsh, Caribbean or African 24068 4.0\nMixed or Multiple ethnic groups 17066 2.9\nWhite 487868 81.7\nOther ethnic group 12655 2.1\n```\n\n\n:::\n\n```{.r .cell-code}\ncbind(t_g,round(prop.table(t_g)*100,1))\n```\n\n::: {.cell-output .cell-output-stdout}\n\n```\n t_g\nDoes not apply 129807\nAB Higher and intermediate managerial/administrative/professional occupations 109280\nC1 Supervisory, clerical and junior managerial/administrative/professional occupations 153143\nC2 Skilled manual occupations 99796\nDE Semi-skilled and unskilled manual occupations; unemployed and lowest grade occupations 105336\n \nDoes not apply 21.7\nAB Higher and intermediate managerial/administrative/professional occupations 18.3\nC1 Supervisory, clerical and junior managerial/administrative/professional occupations 25.6\nC2 Skilled manual occupations 16.7\nDE Semi-skilled and unskilled manual occupations; unemployed and lowest grade occupations 17.6\n```\n\n\n:::\n:::\n\n\n#### Plot pie chart Age variable\n\n\n::: {.cell}\n\n```{.r .cell-code}\nt_a.df <- as.data.frame(round(prop.table(t_a)*100,1))\nnames(t_a.df) <- c(\"age\", \"pct\")\n\npie(t_a.df$pct,\n labels = paste(t_a.df$age, sep = \" \", t_a.df$pct, \"%\"),\n cex = 0.7, \n radius=0.9,\n col = viridis::viridis(length(t_a.df$age)), \n main = \"Distribution of respondents by age\")\n```\n\n::: {.cell-output-display}\n![](chap1_files/figure-html/unnamed-chunk-13-1.png){width=672}\n:::\n:::\n\n\n#### Pie chart for Sex variable\n\n\n\n::: {.cell}\n\n```{.r .cell-code}\nt_s.df <- as.data.frame(round(prop.table(t_s)*100,1))\nnames(t_s.df) <- c(\"sex\", \"pct\")\n\npie(t_s.df$pct,\n labels = paste(t_s.df$sex, sep = \" \", t_s.df$pct, \"%\"),\n cex = 0.7, \n radius=0.9,\n col = viridis::viridis(length(t_a.df$age)), \n main = \"Distribution of respondents by sex\")\n```\n\n::: {.cell-output-display}\n![](chap1_files/figure-html/unnamed-chunk-14-1.png){width=672}\n:::\n:::\n\n\n#### Histogram for Ethnicity variable\n\n##### a) Recode Ethnicity variable with labels\n\n\n::: {.cell}\n\n```{.r .cell-code}\nEthnicity <- census2021teaching %>%\n mutate(Ethnicity_label = factor(ethnic_group_tb_6a, levels = c(1, 2, 3, 4, 5, -8), \n labels = c(\"Asian\", \"Black\", \"Mixed\", \"White\", \"Other\", \"Does not apply\"))) %>%\n count(Ethnicity_label, name = \"n\") %>%\n mutate(prop = n / sum(n),\n label = scales::percent(prop, accuracy = 0.1))\n```\n:::\n\n\n##### b) Plot histogram\n\n\n::: {.cell}\n\n```{.r .cell-code}\nggplot(Ethnicity, aes(x = Ethnicity_label, y = prop)) +\n geom_col(fill = \"chocolate\", color = \"black\", width = 0.3) +\n geom_text(aes(label = label), vjust = -0.5) +\n scale_y_continuous(labels = scales::percent_format()) +\n labs(title = \"Distribution of respondents by Ethnicity\", x = \"\", y = \"Percentage\") +\n theme_classic()+\n theme(axis.text.x = element_text(size = 12))+\n theme(plot.title = element_text(size = 16, hjust = 0.5))\n```\n\n::: {.cell-output-display}\n![](chap1_files/figure-html/unnamed-chunk-16-1.png){width=672}\n:::\n:::\n\n\n#### Pie chart for variable social class\n\n##### a) Create a labelled social class variable\n\n\n::: {.cell}\n\n```{.r .cell-code}\nSocial_class <- census2021teaching %>%\n mutate(\n Social_class_code = as.integer(as.character(approx_social_grade)),\n Social_class_label = factor(\n Social_class_code,\n levels = c(1, 2, 3, 4, -8),\n labels = c(\"Higher, intermediate managers\", \"Supervisory, junior managers\", \n \"Skilled manual\", \"Semi-skilled, unskilled manual\",\"Not applicable\")\n )\n ) %>%\n filter(!is.na(Social_class_label)) %>%\n count(Social_class_label, name = \"n\") %>%\n mutate(\n pct = round(100 * n / sum(n), 1),\n label = paste0(Social_class_label, \" (\", pct, \"%)\")\n )\n```\n:::\n\n\n##### b) Plot pie chart\n\n\n::: {.cell}\n\n```{.r .cell-code}\nggplot(Social_class, aes(x = \"\", y = n, fill = Social_class_label)) +\n geom_col(width = 1, color = \"white\") +\n coord_polar(theta = \"y\") +\n geom_label_repel(aes(label = label),\n position = position_stack(vjust = 0.5),\n show.legend = FALSE,\n size = 3) +\n labs(title = \"Distribution of respondents by Social class\", fill = \"Social class\") +\n theme_void()+\n scale_fill_brewer(palette = \"Pastel2\") +\n theme(plot.title = element_text(size = 16, hjust = 1))\n```\n\n::: {.cell-output-display}\n![](chap1_files/figure-html/unnamed-chunk-18-1.png){width=672}\n:::\n:::\n\n\n##### Drop under 16 from the table\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncensus2021teaching <- subset(census2021teaching, resident_age_7d != 1)\n```\n:::\n\n\n### 4.2- Bivariate analysis \n\n##### Cross tabulation of Poor_health by hours worked\n\n\n::: {.cell}\n\n```{.r .cell-code}\ntabulation <- table(census2021teaching$Poor_health, census2021teaching$hours_per_week_worked)\ntabulation\n```\n\n::: {.cell-output .cell-output-stdout}\n\n```\n \n -8 1 2 3 4\n 0 133254 24373 47454 149740 27898\n 1 75762 4307 6960 14863 2624\n```\n\n\n:::\n:::\n\n\n##### Row percentages\n\n\n::: {.cell}\n\n```{.r .cell-code}\nprop.table(tabulation, margin = 1) * 100\n```\n\n::: {.cell-output .cell-output-stdout}\n\n```\n \n -8 1 2 3 4\n 0 34.817712 6.368380 12.399175 39.125311 7.289421\n 1 72.488423 4.120900 6.659267 14.220789 2.510620\n```\n\n\n:::\n:::\n\n\n##### Column percentages\n\n\n::: {.cell}\n\n```{.r .cell-code}\nprop.table(tabulation, margin = 2) * 100\n```\n\n::: {.cell-output .cell-output-stdout}\n\n```\n \n -8 1 2 3 4\n 0 63.753014 84.982566 87.209174 90.970395 91.402922\n 1 36.246986 15.017434 12.790826 9.029605 8.597078\n```\n\n\n:::\n:::\n\n#### Chi square test\n\n##### a) Creating cross tabulations to be tested\n\n\n::: {.cell}\n\n```{.r .cell-code}\ntabulation.health_hours <- table(census2021teaching$Poor_health, census2021teaching$hours_per_week_worked)\ntabulation.health_age <- table(census2021teaching$Poor_health, census2021teaching$resident_age_7d)\ntabulation.health_sex <- table(census2021teaching$Poor_health, census2021teaching$sex)\ntabulation.health_ethnicity <- table(census2021teaching$Poor_health, census2021teaching$ethnic_group_tb_6a)\ntabulation.health_classes <- table(census2021teaching$Poor_health, census2021teaching$approx_social_grade)\n```\n:::\n\n\n##### b) chi square test on health and our 5 other variables\n\n\n::: {.cell}\n\n```{.r .cell-code}\nchisq.test(tabulation.health_hours)\n```\n\n::: {.cell-output .cell-output-stdout}\n\n```\n\n\tPearson's Chi-squared test\n\ndata: tabulation.health_hours\nX-squared = 48349, df = 4, p-value < 2.2e-16\n```\n\n\n:::\n\n```{.r .cell-code}\nchisq.test(tabulation.health_age)\n```\n\n::: {.cell-output .cell-output-stdout}\n\n```\n\n\tPearson's Chi-squared test\n\ndata: tabulation.health_age\nX-squared = 47990, df = 5, p-value < 2.2e-16\n```\n\n\n:::\n\n```{.r .cell-code}\nchisq.test(tabulation.health_sex)\n```\n\n::: {.cell-output .cell-output-stdout}\n\n```\n\n\tPearson's Chi-squared test with Yates' continuity correction\n\ndata: tabulation.health_sex\nX-squared = 280.16, df = 1, p-value < 2.2e-16\n```\n\n\n:::\n\n```{.r .cell-code}\nchisq.test(tabulation.health_ethnicity)\n```\n\n::: {.cell-output .cell-output-stdout}\n\n```\n\n\tPearson's Chi-squared test\n\ndata: tabulation.health_ethnicity\nX-squared = 1454.4, df = 4, p-value < 2.2e-16\n```\n\n\n:::\n\n```{.r .cell-code}\nchisq.test(tabulation.health_classes)\n```\n\n::: {.cell-output .cell-output-stdout}\n\n```\n\n\tPearson's Chi-squared test\n\ndata: tabulation.health_classes\nX-squared = 42535, df = 4, p-value < 2.2e-16\n```\n\n\n:::\n:::\n\n## 5 Regression model\n\n### 5.1 Recode variables as factors with labels\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncensus2021teaching$hours_per_week_worked <- factor(\n census2021teaching$hours_per_week_worked,\n levels = c(1, 2, 3, 4, -8),\n labels = c(\"0-15\", \"16-30\", \"31-48\", \"49+\", \"Does not apply\")\n)\n\ncensus2021teaching$resident_age_7d <- factor(\n census2021teaching$resident_age_7d,\n levels = c(1, 2, 3, 4, 5, 6, 7, -8),\n labels = c(\"0-15\", \"16-24\", \"25-34\", \"35-44\", \"45-54\", \"55-64\", \"65+\", \"Not applicable\")\n)\n\ncensus2021teaching$ethnic_group_tb_6a <- factor(\n census2021teaching$ethnic_group_tb_6a,\n levels = c(1, 2, 3, 4, 5, -8),\n labels = c(\"Asian\", \"Black\", \"Mixed\", \"White\", \"Other\", \"Does not apply\")\n)\n\ncensus2021teaching$approx_social_grade <- factor(\n census2021teaching$approx_social_grade,\n levels = c(1, 2, 3, 4, -8),\n labels = c(\"Higher, intermediate managers\",\n \"Supervisory, junior managers\",\n \"Skilled manual\",\n \"Semi-skilled, unskilled manual\",\n \"Not applicable\")\n)\n\ncensus2021teaching$sex <- factor(\n census2021teaching$sex,\n levels = c(1, 2),\n labels = c(\"Male\", \"Female\")\n)\n```\n:::\n\n\n### 5.2 Pick references that are central or policy relevant\n\n\n::: {.cell}\n\n```{.r .cell-code}\ncensus2021teaching$hours_per_week_worked <- relevel(census2021teaching$hours_per_week_worked, ref = \"31-48\")\ncensus2021teaching$resident_age_7d <- relevel(census2021teaching$resident_age_7d, ref = \"45-54\")\ncensus2021teaching$sex <- relevel(census2021teaching$sex, ref = \"Male\")\ncensus2021teaching$ethnic_group_tb_6a <- relevel(census2021teaching$ethnic_group_tb_6a, ref = \"White\")\ncensus2021teaching$approx_social_grade <- relevel(census2021teaching$approx_social_grade, ref = \"Higher, intermediate managers\")\n```\n:::\n\n\n### 5.3 Fit logistic regression \n\n\n\n::: {.cell}\n\n```{.r .cell-code}\nmodel_health <- glm(\n Poor_health ~ hours_per_week_worked + resident_age_7d + sex + ethnic_group_tb_6a + approx_social_grade,\n data = census2021teaching,\n family = binomial(link = \"logit\")\n)\n```\n:::\n\n#### Obtain summary results of the logistic regression\n\n\n::: {.cell}\n\n```{.r .cell-code}\nsummary(model_health)\n```\n\n::: {.cell-output .cell-output-stdout}\n\n```\n\nCall:\nglm(formula = Poor_health ~ hours_per_week_worked + resident_age_7d + \n sex + ethnic_group_tb_6a + approx_social_grade, family = binomial(link = \"logit\"), \n data = census2021teaching)\n\nCoefficients:\n Estimate Std. Error z value\n(Intercept) -2.465718 0.017174 -143.571\nhours_per_week_worked0-15 0.464247 0.019453 23.865\nhours_per_week_worked16-30 0.235470 0.016068 14.654\nhours_per_week_worked49+ -0.072616 0.022560 -3.219\nhours_per_week_workedDoes not apply 1.365718 0.011859 115.166\nresident_age_7d16-24 -1.709400 0.018745 -91.194\nresident_age_7d25-34 -0.925160 0.015855 -58.353\nresident_age_7d35-44 -0.523959 0.014894 -35.178\nresident_age_7d55-64 0.165097 0.013184 12.523\nresident_age_7d65+ 0.281656 0.017617 15.988\nsexFemale 0.057929 0.007857 7.373\nethnic_group_tb_6aAsian -0.159309 0.014871 -10.712\nethnic_group_tb_6aBlack -0.300889 0.022624 -13.299\nethnic_group_tb_6aMixed 0.050706 0.031398 1.615\nethnic_group_tb_6aOther -0.124834 0.028489 -4.382\napprox_social_gradeSupervisory, junior managers 0.440118 0.016054 27.415\napprox_social_gradeSkilled manual 0.591386 0.017021 34.745\napprox_social_gradeSemi-skilled, unskilled manual 1.262307 0.015867 79.554\napprox_social_gradeNot applicable 0.597043 0.018928 31.544\n Pr(>|z|) \n(Intercept) < 2e-16 ***\nhours_per_week_worked0-15 < 2e-16 ***\nhours_per_week_worked16-30 < 2e-16 ***\nhours_per_week_worked49+ 0.00129 ** \nhours_per_week_workedDoes not apply < 2e-16 ***\nresident_age_7d16-24 < 2e-16 ***\nresident_age_7d25-34 < 2e-16 ***\nresident_age_7d35-44 < 2e-16 ***\nresident_age_7d55-64 < 2e-16 ***\nresident_age_7d65+ < 2e-16 ***\nsexFemale 1.67e-13 ***\nethnic_group_tb_6aAsian < 2e-16 ***\nethnic_group_tb_6aBlack < 2e-16 ***\nethnic_group_tb_6aMixed 0.10632 \nethnic_group_tb_6aOther 1.18e-05 ***\napprox_social_gradeSupervisory, junior managers < 2e-16 ***\napprox_social_gradeSkilled manual < 2e-16 ***\napprox_social_gradeSemi-skilled, unskilled manual < 2e-16 ***\napprox_social_gradeNot applicable < 2e-16 ***\n---\nSignif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n\n(Dispersion parameter for binomial family taken to be 1)\n\n Null deviance: 506597 on 487234 degrees of freedom\nResidual deviance: 424553 on 487216 degrees of freedom\nAIC: 424591\n\nNumber of Fisher Scoring iterations: 5\n```\n\n\n:::\n:::\n\n\n\n\n\n\n\n\n", + "supporting": [ + "chap1_files" + ], "filters": [ "rmarkdown/pagebreak.lua" ], diff --git a/.quarto/idx/chap1.qmd.json b/.quarto/idx/chap1.qmd.json index 00748ba..29c8fb8 100644 --- a/.quarto/idx/chap1.qmd.json +++ b/.quarto/idx/chap1.qmd.json @@ -1 +1 @@ -{"title":"R code for module activity","markdown":{"headingText":"R code for module activity","containsRefs":false,"markdown":"\n## 1- Install and load the main packages\n\n#### Install packages\n\nIf any of the below packages is not installed on your computer, please install it. Remember to delete the **#** symbol before running the code.\n\n```{r, eval=FALSE}\n\n#install.packages(\"tidyverse\")\n#install.packages(\"ggplot2\")\n#install.packages(\"dplyr\")\n#install.packages(\"ggrepel\")\n#install.packages(\"patchwork\")\n#install.packages(\"gridExtra\")\n\n```\n\n#### Load packages\n\n```{r, eval=FALSE}\n\nlibrary(tidyverse)\nlibrary(ggplot2)\nlibrary(dplyr)\nlibrary(ggrepel)\nlibrary(patchwork)\nlibrary(gridExtra)\nlibrary(haven)\n\n```\n\n## 2- Load the dataset and make a copy\n\n```{r, eval=FALSE}\n\npublicmicrodatateachingsample <- read_sav(\"data/publicmicrodatateachingsample.sav\")\n\ncensus2021teaching <- publicmicrodatateachingsample\n\n```\n\nImportant: Remember to change to pathway to your dataset accordingly. \n\nIf the **read_sav** code does not work, you can use the **Import Dataset** button in the Environment pane to load the dataset in R.\n\n## 3- Drop unnecessary variables\n\n```{r, eval=FALSE}\n\ncensus2021teaching <- census2021teaching[,c(\"health_in_general\",\n \"hours_per_week_worked\",\n \"resident_age_7d\",\"sex\", \n \"ethnic_group_tb_6a\",\n \"approx_social_grade\")]\n```\n\n## 4- Exploratory analysis\n\n### 4.1- Univariate analysis \n\n#### 4.1.1 Univariate analysis for health in general and hours worked\n\n##### Frequencies for health in general and hours worked\n\n```{r, eval=FALSE}\n\ntable(census2021teaching$health_in_general)\n\ntable(census2021teaching$hours_per_week_worked)\n\n```\n\n##### Percentages for health in general and hours worked\n\n\n```{r, eval=FALSE}\n\nprop.table(table(census2021teaching$health_in_general)) * 100\n\nprop.table(table(census2021teaching$hours_per_week_worked)) * 100\n\n```\n\nYou can also calculate these frequencies and percentages with appropriate labels.\n\n##### Frequencies and percentages with labels for health in general and hours worked\n\n \n```{r, eval=FALSE}\n\ncensus2021teaching %>%\n mutate(\n health_code = as.integer(as.character(health_in_general)),\n health_label = factor(\n health_code,\n levels = c(1, 2, 3, 4, 5, -8),\n labels = c(\"Very good\", \"Good\", \"Fair\", \"Bad\", \"Very bad\", \"Does not apply\")\n )\n ) %>%\n count(health_label) %>%\n mutate(percent = round(100 * n / sum(n), 1))\n\n\ncensus2021teaching %>%\n mutate(\n hours_code = as.integer(as.character(hours_per_week_worked)),\n hours_label = factor(\n hours_code,\n levels = c(1, 2, 3, 4, -8),\n labels = c(\"[0 – 15]\", \"[16 – 30]\", \"[31 – 48]\", \"[49 and +]\", \"Does not apply\")\n )\n ) %>%\n count(hours_label) %>%\n mutate(percent = round(100 * n / sum(n), 1))\n\n```\n\nYou can also generate pie charts for better visualization\n\n### a) Pie chart for Health in general\n\n##### a.1) Create a labelled health variable\n \n```{r, eval=FALSE}\n\nhealth <- census2021teaching %>%\n mutate(\n health_code = as.integer(as.character(health_in_general)),\n health_label = factor(\n health_code,\n levels = c(1,2,3,4,5,-8),\n labels = c(\"Very good\", \"Good\", \"Fair\", \"Bad\", \"Very bad\", \"Not applicable\")\n )\n ) %>%\n filter(!is.na(health_label)) %>%\n count(health_label, name = \"n\") %>%\n mutate(\n pct = round(100 * n / sum(n), 1),\n label = paste0(health_label, \" (\", pct, \"%)\")\n )\n\n```\n\n##### a.2) Plot pie chart\n\n\n```{r, eval=FALSE}\n\np1 <- ggplot(health, aes(x = \"\", y = n, fill = health_label)) +\n geom_col(width = 1, color = \"white\") +\n coord_polar(theta = \"y\") +\n geom_label_repel(aes(label = label),\n position = position_stack(vjust = 0.5),\n show.legend = FALSE,\n size = 3) +\n labs(title = \"Distribution of general health\", fill = \"Health status\") +\n theme_void()+\n scale_fill_brewer(palette = \"Set2\") +\n theme(plot.title = element_text(size = 16, hjust = 1))\n\np1\n\n```\n\n### b) Pie chart for Hours worked\n\n##### b.1) Create a labelled hours worked variable\n\n\n```{r, eval=FALSE}\n\nhours <- census2021teaching %>%\n mutate(\n hours_code = as.integer(as.character(hours_per_week_worked)),\n hours_label = factor(\n hours_code,\n levels = c(1, 2, 3, 4, -8),\n labels = c(\"[0 – 15]\", \"[16 – 30]\", \"[31 – 48]\", \"[49 and +]\", \"Not applicable\")\n )\n ) %>%\n filter(!is.na(hours_label)) %>%\n count(hours_label, name = \"n\") %>%\n mutate(\n pct = round(100 * n / sum(n), 1),\n label = paste0(hours_label, \" (\", pct, \"%)\")\n )\n\n```\n\n##### b.2) Plot pie chart\n\n\n```{r, eval=FALSE}\n\np2 <- ggplot(hours, aes(x = \"\", y = n, fill = hours_label)) +\n geom_col(width = 1, color = \"white\") +\n coord_polar(theta = \"y\") +\n geom_label_repel(aes(label = label),\n position = position_stack(vjust = 0.5),\n show.legend = FALSE,\n size = 3) +\n labs(title = \"Distribution of worked hours\", fill = \"Hours worked\") +\n theme_void()+\n scale_fill_brewer(palette = \"Dark2\") +\n theme(plot.title = element_text(size = 16, hjust = 1))\n\np2\n\n```\n\n\n### 4.1.1.1 Transformation of variable Health in general\n\nCreate a new variable called **health_binary** by regrouping health in general in two broader categories: **Good or very good health** and **Poor health**\n\n\n```{r, eval=FALSE}\n\ncensus2021teaching$health_binary <- ifelse(\n census2021teaching$health_in_general %in% c(1, 2), \"Good or very good health\",\n ifelse(census2021teaching$health_in_general %in% c(3, 4, 5), \"Poor health\", NA)\n)\n\n```\n\n##### Drop rows where health_in_general = -8 (Does not apply)\n\n```{r, eval=FALSE}\n\ncensus2021teaching <- subset(census2021teaching, health_in_general != -8)\n\n```\n\n##### Check the new variable's distribution\n\n```{r, eval=FALSE}\n\ntable(census2021teaching$health_binary)\n\n```\n\n##### Create variable **Poor_health** and check its distribution\n\nCode a binary variable that takes 1 for poor health and 0 otherwise. Call this new variable **Poor_health**. This variable will be the explained variable of our regression model.\n\n```{r, eval=FALSE}\n\ncensus2021teaching$Poor_health <- ifelse(\n census2021teaching$health_binary == \"Poor health\", 1,\n ifelse(census2021teaching$health_binary == \"Good or very good health\", 0, NA)\n)\n\ntable(census2021teaching$Poor_health)\n\n```\n\n### 4.1.2 Univariate analysis for age, sex, ethnicity, social category \n\n#### Frequencies and percentages for age, sex, ethnicity, social category \n\n```{r, eval=FALSE}\n\ntable(census2021teaching$resident_age_7d)\ntable(census2021teaching$sex)\ntable(census2021teaching$ethnic_group_tb_6a)\ntable(census2021teaching$approx_social_grade)\n\nprop.table(table(census2021teaching$resident_age_7d)) * 100\nprop.table(table(census2021teaching$sex)) * 100\nprop.table(table(census2021teaching$ethnic_group_tb_6a)) * 100\nprop.table(table(census2021teaching$approx_social_grade)) * 100\n\n```\n\n#### Plot pie chart Age variable\n\n##### a) Create a labelled Age variable\n\n```{r, eval=FALSE}\n\nAge <- census2021teaching %>%\n mutate(\n Age_code = as.integer(as.character(resident_age_7d)),\n Age_label = factor(\n Age_code,\n levels = c(1, 2, 3, 4,5,6,7, -8),\n labels = c(\"]0 – 15]\", \"[16 – 24]\", \"[25 – 34]\", \"[35 - 44]\", \"[45 - 54]\", \"[55 - 64]\", \"[65 and +]\",\"Not applicable\")\n )\n ) %>%\n filter(!is.na(Age_label)) %>%\n count(Age_label, name = \"n\") %>%\n mutate(\n pct = round(100 * n / sum(n), 1),\n label = paste0(Age_label, \" (\", pct, \"%)\")\n )\n\n```\n\n##### b) Plot pie chart\n\n```{r, eval=FALSE}\n\nggplot(Age, aes(x = \"\", y = n, fill = Age_label)) +\n geom_col(width = 1, color = \"white\") +\n coord_polar(theta = \"y\") +\n geom_label_repel(aes(label = label),\n position = position_stack(vjust = 0.5),\n show.legend = FALSE,\n size = 3) +\n labs(title = \"Distribution of respondents by Age\", fill = \"Resident age\") +\n theme_void()+\n scale_fill_brewer(palette = \"Pastel1\") +\n theme(plot.title = element_text(size = 16, hjust = 1))\n\n```\n\n#### Histogram for Sex variable\n\n##### a) Recode sex variable with labels\n\n```{r, eval=FALSE}\n\nsex <- census2021teaching %>%\n mutate(sex_label = factor(sex, levels = c(1, 2), labels = c(\"Male\", \"Female\"))) %>%\n count(sex_label, name = \"n\") %>%\n mutate(prop = n / sum(n),\n label = scales::percent(prop, accuracy = 0.1))\n\n```\n\n##### b) Plot histogram\n\n```{r, eval=FALSE}\n\nggplot(sex, aes(x = sex_label, y = prop)) +\n geom_col(fill = \"skyblue\", color = \"black\", width = 0.3) +\n geom_text(aes(label = label), vjust = -0.5) +\n scale_y_continuous(labels = scales::percent_format()) +\n scale_x_discrete(expand = expansion(mult = c(0.5, 0.7))) +\n labs(title = \"Distribution of respondents by sex\", x = \"\", y = \"Percentage\") +\n theme_classic()+\n theme(axis.text.x = element_text(size = 12))+\n theme(plot.title = element_text(size = 16, hjust = 0.5))\n\n```\n\n#### Histogram for Ethnicity variable\n\n##### a) Recode Ethnicity variable with labels\n\n```{r, eval=FALSE}\n\nEthnicity <- census2021teaching %>%\n mutate(Ethnicity_label = factor(ethnic_group_tb_6a, levels = c(1, 2, 3, 4, 5, -8), \n labels = c(\"Asian\", \"Black\", \"Mixed\", \"White\", \"Other\", \"Does not apply\"))) %>%\n count(Ethnicity_label, name = \"n\") %>%\n mutate(prop = n / sum(n),\n label = scales::percent(prop, accuracy = 0.1))\n\n```\n\n##### b) Plot histogram\n\n```{r, eval=FALSE}\n\nggplot(Ethnicity, aes(x = Ethnicity_label, y = prop)) +\n geom_col(fill = \"chocolate\", color = \"black\", width = 0.3) +\n geom_text(aes(label = label), vjust = -0.5) +\n scale_y_continuous(labels = scales::percent_format()) +\n labs(title = \"Distribution of respondents by Ethnicity\", x = \"\", y = \"Percentage\") +\n theme_classic()+\n theme(axis.text.x = element_text(size = 12))+\n theme(plot.title = element_text(size = 16, hjust = 0.5))\n\n```\n\n#### Pie chart for variable social class\n\n##### a) Create a labelled social class variable\n\n```{r, eval=FALSE}\n\nSocial_class <- census2021teaching %>%\n mutate(\n Social_class_code = as.integer(as.character(approx_social_grade)),\n Social_class_label = factor(\n Social_class_code,\n levels = c(1, 2, 3, 4, -8),\n labels = c(\"Higher, intermediate managers\", \"Supervisory, junior managers\", \n \"Skilled manual\", \"Semi-skilled, unskilled manual\",\"Not applicable\")\n )\n ) %>%\n filter(!is.na(Social_class_label)) %>%\n count(Social_class_label, name = \"n\") %>%\n mutate(\n pct = round(100 * n / sum(n), 1),\n label = paste0(Social_class_label, \" (\", pct, \"%)\")\n )\n\n```\n\n##### b) Plot pie chart\n\n```{r, eval=FALSE}\n\nggplot(Social_class, aes(x = \"\", y = n, fill = Social_class_label)) +\n geom_col(width = 1, color = \"white\") +\n coord_polar(theta = \"y\") +\n geom_label_repel(aes(label = label),\n position = position_stack(vjust = 0.5),\n show.legend = FALSE,\n size = 3) +\n labs(title = \"Distribution of respondents by Social class\", fill = \"Social class\") +\n theme_void()+\n scale_fill_brewer(palette = \"Pastel2\") +\n theme(plot.title = element_text(size = 16, hjust = 1))\n\n```\n\n##### Drop under 16 from the table\n\n```{r, eval=FALSE}\n\ncensus2021teaching <- subset(census2021teaching, resident_age_7d != 1)\n\n```\n\n### 4.2- Bivariate analysis \n\n##### Cross tabulation of Poor_health by hours worked\n\n```{r, eval=FALSE}\n\ntabulation <- table(census2021teaching$Poor_health, census2021teaching$hours_per_week_worked)\ntabulation\n\n```\n\n##### Row percentages\n\n```{r, eval=FALSE}\n\nprop.table(tabulation, margin = 1) * 100\n\n```\n\n##### Column percentages\n\n```{r, eval=FALSE}\n\nprop.table(tabulation, margin = 2) * 100\n\n```\n#### Chi square test\n\n##### a) Creating cross tabulations to be tested\n\n```{r, eval=FALSE}\n\ntabulation.health_hours <- table(census2021teaching$Poor_health, census2021teaching$hours_per_week_worked)\ntabulation.health_age <- table(census2021teaching$Poor_health, census2021teaching$resident_age_7d)\ntabulation.health_sex <- table(census2021teaching$Poor_health, census2021teaching$sex)\ntabulation.health_ethnicity <- table(census2021teaching$Poor_health, census2021teaching$ethnic_group_tb_6a)\ntabulation.health_classes <- table(census2021teaching$Poor_health, census2021teaching$approx_social_grade)\n\n```\n\n##### b) chi square test on health and our 5 other variables\n\n```{r, eval=FALSE}\n\nchisq.test(tabulation.health_hours)\nchisq.test(tabulation.health_age)\nchisq.test(tabulation.health_sex)\nchisq.test(tabulation.health_ethnicity)\nchisq.test(tabulation.health_classes)\n\n```\n## 5 Regression model\n\n### 5.1 Recode variables as factors with labels\n\n```{r, eval=FALSE}\n\ncensus2021teaching$hours_per_week_worked <- factor(\n census2021teaching$hours_per_week_worked,\n levels = c(1, 2, 3, 4, -8),\n labels = c(\"0-15\", \"16-30\", \"31-48\", \"49+\", \"Does not apply\")\n)\n\ncensus2021teaching$resident_age_7d <- factor(\n census2021teaching$resident_age_7d,\n levels = c(1, 2, 3, 4, 5, 6, 7, -8),\n labels = c(\"0-15\", \"16-24\", \"25-34\", \"35-44\", \"45-54\", \"55-64\", \"65+\", \"Not applicable\")\n)\n\ncensus2021teaching$ethnic_group_tb_6a <- factor(\n census2021teaching$ethnic_group_tb_6a,\n levels = c(1, 2, 3, 4, 5, -8),\n labels = c(\"Asian\", \"Black\", \"Mixed\", \"White\", \"Other\", \"Does not apply\")\n)\n\ncensus2021teaching$approx_social_grade <- factor(\n census2021teaching$approx_social_grade,\n levels = c(1, 2, 3, 4, -8),\n labels = c(\"Higher, intermediate managers\",\n \"Supervisory, junior managers\",\n \"Skilled manual\",\n \"Semi-skilled, unskilled manual\",\n \"Not applicable\")\n)\n\ncensus2021teaching$sex <- factor(\n census2021teaching$sex,\n levels = c(1, 2),\n labels = c(\"Male\", \"Female\")\n)\n\n```\n\n### 5.2 Pick references that are central or policy relevant\n\n```{r, eval=FALSE}\n\ncensus2021teaching$hours_per_week_worked <- relevel(census2021teaching$hours_per_week_worked, ref = \"31-48\")\ncensus2021teaching$resident_age_7d <- relevel(census2021teaching$resident_age_7d, ref = \"45-54\")\ncensus2021teaching$sex <- relevel(census2021teaching$sex, ref = \"Male\")\ncensus2021teaching$ethnic_group_tb_6a <- relevel(census2021teaching$ethnic_group_tb_6a, ref = \"White\")\ncensus2021teaching$approx_social_grade <- relevel(census2021teaching$approx_social_grade, ref = \"Higher, intermediate managers\")\n\n```\n\n### 5.3 Fit logistic regression \n\n\n```{r, eval=FALSE}\n\nmodel_health <- glm(\n Poor_health ~ hours_per_week_worked + resident_age_7d + sex + ethnic_group_tb_6a + approx_social_grade,\n data = census2021teaching,\n family = binomial(link = \"logit\")\n)\n\n```\n#### Obtain summary results of the logistic regression\n\n```{r, eval=FALSE}\n\nsummary(model_health)\n\n```\n\n\n\n\n\n\n\n","srcMarkdownNoYaml":""},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":false,"cache":null,"freeze":false,"echo":true,"output":true,"warning":false,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"ipynb-shell-interactivity":null,"plotly-connected":true,"message":false,"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":false,"code-overflow":"scroll","code-link":false,"code-line-numbers":true,"code-tools":{"source":true,"toggle":false,"caption":"Quarto source"},"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-min-runs":1,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":true,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","css":["ukds_long.css"],"toc":true,"toc-depth":2,"number-sections":false,"highlight-style":"arrow-light","output-file":"chap1.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","other-links-title":"Other Links","code-links-title":"Code Links","launch-dev-container-title":"Launch Dev Container","launch-binder-title":"Launch Binder","article-notebook-label":"Article Notebook","notebook-preview-download":"Download Notebook","notebook-preview-download-src":"Download Source","notebook-preview-back":"Back to Article","manuscript-meca-bundle":"MECA Bundle","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","appendix-view-license":"View License","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","title-block-keywords":"Keywords","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","tools-share":"Share","tools-download":"Download","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-text-placeholder":"","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-wordcount":"Word Count","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","listing-page-words":"{0} words","listing-page-filter":"Filter","draft":"Draft"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.8.25","jupyter":"nbstata","toc-location":"right","fig-cap-location":"top","tbl-cap-location":"top","smooth-scroll":true,"page-layout":"full","citations-hover":true,"footnotes-hover":true},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file +{"title":"R code for module activity","markdown":{"headingText":"R code for module activity","containsRefs":false,"markdown":"\n## 1- Install and load the main packages\n\n\n```{r packages}\n#### Load the packages if installed; install them if not present \n\npkg <- c(\"tidyverse\", \n \"ggplot2\", \n \"ggrepel\", \n \"patchwork\", \n \"gridExtra\", \n \"haven\", ### Import Stata and SPSS datasets\n \"viridis\" ### colourblind friendly palette\n)\n\nfor (p in pkg) {\n if (!require(p, character.only = T)) {\n install.packages(p)\n library(p, character.only = T)\n }\n}\n```\n\n## 2- Load the dataset and make a copy\n\n```{r}\n\npublicmicrodatateachingsample <- read_sav(\"data/publicmicrodatateachingsample.sav\")\n\ncensus2021teaching <- publicmicrodatateachingsample\n\n```\n\nImportant: Remember to change the filepath to your dataset accordingly. \n\nIf the **read_sav** code does not work, you can use the **Import Dataset** button in the RStudio Environment Pane to load the dataset in R.\n\n## 3- Drop unnecessary variables\n\n```{r}\n#| eval: false\ncensus2021teaching <- census2021teaching[,c(\"health_in_general\",\n \"hours_per_week_worked\",\n \"resident_age_7d\",\"sex\", \n \"ethnic_group_tb_6a\",\n \"approx_social_grade\")]\n\nhead(census2021teaching)\ndim(census2021teaching)\n```\n\n::: {.callout-note collapse=\"true\"}\n\n## Click to view results\n\n```{r}\n#| echo: false\ncensus2021teaching <- census2021teaching[,c(\"health_in_general\",\n \"hours_per_week_worked\",\n \"resident_age_7d\",\"sex\", \n \"ethnic_group_tb_6a\",\n \"approx_social_grade\")]\n\nhead(census2021teaching)\ndim(census2021teaching)\n```\n\n:::\n\n## 4- Exploratory analysis\n\n### 4.1- Univariate analysis \n\n#### 4.1.1 Univariate analysis for health in general and hours worked\n\n##### Frequencies for health in general and hours worked\n\n```{r}\n#| eval: false\n\n### Raw values\ntable(census2021teaching$health_in_general)\n\n### Labelled values\ntable(as_factor(census2021teaching$hours_per_week_worked))\n\n```\n\n::: {.callout-note collapse=\"true\"}\n\n## Click to view results\n\n```{r}\n#| echo: false\n\n### Raw values\ntable(census2021teaching$health_in_general)\n\n### Labelled values\ntable(as_factor(census2021teaching$hours_per_week_worked))\n```\n\n:::\n\n##### Distribution of self-rated health and hours worked\n\n\n```{r}\n#| eval: false\n\nround(\n prop.table(\n table(\n as_factor(census2021teaching$health_in_general\n )\n )\n ) * 100,\n 1) \n\nround(\n prop.table(\n table(\n as_factor(census2021teaching$hours_per_week_worked)\n )\n ) * 100,\n 1)\n\n```\n::: {.callout-note collapse=\"true\"}\n\n## Click to view results\n\n```{r}\n#| echo: false\n\nround(\n prop.table(\n table(\n as_factor(census2021teaching$health_in_general\n )\n )\n ) * 100,\n 1) \n\nround(\n prop.table(\n table(\n as_factor(census2021teaching$hours_per_week_worked)\n )\n ) * 100,\n 1)\n\n```\n\n:::\n\n\n\n##### Frequencies and percentages with labels for health in general and hours worked\n\n\nYou can also generate pie charts for better visualization\n\n### a) Pie chart for self-rated general health\n\n\n\n```{r p1}\n#| eval: false\n\nh1<-round(100*\n prop.table(\n table(\n as_factor(census2021teaching$health_in_general)\n )\n ),1\n )\n\nh1_df <- as.data.frame(h1)\n\nnames(h1_df) <- c(\"srh\", \"pct\")\n\npie(h1_df$pct,\n labels = paste(h1_df$srh, sep = \" \", h1_df$pct, \"%\"),\n cex = 0.7, \n radius=0.9,\n col = viridis::viridis(length(h1_df$srh)), \n main = \"Self-rated general health\")\n\n```\n::: {.callout-note collapse=\"true\"}\n\n## Click to view results\n```{r p1.2}\n#| echo: false\n\nh1<-round(100*\n prop.table(\n table(\n as_factor(census2021teaching$health_in_general)\n )\n ),1\n )\n\nh1_df <- as.data.frame(h1)\n\nnames(h1_df) <- c(\"srh\", \"pct\")\n\npie(h1_df$pct,\n labels = paste(h1_df$srh, sep = \" \", h1_df$pct, \"%\"),\n cex = 0.7, \n radius=0.9,\n col = viridis::viridis(length(h1_df$srh)), \n main = \"Self-rated general health\")\n\n```\n\n:::\n\n### b) Pie chart for Hours worked\n\n##### b.1) Create a labelled hours worked variable\n\n\n```{r p2}\n#| code-fold: true\n#| code-summary: \"View output\"\n\nh2<-round(100*\n prop.table(\n table(\n as_factor(census2021teaching$hours_per_week_worked))\n ),1\n )\n\nh2_df <- as.data.frame(h2)\n\nnames(h2_df) <- c(\"hpw\", \"pct\")\n\np2<-pie(h2_df$pct,\n labels = paste(h2_df$hpw, sep = \" \", h2_df$pct, \"%\"),\n cex = 0.7, \n radius=0.9,\n col = viridis::viridis(length(h2_df$hpw)), \n main = \"Hours worked per week\")\n\n```\n\n\n### 4.1.1.1 Transformation of variable Health in general\n\nCreate a new variable called **health_binary** by regrouping health in general in two broader categories: **Good or very good health** and **Poor health**\n\n\n```{r}\n\ncensus2021teaching$health_binary <- ifelse(\n census2021teaching$health_in_general %in% c(1, 2), \"Good or very good health\",\n ifelse(census2021teaching$health_in_general %in% c(3, 4, 5), \"Poor health\", NA)\n)\n\n```\n\n##### Drop rows where health_in_general = -8 (Does not apply)\n\n```{r}\n\ncensus2021teaching <-census2021teaching %>%\n filter(health_in_general != -8)\n\n```\n\n##### Check the new variable's distribution\n\n```{r}\n\ntable(census2021teaching$health_binary)\n\n```\n\n##### Create variable **Poor_health** and check its distribution\n\nCode a binary variable that takes 1 for poor health and 0 otherwise. Call this new variable **Poor_health**. This variable will be the explained variable of our regression model.\n\n```{r}\n\ncensus2021teaching$Poor_health <- ifelse(\n census2021teaching$health_binary == \"Poor health\", 1,\n ifelse(census2021teaching$health_binary == \"Good or very good health\", 0, NA)\n)\n\ntable(census2021teaching$Poor_health)\n\n```\n\n### 4.1.2 Univariate analysis for age, sex, ethnicity, social category \n\n#### Frequencies and percentages for age, sex, ethnicity, social category \n\n```{r}\n\nt_a<-table(as_factor(census2021teaching$resident_age_7d))\nt_s<-table(as_factor(census2021teaching$sex))\nt_e<-table(as_factor(census2021teaching$ethnic_group_tb_6a))\nt_g<-table(as_factor(census2021teaching$approx_social_grade))\n\ncbind(t_a,round(prop.table(t_a)*100,1))\ncbind(t_s,round(prop.table(t_s)*100,1))\ncbind(t_e,round(prop.table(t_e)*100,1))\ncbind(t_g,round(prop.table(t_g)*100,1))\n\n```\n\n#### Plot pie chart Age variable\n\n```{r}\n\n\nt_a.df <- as.data.frame(round(prop.table(t_a)*100,1))\nnames(t_a.df) <- c(\"age\", \"pct\")\n\npie(t_a.df$pct,\n labels = paste(t_a.df$age, sep = \" \", t_a.df$pct, \"%\"),\n cex = 0.7, \n radius=0.9,\n col = viridis::viridis(length(t_a.df$age)), \n main = \"Distribution of respondents by age\")\n\n\n```\n\n#### Pie chart for Sex variable\n\n\n```{r}\n\nt_s.df <- as.data.frame(round(prop.table(t_s)*100,1))\nnames(t_s.df) <- c(\"sex\", \"pct\")\n\npie(t_s.df$pct,\n labels = paste(t_s.df$sex, sep = \" \", t_s.df$pct, \"%\"),\n cex = 0.7, \n radius=0.9,\n col = viridis::viridis(length(t_a.df$age)), \n main = \"Distribution of respondents by sex\")\n\n\n```\n\n#### Histogram for Ethnicity variable\n\n##### a) Recode Ethnicity variable with labels\n\n```{r}\n\nEthnicity <- census2021teaching %>%\n mutate(Ethnicity_label = factor(ethnic_group_tb_6a, levels = c(1, 2, 3, 4, 5, -8), \n labels = c(\"Asian\", \"Black\", \"Mixed\", \"White\", \"Other\", \"Does not apply\"))) %>%\n count(Ethnicity_label, name = \"n\") %>%\n mutate(prop = n / sum(n),\n label = scales::percent(prop, accuracy = 0.1))\n\n```\n\n##### b) Plot histogram\n\n```{r}\n\nggplot(Ethnicity, aes(x = Ethnicity_label, y = prop)) +\n geom_col(fill = \"chocolate\", color = \"black\", width = 0.3) +\n geom_text(aes(label = label), vjust = -0.5) +\n scale_y_continuous(labels = scales::percent_format()) +\n labs(title = \"Distribution of respondents by Ethnicity\", x = \"\", y = \"Percentage\") +\n theme_classic()+\n theme(axis.text.x = element_text(size = 12))+\n theme(plot.title = element_text(size = 16, hjust = 0.5))\n\n```\n\n#### Pie chart for variable social class\n\n##### a) Create a labelled social class variable\n\n```{r}\n\nSocial_class <- census2021teaching %>%\n mutate(\n Social_class_code = as.integer(as.character(approx_social_grade)),\n Social_class_label = factor(\n Social_class_code,\n levels = c(1, 2, 3, 4, -8),\n labels = c(\"Higher, intermediate managers\", \"Supervisory, junior managers\", \n \"Skilled manual\", \"Semi-skilled, unskilled manual\",\"Not applicable\")\n )\n ) %>%\n filter(!is.na(Social_class_label)) %>%\n count(Social_class_label, name = \"n\") %>%\n mutate(\n pct = round(100 * n / sum(n), 1),\n label = paste0(Social_class_label, \" (\", pct, \"%)\")\n )\n\n```\n\n##### b) Plot pie chart\n\n```{r}\n\nggplot(Social_class, aes(x = \"\", y = n, fill = Social_class_label)) +\n geom_col(width = 1, color = \"white\") +\n coord_polar(theta = \"y\") +\n geom_label_repel(aes(label = label),\n position = position_stack(vjust = 0.5),\n show.legend = FALSE,\n size = 3) +\n labs(title = \"Distribution of respondents by Social class\", fill = \"Social class\") +\n theme_void()+\n scale_fill_brewer(palette = \"Pastel2\") +\n theme(plot.title = element_text(size = 16, hjust = 1))\n\n```\n\n##### Drop under 16 from the table\n\n```{r}\n\ncensus2021teaching <- subset(census2021teaching, resident_age_7d != 1)\n\n```\n\n### 4.2- Bivariate analysis \n\n##### Cross tabulation of Poor_health by hours worked\n\n```{r}\n\ntabulation <- table(census2021teaching$Poor_health, census2021teaching$hours_per_week_worked)\ntabulation\n\n```\n\n##### Row percentages\n\n```{r}\n\nprop.table(tabulation, margin = 1) * 100\n\n```\n\n##### Column percentages\n\n```{r}\n\nprop.table(tabulation, margin = 2) * 100\n\n```\n#### Chi square test\n\n##### a) Creating cross tabulations to be tested\n\n```{r}\n\ntabulation.health_hours <- table(census2021teaching$Poor_health, census2021teaching$hours_per_week_worked)\ntabulation.health_age <- table(census2021teaching$Poor_health, census2021teaching$resident_age_7d)\ntabulation.health_sex <- table(census2021teaching$Poor_health, census2021teaching$sex)\ntabulation.health_ethnicity <- table(census2021teaching$Poor_health, census2021teaching$ethnic_group_tb_6a)\ntabulation.health_classes <- table(census2021teaching$Poor_health, census2021teaching$approx_social_grade)\n\n```\n\n##### b) chi square test on health and our 5 other variables\n\n```{r}\n\nchisq.test(tabulation.health_hours)\nchisq.test(tabulation.health_age)\nchisq.test(tabulation.health_sex)\nchisq.test(tabulation.health_ethnicity)\nchisq.test(tabulation.health_classes)\n\n```\n## 5 Regression model\n\n### 5.1 Recode variables as factors with labels\n\n```{r}\n\ncensus2021teaching$hours_per_week_worked <- factor(\n census2021teaching$hours_per_week_worked,\n levels = c(1, 2, 3, 4, -8),\n labels = c(\"0-15\", \"16-30\", \"31-48\", \"49+\", \"Does not apply\")\n)\n\ncensus2021teaching$resident_age_7d <- factor(\n census2021teaching$resident_age_7d,\n levels = c(1, 2, 3, 4, 5, 6, 7, -8),\n labels = c(\"0-15\", \"16-24\", \"25-34\", \"35-44\", \"45-54\", \"55-64\", \"65+\", \"Not applicable\")\n)\n\ncensus2021teaching$ethnic_group_tb_6a <- factor(\n census2021teaching$ethnic_group_tb_6a,\n levels = c(1, 2, 3, 4, 5, -8),\n labels = c(\"Asian\", \"Black\", \"Mixed\", \"White\", \"Other\", \"Does not apply\")\n)\n\ncensus2021teaching$approx_social_grade <- factor(\n census2021teaching$approx_social_grade,\n levels = c(1, 2, 3, 4, -8),\n labels = c(\"Higher, intermediate managers\",\n \"Supervisory, junior managers\",\n \"Skilled manual\",\n \"Semi-skilled, unskilled manual\",\n \"Not applicable\")\n)\n\ncensus2021teaching$sex <- factor(\n census2021teaching$sex,\n levels = c(1, 2),\n labels = c(\"Male\", \"Female\")\n)\n\n```\n\n### 5.2 Pick references that are central or policy relevant\n\n```{r}\n\ncensus2021teaching$hours_per_week_worked <- relevel(census2021teaching$hours_per_week_worked, ref = \"31-48\")\ncensus2021teaching$resident_age_7d <- relevel(census2021teaching$resident_age_7d, ref = \"45-54\")\ncensus2021teaching$sex <- relevel(census2021teaching$sex, ref = \"Male\")\ncensus2021teaching$ethnic_group_tb_6a <- relevel(census2021teaching$ethnic_group_tb_6a, ref = \"White\")\ncensus2021teaching$approx_social_grade <- relevel(census2021teaching$approx_social_grade, ref = \"Higher, intermediate managers\")\n\n```\n\n### 5.3 Fit logistic regression \n\n\n```{r}\n\nmodel_health <- glm(\n Poor_health ~ hours_per_week_worked + resident_age_7d + sex + ethnic_group_tb_6a + approx_social_grade,\n data = census2021teaching,\n family = binomial(link = \"logit\")\n)\n\n```\n#### Obtain summary results of the logistic regression\n\n```{r}\n\nsummary(model_health)\n\n```\n\n\n\n\n\n\n\n","srcMarkdownNoYaml":""},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":false,"echo":true,"output":true,"warning":false,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"ipynb-shell-interactivity":null,"plotly-connected":true,"message":false,"engine":"knitr"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":false,"code-overflow":"scroll","code-link":false,"code-line-numbers":true,"code-tools":{"source":true,"toggle":false,"caption":"Quarto source"},"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-min-runs":1,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":true,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","css":["ukds_long.css"],"toc":true,"toc-depth":2,"number-sections":false,"highlight-style":"arrow-light","output-file":"chap1.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","other-links-title":"Other Links","code-links-title":"Code Links","launch-dev-container-title":"Launch Dev Container","launch-binder-title":"Launch Binder","article-notebook-label":"Article Notebook","notebook-preview-download":"Download Notebook","notebook-preview-download-src":"Download Source","notebook-preview-back":"Back to Article","manuscript-meca-bundle":"MECA Bundle","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","appendix-view-license":"View License","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","title-block-keywords":"Keywords","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","tools-share":"Share","tools-download":"Download","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-text-placeholder":"","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-wordcount":"Word Count","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","listing-page-words":"{0} words","listing-page-filter":"Filter","draft":"Draft"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.8.27","jupyter":"nbstata","toc-location":"right","fig-cap-location":"top","tbl-cap-location":"top","smooth-scroll":true,"page-layout":"full","citations-hover":true,"footnotes-hover":true},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/idx/index.qmd.json b/.quarto/idx/index.qmd.json index a2ed3c3..a0a4074 100644 --- a/.quarto/idx/index.qmd.json +++ b/.quarto/idx/index.qmd.json @@ -1 +1 @@ -{"title":"Introduction","markdown":{"headingText":"Introduction","containsRefs":false,"markdown":"\nWelcome to the census microdata activity - R code. This page provides the R code used in the module activity.\n\n## What you will find in this page\n\n- An introduction to the census microdata activity\n- A chapter containing the full R code used for analysis\n\n## Requirements\n\nTo use the code in this page, you will need:\n\n- R and RStudio installed on your computer\n- The required R packages installed\n- Download the teaching dataset: [2021 census microdata open access, England and Wales](https://datacatalogue.ukdataservice.ac.uk/studies/study/9202#!/access-data)\n\n## Data access and file location\n\nIn this code, the dataset is referenced using the following relative path: **data/publicmicrodatateachingsample.sav**\n\nPlease, update this file path in your code accordingly so that R can locate the dataset file on your computer.\n\n## How to use this page\n\nNavigate to Chapter 1 to view the complete R script. At this stage, the code is displayed for reference and can be copied but does not run within the page.\n\n\n","srcMarkdownNoYaml":""},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":false,"cache":null,"freeze":false,"echo":true,"output":true,"warning":false,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"ipynb-shell-interactivity":null,"plotly-connected":true,"message":false,"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":false,"code-overflow":"scroll","code-link":false,"code-line-numbers":true,"code-tools":{"source":true,"toggle":false,"caption":"Quarto source"},"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-min-runs":1,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":true,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","css":["ukds_long.css"],"toc":true,"toc-depth":2,"number-sections":false,"highlight-style":"arrow-light","output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","other-links-title":"Other Links","code-links-title":"Code Links","launch-dev-container-title":"Launch Dev Container","launch-binder-title":"Launch Binder","article-notebook-label":"Article Notebook","notebook-preview-download":"Download Notebook","notebook-preview-download-src":"Download Source","notebook-preview-back":"Back to Article","manuscript-meca-bundle":"MECA Bundle","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","appendix-view-license":"View License","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","title-block-keywords":"Keywords","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","tools-share":"Share","tools-download":"Download","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-text-placeholder":"","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-wordcount":"Word Count","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","listing-page-words":"{0} words","listing-page-filter":"Filter","draft":"Draft"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.8.25","jupyter":"nbstata","toc-location":"right","fig-cap-location":"top","tbl-cap-location":"top","smooth-scroll":true,"page-layout":"full","citations-hover":true,"footnotes-hover":true},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file +{"title":"Introduction","markdown":{"headingText":"Introduction","containsRefs":false,"markdown":"\nWelcome to the census microdata activity - R code. This page provides the R code used in the module activity.\n\n## What you will find in this page\n\n- An introduction to the census microdata activity\n- A chapter containing the full R code used for analysis\n\n## Requirements\n\nTo use the code in this page, you will need:\n\n- R and RStudio installed on your computer\n- The required R packages installed\n- Download the teaching dataset: [2021 census microdata open access, England and Wales](https://datacatalogue.ukdataservice.ac.uk/studies/study/9202#!/access-data)\n\n## Data access and file location\n\nIn this code, the dataset is referenced using the following relative path: **data/publicmicrodatateachingsample.sav**\n\nPlease, update this file path in your code accordingly so that R can locate the dataset file on your computer.\n\n## How to use this page\n\nNavigate to Chapter 1 to view the complete R script. At this stage, the code is displayed for reference and can be copied but does not run within the page.\n\n\n","srcMarkdownNoYaml":""},"formats":{"html":{"identifier":{"display-name":"HTML","target-format":"html","base-format":"html"},"execute":{"fig-width":7,"fig-height":5,"fig-format":"retina","fig-dpi":96,"df-print":"default","error":false,"eval":true,"cache":null,"freeze":false,"echo":true,"output":true,"warning":false,"include":true,"keep-md":false,"keep-ipynb":false,"ipynb":null,"enabled":null,"daemon":null,"daemon-restart":false,"debug":false,"ipynb-filters":[],"ipynb-shell-interactivity":null,"plotly-connected":true,"message":false,"engine":"markdown"},"render":{"keep-tex":false,"keep-typ":false,"keep-source":false,"keep-hidden":false,"prefer-html":false,"output-divs":true,"output-ext":"html","fig-align":"default","fig-pos":null,"fig-env":null,"code-fold":false,"code-overflow":"scroll","code-link":false,"code-line-numbers":true,"code-tools":{"source":true,"toggle":false,"caption":"Quarto source"},"tbl-colwidths":"auto","merge-includes":true,"inline-includes":false,"preserve-yaml":false,"latex-auto-mk":true,"latex-auto-install":true,"latex-clean":true,"latex-min-runs":1,"latex-max-runs":10,"latex-makeindex":"makeindex","latex-makeindex-opts":[],"latex-tlmgr-opts":[],"latex-input-paths":[],"latex-output-dir":null,"link-external-icon":false,"link-external-newwindow":true,"self-contained-math":false,"format-resources":[],"notebook-links":true},"pandoc":{"standalone":true,"wrap":"none","default-image-extension":"png","to":"html","css":["ukds_long.css"],"toc":true,"toc-depth":2,"number-sections":false,"highlight-style":"arrow-light","output-file":"index.html"},"language":{"toc-title-document":"Table of contents","toc-title-website":"On this page","related-formats-title":"Other Formats","related-notebooks-title":"Notebooks","source-notebooks-prefix":"Source","other-links-title":"Other Links","code-links-title":"Code Links","launch-dev-container-title":"Launch Dev Container","launch-binder-title":"Launch Binder","article-notebook-label":"Article Notebook","notebook-preview-download":"Download Notebook","notebook-preview-download-src":"Download Source","notebook-preview-back":"Back to Article","manuscript-meca-bundle":"MECA Bundle","section-title-abstract":"Abstract","section-title-appendices":"Appendices","section-title-footnotes":"Footnotes","section-title-references":"References","section-title-reuse":"Reuse","section-title-copyright":"Copyright","section-title-citation":"Citation","appendix-attribution-cite-as":"For attribution, please cite this work as:","appendix-attribution-bibtex":"BibTeX citation:","appendix-view-license":"View License","title-block-author-single":"Author","title-block-author-plural":"Authors","title-block-affiliation-single":"Affiliation","title-block-affiliation-plural":"Affiliations","title-block-published":"Published","title-block-modified":"Modified","title-block-keywords":"Keywords","callout-tip-title":"Tip","callout-note-title":"Note","callout-warning-title":"Warning","callout-important-title":"Important","callout-caution-title":"Caution","code-summary":"Code","code-tools-menu-caption":"Code","code-tools-show-all-code":"Show All Code","code-tools-hide-all-code":"Hide All Code","code-tools-view-source":"View Source","code-tools-source-code":"Source Code","tools-share":"Share","tools-download":"Download","code-line":"Line","code-lines":"Lines","copy-button-tooltip":"Copy to Clipboard","copy-button-tooltip-success":"Copied!","repo-action-links-edit":"Edit this page","repo-action-links-source":"View source","repo-action-links-issue":"Report an issue","back-to-top":"Back to top","search-no-results-text":"No results","search-matching-documents-text":"matching documents","search-copy-link-title":"Copy link to search","search-hide-matches-text":"Hide additional matches","search-more-match-text":"more match in this document","search-more-matches-text":"more matches in this document","search-clear-button-title":"Clear","search-text-placeholder":"","search-detached-cancel-button-title":"Cancel","search-submit-button-title":"Submit","search-label":"Search","toggle-section":"Toggle section","toggle-sidebar":"Toggle sidebar navigation","toggle-dark-mode":"Toggle dark mode","toggle-reader-mode":"Toggle reader mode","toggle-navigation":"Toggle navigation","crossref-fig-title":"Figure","crossref-tbl-title":"Table","crossref-lst-title":"Listing","crossref-thm-title":"Theorem","crossref-lem-title":"Lemma","crossref-cor-title":"Corollary","crossref-prp-title":"Proposition","crossref-cnj-title":"Conjecture","crossref-def-title":"Definition","crossref-exm-title":"Example","crossref-exr-title":"Exercise","crossref-ch-prefix":"Chapter","crossref-apx-prefix":"Appendix","crossref-sec-prefix":"Section","crossref-eq-prefix":"Equation","crossref-lof-title":"List of Figures","crossref-lot-title":"List of Tables","crossref-lol-title":"List of Listings","environment-proof-title":"Proof","environment-remark-title":"Remark","environment-solution-title":"Solution","listing-page-order-by":"Order By","listing-page-order-by-default":"Default","listing-page-order-by-date-asc":"Oldest","listing-page-order-by-date-desc":"Newest","listing-page-order-by-number-desc":"High to Low","listing-page-order-by-number-asc":"Low to High","listing-page-field-date":"Date","listing-page-field-title":"Title","listing-page-field-description":"Description","listing-page-field-author":"Author","listing-page-field-filename":"File Name","listing-page-field-filemodified":"Modified","listing-page-field-subtitle":"Subtitle","listing-page-field-readingtime":"Reading Time","listing-page-field-wordcount":"Word Count","listing-page-field-categories":"Categories","listing-page-minutes-compact":"{0} min","listing-page-category-all":"All","listing-page-no-matches":"No matching items","listing-page-words":"{0} words","listing-page-filter":"Filter","draft":"Draft"},"metadata":{"lang":"en","fig-responsive":true,"quarto-version":"1.8.27","jupyter":"nbstata","toc-location":"right","fig-cap-location":"top","tbl-cap-location":"top","smooth-scroll":true,"page-layout":"full","citations-hover":true,"footnotes-hover":true},"extensions":{"book":{"multiFile":true}}}},"projectFormats":["html"]} \ No newline at end of file diff --git a/.quarto/project-cache/deno-kv-file b/.quarto/project-cache/deno-kv-file index 338d1b8..3c6f3d4 100644 Binary files a/.quarto/project-cache/deno-kv-file and b/.quarto/project-cache/deno-kv-file differ diff --git a/.quarto/project-cache/deno-kv-file-shm b/.quarto/project-cache/deno-kv-file-shm deleted file mode 100644 index fe9ac28..0000000 Binary files a/.quarto/project-cache/deno-kv-file-shm and /dev/null differ diff --git a/.quarto/project-cache/deno-kv-file-wal b/.quarto/project-cache/deno-kv-file-wal deleted file mode 100644 index e69de29..0000000 diff --git a/.quarto/xref/24ad0713 b/.quarto/xref/24ad0713 index 011b0e4..fecc2a2 100644 --- a/.quarto/xref/24ad0713 +++ b/.quarto/xref/24ad0713 @@ -1 +1 @@ -{"entries":[],"headings":["install-and-load-the-main-packages","install-packages","load-packages","load-the-dataset-and-make-a-copy","drop-unnecessary-variables","exploratory-analysis","univariate-analysis","univariate-analysis-for-health-in-general-and-hours-worked","frequencies-for-health-in-general-and-hours-worked","percentages-for-health-in-general-and-hours-worked","frequencies-and-percentages-with-labels-for-health-in-general-and-hours-worked","a-pie-chart-for-health-in-general","a.1-create-a-labelled-health-variable","a.2-plot-pie-chart","b-pie-chart-for-hours-worked","b.1-create-a-labelled-hours-worked-variable","b.2-plot-pie-chart","transformation-of-variable-health-in-general","drop-rows-where-health_in_general--8-does-not-apply","check-the-new-variables-distribution","create-variable-poor_health-and-check-its-distribution","univariate-analysis-for-age-sex-ethnicity-social-category","frequencies-and-percentages-for-age-sex-ethnicity-social-category","plot-pie-chart-age-variable","a-create-a-labelled-age-variable","b-plot-pie-chart","histogram-for-sex-variable","a-recode-sex-variable-with-labels","b-plot-histogram","histogram-for-ethnicity-variable","a-recode-ethnicity-variable-with-labels","b-plot-histogram-1","pie-chart-for-variable-social-class","a-create-a-labelled-social-class-variable","b-plot-pie-chart-1","drop-under-16-from-the-table","bivariate-analysis","cross-tabulation-of-poor_health-by-hours-worked","row-percentages","column-percentages","chi-square-test","a-creating-cross-tabulations-to-be-tested","b-chi-square-test-on-health-and-our-5-other-variables","regression-model","recode-variables-as-factors-with-labels","pick-references-that-are-central-or-policy-relevant","fit-logistic-regression","obtain-summary-results-of-the-logistic-regression"],"options":{"chapters":true}} \ No newline at end of file +{"entries":[],"headings":["install-and-load-the-main-packages","load-the-dataset-and-make-a-copy","drop-unnecessary-variables","exploratory-analysis","univariate-analysis","univariate-analysis-for-health-in-general-and-hours-worked","frequencies-for-health-in-general-and-hours-worked","distribution-of-self-rated-health-and-hours-worked","frequencies-and-percentages-with-labels-for-health-in-general-and-hours-worked","a-pie-chart-for-self-rated-general-health","b-pie-chart-for-hours-worked","b.1-create-a-labelled-hours-worked-variable","transformation-of-variable-health-in-general","drop-rows-where-health_in_general--8-does-not-apply","check-the-new-variables-distribution","create-variable-poor_health-and-check-its-distribution","univariate-analysis-for-age-sex-ethnicity-social-category","frequencies-and-percentages-for-age-sex-ethnicity-social-category","plot-pie-chart-age-variable","pie-chart-for-sex-variable","histogram-for-ethnicity-variable","a-recode-ethnicity-variable-with-labels","b-plot-histogram","pie-chart-for-variable-social-class","a-create-a-labelled-social-class-variable","b-plot-pie-chart","drop-under-16-from-the-table","bivariate-analysis","cross-tabulation-of-poor_health-by-hours-worked","row-percentages","column-percentages","chi-square-test","a-creating-cross-tabulations-to-be-tested","b-chi-square-test-on-health-and-our-5-other-variables","regression-model","recode-variables-as-factors-with-labels","pick-references-that-are-central-or-policy-relevant","fit-logistic-regression","obtain-summary-results-of-the-logistic-regression"],"options":{"chapters":true}} \ No newline at end of file diff --git a/_quarto.yml b/_quarto.yml index 7cdfd5b..087e0ef 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -4,7 +4,7 @@ project: ### Code execution options execute: ### How R code is going to be interpreted - eval: false + eval: true echo: true ### These can be changed for individual code blocks warning: false message: false diff --git a/chap1.qmd b/chap1.qmd index f1325f9..717ebab 100644 --- a/chap1.qmd +++ b/chap1.qmd @@ -2,38 +2,30 @@ ## 1- Install and load the main packages -#### Install packages -If any of the below packages is not installed on your computer, please install it. Remember to delete the **#** symbol before running the code. +```{r packages} +#### Load the packages if installed; install them if not present -```{r, eval=FALSE} - -#install.packages("tidyverse") -#install.packages("ggplot2") -#install.packages("dplyr") -#install.packages("ggrepel") -#install.packages("patchwork") -#install.packages("gridExtra") - -``` - -#### Load packages - -```{r, eval=FALSE} - -library(tidyverse) -library(ggplot2) -library(dplyr) -library(ggrepel) -library(patchwork) -library(gridExtra) -library(haven) +pkg <- c("tidyverse", + "ggplot2", + "ggrepel", + "patchwork", + "gridExtra", + "haven", ### Import Stata and SPSS datasets + "viridis" ### colourblind friendly palette +) +for (p in pkg) { + if (!require(p, character.only = T)) { + install.packages(p) + library(p, character.only = T) + } +} ``` ## 2- Load the dataset and make a copy -```{r, eval=FALSE} +```{r} publicmicrodatateachingsample <- read_sav("data/publicmicrodatateachingsample.sav") @@ -41,21 +33,42 @@ census2021teaching <- publicmicrodatateachingsample ``` -Important: Remember to change to pathway to your dataset accordingly. +Important: Remember to change the filepath to your dataset accordingly. -If the **read_sav** code does not work, you can use the **Import Dataset** button in the Environment pane to load the dataset in R. +If the **read_sav** code does not work, you can use the **Import Dataset** button in the RStudio Environment Pane to load the dataset in R. ## 3- Drop unnecessary variables -```{r, eval=FALSE} +```{r} +#| eval: false +census2021teaching <- census2021teaching[,c("health_in_general", + "hours_per_week_worked", + "resident_age_7d","sex", + "ethnic_group_tb_6a", + "approx_social_grade")] + +head(census2021teaching) +dim(census2021teaching) +``` +::: {.callout-note collapse="true"} + +## Click to view results + +```{r} +#| echo: false census2021teaching <- census2021teaching[,c("health_in_general", "hours_per_week_worked", "resident_age_7d","sex", "ethnic_group_tb_6a", "approx_social_grade")] + +head(census2021teaching) +dim(census2021teaching) ``` +::: + ## 4- Exploratory analysis ### 4.1- Univariate analysis @@ -64,149 +77,174 @@ census2021teaching <- census2021teaching[,c("health_in_general", ##### Frequencies for health in general and hours worked -```{r, eval=FALSE} +```{r} +#| eval: false +### Raw values table(census2021teaching$health_in_general) -table(census2021teaching$hours_per_week_worked) +### Labelled values +table(as_factor(census2021teaching$hours_per_week_worked)) ``` -##### Percentages for health in general and hours worked +::: {.callout-note collapse="true"} +## Click to view results -```{r, eval=FALSE} +```{r} +#| echo: false -prop.table(table(census2021teaching$health_in_general)) * 100 - -prop.table(table(census2021teaching$hours_per_week_worked)) * 100 +### Raw values +table(census2021teaching$health_in_general) +### Labelled values +table(as_factor(census2021teaching$hours_per_week_worked)) ``` -You can also calculate these frequencies and percentages with appropriate labels. +::: -##### Frequencies and percentages with labels for health in general and hours worked +##### Distribution of self-rated health and hours worked - -```{r, eval=FALSE} -census2021teaching %>% - mutate( - health_code = as.integer(as.character(health_in_general)), - health_label = factor( - health_code, - levels = c(1, 2, 3, 4, 5, -8), - labels = c("Very good", "Good", "Fair", "Bad", "Very bad", "Does not apply") - ) - ) %>% - count(health_label) %>% - mutate(percent = round(100 * n / sum(n), 1)) +```{r} +#| eval: false +round( + prop.table( + table( + as_factor(census2021teaching$health_in_general + ) + ) + ) * 100, + 1) -census2021teaching %>% - mutate( - hours_code = as.integer(as.character(hours_per_week_worked)), - hours_label = factor( - hours_code, - levels = c(1, 2, 3, 4, -8), - labels = c("[0 – 15]", "[16 – 30]", "[31 – 48]", "[49 and +]", "Does not apply") - ) - ) %>% - count(hours_label) %>% - mutate(percent = round(100 * n / sum(n), 1)) +round( + prop.table( + table( + as_factor(census2021teaching$hours_per_week_worked) + ) + ) * 100, + 1) ``` +::: {.callout-note collapse="true"} -You can also generate pie charts for better visualization +## Click to view results -### a) Pie chart for Health in general +```{r} +#| echo: false -##### a.1) Create a labelled health variable - -```{r, eval=FALSE} +round( + prop.table( + table( + as_factor(census2021teaching$health_in_general + ) + ) + ) * 100, + 1) -health <- census2021teaching %>% - mutate( - health_code = as.integer(as.character(health_in_general)), - health_label = factor( - health_code, - levels = c(1,2,3,4,5,-8), - labels = c("Very good", "Good", "Fair", "Bad", "Very bad", "Not applicable") - ) - ) %>% - filter(!is.na(health_label)) %>% - count(health_label, name = "n") %>% - mutate( - pct = round(100 * n / sum(n), 1), - label = paste0(health_label, " (", pct, "%)") - ) +round( + prop.table( + table( + as_factor(census2021teaching$hours_per_week_worked) + ) + ) * 100, + 1) ``` -##### a.2) Plot pie chart +::: + -```{r, eval=FALSE} +##### Frequencies and percentages with labels for health in general and hours worked -p1 <- ggplot(health, aes(x = "", y = n, fill = health_label)) + - geom_col(width = 1, color = "white") + - coord_polar(theta = "y") + - geom_label_repel(aes(label = label), - position = position_stack(vjust = 0.5), - show.legend = FALSE, - size = 3) + - labs(title = "Distribution of general health", fill = "Health status") + - theme_void()+ - scale_fill_brewer(palette = "Set2") + - theme(plot.title = element_text(size = 16, hjust = 1)) -p1 +You can also generate pie charts for better visualization + +### a) Pie chart for self-rated general health + + + +```{r p1} +#| eval: false + +h1<-round(100* + prop.table( + table( + as_factor(census2021teaching$health_in_general) + ) + ),1 + ) + +h1_df <- as.data.frame(h1) + +names(h1_df) <- c("srh", "pct") + +pie(h1_df$pct, + labels = paste(h1_df$srh, sep = " ", h1_df$pct, "%"), + cex = 0.7, + radius=0.9, + col = viridis::viridis(length(h1_df$srh)), + main = "Self-rated general health") ``` +::: {.callout-note collapse="true"} -### b) Pie chart for Hours worked +## Click to view results +```{r p1.2} +#| echo: false -##### b.1) Create a labelled hours worked variable +h1<-round(100* + prop.table( + table( + as_factor(census2021teaching$health_in_general) + ) + ),1 + ) +h1_df <- as.data.frame(h1) -```{r, eval=FALSE} +names(h1_df) <- c("srh", "pct") -hours <- census2021teaching %>% - mutate( - hours_code = as.integer(as.character(hours_per_week_worked)), - hours_label = factor( - hours_code, - levels = c(1, 2, 3, 4, -8), - labels = c("[0 – 15]", "[16 – 30]", "[31 – 48]", "[49 and +]", "Not applicable") - ) - ) %>% - filter(!is.na(hours_label)) %>% - count(hours_label, name = "n") %>% - mutate( - pct = round(100 * n / sum(n), 1), - label = paste0(hours_label, " (", pct, "%)") - ) +pie(h1_df$pct, + labels = paste(h1_df$srh, sep = " ", h1_df$pct, "%"), + cex = 0.7, + radius=0.9, + col = viridis::viridis(length(h1_df$srh)), + main = "Self-rated general health") ``` -##### b.2) Plot pie chart +::: +### b) Pie chart for Hours worked -```{r, eval=FALSE} +##### b.1) Create a labelled hours worked variable -p2 <- ggplot(hours, aes(x = "", y = n, fill = hours_label)) + - geom_col(width = 1, color = "white") + - coord_polar(theta = "y") + - geom_label_repel(aes(label = label), - position = position_stack(vjust = 0.5), - show.legend = FALSE, - size = 3) + - labs(title = "Distribution of worked hours", fill = "Hours worked") + - theme_void()+ - scale_fill_brewer(palette = "Dark2") + - theme(plot.title = element_text(size = 16, hjust = 1)) -p2 +```{r p2} +#| code-fold: true +#| code-summary: "View output" + +h2<-round(100* + prop.table( + table( + as_factor(census2021teaching$hours_per_week_worked)) + ),1 + ) + +h2_df <- as.data.frame(h2) + +names(h2_df) <- c("hpw", "pct") + +p2<-pie(h2_df$pct, + labels = paste(h2_df$hpw, sep = " ", h2_df$pct, "%"), + cex = 0.7, + radius=0.9, + col = viridis::viridis(length(h2_df$hpw)), + main = "Hours worked per week") ``` @@ -216,7 +254,7 @@ p2 Create a new variable called **health_binary** by regrouping health in general in two broader categories: **Good or very good health** and **Poor health** -```{r, eval=FALSE} +```{r} census2021teaching$health_binary <- ifelse( census2021teaching$health_in_general %in% c(1, 2), "Good or very good health", @@ -227,15 +265,16 @@ census2021teaching$health_binary <- ifelse( ##### Drop rows where health_in_general = -8 (Does not apply) -```{r, eval=FALSE} +```{r} -census2021teaching <- subset(census2021teaching, health_in_general != -8) +census2021teaching <-census2021teaching %>% + filter(health_in_general != -8) ``` ##### Check the new variable's distribution -```{r, eval=FALSE} +```{r} table(census2021teaching$health_binary) @@ -245,7 +284,7 @@ table(census2021teaching$health_binary) Code a binary variable that takes 1 for poor health and 0 otherwise. Call this new variable **Poor_health**. This variable will be the explained variable of our regression model. -```{r, eval=FALSE} +```{r} census2021teaching$Poor_health <- ifelse( census2021teaching$health_binary == "Poor health", 1, @@ -260,89 +299,53 @@ table(census2021teaching$Poor_health) #### Frequencies and percentages for age, sex, ethnicity, social category -```{r, eval=FALSE} +```{r} -table(census2021teaching$resident_age_7d) -table(census2021teaching$sex) -table(census2021teaching$ethnic_group_tb_6a) -table(census2021teaching$approx_social_grade) +t_a<-table(as_factor(census2021teaching$resident_age_7d)) +t_s<-table(as_factor(census2021teaching$sex)) +t_e<-table(as_factor(census2021teaching$ethnic_group_tb_6a)) +t_g<-table(as_factor(census2021teaching$approx_social_grade)) -prop.table(table(census2021teaching$resident_age_7d)) * 100 -prop.table(table(census2021teaching$sex)) * 100 -prop.table(table(census2021teaching$ethnic_group_tb_6a)) * 100 -prop.table(table(census2021teaching$approx_social_grade)) * 100 +cbind(t_a,round(prop.table(t_a)*100,1)) +cbind(t_s,round(prop.table(t_s)*100,1)) +cbind(t_e,round(prop.table(t_e)*100,1)) +cbind(t_g,round(prop.table(t_g)*100,1)) ``` #### Plot pie chart Age variable -##### a) Create a labelled Age variable - -```{r, eval=FALSE} - -Age <- census2021teaching %>% - mutate( - Age_code = as.integer(as.character(resident_age_7d)), - Age_label = factor( - Age_code, - levels = c(1, 2, 3, 4,5,6,7, -8), - labels = c("]0 – 15]", "[16 – 24]", "[25 – 34]", "[35 - 44]", "[45 - 54]", "[55 - 64]", "[65 and +]","Not applicable") - ) - ) %>% - filter(!is.na(Age_label)) %>% - count(Age_label, name = "n") %>% - mutate( - pct = round(100 * n / sum(n), 1), - label = paste0(Age_label, " (", pct, "%)") - ) +```{r} -``` -##### b) Plot pie chart +t_a.df <- as.data.frame(round(prop.table(t_a)*100,1)) +names(t_a.df) <- c("age", "pct") -```{r, eval=FALSE} +pie(t_a.df$pct, + labels = paste(t_a.df$age, sep = " ", t_a.df$pct, "%"), + cex = 0.7, + radius=0.9, + col = viridis::viridis(length(t_a.df$age)), + main = "Distribution of respondents by age") -ggplot(Age, aes(x = "", y = n, fill = Age_label)) + - geom_col(width = 1, color = "white") + - coord_polar(theta = "y") + - geom_label_repel(aes(label = label), - position = position_stack(vjust = 0.5), - show.legend = FALSE, - size = 3) + - labs(title = "Distribution of respondents by Age", fill = "Resident age") + - theme_void()+ - scale_fill_brewer(palette = "Pastel1") + - theme(plot.title = element_text(size = 16, hjust = 1)) ``` -#### Histogram for Sex variable +#### Pie chart for Sex variable -##### a) Recode sex variable with labels -```{r, eval=FALSE} +```{r} -sex <- census2021teaching %>% - mutate(sex_label = factor(sex, levels = c(1, 2), labels = c("Male", "Female"))) %>% - count(sex_label, name = "n") %>% - mutate(prop = n / sum(n), - label = scales::percent(prop, accuracy = 0.1)) +t_s.df <- as.data.frame(round(prop.table(t_s)*100,1)) +names(t_s.df) <- c("sex", "pct") -``` - -##### b) Plot histogram +pie(t_s.df$pct, + labels = paste(t_s.df$sex, sep = " ", t_s.df$pct, "%"), + cex = 0.7, + radius=0.9, + col = viridis::viridis(length(t_a.df$age)), + main = "Distribution of respondents by sex") -```{r, eval=FALSE} - -ggplot(sex, aes(x = sex_label, y = prop)) + - geom_col(fill = "skyblue", color = "black", width = 0.3) + - geom_text(aes(label = label), vjust = -0.5) + - scale_y_continuous(labels = scales::percent_format()) + - scale_x_discrete(expand = expansion(mult = c(0.5, 0.7))) + - labs(title = "Distribution of respondents by sex", x = "", y = "Percentage") + - theme_classic()+ - theme(axis.text.x = element_text(size = 12))+ - theme(plot.title = element_text(size = 16, hjust = 0.5)) ``` @@ -350,7 +353,7 @@ ggplot(sex, aes(x = sex_label, y = prop)) + ##### a) Recode Ethnicity variable with labels -```{r, eval=FALSE} +```{r} Ethnicity <- census2021teaching %>% mutate(Ethnicity_label = factor(ethnic_group_tb_6a, levels = c(1, 2, 3, 4, 5, -8), @@ -363,7 +366,7 @@ Ethnicity <- census2021teaching %>% ##### b) Plot histogram -```{r, eval=FALSE} +```{r} ggplot(Ethnicity, aes(x = Ethnicity_label, y = prop)) + geom_col(fill = "chocolate", color = "black", width = 0.3) + @@ -380,7 +383,7 @@ ggplot(Ethnicity, aes(x = Ethnicity_label, y = prop)) + ##### a) Create a labelled social class variable -```{r, eval=FALSE} +```{r} Social_class <- census2021teaching %>% mutate( @@ -403,7 +406,7 @@ Social_class <- census2021teaching %>% ##### b) Plot pie chart -```{r, eval=FALSE} +```{r} ggplot(Social_class, aes(x = "", y = n, fill = Social_class_label)) + geom_col(width = 1, color = "white") + @@ -421,7 +424,7 @@ ggplot(Social_class, aes(x = "", y = n, fill = Social_class_label)) + ##### Drop under 16 from the table -```{r, eval=FALSE} +```{r} census2021teaching <- subset(census2021teaching, resident_age_7d != 1) @@ -431,7 +434,7 @@ census2021teaching <- subset(census2021teaching, resident_age_7d != 1) ##### Cross tabulation of Poor_health by hours worked -```{r, eval=FALSE} +```{r} tabulation <- table(census2021teaching$Poor_health, census2021teaching$hours_per_week_worked) tabulation @@ -440,7 +443,7 @@ tabulation ##### Row percentages -```{r, eval=FALSE} +```{r} prop.table(tabulation, margin = 1) * 100 @@ -448,7 +451,7 @@ prop.table(tabulation, margin = 1) * 100 ##### Column percentages -```{r, eval=FALSE} +```{r} prop.table(tabulation, margin = 2) * 100 @@ -457,7 +460,7 @@ prop.table(tabulation, margin = 2) * 100 ##### a) Creating cross tabulations to be tested -```{r, eval=FALSE} +```{r} tabulation.health_hours <- table(census2021teaching$Poor_health, census2021teaching$hours_per_week_worked) tabulation.health_age <- table(census2021teaching$Poor_health, census2021teaching$resident_age_7d) @@ -469,7 +472,7 @@ tabulation.health_classes <- table(census2021teaching$Poor_health, census2021tea ##### b) chi square test on health and our 5 other variables -```{r, eval=FALSE} +```{r} chisq.test(tabulation.health_hours) chisq.test(tabulation.health_age) @@ -482,7 +485,7 @@ chisq.test(tabulation.health_classes) ### 5.1 Recode variables as factors with labels -```{r, eval=FALSE} +```{r} census2021teaching$hours_per_week_worked <- factor( census2021teaching$hours_per_week_worked, @@ -522,7 +525,7 @@ census2021teaching$sex <- factor( ### 5.2 Pick references that are central or policy relevant -```{r, eval=FALSE} +```{r} census2021teaching$hours_per_week_worked <- relevel(census2021teaching$hours_per_week_worked, ref = "31-48") census2021teaching$resident_age_7d <- relevel(census2021teaching$resident_age_7d, ref = "45-54") @@ -535,7 +538,7 @@ census2021teaching$approx_social_grade <- relevel(census2021teaching$app ### 5.3 Fit logistic regression -```{r, eval=FALSE} +```{r} model_health <- glm( Poor_health ~ hours_per_week_worked + resident_age_7d + sex + ethnic_group_tb_6a + approx_social_grade, @@ -546,7 +549,7 @@ model_health <- glm( ``` #### Obtain summary results of the logistic regression -```{r, eval=FALSE} +```{r} summary(model_health) diff --git a/docs/chap1.html b/docs/chap1.html index 2abd63d..b81a94a 100644 --- a/docs/chap1.html +++ b/docs/chap1.html @@ -2,7 +2,7 @@ - + @@ -73,10 +73,10 @@ - + - + - + - +