This is an R Markdown
Notebook for hypothesis testing the means and variances of two
populations.
We begin by loading the necessary libraries.
# Load necessary libraries
library(stats) # Statistical functions
library(ggplot2) # Plots
library(ggpubr) # Publication quality plots
library(car) # Statistical tests
library(dplyr) # Data manipulation
library(boot) # Bootstrap confidence intervals
library(qqplotr) # Normal probability plots
Load Data
Next we read the .csv file and randomly select a sample from both the
male and female populations.
# Read the CSV file
data <- read.csv("heights-by-gender.csv")
# Set Gender as a categorical factor
data$Sex <- as.factor(data$Sex)
# Set sample size
sample_size <- 1000
# Randomly select 30 males and 30 females
set.seed(321) # Setting a seed for reproducibility
male_heights <- data %>%
filter(Sex == 'Male') %>%
sample_n(sample_size)
set.seed(654) # Setting a different seed for the female sample
female_heights <- data %>%
filter(Sex == 'Female') %>%
sample_n(sample_size)
Normal Probability Plots
Normal probability plots are useful to visually assess if the data is
approximately normally distributed. In a perfectly normal distribution,
the data points would form a straight line. Significant deviations from
this line suggest departures from normality.
# Create and display the QQ plots
ggplot(data = rbind(male_heights,female_heights), aes(sample = Height)) +
stat_qq_band(alpha=0.2, conf=0.95, qtype=1, bandType = "boot") +
stat_qq_line(identity=TRUE,alpha=0.6) +
stat_qq_point(col="blue", size=0.5) +
facet_wrap(~Sex) +
ggtitle(paste("Normal Probability Plot for Sample")) +
labs(x = "Theoretical Values", y = "Sample Values") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
Sample Means and Sample Variances
Now we calculate the sample means and sample variances.
# Calculate Means and Variances
male_mean <- mean(male_heights$Height)
male_variance <- var(male_heights$Height)
female_mean <- mean(female_heights$Height)
female_variance <- var(female_heights$Height)
# Display the calculated values
cat(" Male Mean:\t\t\t\t\t", male_mean, "\n",
"Male Variance:\t\t\t\t", male_variance, "\n",
"Male Standard Deviation:\t", sqrt(male_variance), "\n\n",
"Female Mean:\t\t\t\t", female_mean, "\n",
"Female Variance:\t\t\t", female_variance, "\n",
"Female Standard Deviation:\t", sqrt(female_variance), "\n")
Male Mean: 171.1412
Male Variance: 102.7636
Male Standard Deviation: 10.13724
Female Mean: 149.8925
Female Variance: 122.6704
Female Standard Deviation: 11.07567
Equality Test of Population Variances
Now we apply a robust test for approximately normal data to test the
null hypothesis that the male and female populations have the same
variance.
# Levene test is robust for approximately normal data
levene_test_result <- leveneTest(Height ~ Sex,
data = rbind(male_heights, female_heights))
# F_test_result <- var.test(Height ~ Sex,
# data = rbind(male_heights, female_heights),
# alternative="two.sided")
print(levene_test_result)
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 1 6.1769 0.01302 *
1998
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Equality Test of Population Means
Finally we apply a Welch’s t-test for equality of means, assuming
unequal variance.
# Perform Welch's t-test for Equality of Means (for unequal variances)
t_test_result <- t.test(Height ~ Sex,
data = rbind(male_heights, female_heights),
var.equal = FALSE)
print(t_test_result)
Welch Two Sample t-test
data: Height by Sex
t = -44.753, df = 1982.5, p-value < 2.2e-16
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
-22.17989 -20.31757
sample estimates:
mean in group Female mean in group Male
149.8925 171.1412
LS0tDQp0aXRsZTogIlN0YXRpc3RpY2FsIEFuYWx5c2lzIG9mIEhlaWdodCBEaWZmZXJlbmNlcyBieSBHZW5kZXIiDQpvdXRwdXQ6DQogIGh0bWxfbm90ZWJvb2s6IGRlZmF1bHQNCiAgaHRtbF9kb2N1bWVudDogZGVmYXVsdA0KICBwZGZfZG9jdW1lbnQ6IGRlZmF1bHQNCmVkaXRvcl9vcHRpb25zOg0KICBjaHVua19vdXRwdXRfdHlwZTogaW5saW5lDQotLS0NCg0KVGhpcyBpcyBhbiBbUiBNYXJrZG93bl0oaHR0cDovL3JtYXJrZG93bi5yc3R1ZGlvLmNvbSkgTm90ZWJvb2sgZm9yIGh5cG90aGVzaXMgdGVzdGluZyB0aGUgbWVhbnMgYW5kIHZhcmlhbmNlcyBvZiB0d28gcG9wdWxhdGlvbnMuDQoNCldlIGJlZ2luIGJ5IGxvYWRpbmcgdGhlIG5lY2Vzc2FyeSBsaWJyYXJpZXMuDQoNCmBgYHtyIGVjaG89VFJVRSwgZXJyb3I9RkFMU0UsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0UsIHJlc3VsdHM9J2hpZGUnfQ0KIyBMb2FkIG5lY2Vzc2FyeSBsaWJyYXJpZXMNCmxpYnJhcnkoc3RhdHMpICAgICMgU3RhdGlzdGljYWwgZnVuY3Rpb25zDQpsaWJyYXJ5KGdncGxvdDIpICAjIFBsb3RzDQpsaWJyYXJ5KGdncHVicikgICAjIFB1YmxpY2F0aW9uIHF1YWxpdHkgcGxvdHMNCmxpYnJhcnkoY2FyKSAgICAgICMgU3RhdGlzdGljYWwgdGVzdHMNCmxpYnJhcnkoZHBseXIpICAgICMgRGF0YSBtYW5pcHVsYXRpb24NCmxpYnJhcnkoYm9vdCkgICAgICMgQm9vdHN0cmFwIGNvbmZpZGVuY2UgaW50ZXJ2YWxzDQpsaWJyYXJ5KHFxcGxvdHIpICAjIE5vcm1hbCBwcm9iYWJpbGl0eSBwbG90cw0KYGBgDQoNCmBgYHtyIHdyYXAtaG9vaywgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRSwgaW5jbHVkZT1GQUxTRX0NCmxpYnJhcnkoa25pdHIpDQojIFRoaXMgZnVuY3Rpb24gYWxsb3dzIHRleHQgd3JhcHBpbmcgd2hlbiBrbml0dGluZyB0byBQREYNCmhvb2tfb3V0cHV0ID0ga25pdF9ob29rcyRnZXQoJ291dHB1dCcpDQprbml0X2hvb2tzJHNldChvdXRwdXQgPSBmdW5jdGlvbih4LCBvcHRpb25zKSB7DQogICMgdGhpcyBob29rIGlzIHVzZWQgb25seSB3aGVuIHRoZSBsaW5ld2lkdGggb3B0aW9uIGlzIG5vdCBOVUxMDQogIGlmICghaXMubnVsbChuIDwtIG9wdGlvbnMkbGluZXdpZHRoKSkgew0KICAgIHggPSB4ZnVuOjpzcGxpdF9saW5lcyh4KQ0KICAgICMgYW55IGxpbmVzIHdpZGVyIHRoYW4gbiBzaG91bGQgYmUgd3JhcHBlZA0KICAgIGlmIChhbnkobmNoYXIoeCkgPiBuKSkgeCA9IHN0cndyYXAoeCwgd2lkdGggPSBuKQ0KICAgIHggPSBwYXN0ZSh4LCBjb2xsYXBzZSA9ICdcbicpDQogIH0NCiAgaG9va19vdXRwdXQoeCwgb3B0aW9ucykNCn0pDQpgYGANCg0KIyMgTG9hZCBEYXRhDQoNCk5leHQgd2UgcmVhZCB0aGUgLmNzdiBmaWxlIGFuZCByYW5kb21seSBzZWxlY3QgYSBzYW1wbGUgZnJvbSBib3RoIHRoZSBtYWxlIGFuZCBmZW1hbGUgcG9wdWxhdGlvbnMuDQoNCmBgYHtyfQ0KIyBSZWFkIHRoZSBDU1YgZmlsZQ0KZGF0YSA8LSByZWFkLmNzdigiaGVpZ2h0cy1ieS1nZW5kZXIuY3N2IikNCg0KIyBTZXQgR2VuZGVyIGFzIGEgY2F0ZWdvcmljYWwgZmFjdG9yDQpkYXRhJFNleCA8LSBhcy5mYWN0b3IoZGF0YSRTZXgpDQoNCiMgU2V0IHNhbXBsZSBzaXplDQpzYW1wbGVfc2l6ZSA8LSAxMDAwDQoNCiMgUmFuZG9tbHkgc2VsZWN0IDMwIG1hbGVzIGFuZCAzMCBmZW1hbGVzDQpzZXQuc2VlZCgzMjEpICAjIFNldHRpbmcgYSBzZWVkIGZvciByZXByb2R1Y2liaWxpdHkNCm1hbGVfaGVpZ2h0cyA8LSBkYXRhICU+JSANCiAgICAgICAgICAgICAgIGZpbHRlcihTZXggPT0gJ01hbGUnKSAlPiUgDQogICAgICAgICAgICAgICBzYW1wbGVfbihzYW1wbGVfc2l6ZSkNCg0Kc2V0LnNlZWQoNjU0KSAgIyBTZXR0aW5nIGEgZGlmZmVyZW50IHNlZWQgZm9yIHRoZSBmZW1hbGUgc2FtcGxlDQpmZW1hbGVfaGVpZ2h0cyA8LSBkYXRhICU+JSANCiAgICAgICAgICAgICAgICAgZmlsdGVyKFNleCA9PSAnRmVtYWxlJykgJT4lIA0KICAgICAgICAgICAgICAgICBzYW1wbGVfbihzYW1wbGVfc2l6ZSkNCg0KYGBgDQoNCiMjIE5vcm1hbCBQcm9iYWJpbGl0eSBQbG90cw0KDQpOb3JtYWwgcHJvYmFiaWxpdHkgcGxvdHMgYXJlIHVzZWZ1bCB0byB2aXN1YWxseSBhc3Nlc3MgaWYgdGhlIGRhdGEgaXMgYXBwcm94aW1hdGVseSBub3JtYWxseSBkaXN0cmlidXRlZC4gSW4gYSBwZXJmZWN0bHkgbm9ybWFsIGRpc3RyaWJ1dGlvbiwgdGhlIGRhdGEgcG9pbnRzIHdvdWxkIGZvcm0gYSBzdHJhaWdodCBsaW5lLiBTaWduaWZpY2FudCBkZXZpYXRpb25zIGZyb20gdGhpcyBsaW5lIHN1Z2dlc3QgZGVwYXJ0dXJlcyBmcm9tIG5vcm1hbGl0eS4NCg0KYGBge3IsIGZpZy53aWR0aCA9IDEwfQ0KIyBDcmVhdGUgYW5kIGRpc3BsYXkgdGhlIFFRIHBsb3RzDQpnZ3Bsb3QoZGF0YSA9IHJiaW5kKG1hbGVfaGVpZ2h0cyxmZW1hbGVfaGVpZ2h0cyksIGFlcyhzYW1wbGUgPSBIZWlnaHQpKSArDQogICAgICAgIHN0YXRfcXFfYmFuZChhbHBoYT0wLjIsIGNvbmY9MC45NSwgcXR5cGU9MSwgYmFuZFR5cGUgPSAiYm9vdCIpICsNCiAgICAgICAgc3RhdF9xcV9saW5lKGlkZW50aXR5PVRSVUUsYWxwaGE9MC42KSArDQogICAgICAgIHN0YXRfcXFfcG9pbnQoY29sPSJibHVlIiwgc2l6ZT0wLjUpICsNCiAgICAgICAgZmFjZXRfd3JhcCh+U2V4KSArDQogICAgICAgIGdndGl0bGUocGFzdGUoIk5vcm1hbCBQcm9iYWJpbGl0eSBQbG90IGZvciBTYW1wbGUiKSkgKw0KICAgICAgICBsYWJzKHggPSAiVGhlb3JldGljYWwgVmFsdWVzIiwgeSA9ICJTYW1wbGUgVmFsdWVzIikgKw0KICAgICAgICB0aGVtZV9taW5pbWFsKCkgKw0KICAgICAgICB0aGVtZShwbG90LnRpdGxlID0gZWxlbWVudF90ZXh0KGhqdXN0ID0gMC41KSkNCg0KYGBgDQoNCiMjIFNhbXBsZSBNZWFucyBhbmQgU2FtcGxlIFZhcmlhbmNlcw0KDQpOb3cgd2UgY2FsY3VsYXRlIHRoZSBzYW1wbGUgbWVhbnMgYW5kIHNhbXBsZSB2YXJpYW5jZXMuDQoNCmBgYHtyfQ0KIyBDYWxjdWxhdGUgTWVhbnMgYW5kIFZhcmlhbmNlcw0KbWFsZV9tZWFuIDwtIG1lYW4obWFsZV9oZWlnaHRzJEhlaWdodCkNCm1hbGVfdmFyaWFuY2UgPC0gdmFyKG1hbGVfaGVpZ2h0cyRIZWlnaHQpDQoNCmZlbWFsZV9tZWFuIDwtIG1lYW4oZmVtYWxlX2hlaWdodHMkSGVpZ2h0KQ0KZmVtYWxlX3ZhcmlhbmNlIDwtIHZhcihmZW1hbGVfaGVpZ2h0cyRIZWlnaHQpDQoNCiMgRGlzcGxheSB0aGUgY2FsY3VsYXRlZCB2YWx1ZXMNCmNhdCgiIE1hbGUgTWVhbjpcdFx0XHRcdFx0IiwgbWFsZV9tZWFuLCAiXG4iLCANCiAgICAiTWFsZSBWYXJpYW5jZTpcdFx0XHRcdCIsIG1hbGVfdmFyaWFuY2UsICJcbiIsDQogICAgIk1hbGUgU3RhbmRhcmQgRGV2aWF0aW9uOlx0Iiwgc3FydChtYWxlX3ZhcmlhbmNlKSwgIlxuXG4iLA0KICAgICJGZW1hbGUgTWVhbjpcdFx0XHRcdCIsIGZlbWFsZV9tZWFuLCAiXG4iLA0KICAgICJGZW1hbGUgVmFyaWFuY2U6XHRcdFx0IiwgZmVtYWxlX3ZhcmlhbmNlLCAiXG4iLA0KICAgICJGZW1hbGUgU3RhbmRhcmQgRGV2aWF0aW9uOlx0Iiwgc3FydChmZW1hbGVfdmFyaWFuY2UpLCAiXG4iKQ0KYGBgDQoNCiMjIEVxdWFsaXR5IFRlc3Qgb2YgUG9wdWxhdGlvbiBWYXJpYW5jZXMNCg0KTm93IHdlIGFwcGx5IGEgcm9idXN0IHRlc3QgZm9yIGFwcHJveGltYXRlbHkgbm9ybWFsIGRhdGEgdG8gdGVzdCB0aGUgbnVsbCBoeXBvdGhlc2lzIHRoYXQgdGhlIG1hbGUgYW5kIGZlbWFsZSBwb3B1bGF0aW9ucyBoYXZlIHRoZSBzYW1lIHZhcmlhbmNlLg0KDQpgYGB7cn0NCiMgTGV2ZW5lIHRlc3QgaXMgcm9idXN0IGZvciBhcHByb3hpbWF0ZWx5IG5vcm1hbCBkYXRhDQpsZXZlbmVfdGVzdF9yZXN1bHQgPC0gbGV2ZW5lVGVzdChIZWlnaHQgfiBTZXgsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBkYXRhID0gcmJpbmQobWFsZV9oZWlnaHRzLCBmZW1hbGVfaGVpZ2h0cykpDQoNCiMgRl90ZXN0X3Jlc3VsdCA8LSB2YXIudGVzdChIZWlnaHQgfiBTZXgsDQojICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGRhdGEgPSByYmluZChtYWxlX2hlaWdodHMsIGZlbWFsZV9oZWlnaHRzKSwNCiMgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgYWx0ZXJuYXRpdmU9InR3by5zaWRlZCIpDQoNCnByaW50KGxldmVuZV90ZXN0X3Jlc3VsdCkNCmBgYA0KDQojIyBFcXVhbGl0eSBUZXN0IG9mIFBvcHVsYXRpb24gTWVhbnMNCg0KRmluYWxseSB3ZSBhcHBseSBhIFdlbGNoJ3MgdC10ZXN0IGZvciBlcXVhbGl0eSBvZiBtZWFucywgYXNzdW1pbmcgdW5lcXVhbCB2YXJpYW5jZS4NCg0KYGBge3IsIGxpbmV3aWR0aD04MH0NCiMgUGVyZm9ybSBXZWxjaCdzIHQtdGVzdCBmb3IgRXF1YWxpdHkgb2YgTWVhbnMgKGZvciB1bmVxdWFsIHZhcmlhbmNlcykNCnRfdGVzdF9yZXN1bHQgPC0gdC50ZXN0KEhlaWdodCB+IFNleCwNCiAgICAgICAgICAgICAgICAgICAgICAgIGRhdGEgPSByYmluZChtYWxlX2hlaWdodHMsIGZlbWFsZV9oZWlnaHRzKSwgDQogICAgICAgICAgICAgICAgICAgICAgICB2YXIuZXF1YWwgPSBGQUxTRSkNCnByaW50KHRfdGVzdF9yZXN1bHQpDQpgYGANCg==