This is an R Markdown Notebook for hypothesis testing the means and variances of two populations.

We begin by loading the necessary libraries.

# Load necessary libraries
library(stats)    # Statistical functions
library(ggplot2)  # Plots
library(ggpubr)   # Publication quality plots
library(car)      # Statistical tests
library(dplyr)    # Data manipulation
library(boot)     # Bootstrap confidence intervals
library(qqplotr)  # Normal probability plots

Load Data

Next we read the .csv file and randomly select a sample from both the male and female populations.

# Read the CSV file
data <- read.csv("heights-by-gender.csv")

# Set Gender as a categorical factor
data$Sex <- as.factor(data$Sex)

# Set sample size
sample_size <- 1000

# Randomly select 30 males and 30 females
set.seed(321)  # Setting a seed for reproducibility
male_heights <- data %>% 
               filter(Sex == 'Male') %>% 
               sample_n(sample_size)

set.seed(654)  # Setting a different seed for the female sample
female_heights <- data %>% 
                 filter(Sex == 'Female') %>% 
                 sample_n(sample_size)

Normal Probability Plots

Normal probability plots are useful to visually assess if the data is approximately normally distributed. In a perfectly normal distribution, the data points would form a straight line. Significant deviations from this line suggest departures from normality.

# Create and display the QQ plots
ggplot(data = rbind(male_heights,female_heights), aes(sample = Height)) +
        stat_qq_band(alpha=0.2, conf=0.95, qtype=1, bandType = "boot") +
        stat_qq_line(identity=TRUE,alpha=0.6) +
        stat_qq_point(col="blue", size=0.5) +
        facet_wrap(~Sex) +
        ggtitle(paste("Normal Probability Plot for Sample")) +
        labs(x = "Theoretical Values", y = "Sample Values") +
        theme_minimal() +
        theme(plot.title = element_text(hjust = 0.5))

Sample Means and Sample Variances

Now we calculate the sample means and sample variances.

# Calculate Means and Variances
male_mean <- mean(male_heights$Height)
male_variance <- var(male_heights$Height)

female_mean <- mean(female_heights$Height)
female_variance <- var(female_heights$Height)

# Display the calculated values
cat(" Male Mean:\t\t\t\t\t", male_mean, "\n", 
    "Male Variance:\t\t\t\t", male_variance, "\n",
    "Male Standard Deviation:\t", sqrt(male_variance), "\n\n",
    "Female Mean:\t\t\t\t", female_mean, "\n",
    "Female Variance:\t\t\t", female_variance, "\n",
    "Female Standard Deviation:\t", sqrt(female_variance), "\n")
 Male Mean:                  171.1412 
 Male Variance:              102.7636 
 Male Standard Deviation:    10.13724 

 Female Mean:                149.8925 
 Female Variance:            122.6704 
 Female Standard Deviation:  11.07567 

Equality Test of Population Variances

Now we apply a robust test for approximately normal data to test the null hypothesis that the male and female populations have the same variance.

# Levene test is robust for approximately normal data
levene_test_result <- leveneTest(Height ~ Sex,
                                 data = rbind(male_heights, female_heights))

# F_test_result <- var.test(Height ~ Sex,
#                               data = rbind(male_heights, female_heights),
#                               alternative="two.sided")

print(levene_test_result)
Levene's Test for Homogeneity of Variance (center = median)
        Df F value  Pr(>F)  
group    1  6.1769 0.01302 *
      1998                  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Equality Test of Population Means

Finally we apply a Welch’s t-test for equality of means, assuming unequal variance.

# Perform Welch's t-test for Equality of Means (for unequal variances)
t_test_result <- t.test(Height ~ Sex,
                        data = rbind(male_heights, female_heights), 
                        var.equal = FALSE)
print(t_test_result)

    Welch Two Sample t-test

data:  Height by Sex
t = -44.753, df = 1982.5, p-value < 2.2e-16
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
 -22.17989 -20.31757
sample estimates:
mean in group Female   mean in group Male 
            149.8925             171.1412 
LS0tDQp0aXRsZTogIlN0YXRpc3RpY2FsIEFuYWx5c2lzIG9mIEhlaWdodCBEaWZmZXJlbmNlcyBieSBHZW5kZXIiDQpvdXRwdXQ6DQogIGh0bWxfbm90ZWJvb2s6IGRlZmF1bHQNCiAgaHRtbF9kb2N1bWVudDogZGVmYXVsdA0KICBwZGZfZG9jdW1lbnQ6IGRlZmF1bHQNCmVkaXRvcl9vcHRpb25zOg0KICBjaHVua19vdXRwdXRfdHlwZTogaW5saW5lDQotLS0NCg0KVGhpcyBpcyBhbiBbUiBNYXJrZG93bl0oaHR0cDovL3JtYXJrZG93bi5yc3R1ZGlvLmNvbSkgTm90ZWJvb2sgZm9yIGh5cG90aGVzaXMgdGVzdGluZyB0aGUgbWVhbnMgYW5kIHZhcmlhbmNlcyBvZiB0d28gcG9wdWxhdGlvbnMuDQoNCldlIGJlZ2luIGJ5IGxvYWRpbmcgdGhlIG5lY2Vzc2FyeSBsaWJyYXJpZXMuDQoNCmBgYHtyIGVjaG89VFJVRSwgZXJyb3I9RkFMU0UsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0UsIHJlc3VsdHM9J2hpZGUnfQ0KIyBMb2FkIG5lY2Vzc2FyeSBsaWJyYXJpZXMNCmxpYnJhcnkoc3RhdHMpICAgICMgU3RhdGlzdGljYWwgZnVuY3Rpb25zDQpsaWJyYXJ5KGdncGxvdDIpICAjIFBsb3RzDQpsaWJyYXJ5KGdncHVicikgICAjIFB1YmxpY2F0aW9uIHF1YWxpdHkgcGxvdHMNCmxpYnJhcnkoY2FyKSAgICAgICMgU3RhdGlzdGljYWwgdGVzdHMNCmxpYnJhcnkoZHBseXIpICAgICMgRGF0YSBtYW5pcHVsYXRpb24NCmxpYnJhcnkoYm9vdCkgICAgICMgQm9vdHN0cmFwIGNvbmZpZGVuY2UgaW50ZXJ2YWxzDQpsaWJyYXJ5KHFxcGxvdHIpICAjIE5vcm1hbCBwcm9iYWJpbGl0eSBwbG90cw0KYGBgDQoNCmBgYHtyIHdyYXAtaG9vaywgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRSwgaW5jbHVkZT1GQUxTRX0NCmxpYnJhcnkoa25pdHIpDQojIFRoaXMgZnVuY3Rpb24gYWxsb3dzIHRleHQgd3JhcHBpbmcgd2hlbiBrbml0dGluZyB0byBQREYNCmhvb2tfb3V0cHV0ID0ga25pdF9ob29rcyRnZXQoJ291dHB1dCcpDQprbml0X2hvb2tzJHNldChvdXRwdXQgPSBmdW5jdGlvbih4LCBvcHRpb25zKSB7DQogICMgdGhpcyBob29rIGlzIHVzZWQgb25seSB3aGVuIHRoZSBsaW5ld2lkdGggb3B0aW9uIGlzIG5vdCBOVUxMDQogIGlmICghaXMubnVsbChuIDwtIG9wdGlvbnMkbGluZXdpZHRoKSkgew0KICAgIHggPSB4ZnVuOjpzcGxpdF9saW5lcyh4KQ0KICAgICMgYW55IGxpbmVzIHdpZGVyIHRoYW4gbiBzaG91bGQgYmUgd3JhcHBlZA0KICAgIGlmIChhbnkobmNoYXIoeCkgPiBuKSkgeCA9IHN0cndyYXAoeCwgd2lkdGggPSBuKQ0KICAgIHggPSBwYXN0ZSh4LCBjb2xsYXBzZSA9ICdcbicpDQogIH0NCiAgaG9va19vdXRwdXQoeCwgb3B0aW9ucykNCn0pDQpgYGANCg0KIyMgTG9hZCBEYXRhDQoNCk5leHQgd2UgcmVhZCB0aGUgLmNzdiBmaWxlIGFuZCByYW5kb21seSBzZWxlY3QgYSBzYW1wbGUgZnJvbSBib3RoIHRoZSBtYWxlIGFuZCBmZW1hbGUgcG9wdWxhdGlvbnMuDQoNCmBgYHtyfQ0KIyBSZWFkIHRoZSBDU1YgZmlsZQ0KZGF0YSA8LSByZWFkLmNzdigiaGVpZ2h0cy1ieS1nZW5kZXIuY3N2IikNCg0KIyBTZXQgR2VuZGVyIGFzIGEgY2F0ZWdvcmljYWwgZmFjdG9yDQpkYXRhJFNleCA8LSBhcy5mYWN0b3IoZGF0YSRTZXgpDQoNCiMgU2V0IHNhbXBsZSBzaXplDQpzYW1wbGVfc2l6ZSA8LSAxMDAwDQoNCiMgUmFuZG9tbHkgc2VsZWN0IDMwIG1hbGVzIGFuZCAzMCBmZW1hbGVzDQpzZXQuc2VlZCgzMjEpICAjIFNldHRpbmcgYSBzZWVkIGZvciByZXByb2R1Y2liaWxpdHkNCm1hbGVfaGVpZ2h0cyA8LSBkYXRhICU+JSANCiAgICAgICAgICAgICAgIGZpbHRlcihTZXggPT0gJ01hbGUnKSAlPiUgDQogICAgICAgICAgICAgICBzYW1wbGVfbihzYW1wbGVfc2l6ZSkNCg0Kc2V0LnNlZWQoNjU0KSAgIyBTZXR0aW5nIGEgZGlmZmVyZW50IHNlZWQgZm9yIHRoZSBmZW1hbGUgc2FtcGxlDQpmZW1hbGVfaGVpZ2h0cyA8LSBkYXRhICU+JSANCiAgICAgICAgICAgICAgICAgZmlsdGVyKFNleCA9PSAnRmVtYWxlJykgJT4lIA0KICAgICAgICAgICAgICAgICBzYW1wbGVfbihzYW1wbGVfc2l6ZSkNCg0KYGBgDQoNCiMjIE5vcm1hbCBQcm9iYWJpbGl0eSBQbG90cw0KDQpOb3JtYWwgcHJvYmFiaWxpdHkgcGxvdHMgYXJlIHVzZWZ1bCB0byB2aXN1YWxseSBhc3Nlc3MgaWYgdGhlIGRhdGEgaXMgYXBwcm94aW1hdGVseSBub3JtYWxseSBkaXN0cmlidXRlZC4gSW4gYSBwZXJmZWN0bHkgbm9ybWFsIGRpc3RyaWJ1dGlvbiwgdGhlIGRhdGEgcG9pbnRzIHdvdWxkIGZvcm0gYSBzdHJhaWdodCBsaW5lLiBTaWduaWZpY2FudCBkZXZpYXRpb25zIGZyb20gdGhpcyBsaW5lIHN1Z2dlc3QgZGVwYXJ0dXJlcyBmcm9tIG5vcm1hbGl0eS4NCg0KYGBge3IsIGZpZy53aWR0aCA9IDEwfQ0KIyBDcmVhdGUgYW5kIGRpc3BsYXkgdGhlIFFRIHBsb3RzDQpnZ3Bsb3QoZGF0YSA9IHJiaW5kKG1hbGVfaGVpZ2h0cyxmZW1hbGVfaGVpZ2h0cyksIGFlcyhzYW1wbGUgPSBIZWlnaHQpKSArDQogICAgICAgIHN0YXRfcXFfYmFuZChhbHBoYT0wLjIsIGNvbmY9MC45NSwgcXR5cGU9MSwgYmFuZFR5cGUgPSAiYm9vdCIpICsNCiAgICAgICAgc3RhdF9xcV9saW5lKGlkZW50aXR5PVRSVUUsYWxwaGE9MC42KSArDQogICAgICAgIHN0YXRfcXFfcG9pbnQoY29sPSJibHVlIiwgc2l6ZT0wLjUpICsNCiAgICAgICAgZmFjZXRfd3JhcCh+U2V4KSArDQogICAgICAgIGdndGl0bGUocGFzdGUoIk5vcm1hbCBQcm9iYWJpbGl0eSBQbG90IGZvciBTYW1wbGUiKSkgKw0KICAgICAgICBsYWJzKHggPSAiVGhlb3JldGljYWwgVmFsdWVzIiwgeSA9ICJTYW1wbGUgVmFsdWVzIikgKw0KICAgICAgICB0aGVtZV9taW5pbWFsKCkgKw0KICAgICAgICB0aGVtZShwbG90LnRpdGxlID0gZWxlbWVudF90ZXh0KGhqdXN0ID0gMC41KSkNCg0KYGBgDQoNCiMjIFNhbXBsZSBNZWFucyBhbmQgU2FtcGxlIFZhcmlhbmNlcw0KDQpOb3cgd2UgY2FsY3VsYXRlIHRoZSBzYW1wbGUgbWVhbnMgYW5kIHNhbXBsZSB2YXJpYW5jZXMuDQoNCmBgYHtyfQ0KIyBDYWxjdWxhdGUgTWVhbnMgYW5kIFZhcmlhbmNlcw0KbWFsZV9tZWFuIDwtIG1lYW4obWFsZV9oZWlnaHRzJEhlaWdodCkNCm1hbGVfdmFyaWFuY2UgPC0gdmFyKG1hbGVfaGVpZ2h0cyRIZWlnaHQpDQoNCmZlbWFsZV9tZWFuIDwtIG1lYW4oZmVtYWxlX2hlaWdodHMkSGVpZ2h0KQ0KZmVtYWxlX3ZhcmlhbmNlIDwtIHZhcihmZW1hbGVfaGVpZ2h0cyRIZWlnaHQpDQoNCiMgRGlzcGxheSB0aGUgY2FsY3VsYXRlZCB2YWx1ZXMNCmNhdCgiIE1hbGUgTWVhbjpcdFx0XHRcdFx0IiwgbWFsZV9tZWFuLCAiXG4iLCANCiAgICAiTWFsZSBWYXJpYW5jZTpcdFx0XHRcdCIsIG1hbGVfdmFyaWFuY2UsICJcbiIsDQogICAgIk1hbGUgU3RhbmRhcmQgRGV2aWF0aW9uOlx0Iiwgc3FydChtYWxlX3ZhcmlhbmNlKSwgIlxuXG4iLA0KICAgICJGZW1hbGUgTWVhbjpcdFx0XHRcdCIsIGZlbWFsZV9tZWFuLCAiXG4iLA0KICAgICJGZW1hbGUgVmFyaWFuY2U6XHRcdFx0IiwgZmVtYWxlX3ZhcmlhbmNlLCAiXG4iLA0KICAgICJGZW1hbGUgU3RhbmRhcmQgRGV2aWF0aW9uOlx0Iiwgc3FydChmZW1hbGVfdmFyaWFuY2UpLCAiXG4iKQ0KYGBgDQoNCiMjIEVxdWFsaXR5IFRlc3Qgb2YgUG9wdWxhdGlvbiBWYXJpYW5jZXMNCg0KTm93IHdlIGFwcGx5IGEgcm9idXN0IHRlc3QgZm9yIGFwcHJveGltYXRlbHkgbm9ybWFsIGRhdGEgdG8gdGVzdCB0aGUgbnVsbCBoeXBvdGhlc2lzIHRoYXQgdGhlIG1hbGUgYW5kIGZlbWFsZSBwb3B1bGF0aW9ucyBoYXZlIHRoZSBzYW1lIHZhcmlhbmNlLg0KDQpgYGB7cn0NCiMgTGV2ZW5lIHRlc3QgaXMgcm9idXN0IGZvciBhcHByb3hpbWF0ZWx5IG5vcm1hbCBkYXRhDQpsZXZlbmVfdGVzdF9yZXN1bHQgPC0gbGV2ZW5lVGVzdChIZWlnaHQgfiBTZXgsDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBkYXRhID0gcmJpbmQobWFsZV9oZWlnaHRzLCBmZW1hbGVfaGVpZ2h0cykpDQoNCiMgRl90ZXN0X3Jlc3VsdCA8LSB2YXIudGVzdChIZWlnaHQgfiBTZXgsDQojICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGRhdGEgPSByYmluZChtYWxlX2hlaWdodHMsIGZlbWFsZV9oZWlnaHRzKSwNCiMgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgYWx0ZXJuYXRpdmU9InR3by5zaWRlZCIpDQoNCnByaW50KGxldmVuZV90ZXN0X3Jlc3VsdCkNCmBgYA0KDQojIyBFcXVhbGl0eSBUZXN0IG9mIFBvcHVsYXRpb24gTWVhbnMNCg0KRmluYWxseSB3ZSBhcHBseSBhIFdlbGNoJ3MgdC10ZXN0IGZvciBlcXVhbGl0eSBvZiBtZWFucywgYXNzdW1pbmcgdW5lcXVhbCB2YXJpYW5jZS4NCg0KYGBge3IsIGxpbmV3aWR0aD04MH0NCiMgUGVyZm9ybSBXZWxjaCdzIHQtdGVzdCBmb3IgRXF1YWxpdHkgb2YgTWVhbnMgKGZvciB1bmVxdWFsIHZhcmlhbmNlcykNCnRfdGVzdF9yZXN1bHQgPC0gdC50ZXN0KEhlaWdodCB+IFNleCwNCiAgICAgICAgICAgICAgICAgICAgICAgIGRhdGEgPSByYmluZChtYWxlX2hlaWdodHMsIGZlbWFsZV9oZWlnaHRzKSwgDQogICAgICAgICAgICAgICAgICAgICAgICB2YXIuZXF1YWwgPSBGQUxTRSkNCnByaW50KHRfdGVzdF9yZXN1bHQpDQpgYGANCg==