I am currently teaching a new course on mathematical data science. It is done in the style of Inquiry Based Learning. Note that the scripts are still in a state of flux. I am including in text in an attempt to get Microsoft CoPilot to create a socratic tutor for the course but it can't read pdfs as an agent.
SCRIPT 1:
\documentclass[12pt]{article}
\usepackage[utf8]{inputenc}
\usepackage{amsmath, amssymb, amsthm}
\usepackage{geometry}
\usepackage{enumitem}
\usepackage{hyperref}
\geometry{margin=1in}
\newcommand{\Borel}{\mathcal{B}}
\newcommand{\M}{\mathcal{M}}
\newcommand{\R}{\mathbb{R}}
\newcommand{\E}{\mathbb{E}}
\renewcommand{\P}{\mathbb{P}}
% Theorem environments
\newtheorem{theorem}{Theorem}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{example}[theorem]{Example}
\theoremstyle{definition}
\newtheorem{definition}[theorem]{Definition}
\theoremstyle{remark}
\newtheorem{remark}[theorem]{Remark}
\title{Script 1: Random Variables and Cumulative Density Functions}
\author{Add your name here}
\date{}
\begin{document}
\maketitle
\section*{Useful facts about series (assumed background knowledge)}
Assume $b_n\geq 0$ for all $n$.
\begin{enumerate}
\item $\sum_{n=0}^K b_n<M$ for all $K$ (uniformly bounded) implies $\sum_{n=0}^\infty b_n$ convergences with limit bounded by $M$
\item If $\sum_{n=0}^\infty b_n$ converges implies for all $\epsilon>0$ there exists $K$ such that $\sum_{n=L}b_n<\epsilon$ for all $L\geq K$
\end{enumerate}
\section*{Script Proper}
\begin{definition}
For us ``nice subsets'' of $\R$ are anything we can construct from intervals via countable unions, intersections or complements. We denote this set of nice subsets by $\Borel(\R)$ where $\Borel$ stands for \textbf{Borel}. We will assume every set in this course is ``nice.''
\end{definition}
\begin{definition}
A \textbf{probability distribution} is a function $\nu: \Borel (\R)\to [0,1]$ such that
\begin{itemize}
\item $\nu(\R)=1 $
\item For $A_1, A_2, \ldots$ countably many disjoint sets we have $\nu(\cup A_i)=\sum_i \nu(A_i)$
\end{itemize}
\end{definition}
\begin{lemma}
Let $\nu$ be a probability distribution. For all $A\in \Borel(\R)$ we have
$\nu(A^c)=1-\nu (A)$.
In particular, $\nu(\emptyset)=0$.
(Here $A^c=\{x\in \mathbb{R}\mid x\notin A\}$ is the complement of $A$)
\end{lemma}
%\begin{proof}
%
%\end{proof}
\begin{definition}
A \textbf{random variable} $X$ with distribution $\nu$ (written as write $X\sim \nu$) is a random real number such that $\P(X\in A)=\nu(A)$ for all $A\in \Borel(\R)$.
\end{definition}
We often will use the shortcut notation $\P(X\leq a)$ for $\P(X\in (-\infty, a])$, $\P(a\leq X< b)$ for $\P(X\in [a, b))$, and $\P(X=x)$ for $\P(X\in \{x\}))$ etc.
%\begin{definition}%[Cumulative Distribution Function]
%Given a random variable $X\sim \mu$ its \textbf{cumulative distribution function} (CDF) is the function
%$F_X: \mathbb{R} \to [0,1]$
%with $F_X(x)=\P(X\leq x)$
%\end{definition}
\begin{lemma}
If $X$ is a random variable then $A_n:=\{x\in \R\mid \P(X=x)\in (\frac{1}{n+1}, \frac{1}{n}]\}$ has at most $n$ elements.
\end{lemma}
%\begin{proof}
%
%\end{proof}
\begin{theorem}
If $X$ is a random variable then $A:=\{x\in \R\mid \P(X=x)>0\}$ is countable.
\end{theorem}
%\begin{proof}
%
%\end{proof}
\begin{lemma}
If $\nu$ is a probability distribution and $A_1, A_2 \in \Borel(\R)$ with $A_1\subset A_2$ then $\nu(A_1)\leq \nu(A_2)$.
\end{lemma}
%\begin{proof}
%
%\end{proof}
\begin{definition}%[Cumulative Distribution Function]
Given a random variable $X\sim \mu$ its \textbf{cumulative distribution function} (CDF) is the function
$F_X: \mathbb{R} \to [0,1]$
with $F_X(x)=\P(X\leq x)$
\end{definition}
\begin{example}\label{example:unfair_coin}
Let $X$ represent the outcome of an unfair coin flip with $X=-1$ (representing tails) with probability $0.3$ and $X=1$ (representing heads) with probability $0.7$.
\begin{enumerate}[label=(\alph*)]
\item Using cases depending on whether or not each of $-1$ or $1$ are elements of $A$, write a formula for $\P(X\in A)$ for $A\subset \R$.
\item Find the CDF $F_X$.
\end{enumerate}
\end{example}
%\begin{proof}
%
%\end{proof}
\begin{example}\label{example:interval}
Let $X$ be a random variable whose CDF is $F_X: \R\to [0,1]$ with $F_X(x) = 0$ for $x < 0$, $F_X(x) = x$ for $0 \leq x \leq 1$, $F_X(x) = 1$ for $x > 1$
\begin{enumerate}[label=(\alph*)]
\item Compute $\P(X \leq 0.3)$, $\P(X > 0.7)$, $\P(0.2 < X \leq 0.8)$.
\item Find $\P(X \in(0.5-\epsilon, 0.5])$ (for $\epsilon>0$ small) and $\P(X = 0.5)$.
\item Describe what the random variable $X$ models?
\end{enumerate}
\end{example}
%\begin{proof}
%
%\end{proof}
\begin{definition}
A function $g:\R\to \R$ is a \textbf{non-decreasing} if $x<y$ implies $g(x)\leq g(y)$.
\end{definition}
\begin{lemma}
If $F$ is the CDF of a random variable then $F$ is non-decreasing.
\end{lemma}
%\begin{proof}
%
%\end{proof}
\begin{lemma}
Let $\nu$ be a probability distribution. Then
$$\nu((-\infty, 0]) + \sum_{n=1}^\infty \nu((n-1, n])=1.$$
\end{lemma}
%\begin{proof}
%
%\end{proof}
\begin{lemma}
Let $\nu$ be a probability distribution. For all $\epsilon>0$ there exists $K$ such that $\nu((x, \infty))<\epsilon$ for all $x>K$.
\end{lemma}
%\begin{proof}
%
%\end{proof}
\begin{theorem}
If $F$ is the CDF of a random variable then $$\lim_{x \to -\infty} F(x) = 0 \quad \text{and} \quad \lim_{x \to \infty} F(x) = 1.$$
\end{theorem}
%\begin{proof}
%
%\end{proof}
\begin{lemma}
Fix CDF $F$ of distribution $\nu$. For all $a\in \R$, and positive integers $K$ we have $$\sum_{n=K}^\infty \nu\left(\left(a+\frac{1}{n+1}, a+\frac{1}{n}\right]\right)=\nu\left(\left(a, a+\frac{1}{K}\right]\right).$$
\end{lemma}
%\begin{proof}
%
%\end{proof}
\begin{lemma}
Fix CDF $F$ of distribution $\nu$. Fix $a\in \R$. For all $a\in \R$ and $\epsilon>0$ there exists $N$ such that $$\sum_{n=N}^\infty \nu\left(\left(a+\frac{1}{n+1}, a+\frac{1}{n}\right]\right)<\epsilon.$$
\end{lemma}
%\begin{proof}
%
%\end{proof}
\begin{definition}
A function $g:\R\to \R$ is \textbf{right-continuous} if $\lim_{h\to 0^+}g(x+h)=g(x)$ for all $x\in \R$ (for all $\epsilon>0$ there exists $\delta>0$ such that $0<h<\delta$ implies $|g(x+h)-g(x)|<\epsilon$.)
\end{definition}
\begin{theorem}
If $F$ is the CDF of a random variable then $F$ is right-continuous.
\end{theorem}
%\begin{proof}
%
%\end{proof}
\begin{lemma}
If $F_X$ is the CDF of random variable $X$ then $\P(X=x)=F_X(x)-\lim_{y \to x^-} F(y)$.
\end{lemma}
%\begin{proof}
%
%\end{proof}
\begin{lemma}
$F_X$ is continuous at $x$ if and only if $\P(X=x)=0$.
\end{lemma}
%\begin{proof}
%
%\end{proof}
\begin{example}
Given CDF $F_X$ of random variable $X$, and $a<b$ real numbers find a formulae in terms of $F_X$ for
\begin{itemize}
\item $\P(a<X\leq b)$
\item $\P(a\leq X\leq b)$
\item $\P(a<X<b)$
\item $\P(a\leq X<b)$
\end{itemize}
\end{example}
%\begin{proof}
%
%\end{proof}
\begin{example}\label{example:dice}
Let $X$ be a random variable whose CDF is $F_X$ has jumps of size $1/6$ at each of $x = 1, 2, 3, 4, 5, 6$:
\begin{enumerate}[label=(\alph*)]
\item Show $\P(X\notin\{1,2,3,4,5,6\})=0$
\item Explain why $F_X$ must be locally flat (except at the $x = 1, 2, 3, 4, 5, 6$):
\item Compute $\P(X\leq 4.5)$, $\P(X=5)$ and $\P(X\in \{1,3, 5, 7\})$
\item Describe what the random variable $X$ models?
\end{enumerate}
\end{example}
%\begin{proof}
%
%\end{proof}
\begin{definition}
A function $g:\R\to \R$ is a \textbf{step function} if it is constant except at countably many jump discontinuities.
\end{definition}
\begin{definition}%[Discrete CDF]
A CDF $F_X$ is \textbf{discrete} if it is a step function. If $F_X$ is a discrete CDF with jumps at points $x_1, x_2, x_3, \ldots$, the \textbf{probability mass function} (PMF) is:
$$p(x) =\P(X=x)= F_X(x) - \lim_{h\to 0^-}F_X(x+h)$$
\end{definition}
\begin{lemma}
If $F_X$ is discrete and $A:=\{x\in \R\mid \P(X=x)>0\}$, then $P(X \in A)=1$.
\end{lemma}
%\begin{proof}
%
%\end{proof}
\begin{example}
Explain why the CDF in Example \ref{example:dice} is discrete. Compute its PMF.
\end{example}
%\begin{proof}
%
%\end{proof}
\begin{definition}%[Absolutely Continuous CDF]
A CDF $F_X$ is called \textbf{absolutely continuous} if there exists a non-negative function $f_X: \mathbb{R} \to [0,\infty)$ such that $F_X(x) = \int_{-\infty}^x f_X(t) \, dt$ for all $x \in \mathbb{R}$.
The function $f_X$ is called a \textbf{probability density function} (PDF) of $F$.
\end{definition}
\begin{example}
Explain why the CDF in Example \ref{example:interval} is absolutely continuous. Compute its PDF.
\end{example}
%\begin{proof}
%
%\end{proof}
\begin{lemma}
If $F_X$ is absolutely continuous with PDF $f_X$, then $\P(a<X\leq b)=\int_a^b f_X dx$.
\end{lemma}
%\begin{proof}
%
%\end{proof}
\begin{lemma}
Let $F$ be an absolutely continuous CDF with PDF $f$ such that there exists $M$ with $0 \leq f(x)\leq M$ for all $x\in \R$. Then $F$ is continuous
\end{lemma}
%\begin{proof}
%
%\end{proof}
Note that more generally any absolutely continuous CDF is continuous - we are not going to prove the general case.
\begin{proposition}
If the CDF $F$ is differentiable, then $F(x) = \int_{-\infty}^x F'(t) \, dt.$
Moreover, $f(x) = F'(x)$ is a PDF of $F$.
\end{proposition}
%\begin{proof}
%
%\end{proof}
\begin{theorem}[Advanced problem - proof not required]
\noindent
\\
If $F$ is a CDF of some random variable if and only if it
\begin{itemize}
\item is non-decreasing,
\item is right-continuous, and
\item satisfies the limits $\lim_{x \to -\infty} F(x) = 0$ and $\lim_{x \to \infty} F(x) = 1$.
\end{itemize}
Furthermore this random variable is uniquely determined by this CDF.
\end{theorem}
%No proof expected here!
\begin{example}
Consider $F(x) = 1 - e^{-\lambda x}$ for $x \geq 0$ (and $F(x) = 0$ for $x < 0$), where $\lambda > 0$.
\begin{enumerate}[label=(\alph*)]
\item Verify this is a CDF of some random variable $X$.
\item Find the PDF $f(x) = F'(x)$ for $x\neq 0$.
\item Compute $\P(1 < X \leq 3)$ using both $F$ and $f$
\end{enumerate}
\end{example}
%\begin{proof}
%
%\end{proof}
\end{document}
SCRIPT 2:
\documentclass[11pt]{article}
\usepackage[margin=1in]{geometry}
\usepackage{amsmath,amssymb,amsthm}
\usepackage{mathtools}
\theoremstyle{theorem}
\newtheorem{lemma}{Lemma}
\newtheorem{proposition}[lemma]{Proposition}
\newtheorem{theorem}[lemma]{Theorem}
\newtheorem{example}[lemma]{Example}
\theoremstyle{definition}
\newtheorem{definition}[lemma]{Definition}
\title{Joint Distributions and Independence\\
\large Mathematical Data Science -- Script 2}
\author{}
\date{}
\usepackage[most]{tcolorbox}
\newtcolorbox{proofbox}{
breakable,
enhanced,
colback=white,
colframe=black,
boxrule=0.8pt,
arc=2pt,
left=6pt,
right=6pt,
top=6pt,
bottom=6pt,
before skip=10pt,
after skip=10pt
}
\begin{document}
\maketitle
\begin{definition}
A \textbf{joint probability distribution} of two random variables $X$ and $Y$ is a function $\nu: \mathcal{B}(\mathbb{R}^2) \to [0,1]$ such that
\begin{itemize}
\item $\nu(A) + \nu(A^c) = 1$ for all $A \in \mathcal{B}(\mathbb{R}^2)$
\item For $A_1, A_2, \ldots$ countably many disjoint sets we have $\nu(\cup A_i) = \sum_i \nu(A_i)$
\end{itemize}
We write $(X,Y) \sim \nu$ and $P((X,Y) \in A) = \nu(A)$ for $A \in \mathcal{B}(\mathbb{R}^2)$.
\end{definition}
\begin{lemma}
Let $A_1, A_2 \subset \mathcal{B}(\mathbb{R}^2)$ with $A_1\subset A_2$. If $\nu(A_2)=0$ then $\nu(A_1)=0$.
\end{lemma}
\begin{proofbox}
\begin{proof}
\end{proof}
\end{proofbox}
\begin{definition}
The \textbf{joint cumulative distribution function (joint CDF)} of random variables $X$ and $Y$ is the function $F_{X,Y}: \mathbb{R}^2 \to [0,1]$ defined by
\[
F_{X,Y}(x,y) = \mathbb{P}(X \leq x \text{ and } Y \leq y).
\]
\end{definition}
\begin{lemma}
For a joint CDF $F_{X,Y}$ and real numbers $a < b$ and $c < d$,
\[
\mathbb{P}(a < X \leq b \text{ and } c < Y \leq d) = F_{X,Y}(b,d) - F_{X,Y}(a,d) - F_{X,Y}(b,c) + F_{X,Y}(a,c).
\]
\end{lemma}
\begin{proofbox}
\begin{proof}
\end{proof}
\end{proofbox}
\begin{lemma}
If $F_{X,Y}$ is a joint CDF then $F_{X,Y}$ is non-decreasing in each argument.
% \item[(c)] $\lim_{x \to -\infty} F_{X,Y}(x,y) = 0$ and $\lim_{y \to -\infty} F_{X,Y}(x,y) = 0$
% \item[(d)] $\lim_{x \to \infty, y \to \infty} F_{X,Y}(x,y) = 1$
\end{lemma}
\begin{proofbox}
\begin{proof}
\end{proof}
\end{proofbox}
\begin{definition}
We say that for function $h:\mathbb{R}^2 \to \mathbb{R}$ has double $\lim_{(x,y)\to (\infty, \infty)} h(x,y)=K$ if for all $\epsilon>0$ there exists $M$ such that $|h(x,y)-K|<\epsilon$ whenever $x>M$ and $y>M$.
\end{definition}
\begin{lemma}
For all $x,y$ the $\mathbb{P}(X\leq x \text{ and } Y\leq y)>1-\mathbb{P}(X>x) -\mathbb{P}(Y>y)$.
\end{lemma}
\begin{proofbox}
\begin{proof}
\end{proof}
\end{proofbox}
\begin{lemma}
If $F_{X,Y}$ is a joint CDF then $\lim_{(x,y)\to (\infty, \infty)} F_{X,Y}(x,y)=1$.
\end{lemma}
\begin{proofbox}
\begin{proof}
\end{proof}
\end{proofbox}
\begin{definition}
Given a joint distribution of $(X,Y)$, the \textbf{marginal distribution} of $X$ is the probability distribution $\mu_X$ defined by $\mu_X(A) = P(X \in A)$ for $A \in \mathcal{B}(\mathbb{R})$. The \textbf{marginal CDF} of $X$ is
\[
F_X(x) = P(X \leq x)
\]
The marginal distribution and CDF of $Y$ are defined analogously.
\end{definition}
%Union of sets here to have up to fixed $x$
\begin{lemma}
Let $F_{X,Y}$ be the joint CDF with marginal CDF $F_X$.
Show $F_X(x)= \lim_{y \to \infty} F_{X,Y}(x,y)$.
\end{lemma}
\begin{proofbox}
\begin{proof}
\end{proof}
\end{proofbox}
\begin{definition}
A joint CDF $F_{X,Y}$ is \textbf{discrete} if there exist countably many points $(x_i, y_j) \in \mathbb{R}^2$ such that $P((X,Y) = (x_i, y_j)) > 0$ and
$\sum_{i,j} P((X,Y) = (x_i, y_j)) = 1.$
The \textbf{joint probability mass function (joint PMF)} is $p(x,y) = P(X = x \text{ and } Y = y)$.
\end{definition}
\begin{lemma}
Suppose $(X,Y)$ have a discrete joint CDF with joint PMF $p(x,y)$. Fix $x_0\in \mathbb{R}$ and set $A_{x_0}=\{y\mid p(x_0,y)>0\}$.
Then
\begin{enumerate}
\item $A_{x_0}$ is countable.
\item $\mathbb{P}((X,Y)\in \{x_0\}\times A_{x_0}^c)=0$.
\item $\mathbb{P}(X=x_0)=\sum_{y\in A_{x_0}}p(x,y)$
\end{enumerate}
\end{lemma}
\begin{proofbox}
\begin{proof}
\end{proof}
\end{proofbox}
\begin{lemma}
Suppose $(X,Y)$ have a discrete joint CDF with joint PMF $p(x,y)$. Let $K=\{x\in \mathbb{R}\mid \mathbb{P}(X=x)=0\}$. Then $K\times \mathbb{R}\subset \{(x,y)\mid p(x,y)=0\}$ and $\mathbb{P}(X\in K)=0$.
\end{lemma}
\begin{proofbox}
\begin{proof}
\end{proof}
\end{proofbox}
\begin{lemma}
If $F_{X,Y}$ is a discrete joint CDF then the marginal CDF $F_X$ is also discrete. Furthermore, the marginal PMF of $X$ is
$p_X(x) = \sum_{\{y\mid p(x,y)>0\}} p(x,y).$
\end{lemma}
\begin{proofbox}
\begin{proof}
\end{proof}
\end{proofbox}
\begin{example}
Let $X$ and $Y$ each take values in $\{1, 2, 3\}$ with joint PMF given by the table:
\[
\begin{array}{c|ccc}
p(x,y) & Y=1 & Y=2 & Y=3 \\
\hline
X=1 & 1/9 & 1/9 & 1/9 \\
X=2 & 1/9 & 1/9 & 1/9 \\
X=3 & 1/9 & 1/9 & 1/9 \\
\end{array}
\]
\begin{enumerate}
\item[(a)] Verify that $\sum_{x,y} p(x,y) = 1$.
\item[(b)] Find the marginal PMFs $p_X$ and $p_Y$.
\item[(c)] Compute $P(X = Y)$ and $P(X < Y)$.
\end{enumerate}
\end{example}
\begin{proofbox}
\end{proofbox}
\begin{example}
Roll a fair die twice. Let $X$ be the result of the first roll and $Y$ be the maximum of the two rolls.
\begin{enumerate}
\item[(a)] Find the joint PMF $p(x,y)$.
\item[(b)] Find the marginal PMF of $Y$.
\item[(c)] Compute $P(X = Y)$.
\end{enumerate}
\end{example}
\begin{proofbox}
\end{proofbox}
\begin{definition}
A joint CDF $F_{X,Y}$ is \textbf{absolutely continuous} if there exists a non-negative function $f_{X,Y}: \mathbb{R}^2 \to [0,\infty)$ such that
$F_{X,Y}(x,y) = \int_{-\infty}^{x} \int_{-\infty}^{y} f_{X,Y}(s,t) \, dt \, ds$
for all $(x,y) \in \mathbb{R}^2$. The function $f_{X,Y}$ is called a \textbf{joint probability density function (joint PDF)}.
\end{definition}
\begin{example}
Let $(X,Y)$ be a random point chosen uniformly from the unit square $[0,1]^2$.
\begin{enumerate}
\item[(a)] Find the joint CDF $F_{X,Y}(x,y)$ for all $(x,y) \in \mathbb{R}^2$.
\item[(b)] Compute $P(X \leq 0.5 \text{ and } Y \leq 0.5)$.
\item[(c)] Find the marginal CDFs $F_X$ and $F_Y$.
\end{enumerate}
\end{example}
\begin{proofbox}
\end{proofbox}
\begin{lemma}
If $(X,Y)$ has joint PDF $f_{X,Y}$, the marginal PDF of $X$ is
\[
f_X(x) = \int_{-\infty}^{\infty} f_{X,Y}(x,y) \, dy.
\]
\end{lemma}
\begin{proofbox}
\begin{proof}
\end{proof}
\end{proofbox}
\begin{example}
Let $(X,Y)$ have joint PDF $f_{X,Y}(x,y) = 2$ for $0 \leq x \leq y \leq 1$ (and $f_{X,Y}(x,y) = 0$ otherwise).
\begin{enumerate}
\item[(a)] Sketch the region where $f_{X,Y}(x,y) > 0$.
\item[(b)] Verify that $\iint f_{X,Y}(x,y) \, dx \, dy = 1$.
\item[(c)] Find the marginal PDFs $f_X$ and $f_Y$.
\item[(d)] Compute $P(X + Y \leq 1)$.
\end{enumerate}
\end{example}
\begin{proofbox}
\end{proofbox}
\begin{example}
Let $(X,Y)$ have joint PDF $f_{X,Y}(x,y) = e^{-x-y}$ for $x, y \geq 0$ and $0$ otherwise.
\begin{enumerate}
\item[(a)] Find the marginal PDFs $f_X$ and $f_Y$.
\item[(b)] Compute $P(X > Y)$.
\item[(c)] Compute $P(X + Y \leq 1)$.
\end{enumerate}
\end{example}
\begin{proofbox}
\end{proofbox}
\begin{definition}
Two random variables $X$ and $Y$ are \textbf{independent} if for all $A, B \in \mathcal{B}(\mathbb{R})$,
\[
P(X \in A \text{ and } Y \in B) = P(X \in A) \cdot P(Y \in B).
\]
We write $X \perp Y$ to denote that $X$ and $Y$ are independent.
\end{definition}
\begin{definition}
The \textbf{product distribution} of two probability distributions $\mu$ and $\nu$ on $\mathbb{R}$ is the distribution $\mu \times \nu$ on $\mathbb{R}^2$ such that if $X \sim \mu$ and $Y \sim \nu$ are independent, then $(X,Y) \sim \mu \times \nu$.
\end{definition}
\begin{lemma}
Random variables $X$ and $Y$ are independent if and only if
\[
F_{X,Y}(x,y) = F_X(x) \cdot F_Y(y) \quad \text{for all } x, y \in \mathbb{R}.
\]
\end{lemma}
\begin{proofbox}
\begin{proof}
\end{proof}
\end{proofbox}
\begin{lemma}
If $(X,Y)$ is discrete with joint PMF $p(x,y)$ and marginal PMFs $p_X$ and $p_Y$, then $X \perp Y$ if and only if
\[
p(x,y) = p_X(x) \cdot p_Y(y) \quad \text{for all } x, y.
\]
\end{lemma}
\begin{proofbox}
\begin{proof}
\end{proof}
\end{proofbox}
\begin{lemma}
If $(X,Y)$ is absolutely continuous with joint PDF $f_{X,Y}$ and marginal PDFs $f_X$ and $f_Y$, then $X \perp Y$ if and only if
\[
f_{X,Y}(x,y) = f_X(x) \cdot f_Y(y) \quad \text{for all } x, y.
\]
\end{lemma}
\begin{proofbox}
\begin{proof}
\end{proof}
\end{proofbox}
\begin{example}
Determine whether $X$ and $Y$ are independent in the earlier examples.
\end{example}
\begin{proofbox}
\end{proofbox}
\begin{lemma}
For $X$ a random variable with distribution $\nu$ and
$g: \mathbb{R} \to \mathbb{R}$ a ``nice'' functions. Then we define the random variable $g(X)$ as the random variable with distribution $\nu_g(A)=\nu(g^{-1}(A))$.
\end{lemma}
\begin{proofbox}
\begin{proof}
\end{proof}
\end{proofbox}
\begin{lemma}
If $X \perp Y$ and $g, h: \mathbb{R} \to \mathbb{R}$ are nice functions, then $g(X) \perp h(Y)$.
\end{lemma}
\begin{proofbox}
\begin{proof}
\end{proof}
\end{proofbox}
\begin{example}
If $X \perp Y$, show that $X^2 \perp e^Y$.
\end{example}
\begin{proofbox}
\end{proofbox}
\begin{definition}
Random variables $X_1, X_2, \ldots, X_n$ are \textbf{mutually independent} if for all $A_1, A_2, \ldots, A_n \in \mathcal{B}(\mathbb{R})$,
\[
P(X_1 \in A_1, X_2 \in A_2, \ldots, X_n \in A_n) = \prod_{i=1}^{n} P(X_i \in A_i).
\]
\end{definition}
\begin{definition}
Random variables $X_1, X_2, \ldots, X_n$ are \textbf{pairwise independent} if $X_i \perp X_j$ for all $i \neq j$.
\end{definition}
\begin{lemma}
If $X_1, X_2, \ldots, X_n$ are mutually independent, then they are pairwise independent.
\end{lemma}
\begin{proofbox}
\begin{proof}
\end{proof}
\end{proofbox}
\begin{example}
Let $X_1$ and $X_2$ be independent fair coin flips taking values in $\{0, 1\}$. Define $X_3 = X_1 \oplus X_2$ (where $\oplus$ denotes addition modulo 2).
\begin{enumerate}
\item[(a)] Show that $X_1, X_2, X_3$ are pairwise independent.
\item[(b)] Show that $X_1, X_2, X_3$ are not mutually independent.
\end{enumerate}
\end{example}
\begin{proofbox}
\end{proofbox}
\end{document}