aboutsummaryrefslogtreecommitdiff
path: root/3.4.2.lyx
diff options
context:
space:
mode:
authorJuan Marín Noguera <juan@mnpi.eu>2025-05-16 22:18:44 +0200
committerJuan Marín Noguera <juan@mnpi.eu>2025-05-16 22:18:44 +0200
commit4f670b750af5c11e1eac16d9cd8556455f89f46a (patch)
treee0f8d7b33df2727d89150f799ee8628821fda80a /3.4.2.lyx
parent16ccda6c459c0fd7ca2081e9d541124c28b0c556 (diff)
Changed layout for more manageable volumes
Diffstat (limited to '3.4.2.lyx')
-rw-r--r--3.4.2.lyx537
1 files changed, 0 insertions, 537 deletions
diff --git a/3.4.2.lyx b/3.4.2.lyx
deleted file mode 100644
index 8d59cc5..0000000
--- a/3.4.2.lyx
+++ /dev/null
@@ -1,537 +0,0 @@
-#LyX 2.4 created this file. For more info see https://www.lyx.org/
-\lyxformat 620
-\begin_document
-\begin_header
-\save_transient_properties true
-\origin unavailable
-\textclass book
-\begin_preamble
-\input defs
-\end_preamble
-\use_default_options true
-\maintain_unincluded_children no
-\language english
-\language_package default
-\inputencoding utf8
-\fontencoding auto
-\font_roman "default" "default"
-\font_sans "default" "default"
-\font_typewriter "default" "default"
-\font_math "auto" "auto"
-\font_default_family default
-\use_non_tex_fonts false
-\font_sc false
-\font_roman_osf false
-\font_sans_osf false
-\font_typewriter_osf false
-\font_sf_scale 100 100
-\font_tt_scale 100 100
-\use_microtype false
-\use_dash_ligatures true
-\graphics default
-\default_output_format default
-\output_sync 0
-\bibtex_command default
-\index_command default
-\float_placement class
-\float_alignment class
-\paperfontsize default
-\spacing single
-\use_hyperref false
-\papersize default
-\use_geometry false
-\use_package amsmath 1
-\use_package amssymb 1
-\use_package cancel 1
-\use_package esint 1
-\use_package mathdots 1
-\use_package mathtools 1
-\use_package mhchem 1
-\use_package stackrel 1
-\use_package stmaryrd 1
-\use_package undertilde 1
-\cite_engine basic
-\cite_engine_type default
-\biblio_style plain
-\use_bibtopic false
-\use_indices false
-\paperorientation portrait
-\suppress_date false
-\justification true
-\use_refstyle 1
-\use_formatted_ref 0
-\use_minted 0
-\use_lineno 0
-\index Index
-\shortcut idx
-\color #008000
-\end_index
-\secnumdepth 3
-\tocdepth 3
-\paragraph_separation indent
-\paragraph_indentation default
-\is_math_indent 0
-\math_numbering_side default
-\quotes_style english
-\dynamic_quotes 0
-\papercolumns 1
-\papersides 1
-\paperpagestyle default
-\tablestyle default
-\tracking_changes false
-\output_changes false
-\change_bars false
-\postpone_fragile_content false
-\html_math_output 0
-\html_css_as_file 0
-\html_be_strict false
-\docbook_table_output 0
-\docbook_mathml_prefix 1
-\end_header
-
-\begin_body
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-rexerc3[22]
-\end_layout
-
-\end_inset
-
-The
-\begin_inset Formula $(t+1)$
-\end_inset
-
-st item in Algorithm S is selected with probability
-\begin_inset Formula $(n-m)/(N-t)$
-\end_inset
-
-,
- not
-\begin_inset Formula $n/N$
-\end_inset
-
-,
- yet the text claims that the sample is unbiased;
- thus each item should be selected with the
-\emph on
-same
-\emph default
- probability.
- How can both of these statements be true?
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-answer
-\end_layout
-
-\end_inset
-
-This is because
-\begin_inset Formula $\frac{n-m}{N-t}$
-\end_inset
-
- appears as a
-\emph on
-conditioned
-\emph default
- probability,
- where the condition depends on
-\begin_inset Formula $t$
-\end_inset
-
-.
- The actual probability of choosing the
-\begin_inset Formula $(t+1)$
-\end_inset
-
-st element is
-\begin_inset Formula
-\[
-\sum_{m=0}^{t}P(\{m\text{ elements have been chosen between }1\text{ and }t\})\frac{n-m}{N-t}.
-\]
-
-\end_inset
-
-Let
-\begin_inset Formula $P_{mt}$
-\end_inset
-
- be the probability described with words inside this formula.
- For
-\begin_inset Formula $m>t$
-\end_inset
-
- or
-\begin_inset Formula $m<0$
-\end_inset
-
-,
- this probability is obviously 0,
- and for
-\begin_inset Formula $t=0$
-\end_inset
-
-,
-
-\begin_inset Formula $P_{00}=1$
-\end_inset
-
-.
- For
-\begin_inset Formula $t\geq1$
-\end_inset
-
- and
-\begin_inset Formula $0\leq m\leq t$
-\end_inset
-
-,
-
-\begin_inset Formula
-\[
-P_{mt}=P_{m(t-1)}\left(1-\frac{n-m}{N-t+1}\right)+P_{(m-1)(t-1)}\frac{n-m+1}{N-t+1}.
-\]
-
-\end_inset
-
-Note that,
- by this formula,
-
-\begin_inset Formula $P_{(n-1)t}=0$
-\end_inset
-
- always,
- since the second term is 0 and the first term is a multiple of
-\begin_inset Formula $P_{(n-1)(t-1)}$
-\end_inset
-
-,
- which is 0 by induction since
-\begin_inset Formula $P_{(n-1)(-1)}=0$
-\end_inset
-
-.
- Then,
- by induction in
-\begin_inset Formula $m$
-\end_inset
-
-,
-
-\begin_inset Formula $P_{mt}=0$
-\end_inset
-
- for any
-\begin_inset Formula $m>n$
-\end_inset
-
-.
-\end_layout
-
-\begin_layout Standard
-Since the algorithm is said to be unbiased,
- we would expect
-\begin_inset Formula
-\[
-P_{mt}=\frac{\binom{t}{m}\binom{N-t}{n-m}}{\binom{N}{n}},
-\]
-
-\end_inset
-
-that is,
- the number of ways to choose
-\begin_inset Formula $m$
-\end_inset
-
- elements among the first
-\begin_inset Formula $t$
-\end_inset
-
- ones and
-\begin_inset Formula $n-m$
-\end_inset
-
- elements among the rest,
- divided by the total number of ways of choosing
-\begin_inset Formula $n$
-\end_inset
-
- elements among
-\begin_inset Formula $N$
-\end_inset
-
-.
-\end_layout
-
-\begin_layout Standard
-We prove this by induction.
- For
-\begin_inset Formula $t=0$
-\end_inset
-
- we have seen it already.
- For
-\begin_inset Formula $t>0$
-\end_inset
-
-,
- if
-\begin_inset Formula $m=0$
-\end_inset
-
-,
-\begin_inset Formula
-\[
-P_{0t}=P_{0(t-1)}\left(1-\frac{n}{N-t+1}\right)=\frac{\binom{N-t+1}{n}}{\binom{N}{n}}\frac{N-t-n+1}{N-t+1}=\frac{\binom{N-t}{n}}{\binom{N}{n}},
-\]
-
-\end_inset
-
-and if
-\begin_inset Formula $m>0$
-\end_inset
-
-,
-\begin_inset Formula
-\begin{align*}
-P_{mt} & =\frac{\binom{t-1}{m}\binom{N-t+1}{n-m}}{\binom{N}{n}}\frac{N-t-n+m+1}{N-t+1}+\frac{\binom{t-1}{m-1}\binom{N-t+1}{n-m+1}}{\binom{N}{n}}\frac{n-m+1}{N-t+1}=\\
- & =\frac{\binom{t-1}{m}\binom{N-t}{n-m}}{\binom{N}{n}}+\frac{\binom{t-1}{m-1}\binom{N-t}{n-m}}{\binom{N}{n}}=\frac{\binom{t}{m}\binom{N-t}{n-m}}{\binom{N}{n}},
-\end{align*}
-
-\end_inset
-
-where we make extensive use of Eq.
- 1.2.6–(8).
- Finally,
- by Eq.
- 1.2.6–(21),
- the probability of choosing any given element is
-\begin_inset Formula
-\[
-\sum_{m=0}^{t}P_{mt}\frac{n-m}{N-t}=\frac{1}{\binom{N}{n}}\sum_{m=0}^{t}\binom{t}{m}\binom{N-1-t}{n-1-m}=\frac{\binom{N-1}{n-1}}{\binom{N}{n}}=\frac{\binom{N-1}{N-n}}{\binom{N}{N-n}}=\frac{n}{N}.
-\]
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-rexerc11[M25]
-\end_layout
-
-\end_inset
-
-Let
-\begin_inset Formula $p_{m}$
-\end_inset
-
- be the probability that exactly
-\begin_inset Formula $m$
-\end_inset
-
- elements are put into the reservoir during the first pass of Algorithm R.
- Determine the generating function
-\begin_inset Formula $G(z)=\sum_{m}p_{m}z^{m}$
-\end_inset
-
-,
- and find the mean and standard deviation.
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-answer
-\end_layout
-
-\end_inset
-
-Let
-\begin_inset Formula $s_{k}\coloneqq P(\text{the }k\text{th element is copied to the reservoir})$
-\end_inset
-
-,
- then
-\begin_inset Formula $p_{m}$
-\end_inset
-
- is the sum across all subsets
-\begin_inset Formula $\{x_{1},\dots,x_{m}\}\in\{1,\dots,N\}$
-\end_inset
-
- of
-\begin_inset Formula $m$
-\end_inset
-
- elements,
-
-\begin_inset Formula $x_{1}<\dots<x_{m}$
-\end_inset
-
-,
- of the product of the
-\begin_inset Formula $s_{x_{i}}$
-\end_inset
-
- times the
-\begin_inset Formula $(1-s_{j})$
-\end_inset
-
- for
-\begin_inset Formula $j\in\{1,\dots N\}\setminus\{x_{1},\dots,x_{m}\}$
-\end_inset
-
-.
- Rearranging terms we can see that
-\begin_inset Formula
-\[
-G(z)=\prod_{k=1}^{N}(s_{k}z+1-s_{k})=z^{n}\prod_{k=n+1}^{N}\left(\frac{nz}{k}+1-\frac{n}{k}\right)=z^{n}\prod_{k=n+1}^{N}\frac{n(z-1)+k}{k}.
-\]
-
-\end_inset
-
-Let
-\begin_inset Formula $F(z)\coloneqq\prod_{k=n+1}^{N}\frac{n(z-1)+k}{k}$
-\end_inset
-
-,
- we have
-\begin_inset Formula $F(1)=1$
-\end_inset
-
- and
-\begin_inset Formula
-\begin{align*}
-\dot{F}(z) & =F(z)\sum_{k=n+1}^{N}\frac{n}{n(z-1)+k}, & \dot{F}(1) & =\sum_{k=n+1}^{N}\frac{n}{k}=n(H_{N}-H_{n});\\
-\ddot{F}(z) & =\frac{\dot{F}(z)^{2}}{F(z)}-F(z)\sum_{k=n+1}^{N}\frac{n^{2}}{(n(z-1)+k)^{2}}, & \ddot{F}(1) & =n^{2}\left((H_{N}-H_{n})^{2}-(H_{N}^{(2)}-H_{n}^{(2)})\right).
-\end{align*}
-
-\end_inset
-
-Using this,
-\begin_inset Formula
-\begin{align*}
-\dot{G}(z) & =nz^{n-1}F(z)+z^{n}\dot{F}(z),\\
-\ddot{G}(z) & =n(n-1)z^{n-2}F(z)+2nz^{n-1}\dot{F}(z)+z^{n}\ddot{F}(z),
-\end{align*}
-
-\end_inset
-
-so
-\begin_inset Formula
-\begin{align*}
-\text{mean}(G) & =\dot{G}(1)=n(1+H_{N}-H_{n});\\
-\text{var}(G) & =\ddot{G}(1)+\dot{G}(1)-\dot{G}(1)^{2}\\
- & =\cancel{n(n-1)}\cancel{+2n\dot{F}(1)}+\ddot{F}(1)\cancel{+n}+\dot{F}(1)\cancel{-n^{2}}-\dot{F}(1)^{2}\cancel{-2n\dot{F}(1)}\\
- & =n(H_{N}-H_{n})-n^{2}(H_{N}^{(2)}-H_{n}^{(2)}),\\
-\sigma(G) & =\sqrt{\text{var}(G)}=\sqrt{n(H_{N}-H_{n})-n^{2}(H_{N}^{(2)}-H_{n}^{(2)})}.
-\end{align*}
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-rexerc16[M25]
-\end_layout
-
-\end_inset
-
-Devise a way to compute a random sample of
-\begin_inset Formula $n$
-\end_inset
-
- records from
-\begin_inset Formula $N$
-\end_inset
-
-,
- given
-\begin_inset Formula $N$
-\end_inset
-
- and
-\begin_inset Formula $n$
-\end_inset
-
-,
- based on the idea of hashing (Section 6.4).
- Your method should use
-\begin_inset Formula $O(n)$
-\end_inset
-
- storage locations and an average of
-\begin_inset Formula $O(n)$
-\end_inset
-
- units of time,
- and it should present the sample as a sorted set of integers
-\begin_inset Formula $1\leq X_{1}<X_{2}<\dots<X_{n}\leq N$
-\end_inset
-
-.
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Plain Layout
-
-
-\backslash
-answer
-\end_layout
-
-\end_inset
-
-
-\begin_inset Note Note
-status open
-
-\begin_layout Plain Layout
-TODO Depends on Section 6.4.
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\end_body
-\end_document