aboutsummaryrefslogtreecommitdiff
path: root/vol2/3.3.1.lyx
diff options
context:
space:
mode:
authorJuan Marín Noguera <juan@mnpi.eu>2025-05-16 22:18:44 +0200
committerJuan Marín Noguera <juan@mnpi.eu>2025-05-16 22:18:44 +0200
commit4f670b750af5c11e1eac16d9cd8556455f89f46a (patch)
treee0f8d7b33df2727d89150f799ee8628821fda80a /vol2/3.3.1.lyx
parent16ccda6c459c0fd7ca2081e9d541124c28b0c556 (diff)
Changed layout for more manageable volumes
Diffstat (limited to 'vol2/3.3.1.lyx')
-rw-r--r--vol2/3.3.1.lyx607
1 files changed, 607 insertions, 0 deletions
diff --git a/vol2/3.3.1.lyx b/vol2/3.3.1.lyx
new file mode 100644
index 0000000..f2bfc5c
--- /dev/null
+++ b/vol2/3.3.1.lyx
@@ -0,0 +1,607 @@
+#LyX 2.4 created this file. For more info see https://www.lyx.org/
+\lyxformat 620
+\begin_document
+\begin_header
+\save_transient_properties true
+\origin unavailable
+\textclass book
+\begin_preamble
+\input defs
+\end_preamble
+\use_default_options true
+\maintain_unincluded_children no
+\language english
+\language_package default
+\inputencoding utf8
+\fontencoding auto
+\font_roman "default" "default"
+\font_sans "default" "default"
+\font_typewriter "default" "default"
+\font_math "auto" "auto"
+\font_default_family default
+\use_non_tex_fonts false
+\font_sc false
+\font_roman_osf false
+\font_sans_osf false
+\font_typewriter_osf false
+\font_sf_scale 100 100
+\font_tt_scale 100 100
+\use_microtype false
+\use_dash_ligatures true
+\graphics default
+\default_output_format default
+\output_sync 0
+\bibtex_command default
+\index_command default
+\float_placement class
+\float_alignment class
+\paperfontsize default
+\spacing single
+\use_hyperref false
+\papersize default
+\use_geometry false
+\use_package amsmath 1
+\use_package amssymb 1
+\use_package cancel 1
+\use_package esint 1
+\use_package mathdots 1
+\use_package mathtools 1
+\use_package mhchem 1
+\use_package stackrel 1
+\use_package stmaryrd 1
+\use_package undertilde 1
+\cite_engine basic
+\cite_engine_type default
+\biblio_style plain
+\use_bibtopic false
+\use_indices false
+\paperorientation portrait
+\suppress_date false
+\justification true
+\use_refstyle 1
+\use_formatted_ref 0
+\use_minted 0
+\use_lineno 0
+\index Index
+\shortcut idx
+\color #008000
+\end_index
+\secnumdepth 3
+\tocdepth 3
+\paragraph_separation indent
+\paragraph_indentation default
+\is_math_indent 0
+\math_numbering_side default
+\quotes_style english
+\dynamic_quotes 0
+\papercolumns 1
+\papersides 1
+\paperpagestyle default
+\tablestyle default
+\tracking_changes false
+\output_changes false
+\change_bars false
+\postpone_fragile_content false
+\html_math_output 0
+\html_css_as_file 0
+\html_be_strict false
+\docbook_table_output 0
+\docbook_mathml_prefix 1
+\end_header
+
+\begin_body
+
+\begin_layout Standard
+\begin_inset FormulaMacro
+\newcommand{\stirla}[2]{\genfrac[]{0pt}{}{#1}{#2}}
+{\begin{bmatrix}{\textstyle #1}\\
+{\textstyle #2}
+\end{bmatrix}}
+\end_inset
+
+
+\begin_inset FormulaMacro
+\newcommand{\stirlb}[2]{\genfrac\{\}{0pt}{}{#1}{#2}}
+{\begin{Bmatrix}{\textstyle #1}\\
+{\textstyle #2}
+\end{Bmatrix}}
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+exerc1[00]
+\end_layout
+
+\end_inset
+
+What line of the chi-square table should be used to check whether or not the value
+\begin_inset Formula $V=7\frac{7}{48}$
+\end_inset
+
+ of Eq.
+ (5) is improbably high?
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+answer
+\end_layout
+
+\end_inset
+
+Row
+\begin_inset Formula $\nu=k-1=11-1=10$
+\end_inset
+
+.
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+rexerc3[23]
+\end_layout
+
+\end_inset
+
+Some dice that were loaded as described in the previous exercise were rolled 144 times,
+ and the following values were observed:
+\begin_inset Formula
+\[
+\begin{array}{rrrrrrrrrrrr}
+\text{value of }s= & 2 & 3 & 4 & 5 & 6 & 7 & 8 & 9 & 10 & 11 & 12\\
+\text{observed number, }Y_{s}= & 2 & 6 & 10 & 16 & 18 & 32 & 20 & 13 & 16 & 9 & 2
+\end{array}
+\]
+
+\end_inset
+
+Apply the chi-square test to
+\emph on
+these
+\emph default
+ values,
+ using the probabilities in (1),
+ pretending that the dice are not in fact known to be faulty.
+ Does the chi-square test detect the bad dice?
+ If not,
+ explain why not.
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+answer
+\end_layout
+
+\end_inset
+
+We take the values
+\begin_inset Formula $np_{s}$
+\end_inset
+
+ from (2),
+ to get:
+\begin_inset Formula
+\begin{align*}
+V & =\sum_{s=2}^{12}\frac{(Y_{s}-np_{s})^{2}}{np_{s}}=\frac{4}{4}+\frac{4}{8}+\frac{4}{12}+\frac{0}{16}+\frac{4}{20}+\frac{64}{24}+\frac{0}{20}+\frac{9}{16}+\frac{16}{12}+\frac{1}{8}+\frac{4}{4}\\
+ & =1+\frac{1}{2}+\frac{1}{3}+\frac{1}{5}+\frac{8}{3}+\frac{9}{16}+\frac{4}{3}+\frac{1}{8}+1=2+\frac{13}{3}+\frac{19}{16}+\frac{1}{5}\\
+ & =7+\frac{80+45+48}{240}=7+\frac{173}{240}.
+\end{align*}
+
+\end_inset
+
+Using
+\begin_inset Formula $n=10$
+\end_inset
+
+ we get a probability between
+\begin_inset Formula $.25$
+\end_inset
+
+ and
+\begin_inset Formula $.5$
+\end_inset
+
+,
+ which is not suspect.
+ This seems to be because the bias of one die compensates that of the other,
+ smoothing out the probability differences.
+ The difference could be discovered with a large enough value of
+\begin_inset Formula $n$
+\end_inset
+
+.
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+rexerc4[23]
+\end_layout
+
+\end_inset
+
+The author actually obtained the data in experiment 1 of (9) by simulating dice in which one was normal,
+ the other was loaded so that it always turned up 1 or 6.
+ (The latter two possibilities were equally probable.) Compute the probabilities that replace (1) in this case,
+ and by using a chi-square test decide if the results of that experiment are consistent with the dice being loaded in this way.
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+answer
+\end_layout
+
+\end_inset
+
+We compute the table with the sum of the two dice:
+\begin_inset Formula
+\[
+\begin{array}{r|rrrrrr}
+ & 1 & 2 & 3 & 4 & 5 & 6\\
+\hline 1 & 2 & 3 & 4 & 5 & 6 & 7\\
+6 & 7 & 8 & 9 & 10 & 11 & 12
+\end{array}
+\]
+
+\end_inset
+
+This gives us the following table of probabilities:
+\begin_inset Formula
+\[
+\begin{array}{rrrrrrrrrrrr}
+s= & 2 & 3 & 4 & 5 & 6 & 7 & 8 & 9 & 10 & 11 & 12\\
+144p_{s}= & 12 & 12 & 12 & 12 & 12 & 24 & 12 & 12 & 12 & 12 & 12
+\end{array}
+\]
+
+\end_inset
+
+Thus,
+\begin_inset Formula
+\begin{align*}
+V & =\frac{1}{12}\left(8^{2}+2^{2}+2^{2}+1^{2}+8^{2}+\frac{6^{2}}{2}+6^{2}+1^{2}+1^{2}+2^{2}+1^{2}\right)\\
+ & =\frac{1}{12}(64+4+4+1+64+18+26+1+1+2+1)=\frac{186}{12}=15+\frac{1}{2}.
+\end{align*}
+
+\end_inset
+
+With
+\begin_inset Formula $n=10$
+\end_inset
+
+,
+ this is somewhat in the middle of
+\begin_inset Formula $p=.75$
+\end_inset
+
+ and
+\begin_inset Formula $p=.95$
+\end_inset
+
+,
+ which is consistent.
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+exerc8[00]
+\end_layout
+
+\end_inset
+
+The text describes an experiment in which 20 values of the statistic
+\begin_inset Formula $K_{10}^{+}$
+\end_inset
+
+ were obtained in the study of a random sequence.
+ These values were plotted,
+ to obtain Fig.
+ 4,
+ and a KS statistic was computed from the resulting graph.
+ Why were the table entries for
+\begin_inset Formula $n=20$
+\end_inset
+
+ used to study the resulting statistic,
+ instead of the table entries for
+\begin_inset Formula $n=10$
+\end_inset
+
+?
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+answer
+\end_layout
+
+\end_inset
+
+Because the value of
+\begin_inset Formula $n$
+\end_inset
+
+ to use is not about the underlying probability distribution (which can be an arbitrary real-valued one,
+ not just
+\begin_inset Formula $K_{n}^{+}$
+\end_inset
+
+ or
+\begin_inset Formula $K_{n}^{-}$
+\end_inset
+
+),
+ but rather it is the number of observations we make for this distribution,
+ which is 20.
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+rexerc9[20]
+\end_layout
+
+\end_inset
+
+The experiment described in the text consisted of plotting 20 values of
+\begin_inset Formula $K_{10}^{+}$
+\end_inset
+
+,
+ computed from the maximum-of-5 test applied to different parts of a random sequence.
+ We could have computed also the corresponding 20 values of
+\begin_inset Formula $K_{10}^{-}$
+\end_inset
+
+;
+ since
+\begin_inset Formula $K_{10}^{-}$
+\end_inset
+
+ has the same distribution as
+\begin_inset Formula $K_{10}^{+}$
+\end_inset
+
+,
+ we could lump together the 40 values thus obtained (that is,
+ 20 of the
+\begin_inset Formula $K_{10}^{+}$
+\end_inset
+
+'s and 20 of the
+\begin_inset Formula $K_{10}^{-}$
+\end_inset
+
+'s),
+ and a KS test could be applied so that we would get new values
+\begin_inset Formula $K_{40}^{+}$
+\end_inset
+
+,
+
+\begin_inset Formula $K_{40}^{-}$
+\end_inset
+
+.
+ Discuss the merits of this idea.
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+answer
+\end_layout
+
+\end_inset
+
+The issue here is that the 40 points would not be independent;
+ if the maximum of 5 is low,
+ the minimum of 5 must be necessarily lower,
+ the probability of it being higher is 0.
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+rexerc10[20]
+\end_layout
+
+\end_inset
+
+Suppose a chi-square test is done by making
+\begin_inset Formula $n$
+\end_inset
+
+ observations,
+ and the value
+\begin_inset Formula $V$
+\end_inset
+
+ is obtained.
+ Now we repeat the test on these same
+\begin_inset Formula $n$
+\end_inset
+
+ observations over again (getting,
+ of course,
+ the same results),
+ and we put together the data from both tests,
+ regarding it as a single chi-square test with
+\begin_inset Formula $2n$
+\end_inset
+
+ observations.
+ (This procedure violates the text's stipulation that all of the observations must be independent of one another.) How is the second value of
+\begin_inset Formula $V$
+\end_inset
+
+ related to the first one?
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+answer
+\end_layout
+
+\end_inset
+
+Let
+\begin_inset Formula $Y'_{s}=2Y_{s}$
+\end_inset
+
+ be the number of observations of category
+\begin_inset Formula $s$
+\end_inset
+
+ in the second test,
+ the second value of
+\begin_inset Formula $V$
+\end_inset
+
+ is
+\begin_inset Formula
+\[
+V'=\sum_{s=1}^{k}\frac{(Y'_{s}-2np_{s})^{2}}{2np_{s}}=\sum_{s=1}^{k}\frac{(2Y_{s}-2np_{s})^{2}}{2np_{s}}=2V.
+\]
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+exerc11[10]
+\end_layout
+
+\end_inset
+
+Solve exercise 10 substituting the KS test for the chi-square test.
+\end_layout
+
+\begin_layout Standard
+\begin_inset ERT
+status open
+
+\begin_layout Plain Layout
+
+
+\backslash
+answer
+\end_layout
+
+\end_inset
+
+This time,
+ after sorting the
+\begin_inset Formula $2n$
+\end_inset
+
+ observations
+\begin_inset Formula $X'_{1},\dots,X'_{2n}$
+\end_inset
+
+,
+ we have
+\begin_inset Formula $X_{j}=X'_{2j-1}=X'_{2j}$
+\end_inset
+
+,
+ so
+\begin_inset Formula
+\[
+K_{2n}^{+}=\sqrt{2n}\max_{1\leq j\leq2n}\left(\frac{j}{2n}-F(X'_{j})\right)=\sqrt{2n}\max_{1\leq j\leq n}\left(\frac{2j}{2n}-F(X_{j})\right)=\sqrt{2}K_{n}^{+},
+\]
+
+\end_inset
+
+and similarly,
+\begin_inset Formula
+\[
+K_{2n}^{-}=\sqrt{2n}\max_{1\leq j\leq2n}\left(F(X'_{j})-\frac{j-1}{2n}\right)=\sqrt{2n}\max_{1\leq j\leq n}\left(F(X_{j})-\frac{2j-2}{2n}\right)=\sqrt{2}K_{n}^{-}.
+\]
+
+\end_inset
+
+
+\end_layout
+
+\end_body
+\end_document