diff options
Diffstat (limited to 'vol2/4.2.2.lyx')
| -rw-r--r-- | vol2/4.2.2.lyx | 1072 | 
1 files changed, 1072 insertions, 0 deletions
| diff --git a/vol2/4.2.2.lyx b/vol2/4.2.2.lyx new file mode 100644 index 0000000..abcc87a --- /dev/null +++ b/vol2/4.2.2.lyx @@ -0,0 +1,1072 @@ +#LyX 2.4 created this file. For more info see https://www.lyx.org/ +\lyxformat 620 +\begin_document +\begin_header +\save_transient_properties true +\origin unavailable +\textclass book +\begin_preamble +\input defs +\end_preamble +\use_default_options true +\maintain_unincluded_children no +\language english +\language_package default +\inputencoding utf8 +\fontencoding auto +\font_roman "default" "default" +\font_sans "default" "default" +\font_typewriter "default" "default" +\font_math "auto" "auto" +\font_default_family default +\use_non_tex_fonts false +\font_sc false +\font_roman_osf false +\font_sans_osf false +\font_typewriter_osf false +\font_sf_scale 100 100 +\font_tt_scale 100 100 +\use_microtype false +\use_dash_ligatures true +\graphics default +\default_output_format default +\output_sync 0 +\bibtex_command default +\index_command default +\float_placement class +\float_alignment class +\paperfontsize default +\spacing single +\use_hyperref false +\papersize default +\use_geometry false +\use_package amsmath 1 +\use_package amssymb 1 +\use_package cancel 1 +\use_package esint 1 +\use_package mathdots 1 +\use_package mathtools 1 +\use_package mhchem 1 +\use_package stackrel 1 +\use_package stmaryrd 1 +\use_package undertilde 1 +\cite_engine basic +\cite_engine_type default +\biblio_style plain +\use_bibtopic false +\use_indices false +\paperorientation portrait +\suppress_date false +\justification true +\use_refstyle 1 +\use_formatted_ref 0 +\use_minted 0 +\use_lineno 0 +\index Index +\shortcut idx +\color #008000 +\end_index +\secnumdepth 3 +\tocdepth 3 +\paragraph_separation indent +\paragraph_indentation default +\is_math_indent 0 +\math_numbering_side default +\quotes_style english +\dynamic_quotes 0 +\papercolumns 1 +\papersides 1 +\paperpagestyle default +\tablestyle default +\tracking_changes false +\output_changes false +\change_bars false +\postpone_fragile_content false +\html_math_output 0 +\html_css_as_file 0 +\html_be_strict false +\docbook_table_output 0 +\docbook_mathml_prefix 1 +\end_header + +\begin_body + +\begin_layout Standard + +\emph on +Note: + +\emph default + Normalized floating point arithmetic is assumed unless the contrary is specified. +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +exerc4[10] +\end_layout + +\end_inset + +Is it possible to have floating point numbers  +\begin_inset Formula $u$ +\end_inset + +, +  +\begin_inset Formula $v$ +\end_inset + +, + and  +\begin_inset Formula $w$ +\end_inset + + for which exponent overflow occurs during the calculation of  +\begin_inset Formula $u\otimes(v\otimes w)$ +\end_inset + + but not during the calculation of  +\begin_inset Formula $(u\otimes v)\otimes w$ +\end_inset + +? +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +answer  +\end_layout + +\end_inset + +Yes. + If, + say, +  +\begin_inset Formula $b=10$ +\end_inset + +, +  +\begin_inset Formula $q=8$ +\end_inset + +, + and overflow occurs when the exponent reaches 16, + let  +\begin_inset Formula $u=(15,.10000001)$ +\end_inset + +, +  +\begin_inset Formula $v=(9,.33333330)$ +\end_inset + +, + and  +\begin_inset Formula $w=(9,.30000000)$ +\end_inset + +. + Then  +\begin_inset Formula $v\otimes w=(9,.99999990)$ +\end_inset + + and  +\begin_inset Formula $u\otimes(v\otimes w)=(16,.10000000)$ +\end_inset + +, + which raises an overflow, + but  +\begin_inset Formula $u\otimes v=(15,.33333333)$ +\end_inset + + and  +\begin_inset Formula $(u\otimes v)\otimes w=(15,.99999999)$ +\end_inset + +, + which does not raise an overflow. +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +rexerc8[20] +\end_layout + +\end_inset + +Let  +\begin_inset Formula $\epsilon=0.0001$ +\end_inset + +; + which of the relations +\begin_inset Formula  +\begin{align*} +u & \prec v\quad(\epsilon), & u & \sim v\quad(\epsilon), & u & \succ v\quad(\epsilon), & u & \cong v\quad(\epsilon) +\end{align*} + +\end_inset + +hold for the following pairs of base 10, + excess 0, + eight-digit floating point numbers? +\end_layout + +\begin_layout Enumerate +\begin_inset Formula $u=(1,+.31415927)$ +\end_inset + +, +  +\begin_inset Formula $v=(1,+.31416000)$ +\end_inset + +; +\end_layout + +\begin_layout Enumerate +\begin_inset Formula $u=(0,+.99997000)$ +\end_inset + +, +  +\begin_inset Formula $v=(1,+.10000039)$ +\end_inset + +; +\end_layout + +\begin_layout Enumerate +\begin_inset Formula $u=(24,+.60221400)$ +\end_inset + +, +  +\begin_inset Formula $v=(27,+.00060221)$ +\end_inset + +; +\end_layout + +\begin_layout Enumerate +\begin_inset Formula $u=(24,+.60221400)$ +\end_inset + +, +  +\begin_inset Formula $v=(31,+.00000006)$ +\end_inset + +; +\end_layout + +\begin_layout Enumerate +\begin_inset Formula $u=(24,+.60221400)$ +\end_inset + +, +  +\begin_inset Formula $v=(28,+.00000000)$ +\end_inset + +. +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +answer +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Enumerate +\begin_inset Formula $\sim$ +\end_inset + +, +  +\begin_inset Formula $\approx$ +\end_inset + +. +\end_layout + +\begin_layout Enumerate +\begin_inset Formula $\sim$ +\end_inset + +, +  +\begin_inset Formula $\approx$ +\end_inset + +. +\end_layout + +\begin_layout Enumerate +\begin_inset Formula $\sim$ +\end_inset + +, +  +\begin_inset Formula $\approx$ +\end_inset + +. +\end_layout + +\begin_layout Enumerate +\begin_inset Formula $\sim$ +\end_inset + +. +\end_layout + +\begin_layout Enumerate +\begin_inset Formula $\sim$ +\end_inset + +. +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +rexerc10[M25] +\end_layout + +\end_inset + +(W. + M. + Kahan.) A certain computer performs floating point arithmetic without proper rounding, + and, + in fact, + its floating point multiplication routine ignores all but the first  +\begin_inset Formula $p$ +\end_inset + + most significant digits of the  +\begin_inset Formula $2p$ +\end_inset + +-digit product  +\begin_inset Formula $f_{u}f_{v}$ +\end_inset + +. + (Thus when  +\begin_inset Formula $f_{u}f_{v}<1/b$ +\end_inset + +, + the least-significant digit of  +\begin_inset Formula $u\otimes v$ +\end_inset + + always comes out to be zero, + due to subsequent normalization.) Show that this causes the monotonicity of multiplication to fail; + in other words, + exhibit positive normalized floating point numbers  +\begin_inset Formula $u$ +\end_inset + +, +  +\begin_inset Formula $v$ +\end_inset + +, + and  +\begin_inset Formula $w$ +\end_inset + + such that  +\begin_inset Formula $u<v$ +\end_inset + + but  +\begin_inset Formula $u\otimes w>v\otimes w$ +\end_inset + + on this machine. +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +answer  +\end_layout + +\end_inset + +Assume  +\begin_inset Formula $p=4$ +\end_inset + +, + and let  +\begin_inset Formula $u=.9999<1.000=v$ +\end_inset + + and let  +\begin_inset Formula $w=.2222$ +\end_inset + +. + Then  +\begin_inset Formula $u\otimes w=.2221>.2220=v\otimes w$ +\end_inset + +. +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +rexerc13[M25] +\end_layout + +\end_inset + +Some programming languages (and even some computers) make use of floating point arithmetic only, + with no provision for exact calculations with integers. + If operations on integers are desired, + we can, + of course, + represent an integer as a floating point number; + and when the floating point operations satisfy the basic definitions in (9), + we know that all floating point operations will be exact, + provided that the operands and the answer can each be represented exactly with  +\begin_inset Formula $p$ +\end_inset + + significant digits. + Therefore— +so long as we know that the numbers aren't too large— +we can add, + subtract, + or multiply integers with no inaccuracy due to rounding errors. +\end_layout + +\begin_layout Standard +But suppose that a programmer wants to determine if  +\begin_inset Formula $m$ +\end_inset + + is an exact multiple of  +\begin_inset Formula $n$ +\end_inset + +, + when  +\begin_inset Formula $m$ +\end_inset + + and  +\begin_inset Formula $n\neq0$ +\end_inset + + are integers. + Suppose further that a subroutine is available to calculate the quantity  +\begin_inset Formula $\text{round}(u\bmod1)=u\mathring{\bmod}1$ +\end_inset + + for any given floating point number  +\begin_inset Formula $u$ +\end_inset + +, + as in exercise 4.2.1–15. + One good way to determine whether or not  +\begin_inset Formula $m$ +\end_inset + + is a multiple of  +\begin_inset Formula $n$ +\end_inset + + might be to test whether or not  +\begin_inset Formula $(m\oslash n)\mathring{\bmod}1=0$ +\end_inset + +, + using the assumed subroutine; + but perhaps rounding errors in the floating point calculations will invalidate this test in certain cases. +\end_layout + +\begin_layout Standard +Find suitable conditions on the range of integer values  +\begin_inset Formula $n\neq0$ +\end_inset + + and  +\begin_inset Formula $m$ +\end_inset + +, + such that  +\begin_inset Formula $m$ +\end_inset + + is a multiple of  +\begin_inset Formula $n$ +\end_inset + + if and only if  +\begin_inset Formula $(m\oslash n)\mathring{\bmod}1=0$ +\end_inset + +. + In other words, + show that if  +\begin_inset Formula $m$ +\end_inset + + and  +\begin_inset Formula $n$ +\end_inset + + are not too large, + this test is valid. +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +answer  +\end_layout + +\end_inset + +A suitable condition would be  +\begin_inset Formula $|m|<2b^{p-1}$ +\end_inset + +; + for the proof we may assume  +\begin_inset Formula $m,n\geq0$ +\end_inset + + as the signs of the operands do not affect the check. + In every case, + if  +\begin_inset Formula $n\mid m$ +\end_inset + +, + then  +\begin_inset Formula $m\oslash n=\text{round}(\frac{m}{n})$ +\end_inset + + will necessarily be an integer and  +\begin_inset Formula $(m\oslash n)\mathring{\bmod}1=0$ +\end_inset + +. + For the reciprocal, + if  +\begin_inset Formula $n\nmid m$ +\end_inset + +, + then  +\begin_inset Formula $m\oslash n=\frac{m}{n}+\delta$ +\end_inset + +, + where  +\begin_inset Formula $\delta\leq\frac{1}{2}b^{e_{m\oslash n}-p-q}$ +\end_inset + +. + Note that the exponent is not increased after rounding in the division; + if it were, + that would mean that  +\begin_inset Formula $b^{e}(1-\frac{1}{2}b^{-p})\leq\frac{m}{n}<b^{e}$ +\end_inset + + for some integer  +\begin_inset Formula $e$ +\end_inset + +, + but then  +\begin_inset Formula $nb^{e}(1-\frac{1}{2}b^{-p})\leq m<nb^{e}$ +\end_inset + + and, + because both  +\begin_inset Formula $m$ +\end_inset + + and  +\begin_inset Formula $nb^{e}$ +\end_inset + + are integers, +  +\begin_inset Formula $\frac{1}{2}nb^{e-p}\geq1$ +\end_inset + +, + so  +\begin_inset Formula $nb^{e}\geq2b^{p}$ +\end_inset + + and  +\begin_inset Formula $m\geq2b^{p}-1>2b^{p-1}\#$ +\end_inset + +. + This means that  +\begin_inset Formula $e_{m\oslash n}=\lfloor\log_{b}\frac{m}{n}\rfloor+1+q$ +\end_inset + +. + Now, + since  +\begin_inset Formula $n\nmid m$ +\end_inset + +, +  +\begin_inset Formula $\log_{b}\frac{m}{n}\notin\mathbb{Z}$ +\end_inset + +, + so  +\begin_inset Formula $\lfloor\log_{b}\frac{m}{n}\rfloor+1=\lceil\log_{b}\frac{m}{n}\rceil=\lceil\log_{b}\frac{m}{2}-\log_{b}\frac{n}{2}\rceil\leq\lceil\log_{b}\frac{m}{2}\rceil-\lfloor\log_{b}\frac{n}{2}\rfloor$ +\end_inset + +. + With this, +\begin_inset Formula  +\[ +\delta\leq\frac{1}{2}b^{\lceil\log_{b}\frac{m}{2}\rceil-\lfloor\log_{b}\frac{n}{2}\rfloor-p}\leq\frac{1}{2}b^{-1}b^{-\lfloor\log_{b}\frac{n}{2}\rfloor}<\frac{1}{2}b^{\cancel{-1}}b^{-(\log_{b}\frac{n}{2}\cancel{-1})}=\frac{1}{n}, +\] + +\end_inset + +but  +\begin_inset Formula $\frac{m}{n}$ +\end_inset + + differs from the nearest integer by  +\begin_inset Formula $\frac{1}{n}$ +\end_inset + + at most, + so  +\begin_inset Formula $m\oslash n\notin\mathbb{Z}$ +\end_inset + + and  +\begin_inset Formula $(m\oslash n)\mathring{\bmod}1\neq0$ +\end_inset + +. + This is assuming that there's no exponent underflow, + which would be rare because it would mean that  +\begin_inset Formula $q<p-1$ +\end_inset + +. +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +rexerc15[M24] +\end_layout + +\end_inset + +(H. + Björk.) Does the computed midpoint of an interval always lie between the endpoints? + (In other words, + does  +\begin_inset Formula $u\leq v$ +\end_inset + + imply that  +\begin_inset Formula $u\leq(u\oplus v)\oslash2\leq v$ +\end_inset + +? +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +answer  +\end_layout + +\end_inset + +No. + For example, + if  +\begin_inset Formula $b=10$ +\end_inset + +, +  +\begin_inset Formula $p=5$ +\end_inset + +, +  +\begin_inset Formula $u=5.9998$ +\end_inset + +, + and  +\begin_inset Formula $v=5.9999$ +\end_inset + +, + then  +\begin_inset Formula $u\oplus v=12.000$ +\end_inset + + and  +\begin_inset Formula $(u\oplus v)\oslash2=6.0000>v$ +\end_inset + +. +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +rexerc25[15] +\end_layout + +\end_inset + +When people speak about inaccuracy in floating point arithmetic they often ascribe errors to  +\begin_inset Quotes eld +\end_inset + +cancellation +\begin_inset Quotes erd +\end_inset + + that occurs during the subtraction of nearly equal quantities. + But when  +\begin_inset Formula $u$ +\end_inset + + and  +\begin_inset Formula $v$ +\end_inset + + are approximately equal, + the difference  +\begin_inset Formula $u\ominus v$ +\end_inset + + is obtained exactly, + with no error. + What do these people really mean? +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +answer  +\end_layout + +\end_inset + +It really means that, + if the inputs carry a relative error due to rounding, + the relative error of the output is potentially much bigger. + Let  +\begin_inset Formula $u_{0}$ +\end_inset + + be the  +\begin_inset Quotes eld +\end_inset + +correct +\begin_inset Quotes erd +\end_inset + + value of  +\begin_inset Formula $u$ +\end_inset + +, + that is, + the value it would have if the operations so far had been carried out with infinite precision, + and let  +\begin_inset Formula $v_{0}$ +\end_inset + + be the  +\begin_inset Quotes eld +\end_inset + +correct +\begin_inset Quotes erd +\end_inset + + value of  +\begin_inset Formula $v$ +\end_inset + +, + similarly defined. + Let  +\begin_inset Formula $u\eqqcolon u_{0}(1+\delta)$ +\end_inset + + and  +\begin_inset Formula $v\eqqcolon v_{0}(1+\delta')$ +\end_inset + +. + Then, + if  +\begin_inset Formula $u$ +\end_inset + + and  +\begin_inset Formula $v$ +\end_inset + + are nearly equal and  +\begin_inset Formula $u\ominus v$ +\end_inset + + is obtained exactly, + then  +\begin_inset Formula  +\[ +\frac{u\ominus v}{u_{0}-v_{0}}=\frac{u_{0}(1+\delta)-v_{0}(1+\delta')}{u_{0}-v_{0}}=1+\frac{u_{0}\delta-v_{0}\delta'}{u_{0}-v_{0}}. +\] + +\end_inset + +Even if  +\begin_inset Formula $\delta$ +\end_inset + + and  +\begin_inset Formula $\delta'$ +\end_inset + + are small, + the new relative error  +\begin_inset Formula $\left|\frac{u_{0}\delta-v_{0}\delta'}{u_{0}-v_{0}}\right|$ +\end_inset + + can be quite big, + as there's no reason for  +\begin_inset Formula $\delta$ +\end_inset + + and  +\begin_inset Formula $\delta'$ +\end_inset + + to be similar. + In the worst case where  +\begin_inset Formula $\delta=-\delta'$ +\end_inset + +, + the relative error of the inputs is multiplied by  +\begin_inset Formula $\left|\frac{u_{0}+v_{0}}{u_{0}-v_{0}}\right|$ +\end_inset + +. +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +rexerc29[M25] +\end_layout + +\end_inset + +Give an example to show that the condition  +\begin_inset Formula $b^{p}\geq3$ +\end_inset + + is necessary in the previous exercise. +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +answer  +\end_layout + +\end_inset + + +\begin_inset Note Greyedout +status open + +\begin_layout Plain Layout +(I had to look up the solution.)  +\end_layout + +\end_inset + +Here  +\begin_inset Formula $\text{round}(x)=2^{e}$ +\end_inset + + for some integer  +\begin_inset Formula $e$ +\end_inset + + such that  +\begin_inset Formula $|x-2^{e}|$ +\end_inset + + is lowest. + If  +\begin_inset Formula $f(x)\coloneqq x^{99/100}$ +\end_inset + +, + then  +\begin_inset Formula $g(y)\coloneqq y^{100/99}$ +\end_inset + +. + Now, + for integer  +\begin_inset Formula $e$ +\end_inset + +, +\begin_inset Formula  +\begin{multline*} +\text{round}(f(2^{e}))=\text{round}(2^{e\cdot99/100})<2^{e}\iff(2^{99/100})^{e}<\frac{3}{4}2^{e}\iff\\ +\iff(2^{-1/100})^{e}<\frac{3}{4}\iff e>41. +\end{multline*} + +\end_inset + +Conversely, +\begin_inset Formula  +\begin{multline*} +\text{round}(g(2^{e}))=\text{round}(2^{e\cdot100/99})\leq2^{e}\iff(2^{100/99})^{e}<\frac{3}{2}2^{e}\iff\\ +\iff(2^{1/99})^{e}<\frac{3}{2}\iff e<58. +\end{multline*} + +\end_inset + +Thus, + if  +\begin_inset Formula $e\in\{42,\dots,58\}$ +\end_inset + +, +  +\begin_inset Formula $\hat{h}(2^{e})<2^{e}$ +\end_inset + +, + and it's easy to see that in fact  +\begin_inset Formula $\hat{h}(2^{e})=2^{e-1}$ +\end_inset + +. + Thus  +\begin_inset Formula $\hat{h}^{2}(2^{53})=2^{51}\neq2^{50}=\hat{h}^{3}(2^{53})$ +\end_inset + +. +\end_layout + +\end_body +\end_document | 
