diff options
| author | Juan Marín Noguera <juan@mnpi.eu> | 2025-05-16 22:19:22 +0200 |
|---|---|---|
| committer | Juan Marín Noguera <juan@mnpi.eu> | 2025-05-16 22:19:22 +0200 |
| commit | eaaeab40bcfd97cc6f01971843022c1401ca1852 (patch) | |
| tree | 7d2fb71859266ecd795c2103f1991432892d73bc | |
| parent | 4f670b750af5c11e1eac16d9cd8556455f89f46a (diff) | |
4.2.1 Floating Point Arithmetic: Single-Precision Calculations
| -rw-r--r-- | vol2/4.2.1.lyx | 589 | ||||
| -rw-r--r-- | vol2/index.lyx | 11 |
2 files changed, 600 insertions, 0 deletions
diff --git a/vol2/4.2.1.lyx b/vol2/4.2.1.lyx new file mode 100644 index 0000000..91b9f24 --- /dev/null +++ b/vol2/4.2.1.lyx @@ -0,0 +1,589 @@ +#LyX 2.4 created this file. For more info see https://www.lyx.org/ +\lyxformat 620 +\begin_document +\begin_header +\save_transient_properties true +\origin unavailable +\textclass book +\begin_preamble +\input defs +\end_preamble +\use_default_options true +\maintain_unincluded_children no +\language english +\language_package default +\inputencoding utf8 +\fontencoding auto +\font_roman "default" "default" +\font_sans "default" "default" +\font_typewriter "default" "default" +\font_math "auto" "auto" +\font_default_family default +\use_non_tex_fonts false +\font_sc false +\font_roman_osf false +\font_sans_osf false +\font_typewriter_osf false +\font_sf_scale 100 100 +\font_tt_scale 100 100 +\use_microtype false +\use_dash_ligatures true +\graphics default +\default_output_format default +\output_sync 0 +\bibtex_command default +\index_command default +\float_placement class +\float_alignment class +\paperfontsize default +\spacing single +\use_hyperref false +\papersize default +\use_geometry false +\use_package amsmath 1 +\use_package amssymb 1 +\use_package cancel 1 +\use_package esint 1 +\use_package mathdots 1 +\use_package mathtools 1 +\use_package mhchem 1 +\use_package stackrel 1 +\use_package stmaryrd 1 +\use_package undertilde 1 +\cite_engine basic +\cite_engine_type default +\biblio_style plain +\use_bibtopic false +\use_indices false +\paperorientation portrait +\suppress_date false +\justification true +\use_refstyle 1 +\use_formatted_ref 0 +\use_minted 0 +\use_lineno 0 +\index Index +\shortcut idx +\color #008000 +\end_index +\secnumdepth 3 +\tocdepth 3 +\paragraph_separation indent +\paragraph_indentation default +\is_math_indent 0 +\math_numbering_side default +\quotes_style english +\dynamic_quotes 0 +\papercolumns 1 +\papersides 1 +\paperpagestyle default +\tablestyle default +\tracking_changes false +\output_changes false +\change_bars false +\postpone_fragile_content false +\html_math_output 0 +\html_css_as_file 0 +\html_be_strict false +\docbook_table_output 0 +\docbook_mathml_prefix 1 +\end_header + +\begin_body + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +exerc1[10] +\end_layout + +\end_inset + +How would Avogadro's number and Planck's constant (3) be represented in base 100, + excess 50, + four-digit floating point notation? + (This would be the representatioon used by +\family typewriter +MIX +\family default +, + as in (4), + when the byte size is 100.) +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +answer +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +hfil +\end_layout + +\end_inset + +Avogadro's number +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +quad +\backslash +hfil +\end_layout + +\end_inset + +Planck's constant +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +quad +\backslash +hfil +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +hfil +\backslash +mixbox{ +\backslash +byte{+} +\backslash +byte{62} +\backslash +byte{60} +\backslash +byte{22} +\backslash +byte{14} +\backslash +byte{0}} +\end_layout + +\begin_layout Plain Layout + + +\backslash +hfil +\backslash +mixbox{ +\backslash +byte{+} +\backslash +byte{37} +\backslash +byte{66} +\backslash +byte{26} +\backslash +byte{10} +\backslash +byte{0}} +\end_layout + +\begin_layout Plain Layout + + +\backslash +hfil +\end_layout + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +rexerc4[16] +\end_layout + +\end_inset + +Assume that +\begin_inset Formula $b=10$ +\end_inset + +, + +\begin_inset Formula $p=8$ +\end_inset + +. + What result does Algorithm A give for +\begin_inset Formula $(50,+.98765432)\oplus(49,+.33333333)$ +\end_inset + +? + For +\begin_inset Formula $(53,-.99987654)\oplus(54,+.10000000)$ +\end_inset + +? + For +\begin_inset Formula $(45,-.50000001)\oplus(54,+.10000000)$ +\end_inset + +? +\end_layout + +\begin_layout Standard +\begin_inset Formula +\begin{align*} +(50,+.98765432)\oplus(49,+.33333333) & \to(50,+1.020987653)\to(51,+.10209877)\\ +(53,-.99987654)\oplus(54,+.10000000) & \to(54,.000012346)\to(50,+.12346000)\\ +(45,-.50000001)\oplus(54,+.10000000) & \to(54,+.09999999949999999)\to(53,+.99999999) +\end{align*} + +\end_inset + + +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +rexerc5[24] +\end_layout + +\end_inset + +Let us day that +\begin_inset Formula $x\sim y$ +\end_inset + + (with respect to a given radix +\begin_inset Formula $b$ +\end_inset + +) if +\begin_inset Formula $x$ +\end_inset + + and +\begin_inset Formula $y$ +\end_inset + + are real numbers satisfying the following conditions: +\begin_inset Formula +\begin{gather*} +\lfloor x/b\rfloor=\lfloor y/b\rfloor;\\ +x\bmod b=0\iff y\bmod b=0;\\ +0<x\bmod b<\tfrac{1}{2}b\iff0<y\bmod b<\tfrac{1}{2}b;\\ +x\bmod b=\tfrac{1}{2}b\iff y\bmod b=\tfrac{1}{2}b;\\ +\tfrac{1}{2}b<x\bmod b<b\iff\tfrac{1}{2}b<y\bmod b<b. +\end{gather*} + +\end_inset + +Prove that if +\begin_inset Formula $f_{v}$ +\end_inset + + is replaced by +\begin_inset Formula $b^{-p-2}F_{v}$ +\end_inset + + between steps A5 and A6 of Algorithm A, + where +\begin_inset Formula $F_{v}\sim b^{p+2}f_{v}$ +\end_inset + +, + the result of that algorithm will be unchanged. + (If +\begin_inset Formula $F_{v}$ +\end_inset + + is an integer and +\begin_inset Formula $b$ +\end_inset + + is even, + this operation essentially truncates +\begin_inset Formula $f_{v}$ +\end_inset + + to +\begin_inset Formula $p+2$ +\end_inset + + places while remembering whether any nonzero digits have been dropped, + thereby minimizing the length of register that is needed for the addition in step A6.) +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +answer +\end_layout + +\end_inset + +Note that the conditions are equivalent to saying that +\begin_inset Formula $\lfloor2x/b\rfloor=\lfloor2y/b\rfloor$ +\end_inset + + and that +\begin_inset Formula $2x\bmod b=0\iff2y\bmod b=0$ +\end_inset + +. + This would mean that +\begin_inset Formula $\lfloor2F_{v}/b\rfloor=\lfloor2b^{p+1}(b^{-p-2}F_{v})\rfloor=\lfloor2b^{p+1}f_{v}\rfloor$ +\end_inset + +, + and that +\begin_inset Formula $2b^{p+1}f_{v}\in\mathbb{Z}\iff2b^{p+1}(2b^{-p-2}F_{v})\in\mathbb{Z}$ +\end_inset + +. + If +\begin_inset Formula $2b^{p+1}f_{v}\in\mathbb{Z}$ +\end_inset + +, + then +\begin_inset Formula $b^{-p-2}F_{v}=f_{v}$ +\end_inset + + and nothing changes. +\end_layout + +\begin_layout Standard +Now let's assume that +\begin_inset Formula $2b^{p+1}f_{v}\notin\mathbb{Z}$ +\end_inset + +, + and let's denote with +\begin_inset Formula $^{\prime}$ +\end_inset + + the values obtained by Algorithm A when substituting +\begin_inset Formula $f_{v}$ +\end_inset + + by +\begin_inset Formula $b^{-p-2}F_{v}$ +\end_inset + +. + Because +\begin_inset Formula $b^{p}f_{u}\in\mathbb{Z}$ +\end_inset + +, + we have +\begin_inset Formula $b^{p+2}f_{w}\sim b^{p+2}f'_{w}$ +\end_inset + + and +\begin_inset Formula $2b^{p+1}f_{w},2b^{p+1}f'_{w}\notin\mathbb{Z}$ +\end_inset + +. + This means that, + in step N1, + +\begin_inset Formula $|f|\geq1\iff|f'|\geq1$ +\end_inset + + and +\begin_inset Formula $f\neq0$ +\end_inset + +, + and N4, + if run, + preserves these conditions. + It also means that step N3 runs at most once, + as the opposite would imply that +\begin_inset Formula $|bf'_{w}|<1/b$ +\end_inset + +, + which happens if and only if +\begin_inset Formula $|bf_{w}|<1/b$ +\end_inset + +, + if and only if +\begin_inset Formula $|f_{w}|<1/b^{2}$ +\end_inset + +, + but +\begin_inset Formula $f_{u}\geq.1$ +\end_inset + + because inputs are normalized, + so this would imply +\begin_inset Formula $f_{v}>.09$ +\end_inset + + and +\begin_inset Formula $e_{u}-e_{v}\leq1$ +\end_inset + +, + so +\begin_inset Formula $f_{w}$ +\end_inset + + would have at most +\begin_inset Formula $p+1$ +\end_inset + + digits and +\begin_inset Formula $2b^{p+1}f_{w}\in\mathbb{Z}\#$ +\end_inset + +. + Therefore N2 produces the same results for +\begin_inset Formula $f$ +\end_inset + + and +\begin_inset Formula $f'$ +\end_inset + + every time it runs and, + when we reach step N5, + +\begin_inset Formula $b^{p+1}f\sim b^{p+1}f'$ +\end_inset + + and +\begin_inset Formula $2b^{p}f,2b^{p}f'\notin\mathbb{Z}$ +\end_inset + +, + so step N5 produces the same result in both cases and the rest of the algorithm runs over the same state and produces the same result. +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +rexerc11[M20] +\end_layout + +\end_inset + +Give an example of normalized, + excess 50, + eight-digit floating decimal numbers +\begin_inset Formula $u$ +\end_inset + + and +\begin_inset Formula $v$ +\end_inset + + for which rounding overflow occurs in multiplication. +\end_layout + +\begin_layout Standard +\begin_inset ERT +status open + +\begin_layout Plain Layout + + +\backslash +answer +\end_layout + +\end_inset + +An illustrative example would be +\begin_inset Formula +\[ +(50,.99999990)\otimes(50,.10000001). +\] + +\end_inset + +The result would be +\begin_inset Formula $(50,.0999999999999990)$ +\end_inset + +, + which becomes +\begin_inset Formula $(50,.10000000)$ +\end_inset + + after shifting left in N3 and then right in N4 after N5 rounds up to +\begin_inset Formula $(49,1.)$ +\end_inset + +. +\end_layout + +\end_body +\end_document diff --git a/vol2/index.lyx b/vol2/index.lyx index 816a76b..fba036c 100644 --- a/vol2/index.lyx +++ b/vol2/index.lyx @@ -997,6 +997,17 @@ Floating Point Arithmetic Single-Precision Calculations \end_layout +\begin_layout Standard +\begin_inset CommandInset include +LatexCommand input +filename "4.2.1.lyx" +literal "false" + +\end_inset + + +\end_layout + \begin_layout Subsection Accuracy of Floating Point Arithmetic \end_layout |
