Asymptotic notation and other fixes

2026-03-13 02:58:26 +00:00 · 2021-05-27 19:35:06 +01:00
parent fe6444ff3d
commit 28fd0505ce
3 changed files with 189 additions and 71 deletions
--- a/macros.tex
+++ b/macros.tex
@@ -4,12 +4,14 @@
 \newcommand{\defas}[0]{\mathrel{\overset{\makebox[0pt]{\mbox{\normalfont\tiny\text{def}}}}{=}}}
 \newcommand{\defnas}[0]{\mathrel{:=}}
 \newcommand{\simdefas}[0]{\mathrel{\overset{\makebox[0pt]{\mbox{\normalfont\tiny\text{def}}}}{\simeq}}}
+\newcommand{\adef}[0]{\mathrel{\overset{\makebox[0pt]{\mbox{\normalfont\tiny{\text{$\alpha$-def}}}}}{\simeq}}}

 %%
 %% Some useful maths abbreviations
 %%
 \newcommand{\C}{\ensuremath{\mathbb{C}}}
 \newcommand{\N}{\ensuremath{\mathbb{N}}}
+\newcommand{\R}{\ensuremath{\mathbb{R}}}
 \newcommand{\Z}{\ensuremath{\mathbb{Z}}}
 \newcommand{\B}{\ensuremath{\mathbb{B}}}
 \newcommand{\BB}[1]{\ensuremath{\mathbf{#1}}}
--- a/thesis.bib
+++ b/thesis.bib
@@ -3507,6 +3507,18 @@
  year      = {1992}
 }

+@inproceedings{TofteT94,
+  author    = {Mads Tofte and
+               Jean{-}Pierre Talpin},
+  title     = {Implementation of the Typed Call-by-Value lambda-Calculus using a
+               Stack of Regions},
+  booktitle = {{POPL}},
+  pages     = {188--201},
+  publisher = {{ACM} Press},
+  year      = {1994}
+}
+
+
@article{TofteT97,
  author    = {Mads Tofte and
               Jean{-}Pierre Talpin},
@@ -3706,3 +3718,12 @@
  publisher = {Springer},
  year      = {2014}
 }
+
+# Asymptotic notation
+@book{Bachmann94,
+  author    = {Paul Bachmann},
+  title     = {Die Analytische Zahlentheorie},
+  publisher = {Teubner},
+  pages     = {1--494},
+  year      = 1894
+}
--- a/thesis.tex
+++ b/thesis.tex
@@ -2282,26 +2282,54 @@ whenever the function $f' : \dom(f) \to \dec{cod}(f)$, obtained by
 restricting $f$ to its domain, is injective, surjective, and bijective
 respectively.

-\section{Universal algebra}
-\label{sec:universal-algebra}
+\section{Asymptotic notation}
+\label{sec:asymp-not}

-Universal algebra studies \emph{algebraic theories}.
-
-\begin{definition}[Operations]
+Asymptotic notation is a compact notational framework for comparing
+the order of growth of functions, which abstracts away any constants
+involved~\cite{Bachmann94}. We will use the asymptotic notation for
+both runtime and space analyses of functions.

+\begin{definition}[Upper bound]
+  We say that a function $f : \N \to \R$ is of order \emph{at most}
+  $g : \N \to \R$, and write $f = \BigO(g)$, if there is a positive
+  constant $c \in \R$ and a positive natural number $n_0 \in \N$ such that
+  %
+  \[
+    f(n) \leq c \cdot g(n),\quad \text{for all}~ n \geq n_0.
+  \]
+  %
 \end{definition}
+%
+We will extend the notation to permit $f(n) = \BigO(g(n))$.
+%
+We will often abuse notation and use the body of an anonymous function
+in place of $g$, e.g. $\BigO(\log~n)$ means $\BigO(n \mapsto \log~n)$
+and stands for a function whose values are bounded above by a
+logarithmic factor (in this dissertation logarithms are always in base
+$2$). If $f = \BigO(\log~n)$ then we say that the order of growth of
+$f$ is logarithmic; if $f = \BigO(n)$ we say that its order of growth
+is linear; if $f = \BigO(n^k)$ for some $k \in \N$ we say that the
+order of growth is polynomial; and if $f = \BigO(2^n)$ then the order
+of growth of $f$ is exponential.
+%
+It is important to note, though, that we write $f = \BigO(1)$ to mean
+that the values of $f$ are bounded above by some constant, meaning
+$1 \not\in \N$, but rather $1$ denotes a family of constant functions
+of type $\N \to \R$. So if $f = \BigO(1)$ then we say that the order
+of growth of $f$ is constant.

-\begin{definition}[Algebraic theory]\label{def:algebra}
-  
+\begin{definition}[Lower bound]
+  We say that a function $f : \N \to \R$ is of order \emph{at least}
+  $g : \N \to \R$, and write $f = \Omega(g)$, if there is a positive
+  constant $c \in \R$ and a positive natural number $n_0 \in \N$ such
+  that
+  %
+  \[
+    f(n) \geq c \cdot g(n),\quad \text{for all}~n \geq n_0.
+  \]
 \end{definition}

-\section{Algebraic effects and their handlers}
-\label{sec:algebraic-effects}
-
-See \citeauthor{Bauer18}'s tutorial~\cite{Bauer18} for an excellent
-thorough account of the mathematics underpinning algebraic effects and
-handlers.
-
 \section{Typed programming languages}
 \label{sec:pls}
 %
@@ -4698,7 +4726,13 @@ of kinds and types in
 Section~\ref{sec:base-language-types}. Subsequently in
 Section~\ref{sec:base-language-terms} we present the term syntax,
 before presenting the formation rules for types in
-Section~\ref{sec:base-language-type-rules}.
+Section~\ref{sec:base-language-type-rules}. As a convention, we always
+work up to $\alpha$-conversion~\cite{Church32} of types and
+terms. Following \citet{Pierce02} we omit cases in definitions that
+deal only with the bureaucracy of renaming. For any transformation
+$\sembr{-}$ on a term $M$, or type, we write $\sembr{M} \adef M'$ to
+mean that $M'$ is the result of transforming $M$ where implicit
+renaming may have occurred.

 % Typically the presentation of a programming language begins with its
 % syntax. If the language is typed there are two possible starting
@@ -4847,7 +4881,7 @@ type, i.e. $\UnitType \defas \Record{\cdot}$.
 % $\Read:\Pre{\Int},\Write:\Abs,\cdot \equiv_{\mathrm{row}}
 % \Read:\Pre{\Int},\cdot$.
 For brevity, we shall often write $\ell : A$
-to mean $\ell : \Pre{A}$ and omit $\cdot$ for closed rows.
+to mean $\ell : \Pre{A}$. % and omit $\cdot$ for closed rows.

 %
 \begin{figure}
@@ -4984,8 +5018,7 @@ $\TyVarCat$, to be generated by:
 \paragraph{Free type variables} Sometimes we need to compute the free
 type variables ($\FTV$) of a given type. To this end we define a
 metafunction $\FTV$ by induction on the type structure, $T$, and
-point-wise on type environments, $\Gamma$. Note that we always work up
-to $\alpha$-conversion~\cite{Church32} of types.
+point-wise on type environments, $\Gamma$.
 %
 \[
  \ba[t]{@{~}l@{~~~~~~}c@{~}l}
@@ -5065,7 +5098,7 @@ structure as follows.
   \begin{eqs}
    (A \eff E)\sigma &\defas& A\sigma \eff E\sigma\\
    (A \to C)\sigma &\defas& A\sigma \to C\sigma\\
-    (\forall \alpha^K.C)\sigma &\simdefas& \forall \alpha^K.C\sigma\\
+    (\forall \alpha^K.C)\sigma &\adef& \forall \alpha^K.C\sigma\\
    \alpha\sigma     &\defas& \begin{cases}
                                      A & \text{if } (\alpha,A) \in \sigma\\
                                 \alpha & \text{otherwise}
@@ -5120,7 +5153,7 @@ singleton open effect row $\{\varepsilon\}$, meaning it may be used in
 an effectful context. By contrast, the former type may only be used in
 a strictly pure context, i.e. the effect-free context.
 %
-\dhil{Maybe say something about reasoning effect types}
+%\dhil{Maybe say something about reasoning effect types}
 %

 We can use the effect system to give precise types to effectful
@@ -5128,7 +5161,7 @@ computations. For example, we can give the signature of some
 polymorphic computation that may only be run in a read-only context
 %
 \[
-  \forall \alpha^\Type, \varepsilon^{\Row_{\{\Read,\Write\}}}. \alpha \eff \{\Read:\Int,\Write:\Abs,\varepsilon\}.
+  \forall \alpha^\Type, \varepsilon^{\Row_{\{\Read,\Write\}}}. \alpha \eff \{\Read:\Int;\Write:\Abs;\varepsilon\}.
 \]
 %
 The effect row comprise a nullary $\Read$ operation returning some
@@ -5144,10 +5177,10 @@ operations, because the row ends in an effect variable.
 The type and effect system is also precise about how a higher-order
 function may use its function arguments. For example consider the
 signature of a map-operation over some datatype such as
-$\dec{Option}~\alpha^\Type \defas [\dec{None};\dec{Some}:\alpha]$
+$\Option~\alpha^\Type \defas [\None;\Some:\alpha;\cdot]$
 %
 \[
-  \forall \alpha^\Type,\beta^\Type,\varepsilon^{\Row_\emptyset}. \Record{\alpha \to \beta \eff \{\varepsilon\}, \dec{Option}~\alpha} \to \dec{Option}~\beta \eff \{\varepsilon\}.
+  \forall \alpha^\Type,\beta^\Type,\varepsilon^{\Row_\emptyset}. \Record{\alpha \to \beta \eff \{\varepsilon\}; \Option~\alpha;\cdot} \to \Option~\beta \eff \{\varepsilon\}.
 \]
 %
 % The $\dec{map}$ function for
@@ -5172,7 +5205,7 @@ e.g. modify their effect rows. The following is the signature of a
 higher-order function which restricts its argument's effect context
 %
 \[
-  \forall \alpha^\Type, \varepsilon^{\Row_{\{\Read\}}},\varepsilon'^{\Row_\emptyset}. (\UnitType \to \alpha \eff \{\Read:\Int,\varepsilon\}) \to (\UnitType \to \alpha \eff \{\Read:\Abs,\varepsilon\}) \eff \{\varepsilon'\}.
+  \forall \alpha^\Type, \varepsilon^{\Row_{\{\Read\}}},\varepsilon'^{\Row_\emptyset}. (\UnitType \to \alpha \eff \{\Read:\Int;\varepsilon\}) \to (\UnitType \to \alpha \eff \{\Read:\Abs;\varepsilon\}) \eff \{\varepsilon'\}.
 \]
 %
 The function argument is allowed to perform a $\Read$ operation,
@@ -5183,7 +5216,27 @@ the (right-most) effect row is a singleton row containing a distinct
 effect variable $\varepsilon'$.

 \paragraph{Syntactic sugar}
-Detail the syntactic sugar\dots
+Explicitly writing down all of the kinding and type annotations is a
+bit on the heavy side. In order to simplify the notation of our future
+examples we are going to adopt a few conventions. First, we shall not
+write kind annotations, when the kinds can unambiguously be inferred
+from context. Second, we do not write quantifiers in prenex
+position. Type variables that appear unbound in a signature are
+implicitly understood be bound at the outermost level of the type
+(this convention is commonly used by practical programming language,
+e.g. SML~\cite{MilnerTHM97} and
+Haskell~\cite{JonesABBBFHHHHJJLMPRRW99}). Third, we shall adopt the
+convention that the row types for closed records and variants are
+implicitly understood to end in a $\cdot$, whereas for effect rows we
+shall adopt the opposite convention that an effect row is implicitly
+understood to be open and ending in a fresh $\varepsilon$ unless it
+ends in an explicit $\cdot$. In Section~\ref{sec:effect-sugar} we will
+elaborate more on the syntactic sugar for effects. The rationale for
+these conventions is that they align with a ML programmer's intuition
+for monomorphic record and variant types, and in this dissertation
+records and variants will often be monomorphic. Conversely, effect
+rows will most often be open.
+

 \subsection{Terms}
 \label{sec:base-language-terms}
@@ -5573,7 +5626,7 @@ that the binder $x : A$.
 \slab{Evaluation contexts} &  \mathcal{E} \in \EvalCat &::=& [~] \mid \Let \; x \revto \mathcal{E} \; \In \; N
 \end{syntax}
 %
-\dhil{Describe evaluation contexts as functions: decompose and plug.}
+%\dhil{Describe evaluation contexts as functions: decompose and plug.}
 %
 %%\[
 % Evaluation context lift
@@ -5631,7 +5684,7 @@ follows.
                          V & \text{if } (x, V) \in \sigma\\
                          x & \text{otherwise}
                      \end{cases}\\
-    (\lambda x^A.M)\sigma &\simdefas& \lambda x^A.M\sigma\\
+    (\lambda x^A.M)\sigma &\adef& \lambda x^A.M\sigma\\
    (\Lambda \alpha^K.M)\sigma &\defas& \Lambda \alpha^K.M\sigma\\
    (V~T)\sigma     &\defas& V\sigma~T
  \end{eqs}
@@ -5644,15 +5697,15 @@ follows.
  \end{eqs}\bigskip\\
  \multicolumn{3}{c}{
    \begin{eqs}
-      (\Let\;\Record{\ell = x; y} = V\;\In\;N)\sigma &\simdefas& \Let\;\Record{\ell = x; y} = V\sigma\;\In\;N\sigma\\
+      (\Let\;\Record{\ell = x; y} = V\;\In\;N)\sigma &\adef& \Let\;\Record{\ell = x; y} = V\sigma\;\In\;N\sigma\\
      (\Case\;(\ell~V)^R\{
          \ell~x \mapsto M
          ; y \mapsto N \})\sigma
-       &\simdefas&
+       &\adef&
       \Case\;(\ell~V\sigma)^R\{
         \ell~x \mapsto M\sigma
         ; y \mapsto N\sigma \}\\
-     (\Let\;x \revto M \;\In\;N)\sigma &\simdefas& \Let\;x \revto M[V/y] \;\In\;N\sigma
+     (\Let\;x \revto M \;\In\;N)\sigma &\adef& \Let\;x \revto M[V/y] \;\In\;N\sigma
   \end{eqs}}
 \ea
 \]
@@ -5676,11 +5729,11 @@ require a change of name of the bound type variable
    (\ell~V)^R\sigma \defas (\ell~V\sigma)^{R\sigma.2}\medskip\\

    \begin{eqs}
-      (\Lambda \alpha^K.M)\sigma &\simdefas& \Lambda \alpha^K.M\sigma\\
+      (\Lambda \alpha^K.M)\sigma &\adef& \Lambda \alpha^K.M\sigma\\
      (\Case\;(\ell~V)^R\{
           \ell~x \mapsto M
           ; y \mapsto N \})\sigma
-        &\simdefas&
+        &\adef&
        \Case\;(\ell~V\sigma)^{R\sigma.2}\{
          \ell~x \mapsto M\sigma
          ; y \mapsto N\sigma \}.
@@ -5777,8 +5830,8 @@ We begin by showing that substitutions preserve typability.
  By induction on the typing derivations.
 \end{proof}
 %
-\dhil{It is clear to me at this point, that I want to coalesce the
-  substitution functions. Possibly define them as maps rather than ordinary functions.}
+% \dhil{It is clear to me at this point, that I want to coalesce the
+%   substitution functions. Possibly define them as maps rather than ordinary functions.}

 The reduction semantics satisfy a \emph{unique decomposition}
 property, which guarantees the existence and uniqueness of complete
@@ -5858,7 +5911,7 @@ meaning that every closed computation term reduces to a trivial
 computation term. In other words, any realisable function in $\BCalc$
 is effect-free and total.
 %
-\begin{claim}[Type soundness]
+\begin{claim}[Type soundness]\label{clm:soundness}
  If $\typ{}{M : C}$, then there exists $\typ{}{N : C}$ such that
  $M \reducesto^\ast N \not\reducesto$ and $N$ is normal.
 \end{claim}
@@ -5908,18 +5961,9 @@ The reduction semantics are also standard.
 Every occurrence of $f$ in $M$ is replaced by the recursive abstractor
 term, while every $x$ in $M$ is replaced by the value argument $V$.

-The introduction of recursion means we obtain a slightly weaker
-progress theorem as some programs may now diverge.
-%
-\begin{theorem}[Progress]
-  \label{thm:base-rec-language-progress}
-  Suppose $\typ{}{M : C}$, then $M$ is normal or there exists
-  $\typ{}{N : C}$ such that $M \reducesto^\ast N$.
-\end{theorem}
-%
-\begin{proof}
-  Similar to the proof of Theorem~\ref{thm:base-language-progress}.
-\end{proof}
+The introduction of recursion means that the claimed type soundness
+theorem (Claim~\ref{clm:soundness}) no longer holds as some programs
+may now diverge.

 \subsection{Tracking divergence via the effect system}
 \label{sec:tracking-div}
@@ -5951,10 +5995,10 @@ effect signature does not alert us about the potential divergence. In
 fact, in this particular instance the effect row on the computation
 type is empty, which might deceive the doctrinaire to think that this
 function is `pure'. Whether this function is pure or impure depend on
-the precise notion of purity -- which we have yet to choose. In
-Section~\ref{sec:notions-of-purity} we shall make clear the notion of
-purity that we have mind, however, first let us briefly illustrate how
-we might utilise the effect system to track divergence.
+the precise notion of purity -- which we have yet to choose. Shortly
+we shall make clear the notion of purity that we have mind, however,
+first let us briefly illustrate how we might utilise the effect system
+to track divergence.

 The key to tracking divergence is to modify the \tylab{Rec} to inject
 some primitive operation into the effect row.
@@ -5998,12 +6042,12 @@ suspended computation.
  \begin{mathpar}
    \inferrule*[Right={\tylab{Lam}}]
      {\inferrule*[Right={\tylab{App}}]
-        {\typ{\emptyset;\Gamma,\Unit:\Record{}}{\dec{fac} : \Int \to \Int \eff \{\dec{Div}:\ZeroType\}}\\
-         \typ{\emptyset;\Gamma,\Unit:\Record{}}{3 : \Int}
+        {\typ{\emptyset;\Gamma,\Unit:\UnitType}{\dec{fac} : \Int \to \Int \eff \{\dec{Div}:\ZeroType\}}\\
+         \typ{\emptyset;\Gamma,\Unit:\UnitType}{3 : \Int}
        }
-        {\typc{\emptyset;\Gamma,\Unit:\Record{}}{\dec{fac}~3 : \Int}{\{\dec{Div}:\ZeroType\}}}
+        {\typc{\emptyset;\Gamma,\Unit:\UnitType}{\dec{fac}~3 : \Int}{\{\dec{Div}:\ZeroType\}}}
      }
-      {\typ{\emptyset;\Gamma}{\lambda\Unit.\dec{fac}~3} : \Unit \to \Int \eff \{\dec{Div}:\ZeroType\}}
+      {\typ{\emptyset;\Gamma}{\lambda\Unit.\dec{fac}~3} : \UnitType \to \Int \eff \{\dec{Div}:\ZeroType\}}
  \end{mathpar}
  %
  The information that the computation applies a possibly divergent
@@ -6024,6 +6068,30 @@ non-divergent such as the $\Rec$-variation of the identity function:
 $\Rec\;f\,x.x$. A practical implementation could utilise a static
 termination checker~\cite{Walther94} to obtain more fine-grained
 tracking of divergence.
+
+The question remains as to whether we should track divergence. In this
+dissertation I choose not to track divergence in the effect
+system. This choice is a slight departure from the real implementation
+of Links~\cite{LindleyC12}. However, the focus of this part of the
+dissertation is on programming with \emph{user-definable computational
+  effects}. Recursion is not a user-definable effect in our setting,
+and therefore, we may regard divergence information as adding noise to
+effect signatures. This choice ought not to alienate programmers as it
+aligns with, say, the notion of purity employed by
+Haskell~\cite{JonesABBBFHHHHJJLMPRRW99,Sabry98}.
+
+% A
+% solemnly sworn pure functional programmer might respond with a
+% resounding ``\emph{yes!}'', whilst a pragmatic functional programmer
+% might respond ``\emph{it depends}''. I side with the latter. My take
+% is that, on one hand if we find ourselves developing safety-critical
+% systems, a static approximation of the dynamic behaviour of functions
+% can be useful, and thus, we ought to track divergence (and other
+% behaviours). On the other hand, if we find ourselves doing general
+% purpose programming, then it may be situational useful to know whether
+% some function may exhibit divergent behaviour
+
+
 % By fairly lightweight means we can obtain a finer analysis of
 % $\Rec$-definitions by simply having an additional typing rule for
 % the application of $\Rec$.
@@ -6039,18 +6107,35 @@ tracking of divergence.
 % \end{mathpar}
 % %

-\subsection{Notions of purity}
-\label{sec:notions-of-purity}
-The term `pure' is heavily overloaded in the programming literature.
-%
-\dhil{In this thesis we use the Haskell notion of purity.}
+% \subsection{Notions of purity}
+% \label{sec:notions-of-purity}
+% The term `pure' is heavily overloaded in the programming literature.
+% %
+% \dhil{In this thesis we use the Haskell notion of purity.}

 \section{Related work}

 \paragraph{Row polymorphism} Row polymorphism was originally
 introduced by \citet{Wand87} as a typing discipline for extensible
-records. The style of row polymorphism used in this chapter was
-originally developed by \citet{Remy94}.
+records. The style of row polymorphism used in this chapter is due to
+\citet{Remy94}. It was designed to work well with type inference as
+typically featured in the ML-family of programming
+languages. \citeauthor{Remy94} also describes a slight variation of
+this system, where the presence polymorphism annotations may depend on
+a concrete type, e.g. $\ell : \theta.\Int;R$ means that the label is
+polymorphic in its presence, however, if it is present then it has
+presence type $\Int$.
+
+Either of \citeauthor{Remy94}'s row systems have set semantics, i.e. a
+row cannot contain duplicated labels. An alternative semantics based
+on dictionaries is used by \citet{Leijen05}. In
+\citeauthor{Leijen05}'s system labels may be duplicated, which
+introduces a form of scoping for labels, which for example makes it
+possible to shadow fields in a record. There is no notion of presence
+information in \cite{Leijen05}'s system, and thus, as a result
+\citeauthor{Leijen05}-style rows simplify the overall type structure.
+\citet{Leijen14} has used this system as the basis for the effect
+system of Koka.

 \citet{MorrisM19} have developed a unifying theory of rows, which
 collects the aforementioned row systems under one umbrella. Their
@@ -6058,13 +6143,23 @@ system provides a general account of record extension and projection,
 and dually, variant injection and branching.

 \paragraph{Effect tracking} As mentioned in
-Section~\ref{sec:back-to-directstyle} the original effect system
-was developed by \citet{LucassenG88} to provide a lightweight facility
-for static concurrency analysis.  \citet{NielsonN99} \citet{TofteT97}
-\citet{BentonK99} \citet{BentonB07}
+Section~\ref{sec:back-to-directstyle} the original effect system was
+developed by \citet{LucassenG88} to provide a lightweight facility for
+static concurrency analysis. Since then effect systems have been
+employed to perform a variety of static analyses,
+e.g. \citet{TofteT94,TofteT97} describe a region-based memory
+management system that makes use of a type and effect system to infer
+and track lifetimes of regions; \citet{BentonK99} use a monadic effect
+system to identify opportunities for optimisations in the intermediate
+language of their ML to Java compiler;
 %
-\citet{LindleyC12} used the effect system presented in this chapter to
-support abstraction for database programming in Links.
+and \citet{LindleyC12} use a variation of the row system presented in
+this chapter to support abstraction and predicable code generation for
+database programming in Links. Row types are used to give structural
+types to SQL rows in queries, whilst their effect system is used to
+differentiate between \emph{tame} and \emph{wild} functions, where a
+tame function is one whose body can be translated and run directly on
+the database, whereas a wild function cannot.


 \chapter{Effect handler oriented programming}
@@ -10189,7 +10284,7 @@ programming~\cite{DolanWM14,DolanWSYM15}. The current incarnation
 features untracked nominal effects and deep handlers with single-use
 resumptions.

-\dhil{Possibly move to the introduction or background}
+%\dhil{Possibly move to the introduction or background}

 \paragraph{Effect-driven concurrency}
 In their tutorial of the Eff programming language \citet{BauerP15}
@@ -11951,7 +12046,7 @@ two reduction rules.
  & f\,W\,(\dRecord{fs, h} \dcons \dhk)
 \end{reductions}
 %
-\dhil{Say something about skip frames?}
+%\dhil{Say something about skip frames?}
 %
 The first rule describes what happens when the pure continuation is
 exhausted and the return clause of the enclosing handler is