\include{parameters}
\usetheme{AFNIC}
\usepackage[english]{babel}
\usepackage[latin1]{inputenc}
\usepackage{bortzmeyer-utils}

\title{Monitoring DNSSEC, not everything is perfect, yet}
\author{Stéphane Bortzmeyer\\AFNIC\\\texttt{bortzmeyer@nic.fr}}
\date{SATIN, 4 April 2011}

%\setlength{\parskip}{1ex plus 0.5ex minus 0.2ex} 
% \setlength{\parskip}{15pt} 
\setlength{\parskip}{15pt plus 10pt minus 10pt} 

\begin{document}

\maketitle

\begin{frame}
  \titlepage
\end{frame}

\begin{frame}
\frametitle{DNSSEC shakes monitoring}
\begin{enumerate}
\item<1-> We all know that a serious DNS zone must be monitored
continuously and automatically...
\item<2-> Many tests were not done before the introduction of DNSSEC,
for instance a clean path for all sizes of packets (my talk at the
OARC workshop in Denver),
\item<3-> DNSSEC-specific tests are typically far from complete,
leading to embarassing publications of failures on public mailing
lists,
\item<4-> Some tests detect failures only when too late (signature expiration).
\end{enumerate}
\end{frame}

\begin{frame}
\frametitle{Example in .FR}
\begin{enumerate}
\item<2-> November 2010: key deletion issue, zone no longer signed,
monitoring did not detect it,
\item<3-> 12 February 2011: ``TYPE65534'' bug. Invalid signature on a
NSEC3 record. The monitoring was only done on the apex, which was
correct. But requests for unsigned sub-domains failed.
\item<4-> 13 March 2011: ``Missing signature'' bug. The SOA record was
no longer signed. This time, the monitor detected it (good reason to
monitor several types).
\end{enumerate}
\end{frame}

\begin{frame}
\frametitle{The specific case of key rollovers}
\begin{block}<1->{Taboo}{Do we really need to do these complicated
rollovers? We break many things to solve a security problem which is
quite far away.}
\end{block}
\begin{block}<2->{Anyway,}
{Without caching, key rollovers would be very simple. But without
caching, would the DNS still work?}
\end{block}
\end{frame}

\begin{frame}
\frametitle{Rollovers need to be aware of caching}
\includegraphics[scale=0.55]{timing.png}
\end{frame}

\begin{frame}
\frametitle{Caching is per set, not per record}
\includegraphics[scale=0.55]{timing-keyset.png}
\end{frame}

\begin{frame}
\frametitle{Time-aware monitoring}
\begin{block}{Because of caching, monitoring has to take time into
account.}
{The monitor needs a memory, to remember what was done and when.}
\end{block}
\end{frame}

\begin{frame}
\frametitle{What do we store}
Everything is obtained from authoritative name servers, for freshness.
\begin{itemize}
\item Signatures of SOA, NS and DNSKEY (discussion welcome), with their TTL, 
\item Keys,
\item Keysets, with their TTL,
\end{itemize}
\end{frame}

\begin{frame}
\frametitle{What do we compute}
This tool focus on one thing: timing in key rollovers. Not a substitute
for comprehensive monitoring. We check:
\begin{enumerate}
\item That every ``potentially in caches'' signature has a published
key,
\item That every published signature has a key which is in the
keyset(s) that is(are) in all the caches.
\end{enumerate}
\end{frame}

\begin{frame}[fragile]
\frametitle{Example of signatures}
\begin{info}
sqlite> SELECT first_seen,last_seen,ttl FROM Signatures 
          WHERE type=6 AND name='192.in-addr.arpa.' 
                AND key_tag=20918 ORDER BY last_seen DESC;
2011-03-28 17:29:30|2011-03-28 20:17:31|86400
2011-03-28 13:22:23|2011-03-28 16:25:05|86400
2011-03-28 09:19:59|2011-03-28 12:28:09|86400
\end{info}
\end{frame}

\begin{frame}[fragile]
\frametitle{Example of keysets}
\begin{info}
sqlite> SELECT first_seen,last_seen,ttl,id FROM Keysets 
          WHERE  name='192.in-addr.arpa.' ORDER BY last_seen DESC;
2011-03-29 09:38:45|2011-03-31 08:30:30|14400|J/dCsFib6kxRer/O/eh1ZbI/Un8=
2011-03-21 21:39:09|2011-03-29 08:38:16|14400|NgM4JKT7QacTgX+ZF7bNo2owKjQ=
\end{info}
\end{frame}

\begin{frame}[fragile]
\frametitle{Example of keys}
\begin{info}
sqlite> SELECT first_seen,last_seen,key_tag FROM Keys 
          WHERE  name='192.in-addr.arpa.' ORDER BY last_seen DESC;
2011-03-01 15:34:17|2011-03-31 08:30:30|39318
2011-03-21 21:39:09|2011-03-31 08:30:30|60494
2011-03-01 15:34:17|2011-03-29 08:38:16|20918
\end{info}
\end{frame}

\begin{frame}
\frametitle{The observed domains and the results}
\begin{itemize}
\item 54 domains monitored, mostly serious domains (TLD, important
          sub-domains like \computer{isoc.org}),
\item In two months, seven problems detected, including two TLD,
\item Six of the problems were a key retired too soon. (Only one was
a new key used too early.) 
\end{itemize}
\end{frame}

\begin{frame}[fragile]
\frametitle{An example: 192.in-addr.arpa}
\begin{info}
% ./examine-history.py 192.in-addr.arpa             
ERROR: signature of zone 192.in-addr.arpa. 
        last seen at 2011-03-28 20:17:31 (with a TTL of 86400) 
        while the key 20918 was retired at 2011-03-29 09:23:54
\end{info}
The key was withdrawn 11 hours before it was safe to do so. 
\end{frame}

\begin{frame}
\frametitle{An exampe: isoc.org}
\includegraphics[scale=0.55]{isoc-org.png}
\end{frame}

\begin{frame}
\frametitle{All the glitches}
\begin{tabular}{|l|c|c|c|}
\hline
\textbf{Zone} & \textbf{Date} & \textbf{Glitch} & \textbf{Window}\\
\hline
\cline{1-4}
isoc.org&2011-03-29&retired too early&11h \\
\hline
\cline{1-4}
192.in-addr.arpa&2011-03-28&retired too early&14h \\
\hline
\cline{1-4}
my&2011-03-26&retired too early&24h \\
\hline
\cline{1-4}
bg&2011-03-19&retired too early&72h \\
\hline
\cline{1-4}
isoc.org&2011-03-01&retired too early&11h \\
\hline
\cline{1-4}
noaa.gov&2011-02-18&used too early&24h \\
\hline
\cline{1-4}
noaa.gov&2011-02-18&retired too early&24h \\
\hline
\end{tabular}
\end{frame}

\begin{frame}
\frametitle{Conclusions}
\begin{itemize}
\item The tools for key rollovers are not stable yet,
\item More monitoring would be a good idea,
\item DNSSEC is a sensitive thing: handle with care. Do not put into
the hands of children.
\end{itemize}
\end{frame}

\end{document}

