% -*- mode: Noweb; noweb-code-mode: icon-mode -*-
\title{htmltoc -- A Table of Contents Generater for HTML}
\author{stephen parker\thanks{Revisions by Norman Ramsey}\\
\texttt{stephen@stephen.uk.eu.org}}
\tableofcontents
\section{Introduction}
\texttt{htmltoc} reads an HTML document on standard input and writes the
same document, but optionally including a table of contents on output.
The motivation for this program was Norman Ramsey's \texttt{htmltoc}
program included with the \texttt{noweb} distribution which this program
has no advantages over other than not being written in perl.
% noweave mangles the references in rawhtml blocks if you're not
% careful. don't ask me why.
\begin{rawhtml}
This html document is created from a noweb
source
document which also produces the notangled
icon source.
The mkfile to control
this is also available; if you haven't got mk then it shouldn't be too
hard to turn it into a Makefile.
\end{rawhtml}
\section{Implementation}
The Icon source follows the usual pattern.
<<*>>=
<>
<>
<>
<>
@ Any text between the marks [[""]] and
[[""]] is replaced on output with a table
of contents for the document.
<>=
$define sentinel ""
$define sentinel_end ""
@ The TOC is created from the header items in the text. These
are stored in an [[ioc]] record. The headers are sequentially
numbered and the level of the heading and the heading text are
stored in a list of [[ioc]]s named [[contents]].
<>=
record ioc(number, level, text)
<>=
global contents
@ HTMLtoc must make two passes through the file in order to
generate a TOC before the actual material. Since we'd like
HTMLtoc to run in a pipeline we actually store the entire
input in a list named [[file]].
<>=
procedure readfile(f)
local old_contents
intoc := &null
contents := []
old_contents := []
no := 0
file := list()
while l := read(f) do l ? { <> }
if \intoc then {
push(old_contents, sentinel_end)
every l := !copy(old_contents) do l ? { <> }
}
return file
end
@ The variable [[intoc]] is [[1]] if we are between the TOC markers
on input. [[no]] is incremented for each header read.
<>=
if =sentinel then {
intoc := "in table of contents"
put(file, l)
} else if =sentinel_end then {
intoc := &null
put(file, l)
} else if \intoc then {
put(old_contents, l)
} else if /intoc then {
if <<[[l]] includes a header>> then {
<>
} else
put(file, l)
}
@ We look for a string of the form \verb|<[Hh][0-9]>| which
signifies the beginning of a header. This isn't nice Icon.
<<[[l]] includes a header>>=
(pre := tab(upto('<')), ="<", skip(), tab(any('hH')), n := tab(many(&digits)),
skip(), =">")
@ [[skip]] skips white space, if present.
<>=
procedure skip()
suspend tab(many(' \t')) | ""
end
@ The scanning environment contains a header, the
\verb|<[Hh][0-9]>| has already been consumed, so
the text of the header can by found by scanning for the close of header.
The header number,
[[no]] is incremented and then [[addcontents()]] is
called to add the item to the [[contents]] list.
<>=
if (c := tab(upto('<')), ="", skip(), tab(any('hH')), =n, skip(), =">") then {
no +:= 1
c ? c := strip_toc_anchors() || tab(0)
addcontents(no, n, c)
<>
} else
write(&errout, "Unterminated ", tab(0), "...")
@ We also need to label the heading in the text, so
we can reference it from the TOC.
<>=
put(file, pre || "" || c ||
"" || tab(0))
@ When stripping these, we rely on their having been put there by this
program.
<>=
procedure strip_toc_anchors()
local s, p
if (="", s := strip_toc_anchors(), ="") then
return s
else {
s := to_next_anchor_or_end_anchor() | runerr(1, "never found an anchor")
if pos(0) | at_end_anchor() then
return s
else
return s || tab_past_anchor_start() || strip_toc_anchors() ||
tab_past_anchor_end()
}
end
@
<>=
procedure at_end_anchor()
&subject[&pos:0] ?
return tab_past_anchor_end()
end
procedure at_start_anchor()
&subject[&pos:0] ?
return tab_past_anchor_start()
end
procedure tab_past_anchor_end()
suspend ="<" || optwhite() || ="/" || optwhite() || =("a"|"A") || optwhite() || =">"
end
procedure tab_past_anchor_start()
suspend ="<" || optwhite() || =("a"|"A") || white() || tab(upto('>')) || =">"
end
procedure to_next_anchor_or_end_anchor()
return (1(tab(upto('<')), at_start_anchor() | at_end_anchor()) | tab(0)) \ 1
end
procedure white()
suspend tab(many(' \t'))
end
procedure optwhite()
suspend white() | ""
end
@ [[addcontents()]] should simply append the [[ioc]]
record for the header to the [[contents]] list. But
one problem is that the text in the header may not
be simple -- it might contain markup. One case where
we don't want to copy this markup is if it is
a \verb|')), =">")
return r || tab(0)
}
end
@
The procedure [[writetoc()]] writes the TOC on
output using the [[contents]] list. We keep a current
header level in the integer variable [[level]]. As
we iterate through the [[contents]] list we insert \verb||s as required for formatting.
<>=
global minlevel
procedure tocindent(level)
return repl(" ", level-minlevel)
end
procedure writetoc()
local level
minlevel := 99
every minlevel >:= (!contents).level
level := minlevel - 1 # triggers opening list
every l := !contents do {
<>
write(tocindent(level),
"", l.text, "")
}
<>
end
@ We write out a \verb|| for each level we are beyond the
current item, and a \verb|| for each level we are below
the current. The call to [[repl()]] merely writes the correct
number of tabs so as to make the contents look ok.
<>=
while level > l.level do {
write(tocindent(level), "
")
level -:= 1
}
while level < l.level do {
level +:= 1
write(tocindent(level), "")
}
@ At the end of the contents we need to write as many
\verb|
|s as are required.
<>=
while level >= minlevel do {
write(tocindent(level), "")
level -:= 1
}
@ The [[add_contents()]] procedure simply reads the input using
[[readfile()]] then writes it out again. If the
sentinel line is found then the TOC is written at
that point.
<>=
procedure add_contents(f)
file := readfile(f)
every l := !file do {
write(l)
if match(sentinel, l) then writetoc()
}
end
@
[[main]] makes the program read its arguments or act as a filter.
<>=
global toclevels
procedure main(args)
if args[1] ? (="-", toclevels := tab(many(&digits)), pos(0)) then get(args)
else toclevels := "2345"
if *args = 0 then add_contents(&input)
else every add_contents(openfile(!args))
return
end
procedure openfile(f)
return open(f) | stop("Cannot open file ", f)
end
<<*>>=
procedure rcsinfo ()
return "$Id: htmltoc.nw,v 1.20 2008/10/06 01:03:05 nr Exp nr $" ||
"$Name: v2_12 $"
end
@