% \iffalse meta-comment
%
% Copyright (C) 1993-2025
% The LaTeX Project and any individual authors listed elsewhere
% in this file.
%
% This file is part of the LaTeX base system.
% -------------------------------------------
%
% It may be distributed and/or modified under the
% conditions of the LaTeX Project Public License, either version 1.3c
% of this license or (at your option) any later version.
% The latest version of this license is in
%    http://www.latex-project.org/lppl.txt
% and version 1.3c or later is part of all distributions of LaTeX
% version 2008 or later.
%
% This file has the LPPL maintenance status "maintained".
%
% The list of all files belonging to the LaTeX base distribution is
% given in the file `manifest.txt'. See also `legal.txt' for additional
% information.
%
% The list of derived (unpacked) files belonging to the distribution
% and covered by LPPL is defined by the unpacking scripts (with
% extension .ins) which are part of the distribution.
%
% \fi
%


\NeedsTeXFormat{LaTeX2e}[1995/12/01]

\documentclass{ltxguide}[1994/11/20]

\usepackage[T1]{fontenc}
\IfFileExists{lmodern.sty}{\usepackage{lmodern}}{}
\usepackage{textcomp}
\usepackage{url}
\usepackage{mflogo}

\addtolength\textheight{6\baselineskip}
\addtolength\topmargin{-2\baselineskip}


\newcommand\ttverb[1]{\texttt{\string#1}}


% for encodings
\providecommand{\Enc}[1]{\texttt{#1}}

% for packages
\providecommand{\Pkg}[1]{%
  \textsf{#1}}

% for files
\providecommand{\File}[1]{%
  \texttt{#1}}

% let's have meta values too
\providecommand{\meta}[1]{%
  \ensuremath{\langle}\emph{#1}\ensuremath{\rangle}}

\usepackage{tabularx}

% eine Umgebung zur Darstellung von Kodierungen
%
% Argumente:
%  #1: Name in LaTeX (z.B. OT1)
%  #2: Name der Kodierung (z.B. TeX text)
%  #3: Name des Autors (z.B. Don Knuth)
%  #4: Bereich der benützten Glyphindizes
%  #5: variable Positionen
%  #6: Beispielzeichensatz
%  #7: Referenz
%
% XXX add code to handle more than a single font example (e.g., larm1000,
% lbrm1000, and lcrm1000).
%
\newenvironment{encodinginfo}[7]%
  {\noindent
   \begin{tabularx}{\linewidth}{@{}l>{\raggedright\let\\\tabularnewline}X}%
     \LaTeX{} name:          & \texttt{#1}\\%
     Public name:          & #2\\%
     Author:                   & #3\\%
     Glyph slots used: & #4\\%
     Variable slots:     & #5\\%
     Font example:     & \def\@tempa{#6}\ifx\@tempa\@empty---%
                            \else\texttt{#6}\referenceftable{#6}\fi\\%
     Further reference:                & #7%
   \end{tabularx}%
   \par\nobreak
   \vspace*{3pt}%
   \quote
  }%
  {\endquote
   \vspace{6pt}}

\makeatletter
\def\referenceftable#1{
  \@ifundefined{r@fonttable:#1}%
  \relax
  {;\space encoding table on page~\pageref{fonttable:#1}}%
}

% font table macros mainly lifted from manmac.tex
\def\oct#1{\hbox{\rm\'{}\kern-.2em\it#1\/\kern.05em}}
\def\hex#1{\hbox{\rm\H{}\tt#1}}

\def\oddline#1{\cr\noalign{\nointerlineskip}
  \multispan{19}\hrulefill&
  \setbox0=\hbox{\lower 2.3pt\hbox{\hex{#1x}}}\smash{\box0}\cr
  \noalign{\nointerlineskip}}
\def\evenline{\cr\noalign{\hrule}}
\def\chartstrut{\lower4.5pt\vbox to14pt{}}
\def\beginchart#1#2{$$\global\count@=0 #1
  \halign to\hsize\bgroup
    \chartstrut##\tabskip0pt plus10pt&
    &\hfil##\hfil&\vrule##\cr
    \lower6.5pt\null
  &#2&&\oct0&&\oct1&&\oct2&&\oct3&&\oct4&&\oct5&&\oct6&&\oct7&\evenline}
\def\endchart{\raise11.5pt\null&&&\hex 8&&\hex 9&&\hex A&&\hex B&
  &\hex C&&\hex D&&\hex E&&\hex F&\cr\egroup$$}
\def\:{\setbox0=\hbox{\noboundary\char\count@\noboundary}%
  \ifdim\ht0>7.5pt\reposition
  \else\ifdim\dp0>2.5pt\reposition\fi\fi
  \box0\global\advance\count@ by1 }
\def\reposition{\setbox0=\hbox{$\vcenter{\kern2pt\box0\kern2pt}$}}
\def\normalchart{%
  &\oct{00x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\oddline0
  &\oct{01x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&\evenline
  &\oct{02x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\oddline1
  &\oct{03x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&\evenline
  &\oct{04x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\oddline2
  &\oct{05x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&\evenline
  &\oct{06x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\oddline3
  &\oct{07x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&\evenline
  &\oct{10x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\oddline4
  &\oct{11x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&\evenline
  &\oct{12x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\oddline5
  &\oct{13x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&\evenline
  &\oct{14x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\oddline6
  &\oct{15x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&\evenline
  &\oct{16x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\oddline7
  &\oct{17x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&\evenline
  \top}

\def\notophalf{}
\def\tophalf{%
%\noalign{\vskip 5pt\hrule}
  &\oct{20x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\oddline8
  &\oct{21x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&\evenline
  &\oct{22x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\oddline9
  &\oct{23x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&\evenline
  &\oct{24x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\oddline A
  &\oct{25x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&\evenline
  &\oct{26x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\oddline B
  &\oct{27x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&\evenline
  &\oct{30x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\oddline C
  &\oct{31x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&\evenline
  &\oct{32x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\oddline D
  &\oct{33x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&\evenline
  &\oct{34x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\oddline E
  &\oct{35x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&\evenline
  &\oct{36x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\oddline F
  &\oct{37x}&&\:&&\:&&\:&&\:&&\:&&\:&&\:&&\:&\evenline}

\def\ftable#1#2{%
     \batchmode
     \font\X=#1%
     \errorstopmode
     \ifx\X\nullfont
       \@warning{Font #1 not found, table omitted}
     \else
       \count@="80
       \setbox0=\hbox{\X
        \loop\char\count@\advance\count@ by1 \ifnum\count@<"100
        \repeat}%
  \ifdim\wd0>0pt \let\top\tophalf\else\let\top\notophalf\fi
     \beginchart\X{\hfill\llap{\textbf{#1, \large#2}\label{fonttable:#1}}}\normalchart
     \endchart\par\vfill
    \fi}
\makeatother


\setcounter{tocdepth}{3}

\title{\LaTeX{} font encodings}

\author{Frank Mittelbach \and Robin
   Fairbairns \and Werner Lemberg \and \LaTeX\ Project Team.}

\date{\copyright~Copyright 1995--2016 \\[5pt] 18 February 2016}

\begin{document}

\maketitle

\tableofcontents

\section{Introduction}

This document explains the ideas that underpin \LaTeX{} font
encodings and the constraints that apply when defining a new encoding; it
also lists the encodings that have already been defined.

\subsection{Encodings in \TeX{}}

\TeX{} (the program) implicitly recognises three sorts of encoding,
and all are (in a sense) discussed in the \TeX{}book~\cite{A-W:DKn86}:
\begin{itemize}
\item[1.] The input encoding, which specifies the meanings of characters
  in files presented to \TeX{} for processing.  The \TeX{}book
  suggests that `your version of \TeX{} will recognise the characters
  you type on your keyboard' (\TeX{} the program has provision for
  static translations of input characters).
\end{itemize}
Such direct use of \TeX{}'s facilities is not the way modern
\LaTeX{} (or indeed any other \TeX{} macro package) is likely to deal
with input encodings.   This document does not address the topic of
input encodings; the interested reader should examine the \LaTeX{}
base package \Pkg{inputenc} \cite[sec.~7.5.2, p.~357]{A-W:MG2004}.
\begin{itemize}
\item[2.] The token stream that \TeX{} processes internally.  This stream
  of \TeX{}'s consciousness is discussed in great detail in the
  \TeX{}book.
\end{itemize}
Again, this document does not address the topic.  \LaTeX's internal
character representation (\textsc{licr}) is well discussed in
\cite[sec.~7.11.2, p.~442]{A-W:MG2004}.
\begin{itemize}
\item[3.] The font encoding---i.e., the mapping of character codes to
  glyphs in the fonts that are used to typeset \TeX{}'s output.
  Again, a set of font encodings is enumerated in the \TeX{}book, but
  that set has proved inadequate to the needs of modern multilingual
  use of \LaTeX.
\end{itemize}
This document explains \emph{why} Knuth's original set of encodings is
inadequate to modern conditions, and discusses the issues that
surround the design and definition of new font encodings.

Font encodings are important for more than their r\^{o}le in mapping the
glyphs of the fonts to be used for typesetting: their glyph tables are
also the context in which \TeX{}'s hyphenation algorithm operates.
There are constraints imposed by \TeX{} that affect the way in which
new font encodings, for use in a multi-lingual environment, may be
structured (see section~\ref{sec:restrictions} for details).

\subsection{The history of \TeX{} font encodings}

Little attention was paid to font encodings prior to the arrival of
\TeX{}\,3.  Up to that time, one used Donald Knuth's fonts (the
Computer Modern family, using the encodings we now refer to as \Enc{OT1} and
the \Enc{OM} series), or one was on one's own.

The Computer Modern text encoding raises problems in unmodified
\TeX{}, because hyphenation cannot break words containing
\verb"\accent" commands.  Even in those Western European languages for
which the \Enc{OT1} encoding has symbols for the necessary
\verb"\accent"-based diacritics, this shortcoming ruins typesetting of
running text.

With the advent of \TeX{}\,3, with its ability to switch between
hyphenation pattern sets, it was clear that the situation could not
continue.  Thus a group at the TUG Annual General Meeting in Cork,
Ireland, specified a uniform encoding for 256-glyph fonts, that
contains accented letters and non-\textsc{ascii} letters necessary to
express most Western European languages (and some Eastern European ones)
without recourse to the \verb"\accent" command.

This ``Cork'' encoding has since been realised in a series of fonts
designed with Metafont, in at least one font series that is available
both in Adobe Type 1 format and in OpenType format, % viz., Latin Modern
and in a number of virtual-font mappings of other font series.

Since the time of the Cork meeting, much effort has been devoted to
the design of encodings for text fonts to use with \TeX{}, and the
Cork encoding influenced the design of many such encodings.

Encodings for mathematical fonts have, in contrast, changed little
since Knuth's contributions.  A TUG Technical Working Group was
established at the Cork meeting, whose aim was to define a set of
256-glyph encodings to regularise and extend Knuth's originals, using
ideas from several other fonts that had appeared since, and from the
known needs of researchers in mathematics and the mathematical sciences.

Independently, a first proposal (the so-called \emph{Aston proposal}) was worked
out by Justin Ziegler together with Frank Mittelbach and other members of the
\LaTeX\ Project team~\cite{ziegler}. A first implementation of
this propsal was realized by Matthias Clasen und Ulrik
Vieth~\cite{clasen,clasen-vieth}.

However, the slow progress of these Mathematical encodings has been
overtaken by the addition (in the last decade or so) of a large number
of mathematical symbols to Unicode~\cite{beeton}; one can expect
further changes so that new public mathematical font encodings will
most likely be delayed still further.



\subsection{Further information}

For a general introduction to \LaTeX, including the new features of
\LaTeXe, you should read \emph{\LaTeXbook},
Leslie Lamport, Addison Wesley, 2nd~ed, 1994.

A more detailed description of the new features of \LaTeX, including an
overview of more than 200 packages and nearly 1000 ready to run examples, is
to be found in \emph{\LaTeXcomp{} second edition} by Frank Mittelbach and
Michel Goossens~\cite{A-W:MG2004}.

The \LaTeX{} project sponsored a report on Mathematical % spelt out in full
font encodings, which
is worth reading for its insight into the problems of defining the way
in which math is used: see~\cite{ziegler,clasen,clasen-vieth}.

The \LaTeX{} font selection scheme is based on \TeX, which is described
by its developer in \emph{The \TeX book}, Donald E.~Knuth, Addison
Wesley, 1986, revised in 1991 to include the features of \TeX~3.

For more information about \TeX{} and \LaTeX, please contact your local
\TeX{} Users Group, or the international \TeX{} Users Group
(\url{http://www.tug.org}).



\section{Existing font encodings}

This section lists the encodings currently assigned; for each
encoding, we list the registered (\LaTeX{}) name, the assigned purpose
of the encoding, and the author.  Further details may list the code
positions used in the encoding, the \emph{variable slots} (see below),
an example font (for which a listing will be provided later in the
document if the relevant fonts are present), and a source for further
reference.

While the characteristic feature of an encoding is that each font
encoded according to the encoding should have the same glyph set,
there are some encodings (notably \Enc{OT1} and its descendants) in
which a few glyph code slots differ in their contents in different
fonts.

\subsection{Naming conventions}

Names for encoding schemes are strings of up to three letters (all
upper case) plus digits.

The \LaTeX\ Project reserves the use of encoding names starting with the
following letters: |T| (standard 256-long text encodings), |TS|
(symbols that are designed to extend a corresponding |T| encoding),
|X| (text encodings that do not conform to the strict requirements for
|T| encodings), |M| (standard 256-long mathematical encodings), |S| (other
symbol encodings), |A| (other special applications), |OT| (standard
128-long text encodings), and |OM| (standard 128-long mathematical encodings).

Please do not use the above starting letters for non-portable
encodings.  If new standard encodings emerge then we shall add them in
a later release of \LaTeX.

Encoding schemes which are local to a site or a system should start
with |L|, experimental encodings intended for wide distribution will
start with |E|, whilst |U| is for Unknown or Unclassified encodings.

\begin{quote}
  \itshape We recommend that new encoding names should not be
  introduced unless careful consideration and discussion in the user
  community has confirmed the need for the encoding. If encodings have to
  change from font to font, a number of problems arise, so it is best to
  develop encodings that can be used with a large number of fonts in parallel.
  This allows documents to be typeset using different fonts without problems.

  The \Enc{TS1} encoding is a good example of a \emph{bad} encoding (even
  though it was developed with the best intentions) as a huge number of fonts
  can only implement parts of it. Similarly, the fact that the few sets of
  available mathematical fonts (beside Computer Modern Math) nearly
  all implement slightly different encodings is a huge source of
  problems. Don't add to this if possible!
\end{quote}


\subsection{128$^+$ glyph encodings (text)}

The `OT' series of font encodings start with Donald Knuth's original
text encoding, that used for the text fonts in the earliest releases
of \TeX{} itself.  The `O' of the encoding designator may be taken as
signifying `original', or just `old'.

\begin{encodinginfo}{OT1}
        {\TeX{} text}
        {Donald Ervin Knuth}
        {0x00--0x7F}
        {0x0B--0x0F, 0x24, 0x3C, 0x3E, 0x5C, 0x7B--0x7D}
% {0X--'177}
% {'13--'17, '44, '74, '76, '134, '173--'175}
        {cmr10}
        {\cite[p.427]{A-W:DKn86}}

  Donald Knuth designed his font encoding (and hence his fonts) in a
  very different environment from that which now pervades the \TeX{}
  world: his (mainframe) computer had very little memory, there was
  little experience in (or demand for) for multilingual technical
  typesetting, and as a result it was appropriate to sacrifice
  uniformity for efficiency.

  Thus Knuth's original fonts differ slightly in some encoded slots:
  for example, the glyphs \texttt{\string<}, \texttt{\string>},
  \verb=\=, \verb={=, and \verb=}= are only available in the
  typewriter fonts and the \textdollar{} and \textsterling{} signs
  share the same position (in different font shapes).

  This means that direct selection of these slots can produce
  unpredictable results, e.g., typing \texttt{\string<} or
  \verb=\symbol{'74}= in a document can yield `\textquestiondown'.
\end{encodinginfo}


\begin{encodinginfo}{OT2}
        {UW cyrillic encoding}
        {University of Washington}
        {0x00--0x7F}
        {---}
        {wnr10}
        {\cite{Beeton:TB6-3-124}}
  Support for this encoding is available in the Cyrillic bundle although for
  all practical purposes it is better to use one of the \Enc{T2} encodings.
\end{encodinginfo}


\begin{encodinginfo}{OT3}
        {UW IPA encoding}
        {University of Washington}
        {0x00--0x7f}
        {---}
        {wsuipa10}
        {\cite[p.149]{CorkGW:91}}
  The \Enc{OT3} encoding was never really used with \LaTeXe{}
  following the introduction of the TIPA system which offers much
  better support for IPA. In particular, no \File{ot3enc.def}
  file was ever produced.
\end{encodinginfo}


\begin{encodinginfo}{OT4}
        {Polish text encoding}
        {B.~Jackowski and M.~Ry\'cko} %% ?  Marcin Woli\'nski
  {0x00--0x7F, 0x81, 0x82, 0x86, 0x8A, 0x8B, 0x91, 0x99, 0x9B, 0xA1,
   0xA2, 0xA6, 0xAA, 0xAB, 0xAE, 0xAF, 0xB1, 0xB9, 0xBB, 0xD3, 0xF3,
   0xFF}
  {0x0B--0x0F, 0x24, 0x3C, 0x3E, 0x5C, 0x7B--0x7D}
        {plr10}
        {---}

   While Knuth included the means of typesetting the `lost L' (\L) in
  his \Enc{OT1} encoding, he omitted the ogonek (\,\,\k{}), a diacritic
  mark that is also needed in Polish text; hence the appearance, well
  before the \Enc{T1} encoding, of fonts using this encoding.
\end{encodinginfo}

\begin{encodinginfo}{OT5}
        {Not currently allocated}
        {---}
        {---}
        {---}
        {}
        {---}

\end{encodinginfo}



\begin{encodinginfo}{OT6}
        {Armenian text encoding}
        {Serguei Dachian}
        {0x03--0x0F, 0x13--0x7F}
        {---}
        {artmr10}
        {---}

  This encoding was allocated to permit use of Dachian's
  Armenian fonts in a standard \LaTeX{} environment.

  Because of license issues the \texttt{artmr} fonts are not necessarily
  included in distributed \TeX{} installations (and for this reason the
  corresponding encoding table is not shown below). However, the fonts
  and the support macros can be found on the CTAN archives (look for
  \texttt{armtex}).

\end{encodinginfo}



\subsection{256 glyph encodings (text)}

\begin{encodinginfo}{T1}
        {Cork encoding}
        {Euro \TeX{} conference at Cork}
        {0x00--0xFF}
        {---}
        {ecrm1000}
        {\cite[p.514]{tub:MFe90}, \cite[p.99]{Knappen:TB17-2-96}}

  The Cork encoding was developed so that advantage could be taken of
  the (then) new facilities of \TeX{}\,3, allowing hyphenation of
  most Western European (and some Eastern European) languages in an
  unmodified version of \TeX{}.

  The encoding was developed in the absence of any extant effort at
  font design, but instances written in Metafont (the `EC' fonts), and
  more recently Adobe Type 1 instances of the same fonts have become
  available.

  Substantial (but incomplete) instances have also been developed,
  which use virtual fonts.  These latter instances map either Knuth's
  original (OT1-encoded) fonts, or commercial fonts that contain the
  Adobe `standard' set of 224 glyphs.
\end{encodinginfo}

\begin{encodinginfo}
  {T2A, T2B, T2C}
  {Cyrillic encodings}
  {The CyrTUG font team}
  {0x00--0xFF}
  {--- (within each encoding)}
  {larm1000}
  {\cite{Berdnikov:eurotex-98}}

  There are too many glyphs in the full Cyrillic complement of
  languages for all of them to be covered by a single
  \LaTeX{}-compliant encoding (the lower half of each
  \Enc{T2}~encoding is identical to that of \Enc{T1}, in order that
  each should be a conforming \LaTeX{} encoding~--- see
  section~\ref{sec:restrictions}).  The approach taken is
  therefore to develop a single encoding, \Enc{X2} (see \ref{sec:extendedenc})
  which contains all the glyphs needed for the full set of
  languages, and then to derive the three \LaTeX{}-complaint
  \Enc{T2}-family encodings using the \Enc{X2} set together with that of
  \Enc{T1}.

\end{encodinginfo}



\begin{encodinginfo}{T3}
        {IPA encoding}
        {FUKUI Rei, University of Tokyo}
        {0x00--0xFF}
        {---}
        {tipa10}
        {\cite[p.102]{Rei:TB17-2-102}}


    The \Enc{T3} encoding (and associated macros) provides the glyphs required
  in phonetic description according to current International Phonetic
  Association recommendations \cite{ipa}.

  The \Enc{T3} encoding does \emph{not fulfil} the requirements for \Enc{T}
  encodings---the name is a historical accident. The correct name would be
  \Enc{X3}, but due to the fact that this font family has been used under its
  current encoding name for a long time, the name will not change for
  compatibility reasons.

\end{encodinginfo}



\begin{encodinginfo}{T4}
        {African Latin (fc)}              % public name
        {J\"org Knappen}              % author name
        {0x00--0xFF}              % range(s) of slots used for glyphs
        {0x24}         % range(s) of slots with variable glyphs if any
        {fcr10}              % name of an example font
        {\cite{tub:JKn93}}

The African Latin fonts contain in their lower half (0--127) the same
characters as the European Latin (T1-encoded) Fonts, while in their
upper half (128--255) they
contain letters and symbols for African languages that use extended
Latin alphabets.
Due to lack of space, J\"org had to play the unfortunate trick of
assigning \verb=\textdollar= and \verb=\textsterling=
the same position; users should take these characters
from the text companion font, if they are needed.  Instead of defining
a lot of new control sequences for the single letters, there are three
accent-like control sequences with general purpose:
\verb=\m= (Modified-1),
\verb=\M= (Modified-2) and
\verb=\B= (Barred).
Most standard \LaTeX{} encoding-dependent commands
work.  However, the Icelandic special letters are not available and `best
replacements' for \verb=\Th=, \verb=\th=, and \verb=\dh=
are used (barred T and d resp.).
\end{encodinginfo}


\begin{encodinginfo}{T5}
        {Vietnamese encoding}
        {Werner Lemberg and
         Vladimir Volovich}
        {0x00--0xFF}
        {---}
        {vnr10}
        {\cite{vnr}}

  The \Enc{T5} encoding was developed for Vietnamese. Again, this encoding
  \emph{does not} conform to the requirements for a \Enc{T}-encoding
  because its large number of accented letters prevent the \verb=\lccode= and
  \verb=\uccode= mapping requirements for \Enc{T} encodings from being
  fulfilled.  However, since the Vietnamese language does not
  use word division in typesetting so that this requirement is
  actually not important for this particular language.
  Since every glyph used in Vietnamese text is internally
  represented as \textsc{licr} macros, the commands  \verb=\MakeUppercase= and
  \verb=\MakeLowercase= still work as expected (as they change the case of the
  \textsc{ascii} characters in \textsc{licr} definitions).

\end{encodinginfo}

\begin{encodinginfo}
  {T6}
  {Armenian}
  {---}
  {---}
  {---}
  {}
  {---}

    This encoding is reserved to permit future expansion of Armenian
  \TeX{} to use 256-character (hyphenatable) fonts.
\end{encodinginfo}

\begin{encodinginfo}{T7}
        {Greek encoding}
   {---}
   {---}
   {---}
   {}
   {---}

The name is already reserved for a 256 glyph greek encoding. The encoding
itself hasn't been defined so far.

\end{encodinginfo}



\subsection{256$^-$ glyph encodings (text symbols)}

\begin{encodinginfo}{TS1}
        {Text Companion encoding (Cork)}
        {J\"org Knappen}
  {0x00--0x0D, 0x12, 0x15, 0x16, 0x18--0x1D, 0x20, 0x24, 0x27, 0x2A,
   0x2C--0x3A, 0x3C--0x3E, 0x4D, 0x4F, 0x57, 0x5B, 0x5D--0x60,
   0x62--0x64, 0x6C--0x6E, 0x7E--0xBF, 0xD6, 0xF6}
  {---}
        {tcrm1000}
        {\cite{Knappen:TB17-2-96}}

   The text symbol encoding offers access to symbolic glyphs that are
  commonly used in text (for a variety of reasons), and whose style
  should vary with the text that surrounds them.

  Unfortunately, the \Enc{TS1} encoding was developed without
  reference to the glyphs available in existing commercial fonts.
  As a result, only font families
  explicitly developed for \TeX{} (i.e., typically originating with
  \MF{}) actually contain all glyphs required by the \Enc{TS1}
  encoding.  Most other font families (whether free or commercial)
  often only provide half of the set%
%%
%% don't show the comment if the tables are not generated
%%
\expandafter\ifx\csname r@fonttable:tcrm1000\endcsname\relax
\else
  \expandafter\ifx\csname r@fonttable:ptmr8c\endcsname\relax
  \else
    \space (compare the two tables for \Enc{TS1} on
     pages~\pageref{fonttable:tcrm1000}
     and~\pageref{fonttable:ptmr8c})%
  \fi
\fi.
  To improve this situation somewhat, NFSS provides a way to define encoding
  subsets on a per family basis in the \Pkg{textcomp} package (which
  package offers support for the \Enc{TS1} encoding).
\end{encodinginfo}


\begin{encodinginfo}{TS3}
        {IPA symbol encoding}
        {FUKUI Rei, University of Tokyo}
        {0x00--0x0A, 0x20--0x49, 0x50--0x56, 0x70--0x7B}
        {---}
        {tipx10}
        {\cite{Rei:TB17-2-102}}

  The \Enc{TS3} encoding (together with the \Enc{T3} encoding) provides the
  glyphs for typesetting phonetic transcriptions following the
  guidelines of the International Phonetic Association \cite{ipa}.  Support
  is offered through the \Pkg{tipa} package.
\end{encodinginfo}




\subsection{256 glyph encodings (text extended)}
\label{sec:extendedenc}

\begin{encodinginfo}
  {X2}
  {Cyrillic glyph container}
  {The CyrTUG font team}
  {0x00--0xFF}
  {---}
  {rxrm1000}
  {\cite{Berdnikov:eurotex-98}}

  This encoding specifies the glyph container for Cyrillic characters,
  which is used in specifying the \Enc{T2A}, \Enc{T2B} and \Enc{T2C} encodings.
\end{encodinginfo}




\subsection{128$^+$ glyph encodings (mathematics)}


\begin{encodinginfo}{OML}
        {\TeX{} math italic}
        {Donald Ervin Knuth}
        {0x00--0x7F}
        {---}
        {cmmi10}
        {\cite[p.430]{A-W:DKn86}}

  The \Enc{OML} encoding contains italic Latin and Greek letters for
  use in mathematical formulas (typically used for variables) together
  with some symbols.

\end{encodinginfo}

\begin{encodinginfo}{OMS}
        {\TeX{} math symbol}
        {Donald Ervin Knuth}
        {0x00--0x7F}
        {---}
        {cmsy10}
        {\cite[p.431]{A-W:DKn86}}

  The  \Enc{OMS} encoding contains basic mathematical symbols,
  together with an uppercase ``calligraphic'' Latin alphabet.
\end{encodinginfo}


\begin{encodinginfo}{OMX}
        {\TeX{} math extension}
        {Donald Ervin Knuth}
        {0x00--0x7F}
        {---}
        {cmex10}
        {\cite[p.432]{A-W:DKn86}}

  \Enc{OMS} encodes mathematical symbols with variable sizes, such as
  the $\sum$ sign, which changes its size if used in displayed
  formulas, and the construction parts for
  brackets, braces and radicals, etc., which can stretch to accommodate
  the thing they're enclosing.

\end{encodinginfo}




\subsection{256 glyph encodings (mathematics)}

So far there are no 256 glyph mathematical encodings. A proposal is
given in \cite{ziegler}.


\subsection{Other encodings}

\begin{encodinginfo}
  {C..}
  {CJK encodings}
  {Werner Lemberg}
  {0x00--0xFF}
  {---}
  {} % no font, of course
  {\cite{CJK}}

  The \Pkg{CJK} package defines a number of encodings which access Chinese,
  Japanese and Korean fonts.

\end{encodinginfo}

\begin{encodinginfo}
  {E..}
  {Experimental encodings}
  {---}
  {0x00--0xFF}
  {all}
  {}
  {\cite[p.416]{A-W:MG2004}}

  As the name indicates, encodings starting with the letter \Enc{E} are
  intended for experimental encodings, that are still likely to change.
\end{encodinginfo}

\begin{encodinginfo}{L..}
        {Local encoding (site dependent)}
        {---}
        {0x00--0xFF}
        {all}
        {}
        {\cite[p.416]{A-W:MG2004}}

        `Local' encodings provide the means to develop representation
        techniques that are suited to a particular \TeX{} environment.  While
        the developer has freedom to specify their encoding as he or she
        pleases, there is a strong incentive to obey the \LaTeX{} rules for
        encodings, since it will otherwise be difficult to compose text using
        the encoding.

        At least it was the intention that \Enc{L..} encodings are local and
        site dependent. However, a number of such encodings became generally
        used without ever getting a different name allocated.

\end{encodinginfo}



\begin{encodinginfo}{LY1}
        {Y\&Y 256 glyph encoding}
        {Berthold Horn}
        {0x00--0x08, 0x0C, 0x10, 0x12--0xFF}
        {\emph{believed none}}
        {ptmr8y}
        {\cite[p.416]{A-W:MG2004}}

        This is an alternative to the \Enc{T1} encoding developed by Y\&Y and
        used in their commercial \TeX{} implementation.

\end{encodinginfo}


\begin{encodinginfo}{LV1}
        {MicroPress encoding}
        {Michael Vulis}
        {\emph{unknown}}
        {\emph{unknown}}
        {}
        {\cite[p.416]{A-W:MG2004}}

        This is an encoding developed by MicroPress and used for some of their
        fonts.

\end{encodinginfo}


\begin{encodinginfo}{LGR}
        {Greek 256 glyph encoding}
        {\emph{unknown}}
        {0x00--0xFF}
        {\emph{believed none}}
        {grmn1000}
        {\cite[p.575]{A-W:MG2004}}

        Currently the main encoding in use for the Greek language.

        This encoding doesn't conform to the restrictions for
        \Enc{T}-encodings described in section~\ref{sec:restrictions} on
        page~\pageref{sec:restrictions} as it doesn't have \textsc{ascii}
        glyphs at all.

\end{encodinginfo}


\begin{encodinginfo}
  {PD1}
  {PDF DocEncoding}
  {Adobe}
  {0x08--0x0A, 0x0C, 0x0D, 0x18--0x7E, 0x80--0x9E, 0xA0--0xAE, 0xB0--0xFF}
  {---}
  {}
  {\cite{Adobe:PDF-1.6}, \cite{hyperref}}

  The \Enc{PD1} encoding is a virtual encoding with 256 glyphs needed to
  produce bookmarks and similar text in PDF document generated with pdf\LaTeX.
  The encoding is ``virtual'' because by design there are no \TeX{}
  fonts that cover \Enc{PD1}. Details can be found in appendix D.1
  of~\cite{Adobe:PDF-1.6}.
\end{encodinginfo}

\begin{encodinginfo}
  {PU}
  {PDF Unicode Encoding}
  {Adobe}
  {---}
  {---}
  {}
  {\cite{Adobe:PDF-1.6}, \cite{hyperref}}

  Another virtual encoding (with more than 600 characters) for
  Unicode-encoded bookmarks in PDF documents.
\end{encodinginfo}

\begin{encodinginfo}{U}
        {Unknown encoding}
        {---}
        {potentially 0x00-0xFF}
        {all}
        {wasy10}
        {\cite[p.416]{A-W:MG2004}}

  This encoding should be used for fonts that resist classification,
  e.g., when it is clear that there will never be more than one font
  using the same encoding.

\end{encodinginfo}



\section{Restrictions}
\label{sec:restrictions}


\subsection{Required glyphs for general text encodings}

Encodings that are supposed to be used with \LaTeX{} for `general
purpose text fonts' need to have certain fixed glyphs in certain
encoding slots.  A `general purpose text font' is one intended for
arbitrary natural language text and not just within special
environments (such as the phonetic alphabet) or just for typesetting
individual symbols (e.g., the text companion font with encoding
\Enc{TS1}).

This is the case for the following glyphs that have to be in their
\textsc{ascii} positions for general purpose text encodings:
\begin{center}
\begin{tabular}[t]{cc}
  Glyph & Position \\ \hline
  !     & \number`\!    \\
  '     & \number`\'    \\
  (     & \number`\(    \\
  )     & \number`\)    \\
  \relax*       & \number`\*    \\
  +     & \number`\+    \\
  ,     & \number`\,    \\
  -     & \number`\-    \\
  .     & \number`\.    \\
  /     & \number`\/    \\
  0 \ldots\ 9   & \number`\0\ to \number`\9     \\
  \end{tabular}
  \quad
  \begin{tabular}[t]{cc}
  Glyph & Position \\ \hline
  :     & \number`\:    \\
  ;     & \number`\;    \\
  =     & \number`\=    \\
  ?     & \number`\?    \\
  @     & \number`\@    \\
  A \ldots\ Z   & \number`\A\ to \number`\Z     \\
  \relax[       & \number`\[    \\
  ]     & \number`\]    \\
  `     & \number`\`    \\
  a \ldots\ z   & \number`\a\ to \number`\z     \\
\end{tabular}
\quad
\begin{tabular}[t]{cc}
Glyph\footnotemark      & Position \\ \hline
<       & \number`\<    \\
>       & \number`\>    \\
\string|        & \number`\|    \\
\end{tabular}\footnotetext{The requirement for these three glyphs is
  violated in the Latin alphabet \Enc{OT} encodings.}
\end{center}
In addition the following glyphs have to be present
somewhere\footnote{The position in this case is not important as they
are generated from ligature programs.} in the encoding together with
corresponding ligature programs to generate them:
\begin{center}
\begin{tabular}[t]{cc}
Glyph   & Ligature program \\ \hline
 ``     & \texttt{`\/`} \\
 ''     & \texttt{'\/'} \\
 --     & \texttt{-\/-} \\
 ---    & \texttt{-\/-\/-} \\
\end{tabular}
\end{center}

This is $33 + 2 * 26 = 85$ positions ``required'', which leaves 171
positions free.

If there are free slots available then adding all or some of the
diacritics would be the best way to fill them.

If there are insufficient slots for the characters needed, a possible
technique is to create a subsidiary encoding, and to move non-letter
characters to it.  Since only ``letters'' take part in the hyphenation
algorithm, this technique doesn't affect the appearance of the typeset
result.

\subsection{The constraints on upper/lower case tables}

Due to some technical restrictions of \TeX{} related to hyphenation it
is not possible in \LaTeX{} to use more than one \verb=\lccode= or
\verb=\uccode= table. Therefore all encodings need to share these two
tables which are defined to be those of the \Enc{T1} encoding.

The \Enc{T1} encoding has some nasty peculiarities which make certain slot
positions more or less unusable for other encodings if this
restriction is to be obeyed. This is unfortunate but since \Enc{T1} is well
established and the basis for a large number of languages it seemed
better to live with this situation instead of trying to replace \Enc{T1} with a
slightly better standard (with the result that for a long time
different \LaTeX{} installations would not be able to communicate with
each other because of incompatible font sets).

The positions that are problematic are as follows.
\begin{center}
\begin{tabular}{lp{.8\linewidth}}
25 (\char 25) & uppercase maps strangely (same as for 105, \char 105)\\
26 (\char 26) & uppercase maps strangely (same as for 106, \char 106)\\
27 (\char 27) & lowercase maps to itself which makes this slot subject
                to hyphenation (used to support \Enc{OT1} encoding) \\
157 (\char 157) & lowercase maps strangely (same as for 73, \char 73) \\
158 (\char 158) & uppercase maps strangely (same as for 240, \char 240) \\
\end{tabular}
\end{center}
One way to use such slots is to fill them with ligature glyphs as
\TeX{} will not consult these tables for glyphs constructed through
ligatures programs but instead uses the entries for the individual
glyphs used to produce the ligature.

A complete listing of the uppercase/lowercase mapping tables is to be
found in section~\ref{sec:uclc-tab} (page \pageref{sec:uclc-tab}).

\newcount\temp \newcount\tempL \newcount\tempU

\def\nextstep{\global\tempL=\lccode\temp
              \global\tempU=\uccode\temp
              \lctablenumbersize\the\temp &
              \the\tempL&
              \the\tempU&\printlowerupper{\the\temp}{\the\tempL}{\the\tempU}\\
               \global\advance\temp by 1
               \stepprint}

\def\printlowerupper#1#2#3{\char#1\relax
   (\ifnum#2=0\relax--\else\char#2\fi
   /\ifnum#3=0\relax--\else\char#3\fi)}

\def\stepprint{\relax\ifnum\temp<\endval
                    \let\next=\nextstep
               \else
                     \let\next=\relax
               \fi
               \next}

\def\dolctable#1#2{{\temp=#1\relax
\def\endval{#2}%
\setlength\tabcolsep{1.5pt}%
\begin{tabular}[t]{@{}cccc@{}}
pos&lc&uc&glyphs\\\hline
\stepprint
\end{tabular}}}

\iffalse
\begin{center}
\tiny\let\lctablenumbersize\tiny
\mbox{\dolctable{0}{52}\vrule
\dolctable{52}{104}\vrule
\dolctable{104}{156}\vrule
\dolctable{156}{208}\vrule
\dolctable{208}{256}}
\end{center}
\fi

\iffalse
\begin{center}\tiny
\mbox{\dolctable{0}{65}\vrule
\dolctable{65}{128}\vrule
\dolctable{128}{193}\vrule
\dolctable{193}{256}}
\end{center}
\fi



\section{Encoding specific commands}

An encoding specific command is one that generates a glyph (or
glyphs), to produce a graphic effect that may be implemented
differently in different encodings.  The encoding specific command
automatically changes its implementation when the encoding changes in
the course of the document.  Encoding specific commands figure in
\LaTeX's internal character representation (\textsc{licr}) and are also
discussed in \cite[sec.~7.11.2, p.~442]{A-W:MG2004}.

The following table only covers the encoding specific commands from
the \Enc{OT1} and \Enc{T1} encodings. Other encodings may specify
additional encoding specific commands.  In the table, the first 15
commands are `accent-like' and need as an argument the character to be
accented.  For example, |\v{c}| is the \textsc{licr} for `\v{c}'.

\begin{tabbing}
\ttverb\textvisiblespace\quad\=bbbbbbbbbbbbbb\=b'b'\=ccccccccccc\kill
\ttverb\`{}               \>OT1,T1\>   \a`{}\> (grave)      \\
\ttverb\'{}               \>OT1,T1\>   \a'{}\> (acute)      \\
\ttverb\^{}               \>OT1,T1\>   \^{}\>  (circumflex) \\
\ttverb\~{}               \>OT1,T1\>   \~{}\>  (tilde)      \\
\ttverb\"{}               \>OT1,T1\>   \"{}\>  (umlaut)     \\
\ttverb\H{}               \>OT1,T1\>   \H{}\>  (Hungarian umlaut) \\
\ttverb\r{}               \>OT1,T1\>   \r{}\>  (ring)       \\
\ttverb\v{}               \>OT1,T1\>   \v{}\>  (ha\v{c}ek)  \\
\ttverb\u{}               \>OT1,T1\>   \u{}\>  (breve)      \\
\ttverb\t{}               \>OT1,T1\>   \t{}\>  (tie)        \\
\ttverb\={}               \>OT1,T1\>   \a={}\> (macron)     \\
\ttverb\.{}               \>OT1,T1\>   \.{}\>  (dot)        \\
\ttverb\b{}               \>OT1,T1\>   \b{}\>  (underbar)   \\
\ttverb\c{}               \>OT1,T1\>   \c{}\>  (cedilla)    \\
\ttverb\d{}               \>OT1,T1\>   \d{}\>  (dot under)  \\
\ttverb\k{}               \>T1    \>   \k{}\>  (ogonek)     \\
% \ttverb\AA              \>OT1,T1\>   \AA \>               \\ % no longer
\ttverb\AE                \>OT1,T1\>   \AE \>               \\
\ttverb\DH                \>T1    \>   \DH \>               \\
\ttverb\DJ                \>T1    \>   \DJ \>               \\
\ttverb\L                 \>OT1,T1\>   \L  \>               \\
\ttverb\NG                \>T1    \>   \NG \>               \\
\ttverb\OE                \>OT1,T1\>   \OE \>               \\
\ttverb\O                 \>OT1,T1\>   \O  \>               \\
\ttverb\SS                \>OT1,T1\>   \SS \>               \\
\ttverb\TH                \>T1    \>   \TH \>               \\
% \ttverb\aa              \>OT1,T1\>   \aa \>               \\ no-longer
\ttverb\ae                \>OT1,T1\>   \ae \>               \\
\ttverb\dh                \>T1    \>   \dh \>               \\
\ttverb\dj                \>T1    \>   \dj \>               \\
\ttverb\guillemotleft     \>T1    \>   \guillemotleft  \> (guillemet) \\
\ttverb\guillemotright    \>T1    \>   \guillemotright \> (guillemet) \\
\ttverb\guilsinglleft     \>T1    \>   \guilsinglleft  \> (guillemet) \\
\ttverb\guilsinglright    \>T1    \>   \guilsinglright \> (guillemet) \\
\ttverb\i                 \>OT1,T1\>   \i  \>               \\
\ttverb\j                 \>OT1,T1\>   \j  \>               \\
\ttverb\l                 \>OT1,T1\>   \l  \>               \\
\ttverb\ng                \>T1    \>   \ng \>               \\
\ttverb\oe                \>OT1,T1\>   \oe \>               \\
\ttverb\o                 \>OT1,T1\>   \o  \>               \\
\ttverb\quotedblbase      \>T1    \>   \quotedblbase   \>   \\
\ttverb\quotesinglbase    \>T1    \>   \quotesinglbase \>   \\
\ttverb\ss                \>OT1,T1\>   \ss \>               \\
\ttverb\textasciicircum   \>OT1,T1\>   \textasciicircum \>  \\
\ttverb\textasciitilde    \>OT1,T1\>   \textasciitilde  \>  \\
\ttverb\textbackslash     \>OT1,T1\>   \textbackslash   \>  \\
\ttverb\textbar           \>OT1,T1\>   \textbar         \>  \\
\ttverb\textbraceleft     \>OT1,T1\>   \textbraceleft   \>  \\
\ttverb\textbraceright    \>OT1,T1\>   \textbraceright  \>  \\
\ttverb\textcompwordmark  \>OT1,T1\>   \textcompwordmark\> (invisible) \\
\ttverb\textdollar        \>OT1,T1\>   \textdollar      \>  \\
\ttverb\textemdash        \>OT1,T1\>   \textemdash      \>  \\
\ttverb\textendash        \>OT1,T1\>   \textendash      \>  \\
\ttverb\textexclamdown    \>OT1,T1\>   \textexclamdown  \>  \\
\ttverb\textgreater       \>OT1,T1\>   \textgreater     \>  \\
\ttverb\textless          \>OT1,T1\>   \textless        \>  \\
\ttverb\textquestiondown  \>OT1,T1\>   \textquestiondown\>  \\
\ttverb\textquotedbl      \>T1    \>   \textquotedbl    \>  \\
\ttverb\textquotedblleft  \>OT1,T1\>   \textquotedblleft\>  \\
\ttverb\textquotedblright \>OT1,T1\>   \textquotedblright\> \\
\ttverb\textquoteleft     \>OT1,T1\>   \textquoteleft   \>  \\
\ttverb\textquoteright    \>OT1,T1\>   \textquoteright  \>  \\
\ttverb\textregistered    \>OT1,T1\>   \textregistered  \>  \\
\ttverb\textsection       \>OT1,T1\>   \textsection     \>  \\
\ttverb\textsterling      \>OT1,T1\>   \textsterling    \>  \\
\ttverb\texttrademark     \>OT1,T1\>   \texttrademark   \>  \\
\ttverb\textunderscore    \>OT1,T1\>   \textunderscore  \>  \\
\ttverb\textvisiblespace  \>OT1,T1\>   \textvisiblespace\>  \\
\ttverb\th                \>T1    \>   \th              \>
\end{tabbing}

\section{Encodings for Unicode based \TeX\ systems}
\label{sec:unicode}

The preceding text has assumed a classic TeX system that is
restricted to the use of fonts with at most 256 characters. In order
to accommodate all the characters needed for different languages and
mathematics it is necessary to have multiple encodings as described
above, and \LaTeX\ needs to be aware of the encoding used for each
font.

Unicode aims to provide a single encoding that removes most of the
need to switch encodings, apart from very specialist use for non-standard characters. Rather than assign codes in the range 0--256 (hex
FF) Unicode codes are in the range 0--1,114,111 (hex 10FFFF), although
not all slots are available for distinct characters for technical
reasons. Unicode offers the possibility to use a single input encoding
(usually UTF-8) for all documents and to use essentially the same
Unicode encoding for all fonts, so removing the need to switch
encodings in different contexts.

Omega was perhaps the first widely used \TeX\ extension that
supported Unicode. Currently the two actively supported systems that are
present in most modern \TeX\ distributions are Xe\TeX\ and Lua\TeX.

When used with these extended \TeX\ engines, \LaTeX's font system can
refer to Unicode fonts (typically OpenType fonts installed system-wide
on your operating system rather than fonts specifically encoded/installed for
\TeX). Currently the usual method of accessing these fonts is through
the contributed \Pkg{fontspec} package. This uses as encoding \Enc{TU}:
``\TeX{} Unicode'' (historically two experimental encodings \Enc{EU1}
and \Enc{EU2}
were used, depending on the engine, but these are deprecated).
The exact rules for \LaTeX\ encodings
for Unicode engines have not yet been finalised in terms of the (usual)
requirement that each slot should be defined. (This is not realistic for
a Unicode font, as almost all fonts address subsets of the full range.)
It is rare to need to specify the \Enc{TU} encoding a document as the
\Pkg{fontspec} package sets up the correct encoding when loaded.

The restrictions described in section \ref{sec:restrictions} do not
apply, or need to be modified in a Unicode based engine. Clearly the
lowercase table (and hyphenation patterns) can not be restricted to
the values used for \Enc{T1} and do only refer to the first 256
characters.

When the \LaTeX\ format is made \LaTeX\ sets up the lowercase table
and classifies characters as letter- or non-letter-based on \Enc{T1} if
a classic \TeX\ or pdf\TeX\ is being used. If a Unicode based \TeX\ is
detected, the values are instead based on the classification and
lower-case mappings provided by the Unicode Character Database
\cite{ucd}. The \LaTeX{} team have written a generic loader bundle,
\Pkg{unicode-data}, which provides the mechanism to load this information
directly from the Unicode Character Database data files and which is read
when a Unicode-compliant engine is detected during format-building.

Similarly in the default configuration files used by modern \TeX\
distribution, the hyphenation files for each supported language are
written in UTF-8 encoding, using Unicode code points for all letters,
then if a classic \TeX\ system  is detected, some additional macros are
loaded to convert these files to 256-character encodings where
possible, and assuming the \Enc{T1} lowercase table. For Unicode engines
no conversion takes place. (The hyphenation patterns for a small number of
languages require that some punctuation characters have non-zero
c values. This are set during pattern reading, and may at some
stage in the future use the e-\TeX{} \verb=\savinghyphcodes= mechanism to
avoid any need to manipulate \verb=\lccode= in the document.)




\begin{thebibliography}{99}
\addcontentsline{toc}{section}{\numberline{\relax}\refname}


\bibitem{Adobe:PDF-1.6} \emph{\textsc{PDF} reference}:
    Adobe portable document format version~1.6.  Adobe Systems
    Incorporated, 2005. % why \textsuperscript{3}?
  \url{http://partners.adobe.com/public/developer/en/pdf/PDFReference16.pdf}.

\bibitem{Beeton:TB6-3-124} Barbara Beeton:
  \emph{Mathematical symbols and cyrillic fonts ready for
      distribution}.  In: TUGBoat, 6\#3), 1985.
  \url{http://tug.org/TUGboat/Articles/tb06-3/tb13beetcyr.pdf}.

\bibitem{beeton} Barbara Beeton: \emph{Unicode
      and math, a combination whose time has come -- Finally!}.  In:
  TUGBoat, 21\#3, 2000.
  \url{http://www.tug.org/TUGboat/Articles/tb21-3/tb68beet.pdf}.


\bibitem{Berdnikov:eurotex-98} A.\@ Berdnikov, O.\@
  Lapko, M.\@ Kolodin, A.\@ Janishevsky and
  A.\@ Burykin: \emph{The Encoding Paradigm in
      \LaTeXe{} and the Projected X2 Encoding for Cyrillic Texts}.
  Euro\TeX~98.
  \url{http://www.gutenberg.eu.org/pub/GUTenberg/publicationsPDF/28-29-berdnikova.pdf}.

\bibitem{CJK} \emph{The \Pkg{CJK} package}:
  \url{http://cjk.ffii.org}.

\bibitem{clasen} Matthias Clasen: \emph{A new
      implementation of \LaTeX{} math}, 1997-98.
  \url{http://www.tug.org/twg/mfg/papers/current/newmath.ps.gz}.

\bibitem{clasen-vieth} Matthias Clasen and Ulrik
  Vieth: \emph{Towards a new Math Font Encoding
      for (La)\TeX}.  March 1998,
  \url{http://www.tug.org/twg/mfg/papers/current/mfg-euro-all.ps.gz}.

\bibitem{CorkGW:91}
Dean Guenther and Janene Winter.
\newblock An international phonetic alphabet.
\newblock In Guenther \cite{proc:MGu91}, pages 149--156.
\newblock Published as {TUG}boat 12\#1.

\bibitem{proc:MGu91}
Mary Guenther, editor.
\newblock {\em {\TeX} 90 Conference Proceedings}, March 1991.
\newblock Published as {TUG}boat 12\#1.

\bibitem{tub:MFe90}
Michael~J. Ferguson.
\newblock Report on multilingual activities.
\newblock {\em {TUG}boat}, 11(4):514--516, 1990.

\bibitem{fontinst} \emph{The \Pkg{fontinst} package}:
  \textlangle CTAN\textrangle\url{/fonts/utilities/fontinst}.

\bibitem{Rei:TB17-2-102} Fukui Rei:
  \emph{\textsl{TIPA}: A system for processing phonetic
      symbols in \LaTeX}.  In: TUGBoat, 17\#, 1996.
  \url{http://www.tug.org/TUGboat/Articles/tb17-2/tb51rei.pdf}.

\bibitem{hyperref} \emph{The \Pkg{hyperref} package}:
  \url{http://www.tug.org/applications/hyperref}.

\bibitem{tub:JKn93}
J\"org Knappen.
\newblock Fonts for Africa: The fc Fonts.
\newblock {\em {TUG}boat}, 14(2):104, 1993.

\bibitem{Knappen:TB17-2-96} J\"org Knappen:
  \emph{The \Pkg{dc} fonts~1.3: Move towards stability
      and completeness}.  In: TUGBoat 17\#2, 1996.
  \url{http://www.tug.org/TUGboat/Articles/tb17-2/tb51knap.pdf}.

\bibitem{A-W:DKn86}
Donald~E. Knuth.
\newblock {\em The {\TeX}book}.
\newblock Volume~A of {\em Computers \& {T}ypesetting\/},
  May 1989.
\newblock Eight printing.

\bibitem{vnr} \emph{The \Pkg{vnr} font family}, developed by
   the author of pdf\TeX, {H\`an Th\^e\protect\llap{\raise 0.5ex\hbox{\'{\relax}}} Th\`anh}.
   \url{http://vntex.org/download/vntex}.

 \bibitem{ipa} Home page of the International Phonetic Association.
   \url{http://www.arts.gla.ac.uk/IPA/ipa.html}

\bibitem{A-W:LLa94}
Leslie Lamport.
\newblock {\em {\LaTeX:} A Document Preparation System}.
\newblock Addison-Wesley, Reading, Massachusetts, second edition, 1994.

\bibitem{LH-Fonts} \emph{The \Pkg{lh}-Fonts for Cyrillic}:
  \textlangle CTAN\textrangle\url{/fonts/cyrillic/lh}.

\bibitem{A-W:MG2004}
Frank Mittelbach and Michel Goossens.
\newblock {\em The {\LaTeX} Companion second edition}.
\newblock With Johannes Braams, David Carlisle, and Chris Rowley.
\newblock Addison-Wesley, Reading, Massachusetts, 2004.

\bibitem{Unicode} \emph{The Unicode Standard}.
  \url{http://unicode.org}.

\bibitem{ucd} \emph{The Unicode Character Database}.
  \url{http://unicode.org/ucd}.

\bibitem{ziegler} Justin Ziegler, \emph{Technical
    Report on Math Font Encodings}, June 1994,
  \url{http://www.tug.org/twg/mfg/papers/ltx3pub/l3d007.ps.gz}.

\end{thebibliography}

\clearpage\appendix
\begin{center}
  \Large\bfseries Appendices
\end{center}

\section{Example code tables}

This appendix contains a table of each font mentioned as an ``example''
font above, providing that the font was available when the document
was processed with \LaTeX{}.  (\LaTeX{} generates a warning message
for each font it fails to find.)

\subsection{Text encodings}

\ftable{cmr10}{OT1}

\ftable{wnr10}{OT2}

\ftable{wsuipa10}{OT3}

\ftable{plr10}{OT4}

%\ftable{artmr10}{OT6}

\ftable{ecrm1000}{T1}

\ftable{larm1000}{T2A}

\ftable{lbrm1000}{T2B}

\ftable{lcrm1000}{T2C}

\ftable{tipa10}{T3}

\ftable{fcr10}{T4}

\ftable{vnr10}{T5}


\subsection{Text symbol encodings}

The full table for \Enc{TS1} as provided by European Computer Modern family:
\ftable{tcrm1000}{TS1}

\pagebreak

In contrast typical PostScript fonts usually have incomplete implementations
of \Enc{TS1} sometimes missing more than half of the glyphs:

\ftable{ptmr8c}{TS1}

\ftable{tipx10}{TS3}



\subsection{Extended text encodings}

\ftable{rxrm1000}{X2}


\subsection{Mathematical encodings}

\ftable{cmmi10}{OML}

\ftable{cmsy10}{OMS}

\ftable{cmex10}{OMX}


\subsection{Other encodings}

\ftable{ptmr8y}{LY1}

%%\ftable{????}{LV1}

\ftable{grmn1000}{LGR}

\ftable{wasy10}{U}
\ftable{logo10}{U}

\clearpage
\section{Uppercase and lowercase tables}
\label{sec:uclc-tab}

The following two sets of tables list the \verb"\uppercase" and
\verb"\lowercase" values for each position in the \LaTeX{} standard
256-character tables.

Each row of each table lists:
\begin{quote}
  \begin{tabular}{lp{0.7\textwidth}}
    pos & The position in the table (0-255) \\
    lc  & The value in the \verb"\lowercase" table at the position \\
        & (note that value 0 here means that \verb"\lowercase" is
          ineffective for this character, and hyphenation does not apply
          to it) \\
    uc  & The value in the \verb"\uppercase" table at the position \\
        & (note that value 0 here means that \verb"\uppercase" is
          ineffective for this character) \\
    glyphs & The glyphs specified for the T1 encoding for this
             position, laid out as \meta{glyph}\textbf{(}\meta{lowercase
             glyph}\textbf{/}\meta{uppercase glyph}\textbf{)}
  \end{tabular}
\end{quote}

\begin{center}
  \let\lctablenumbersize\footnotesize
  \makebox[\textwidth]{\hss
    \dolctable{0}{32}\quad\dolctable{32}{64}\quad
    \dolctable{64}{96}\quad\dolctable{96}{128}%
  \hss}

  \makebox[\textwidth]{\hss
    \dolctable{128}{160}\quad\dolctable{160}{192}\quad
    \dolctable{192}{224}\quad\dolctable{224}{256}%
  \hss}
\end{center}
\end{document}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%