% \LaTeX-Main\
% !TeX encoding=UTF-8
% !TeX spellcheck=en_US
%%
%% The LaTeX package sanitize-umlaut - version 1.3.0 (2023/05/15)
%% sanitize-umlaut.tex: Manual
%%
%% -------------------------------------------------------------------------------------------
%% Copyright (c) 2016-2023 by Prof. Dr. Dr. Thomas F. Sturm <thomas dot sturm at unibw dot de>
%% -------------------------------------------------------------------------------------------
%%
%% This work may be distributed and/or modified under the
%% conditions of the LaTeX Project Public License, either version 1.3
%% of this license or (at your option) any later version.
%% The latest version of this license is in
%%   https://www.latex-project.org/lppl.txt
%% and version 1.3c or later is part of all distributions of LaTeX
%% version 2005/12/01 or later.
%%
%% This work has the LPPL maintenance status `author-maintained'.
%%
%% This work consists of all files listed in README
%%
\documentclass[a4paper,11pt]{ltxdoc}

\usepackage{sanitize-umlaut.doc}
\usepackage{%
  sanitize-umlaut}

\def\version{1.3.0}%
\def\datum{2023/05/15}%

\hypersetup{
  pdftitle={Manual for the sanitize-umlaut package},
  pdfauthor={Thomas F. Sturm},
  pdfsubject={sanitize umlauts for makeindex},
  pdfkeywords={index, umlauts, makeindex, pdflatex}
}

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{document}

\begin{center}
\begin{tcolorbox}[enhanced,hbox,tikznode,left=8mm,right=8mm,boxrule=0.4pt,
  colback=white,colframe=black!50!yellow,
  drop lifted shadow=black!50!yellow,arc is angular,
  before=\par\vspace*{5mm},after=\par\bigskip]
{\bfseries\LARGE The \texttt{sanitize-umlaut} package}\\[3mm]
{\large Manual for version \version\ (\datum)}
\end{tcolorbox}
{\large Thomas F.~Sturm%
  \footnote{Prof.~Dr.~Dr.~Thomas F.~Sturm, Institut f\"{u}r Mathematik und Informatik,
    University of the Bundeswehr Munich, D-85577 Neubiberg, Germany;
    email: \href{mailto:thomas.sturm@unibw.de}{thomas.sturm@unibw.de}}\par\medskip
\normalsize\url{https://www.ctan.org/pkg/sanitize-umlaut}\par
\url{https://github.com/T-F-S/sanitize-umlaut}
}
\end{center}
\bigskip
\begin{absquote}
  \begin{center}\bfseries Abstract\end{center}
  The packages sanitizes umlauts to be used directly in index entries for
  MakeIndex and friends with |pdflatex|. This means, that inside |\index| an umlaut can
  be used as \texttt{"U} or \texttt{Ü}. In both cases, the letter is written
  as \texttt{"U} into the raw index file for correct processing with MakeIndex
  and |pdflatex|. |lualatex| and |xelatex| are also supported with a different approach.
\end{absquote}

\begin{tcolorbox}[breakable,enhanced jigsaw,title={Contents},fonttitle=\bfseries\Large,
  colback=yellow!10!white,colframe=red!50!black,before=\par\bigskip\noindent,
  colbacktitle=red!50!yellow!75!black,
  enlargepage flexible=\baselineskip,pad at break*=3mm,
  attach boxed title to top center={yshift=-0.25mm-\tcboxedtitleheight/2,yshifttext=2mm-\tcboxedtitleheight/2},
  boxed title style={enhanced,boxrule=0.5mm,
    frame code={ \path[tcb fill frame] ([xshift=-4mm]frame.west) -- (frame.north west)
    -- (frame.north east) -- ([xshift=4mm]frame.east)
    -- (frame.south east) -- (frame.south west) -- cycle; },
    interior code={ \path[tcb fill interior] ([xshift=-2mm]interior.west)
    -- (interior.north west) -- (interior.north east)
    -- ([xshift=2mm]interior.east) -- (interior.south east) -- (interior.south west)
    -- cycle;}  },
  drop fuzzy shadow]
\makeatletter
\@starttoc{toc}
\makeatother
\end{tcolorbox}

\clearpage
\section{Purpose of the Package}%
The package sanitizes umlauts to be used directly in index entries for
|makeindex| and friends with |pdflatex|. This means, that inside |\index| an umlaut can
be used as \myverb{"U} or \myverb{Ü}. In both cases, the letter is written
as \myverb{"U} into the raw index file for correct processing with |makeindex|
and |pdflatex|.

The package is intended
\begin{itemize}
\item for documents in German language using the babel package with a setting
  identical or similar to \myverb{\usepackage[ngerman]{babel}}.
\item for documents which are processed by |latex| or |pdflatex| (also for |lualatex| or |xelatex|,
    but with more compilation overhead).
\item for documents with an index which is processed using the
  MakeIndex program.
\item for authors who like to use \myverb{\index{Übermaß}} instead of
  \myverb{\index{"Uberma"s}}.
\end{itemize}

All these conditions are satisfiable by simply including the |sanitize-umlaut|
package.

An alternative would be to filter the resulting raw \myverb{.idx} index
\emph{before} |makeindex| is applied to create the final \myverb{.ind} index.
Another alternative is to replace MakeIndex by Xindy or another index
processor.

\clearpage
\section{Important Compatibility Informations}%

\subsection{Past}
Until 2018, the default encoding for \LaTeX\ files was 7-bit ASCII.
For other encodings, packages like |inputenc| had to be loaded.
Also, |inputenc| used to expand characters like umlauts during |\index| output.
The package |sanitize-umlaut| version 1.00 replaced this expansion code
for |\index| output to get \myverb{"U} instead of \myverb{Ü}, etc.

\subsection{Present}
Since April 2018, the default encoding for \LaTeX\ files has been changed to UTF-8.
This is done by preloading the UTF-8 settings of the package |inputenc|
by default \LaTeX, i.e. if you want to use UTF-8 (recommended!), you do not
longer need to load |inputenc| inside your preamble.
But, also the implementation of |inputenc| changed for UTF-8 (October 2019?).
Nowadays, characters like umlauts are not longer expanded during |\index| output,
but are preserved as is.
Therefore, |sanitize-umlaut| version 1.00 \textbf{is not compatible} to
|inputenc| with UTF-8 dating from 2019 or newer.

|sanitize-umlaut| version 1.10 (or newer) patches some UTF-8 code of
\LaTeX/|inputenc| to return and replace character expansion during |\index| output.
This patch \textbf{is not compatible} to older versions of \LaTeX/|inputenc| (before October 2019).
Therefore, if your \LaTeX\ distribution is not reasonable up to date, you
should stay at version 1.00 of |sanitize-umlaut|.

With the 2022 June release of \LaTeX, characters defined via |utf8.def| are now
defined as |\protected| macros. Therefore, |sanitize-umlaut| version 1.2.0 (or newer)
patches some relevant parts of two-octets characters during |\index| back to
pre 2022 June behaviour. Obviously, you loose |\protected| here, if you
load |sanitize-umlaut|.

|sanitize-umlaut| version 1.3.0 (or newer) also supports |lualatex| and |xelatex|
with a different approach. Here, |\index| is patched such that its argument
is processed to replace umlauts.

\subsection{Future}
As always, the future is dark and difficult to see. Further changes of
|inputenc| implementation may force further changes of |sanitize-umlaut|.
Hopefully, this will not happen too soon or too often.
Also, if some miracle happens, MakeIndex may be updated one day to
recognize UTF-8 properly to make |sanitize-umlaut| superfluous.


\clearpage
\section{Package Usage}%

\subsection{Prerequisites}
The source document may need some encoding by |inputenc| since |pdflatex|
is assumed as engine. For example:

\begin{dispListing}
\usepackage[latin1]{inputenc}
\end{dispListing}

For |utf8| (UTF-8), modern \LaTeX\ does not need this package inclusion any more!
Also, for |lualatex| and |xelatex| this has to be omitted.

Just some few encodings are supported by |sanitize-umlaut|.
These are the most important for German language texts:

\tcbox[center,size=minimal,enhanced,frame hidden,colback=red!10]{\begin{tabular}{>{\ttfamily}l>{\ttfamily}l}\toprule
\textnormal{\bfseries encoding} & \textnormal{\bfseries recognized as}\\\midrule
utf8 & utf8\\
utf8-2018 & utf8-2018\\
latin1, ansinew, cp1252 & latin1\\
applemac & applemac\\\bottomrule
\end{tabular}}

Further, the |babel| package with German settings is needed:

\begin{dispListing}
\usepackage[ngerman]{babel}
\end{dispListing}

\subsection{Package Application}
Now, the package application is simple. You just put

\begin{dispListing}
\usepackage{sanitize-umlaut}
\end{dispListing}

into your document preamble \emph{after} |inputenc| and, maybe, after |babel|.
That is all.


\subsection{Sanitized Characters}

The umlauts and the sharp s are replaced by their |babel| shorthand codes
which are written to the \myverb{.idx} file.

\tcbox[center,size=minimal,enhanced,frame hidden,colback=red!10]{\begin{tabular}{>{\ttfamily}l>{\ttfamily}l}\toprule
\textnormal{\bfseries character} & \textnormal{\bfseries replacement}\\\midrule
ä & "a\\
ö & "o\\
ü & "u\\
Ä & "A\\
Ö & "O\\
Ü & "U\\
ß & "s\\\bottomrule
\end{tabular}}


\clearpage
\subsection{Technical Information}

The package uses \myverb{\inputencodingname} (set by \LaTeX\ and the |inputencoding| package)
to determine the current encoding.

The package redefines the \myverb{\@sanitize} macro at the begin of the
document. It adds some encoding redefinitions to this macro.
\myverb{\@sanitize} is used inside \myverb{\index} in a local group.
If another package (besides |babel|) also changes this macro or uses it
outside \myverb{\index}, strange things may happen.

If \myverb{\inputencodingname} is \emph{not} present, the package checks, if
the current engine is |luatex| or |xetex| and patches the \myverb{\index} macro
itself. All umlauts inside the argument of \myverb{\index} are replaced by their
|babel| shorthand codes using \LaTeX3 token replacement.
This increases compilation time considerably compared to the \myverb{\@sanitize} hack
for |pdflatex|. A very rough figure is approximately a plus of 0.8 seconds per
10000 \myverb{\index} calls (will differ on other maschines and other example codes).


\clearpage
\section{Application Examples}%

\tcbinputlisting{docexample,listing file=german.ist,listing only,minted style=colorful,
  title={file |"german.ist"| for the examples}}


\begin{fullexample}{run arara}
% !TeX encoding=UTF-8
% arara: pdflatex
% arara: makeindex: { style: german.ist, german: true }
% arara: pdflatex
\documentclass[a4paper,12pt]{article}
\usepackage[T1]{fontenc}
%\usepackage[utf8]{inputenc}   % utf8 is default now
\usepackage[ngerman]{babel}
\usepackage{makeidx}
\usepackage{sanitize-umlaut}
\makeindex
\begin{document}
\section{Basic Example}
Test äöüÄÖÜß.
\index{Aber}   \index{Arg}   \index{Ärger}
\index{Ofen}   \index{Ö - wie schön}   \index{oberhalb}
\index{Ufer}   \index{Übermaß}
\index{Latex=\LaTeX}   \index{Ärger>Index}
Test äöüÄÖÜß.
\printindex
\end{document}
\end{fullexample}


\begin{fullexample}{run arara}
% !TeX encoding=UTF-8
% arara: pdflatex
% arara: makeindex: { style: german.ist, german: true }
% arara: pdflatex
\documentclass[a4paper,12pt]{article}
\usepackage[T1]{fontenc}
%\usepackage[utf8]{inputenc}   % utf8 is default now
\usepackage[ngerman]{babel}
\usepackage{makeidx}
\usepackage{sanitize-umlaut}
\usepackage[hyperindex,colorlinks]{hyperref}
\makeindex
\begin{document}
\section{Example with hyperref}
Test äöüÄÖÜß.
\index{Aber}   \index{Arg}   \index{Ärger}
\index{Ofen}   \index{Ö - wie schön}   \index{oberhalb}
\index{Ufer}   \index{Übermaß}
\index{Latex=\LaTeX}   \index{Ärger>Index}
Test äöüÄÖÜß.
\printindex
\end{document}
\end{fullexample}


\begin{fullexample}{run arara}
% !TeX encoding=UTF-8
% arara: pdflatex
\documentclass[a4paper,12pt]{article}
\usepackage[T1]{fontenc}
%\usepackage[utf8]{inputenc}   % utf8 is default now
\usepackage[ngerman]{babel}
\usepackage[makeindex]{imakeidx}
\makeindex[options=-s german.ist -g]
\usepackage{sanitize-umlaut}
\begin{document}
\section{Example with imakeidx}
Test äöüÄÖÜß.
\index{Aber}   \index{Arg}   \index{Ärger}
\index{Ofen}   \index{Ö - wie schön}   \index{oberhalb}
\index{Ufer}   \index{Übermaß}
\index{Latex=\LaTeX}   \index{Ärger>Index}
Test äöüÄÖÜß.
\printindex
\end{document}
\end{fullexample}


\begin{fullexample}{run arara}
% !TeX encoding=UTF-8
% arara: pdflatex
\documentclass[a4paper,12pt]{article}
\usepackage[T1]{fontenc}
%\usepackage[utf8]{inputenc}   % utf8 is default now
\usepackage[ngerman]{babel}
\usepackage[makeindex]{imakeidx}
\makeindex[options=-s german.ist -g]
\usepackage{sanitize-umlaut}
\usepackage[hyperindex,colorlinks]{hyperref}
\begin{document}
\section{Example with imakeidx and hyperref}
Test äöüÄÖÜß.
\index{Aber}   \index{Arg}   \index{Ärger}
\index{Ofen}   \index{Ö - wie schön}   \index{oberhalb}
\index{Ufer}   \index{Übermaß}
\index{Latex=\LaTeX}   \index{Ärger>Index}
Test äöüÄÖÜß.
\printindex
\end{document}
\end{fullexample}


\begin{fullexample}{run arara}
% !TeX encoding=UTF-8
% arara: pdflatex
\documentclass[a4paper,12pt]{article}
\usepackage[T1]{fontenc}
%\usepackage[utf8]{inputenc}   % utf8 is default now
\usepackage[ngerman]{babel}
\usepackage[makeindex]{imakeidx}
\indexsetup{level=\section*,noclearpage}
\makeindex[name=personen,title=Personenregister,options=-s german.ist -g]
\makeindex[name=allgemein,title=Allgemeines Register,options=-s german.ist -g]
\usepackage{sanitize-umlaut}
\begin{document}
\section{Example with multiple indexes}
Test äöüÄÖÜß.
\index[personen]{Huber, Hans}    \index[personen]{Hübner, Jörg}
\index[allgemein]{Aber}          \index[allgemein]{Arg}
\index[allgemein]{Ärger}         \index[allgemein]{Ofen}
\index[allgemein]{Ö - wie schön} \index[allgemein]{oberhalb}
\index[allgemein]{Ufer}          \index[allgemein]{Übermaß}
\index[allgemein]{Latex=\LaTeX}  \index[allgemein]{Ärger>Index}
Test äöüÄÖÜß.
\clearpage
\printindex[allgemein]
\printindex[personen]
\end{document}
\end{fullexample}


\begin{fullexample}{run arara}
% !TeX encoding=UTF-8
% arara: lualatex
\documentclass[a4paper,12pt]{article}
\usepackage{fontspec}
\usepackage[ngerman]{babel}
\usepackage[makeindex]{imakeidx}
\indexsetup{level=\section*,noclearpage}
\makeindex[name=personen,title=Personenregister,options=-s german.ist -g]
\makeindex[name=allgemein,title=Allgemeines Register,options=-s german.ist -g]
\usepackage{sanitize-umlaut}
\begin{document}
\section{Example with multiple indexes for lualatex}
Test äöüÄÖÜß.
\index[personen]{Huber, Hans}    \index[personen]{Hübner, Jörg}
\index[allgemein]{Aber}          \index[allgemein]{Arg}
\index[allgemein]{Ärger}         \index[allgemein]{Ofen}
\index[allgemein]{Ö - wie schön} \index[allgemein]{oberhalb}
\index[allgemein]{Ufer}          \index[allgemein]{Übermaß}
\index[allgemein]{Latex=\LaTeX}  \index[allgemein]{Ärger>Index}
Test äöüÄÖÜß.
\clearpage
\printindex[allgemein]
\printindex[personen]
\end{document}
\end{fullexample}


\end{document}