In many situations text compression is carried out with a previously
formed fixed dictionary (code book) expressing those often-occurring
substrings of a text which are to be replaced by code words. The
problem of encoding a text in a space-optimal manner is equivalent to
the problem of finding a shortest path between a given pair of
vertices in an acyclic and bandwidth-limited network. By combining an
algorithm for finding shortest paths with the string matching
algorithm of Aho and Corasick, a time-efficient approximation
algorithm for the space-optimal encoding is obtained. The performance
of the approximation algorithm depends on the amount of storage space
available in the fast memory of a computer. With an unrestricted,
though at most linear working storage on the length of the input text,
a space-optimal encoding is obtained. However, even a fixed internal
memory of moderate size guarantees almost optimal compression, and in
spite of this the running time of the algorithm is comparable to that
of the longest match heuristic.
@article{KR1989J,
author = {J. Katajainen and T. Raita},
title = {An approximation algorithm for space-optimal encoding of a text},
journaltitle = {The Computer Journal},
volume = {32},
number = {3},
year = {1989},
pages = {228--237},
}