\documentclass[a3paper,landscape,english]{article}
% $Id: regex.tex,v 1.0 2004-05-24 01:12:46 adrian Exp $
% Copyright 2004 Adrian Sai-wah TAM
% Permission is granted to copy, distribute and/or modify this
% document under the terms of the GNU Free Documentation License,
% Version 1.1 or any later version published by the Free Software
% Foundation.

\newcommand{\bs}{$\backslash$}
\newcommand{\remark}[1]{{\scriptsize\sf{#1}}}

\usepackage{multicol}

\setlength{\textheight}{257mm}
\setlength{\textwidth}{380mm}
\setlength{\hoffset}{-5.4mm}	% 20mm - 1.0in = 5.4mm
\setlength{\voffset}{-5.4mm}
\setlength{\footskip}{10mm}
\setlength{\oddsidemargin}{0mm}
\setlength{\evensidemargin}{0mm}
\setlength{\topmargin}{0mm}
\setlength{\headheight}{0 pt}	% I don't have header
\setlength{\headsep}{0 in}

\setlength{\parindent}{0 in}

\ifx \pdfpagewidth \undefined
\else
 \pdfpagewidth=420mm   % page width of PDF output
 \pdfpageheight=297mm  % page height of PDF output
\fi

\newcommand{\ctrl}{C-}

\begin{document}
\thispagestyle{empty}
\fontsize{10}{11}\selectfont

\newcommand{\key}[2]{#1 \hfill\texttt{#2}\par}
\newcommand{\head}[1]{\underline{\large\textit{#1}}\\}

\begin{multicols}{4}
{\Large\bf Regular Expression Quick Reference}
\vskip 5pt
{\par\normalsize Version 1.02. Jun 1, 2004}

\vskip 20pt

\head{POSIX classes}
\key{Letters and digits}{[:alnum:]}
\key{Letters}{[:alpha:]}
\key{Space or tab}{[:blank:]}
\key{Control characters}{[:cntrl:]}
\key{Decimal digits}{[:digit:]}
\key{Printing chars, excl.\ space}{[:graph:]}
\key{Lowercase letters}{[:lower:]}
\key{Printing chars, incl.\ space}{[:print:]}
\key{Printing chars, excl.\ letters \& digits}{[:punct:]}
\key{Whitespaces}{[:space:]}
\key{Uppercase letters}{[:upper:]}
\key{Hex digits}{[:xdigit:]}

\vskip 20pt

\head{Standard Unicode Properties \remark{p}}
\key{Letters}{\bs p\{L\}}
\key{Lowercase letters}{\bs p\{Ll\}}
\key{Modifier letters}{\bs p\{Lm\}}
\key{Other letters (not {\tt Ll} and not {\tt Lm})}{\bs p\{Lo\}}
\key{Titlecase letters}{\bs p\{Lt\}}
\key{Uppercase letters}{\bs p\{Lu\}}
\key{Ctrl codes \& chars not in other categories}{\bs p\{C\}}
\key{ASCII and Latin-1 ctrl chars}{\bs p\{Cc\}}
\key{Non-visible formatting chars}{\bs p\{Cf\}}
\key{Unassigned code points}{\bs p\{Cn\}}
\key{Private use, such as company logos}{\bs p\{Co\}}
\key{Surrogates}{\bs p\{Cs\}}
\key{Marks to combine w/base chars, e.g.\ accents}{\bs p\{M\}}
\key{Mod.\ chars w/their own space, e.g.\ ``vowel signs''}{\bs p\{Mc\}}
\key{Marks enclose other chars, such as circles}{\bs p\{Me\}}
\key{Chars to mod.\ other chars, e.g.\ accents and umlauts}{\bs p\{Mn\}}
\key{Numeric characters}{\bs p\{N\}}
\key{Decimal digits in various scripts}{\bs p\{Nd\}}
\key{Letters that are numbers, e.g.\ roman numerals}{\bs p\{Nl\}}
\key{Superscripts, symbols, or non-digit char rep.\ nums}{\bs p\{No\}}
\key{Punctuation}{\bs p\{P\}}
\key{Connecting punctuation, such as an underscore}{\bs p\{Pc\}}
\key{Dashes and hyphens}{\bs p\{Pd\}}
\key{Closing punctuation complementing \bs p\{Ps\}}{\bs p\{Pe\}}
\key{Initial punctuation, such as opening quotes}{\bs p\{Pi\}}
\key{Final punctuation, such as closing quotes}{\bs p\{Pf\}}
\key{Other punctuation marks}{\bs p\{Po\}}
\key{Opening punctuation, such as opening parentheses}{\bs p\{Ps\}}
\key{Symbols}{\bs p\{S\}}
\key{Currency}{\bs p\{Sc\}}
\key{Combining chars represented as individual char}{\bs p\{Sk\}}
\key{Math symbols}{\bs p\{Sm\}}
\key{Other symbols}{\bs p\{So\}}
\key{Separating chars with no visual representation}{\bs p\{Z\}}
\key{Line separators}{\bs p\{Zl\}}
\key{Paragraph separators}{\bs p\{Zp\}}
\key{Space characters}{\bs p\{Zs\}}

\vskip 20pt

\head{Characters representations}
\key{Alert (bell) \remark{pyas}}{\bs a}
\key{Backspace, \bs x08; supported only in char class \remark{pyma}}{\bs b}
\key{ESC character, \bs x1B \remark{pm}}{\bs e}
\key{Newline, ({\sf p}: \bs x0D on MacOS 9) \remark{pym}}{\bs n}
\key{Carriage return, ({\sf p}: \bs x0A on MacOS 9) \remark{pym}}{\bs r}
\key{Form feed, \bs x0C \remark{pyas}}{\bs f}
\key{Horizontal tab, \bs x09 \remark{pymas}}{\bs t}
\key{Vertical tab, \bs x0B \remark{yas}}{\bs v}
\key{Char specified by 1- to 3-digit octal code \remark{s}}{\bs o{\it octal}}
\key{Char specified by 2- or 3-digit octal code\par\ \ \  ({\sf a}: 1- to 3-) \remark{pya}}{\bs {\it octal}}
\key{Char specified by 1- or 2-digit hex code ({\sf y}: 2-digit) \remark{pyas}}{\bs x{\it hex}}
\key{Char specified by 1- to 3-digit decimal code \remark{as}}{\bs d{\it dec}}
\key{Char specified by any hex code \remark{p}}{\bs x{\it hex}}
\key{Char specified by a 4-digit hex \remark{y}}{\bs u{\it hhhh}}
\key{Char specified by an 8-digit hex \remark{y}}{\bs U{\it hhhhhhhh}}
\key{Ctrl-char, char suggested in uppercase \remark{pas}}{\bs c{\it char}}
\key{Escape the meta character \remark{asg}}{\bs {\it metachar}}
\key{A named char in the Unicode std or listed in \par\ \ \ {\tt\$PERLLIB/unicode/Names.txt}.\par\ \ \ Reqs {\tt use charnames ':full'}.\remark{p}}{\bs N{\it name}}

\vskip 20pt

\head{Char classes and class constructs}
\key{A single char listed \remark{pyvasg}}{[...]}
\key{A single char not listed \remark{pyvasg}}{[\^\@...]}
\key{POSIX-style char class,\par\ \ \ valid only inside a regex char class \remark{pvas}}{[:{\sf class}:]}
\key{Any char except newline ({\sf v}: unless /s mode) \remark{pyvasg}}{.}
\key{One byte (corrupts Unicode character stream) \remark{p}}{\bs C}
\key{Base char followed by any number of Unicode\par\ \ \ combining characters \remark{p}}{\bs X}
\key{(non)Word char \remark{pymsg}}{\bs w{\rm\ or }\bs W}
\key{\ \ \ ({\sf p}: \bs p{IsWord}) ({\sf ym}: [a-zA-Z0-9\_])}{}
\key{(non)Letter character [a-zA-Z] \remark{m}}{\bs a{\rm\ or }\bs A}
\key{(non)Head of word character [a-zA-Z\_] \remark{m}}{\bs h{\rm\ or }\bs H}
\key{(non)Digit char, ({\sf p}: \bs p{IsDigit}) ({\sf ym}: [0-9]) \remark{pym}}{\bs d{\rm\ or }\bs D}
\key{(non)Whitespace \remark{pym}}{\bs s{\rm\ or }\bs S}
\key{\ \ \ ({\sf p}: \bs p{IsSpace}) ({\sf ym}: [ \bs t\bs n\bs r\bs f\bs v])}{}
\key{(non)Hex digit [a-fA-F0-9] \remark{m}}{\bs x{\rm\ or }\bs X}
\key{(non)Octal digit [0-7] \remark{m}}{\bs o{\rm\ or }\bs O}
\key{(non)Lowercase letter [a-z] \remark{m}}{\bs l{\rm\ or }\bs L}
\key{(non)Uppercase letter [A-Z] \remark{m}}{\bs u{\rm\ or }\bs U}
\key{Identifier character defined by isident \remark{m}}{\bs i}
\key{Any non-digit identifier character \remark{m}}{\bs I}
\key{Keyword characters defined by iskeyword, often\par\ \ \  set by language modes. \remark{m}}{\bs k}
\key{Any non-digit keyword character \remark{m}}{\bs K}
\key{Filename character defined by isfname, OS dependent \remark{m}}{\bs f}
\key{Any non-digit filename character \remark{m}}{\bs F}
\key{Printable character defined by isprint, usually\par\ \ \ 0x20-0x7E \remark{m}}{\bs p}
\key{Any non-digit printable character \remark{m}}{\bs P}
\key{Char (not) contained by given Unicode property,\par\ \ \ script, or block \remark{p}}{\bs p{\it prop}{\rm\ or }\bs P{\it prop}}

\vskip 20pt

\head{Anchors and zero-width tests}
\key{Start of str, or after any newline in multiline mode\par\ \ \ ({\tt /m}) \remark{pyvasg}}{\^\@}
\key{Start of search str, in all match modes \remark{py}}{\bs A}
\key{End of str (excl.\ newline), or b4 any newline in {\tt /m} \remark{pyvasg}}{\$}
\key{End of str (excl.\ newline), in any match mode \remark{py}}{\bs Z}
\key{End of str, in any match mode \remark{p}}{\bs z}
\key{Beginning of current search \remark{p}}{\bs G}
\key{Word-boundary \remark{p}}{\bs b}
\key{Not-word-boundary \remark{py}}{\bs B}
\key{Beginning of word boundary (positions between a\par\ \ \ punc/space and a word char) \remark{vg}}{\bs $<$}
\key{End of word boundary \remark{vg}}{\bs $>$}
\key{Positive lookahead \remark{py}}{(?=...)}
\key{Negative lookahead \remark{py}}{(?!...)}
\key{Positive lookbehind; fixed-length only \remark{py}}{(?$<$=...)}
\key{Negative lookbehind; fixed-length only \remark{py}}{(?$<$!...)}

\vskip 20pt

\head{Mode modifiers}
\key{Case-insensitive matching}{\remark{p }/i}
\key{}{\remark{y }I{\rm/}i}
\key{}{\remark{v }:set ic}
\key{}{\remark{s }i{\rm\ or }I}
\key{}{\remark{a }IGNORECASE=1}
\key{}{\remark{g }-i{\rm\ option}}
\key{Case-sensitive matching}{\remark{v }:set noic}
\key{Cause {\tt\bs w},{\tt\bs W}, {\tt\bs b}, {\tt\bs B} to use current locale's definition\par\ \ \ of alphanumeric}{\remark{y }L}
\key{Cause {\tt\bs w},{\tt\bs W}, {\tt\bs b}, {\tt\bs B} to use Unicode's definition of\par\ \ \ alphanumeric}{\remark{y }U{\rm/}u}
\key{{\tt\^\@} and {\tt\$} match next to embedded {\tt\bs n}}{\remark{p }/m}
\key{}{\remark{y }M{\rm/}m}
\key{{\tt .} matches newline}{\remark{p }/s}
\key{}{\remark{y }S{\rm/}s}
\key{Ignore whitespace and allow comments (\#) in pattern}{\remark{p }/x}
\key{}{\remark{y }X{\rm/}x}
\key{Compile pattern only once}{\remark{p }/o}
\key{Turn listed modes ({\tt xsmi}) on for the rest of the\par\ \ \ subexpression}{\remark{py }(?{\it mode})}
\key{Turn listed modes ({\tt xsmi}) off for the rest of the\par\ \ \ subexpression}{\remark{p }(?-{\it mode})}
\key{Turn listed modes ({\tt xsmi}) on within {\tt()}}{\remark{p }(?{\it mode}:...)}
\key{Turn listed modes ({\tt xsmi}) off within {\tt()}}{\remark{p }(?-{\it mode}:...)}
\key{Treat substring as a comment}{\remark{py }(?\#...)}
\key{Treat rest of line as a comment in {\tt/x} mode}{\remark{py }\#...}
\key{Force next character to uppercase}{\remark{ps }\bs u}
\key{Force next character to lowercase}{\remark{ps }\bs l}
\key{Force all following chars to uppercase}{\remark{ps }\bs U}
\key{Force all following chars to lowercase}{\remark{ps }\bs L}
\key{Esacpe all following regex's metacharacters}{\remark{p }\bs Q}
\key{End a span stareted with {\tt\bs U}, {\tt\bs L}, or {\tt\bs Q}}{\remark{s }\bs e}
\key{}{\remark{ps }\bs E}

\vskip 20pt

\head{Grouping, capturing, conditional, \& control}
\key{Group subpattern and capture submatch}{\remark{pa }(...)}
\key{\ \ \ into \bs 1, \bs 2, ... ({\sf p}: and \$1, \$2, ...)}{\remark{vsb }\bs(...\bs)}
\key{Group \& capture into named capture group}{\remark{p }(?P{\it name}...)}
\key{Match text matched by earlier named\par\ \ \ capture group}{\remark{p }(?P={\it name})}
\key{Contains text matched by the {\it n}th capture group}{\remark{pvs }\bs n}
\key{In replacement string, evaluates to the matched text}{\remark{v }\&}
\key{Groups subpattern, but does not capture\par\ \ \ submatch}{\remark{p }(?:...)}
\key{Disallow backtracking for text matched by\par\ \ \ subpattern}{\remark{p }(?>...)}
\key{Try subpatterns in alternation}{\remark{pvsag }...|...}
\key{}{\remark{b }...\bs |...}
\vskip 3pt
\key{Match 0 ore more times}{\remark{pvsag }*}
\key{Match 1 or more times}{\remark{psag }+}
\key{}{\remark{vb }\bs +}
\key{Match 0 or 1 times}{\remark{psag }?}
\key{}{\remark{v }\bs =}
\key{}{\remark{b }\bs ?}
\key{Match exactly {\it n} times}{\remark{p }\{{\it n}\}}
\key{}{\remark{vb }\bs\{{\it n}\}}
\key{}{\remark{sg }\bs\{{\it n}\bs\}}
\key{Match at least {\it n} times}{\remark{p }\{{\it n},\}}
\key{}{\remark{vb }\bs\{{\it n},\}}
\key{}{\remark{sg }\bs\{{\it n},\bs\}}
\key{Match at most {\it n} times}{\remark{p }\{,{\it n}\}}
\key{}{\remark{vb }\bs\{,{\it n}\}}
\key{Match for {\it x} to {\it y} times (inclusive)}{\remark{p }\{{\it x},{\it y}\}}
\key{}{\remark{vb }\bs\{{\it x},{\it y}\}}
\key{}{\remark{sg }\bs\{{\it x},{\it y}\bs\}}
\vskip 3pt
\key{Match 0 ore more times, as few as possible}{\remark{p }*?}
\key{Match 1 or more times, as few as possible}{\remark{p }+?}
\key{Match 0 or 1 times, as few as possible}{\remark{p }??}
\key{Match at least {\it n} times, as few as possible}{\remark{p }\{{\it n},\}?}
\key{Match for {\it x} to {\it y} times (inclusive), as few as\par\ \ \ possible}{\remark{p }\{{\it x},{\it y}\}?}
\vskip 3pt
\key{Match w/if-then-else pattern where {\it cond} is an integer referring to either a backreference or a lookaround assertion}{}
\key{}{(?({\it cond})...|...)}
\key{Match w/if-then pattern}{(?({\it cond})...)}
\key{Exec.\ embedded Perl code}{(?\{{\it code}\})}
\key{Match regex from embedded Perl code}{(??\{{\it code}\})}

\vskip 20pt

\head{Perl 5.8 After-match variables}
\key{Captured submatches}{\$1, \$2, ...}
\key{Array with {\tt\$-[0]}=offset of start of match;\par\ \ \ and {\tt\$-[n]}=offset of start of {\tt\$n}}{@-}
\key{Array with {\tt\$+[0]}=offset of end of match;\par\ \ \ and {\tt\$+[n]}=offset of end of {\tt\$n}}{@+}
\key{Last parenthesized match}{\$+}
\key{Text before match (slow).\par\ \ \ Same as {\tt substr(\$input,0,\$-[0])}}{\$`}
\key{Text of match (slow).\par\ \ \ Same as {\tt subst(\$input,\$-[0],\$+[0]-\$-[0])}}{\$\&}
\key{Text after match (slow).\par\ \ \ Same as {\tt subst(\$input,\$+[0])}}{\$'}
\key{Text of most recently closed capturing parentheses}{\$\^\@N}
\key{If true, {\tt \bs m} is assumed for all matches without a {\tt \bs s}}{\$*}
\key{Result of the most-recent exec.\ construct within a match}{\$\^\@R}

\vskip 20pt

\head{Perl 5.8 Unicode support}
\key{{\tt [\bs x00-\bs x7f]}}{\bs p\{IsASCII\}}
\key{{\tt [\bs p\{Ll\}\bs p\{Lu\}\bs p\{Lt\}\bs p\{Lo\}\bs p\{Nd\}]}}{\bs p\{IsAlnum\}}
\key{{\tt [\bs p\{Ll\}\bs p\{Lu\}\bs p\{Lt\}\bs p\{Lo\}]}}{\bs p\{IsAlpha\}}
\key{{\tt \bs p\{C\}}}{\bs p\{IsCntrl\}}
\key{{\tt \bs p\{Nd\}}}{\bs p\{IsDigit\}}
\key{{\tt [\^\@\bs p\{C\}\bs p\{Space\}]}}{\bs p\{IsGraph\}}
\key{{\tt \bs p\{Ll\}}}{\bs p\{IsLower\}}
\key{{\tt \bs P\{C\}}}{\bs p\{IsPrint\}}
\key{{\tt \bs p\{P\}}}{\bs p\{IsPunct\}}
\key{{\tt [\bs t\bs n\bs f\bs r\bs p\{Z\}]}}{\bs p\{IsSpace\}}
\key{{\tt [\bs p\{Lu\}\bs p\{Lt\}]}}{\bs p\{IsUpper\}}
\key{{\tt [\_\bs p\{Ll\}\bs p\{Lu\}\bs p\{Lt\}\bs p\{Lo\}\bs p\{Nd\}]}}{\bs p\{IsWord\}}
\key{{\tt [0-9a-fA-F]}}{\bs p\{IsXDigit\}}

\end{multicols}

\vspace{\fill}
\hfill Application code: {\sf Perl 5.8 (p), Python (y), vim and vi (v), vim (m), sed (s), awk (a), egrep (g), grep and basic regex (b)}
\vskip 3pt
\par\hfill Copyright \copyright\ 2004 Adrian Sai-wah TAM. Permission is granted to use this document under the terms of the GNU Free Documentation Licence, Version 1.2 or any later version published by the Free Software Foundation.
\end{document}
