User:TarmstroBot/user-fixes.py

From Wikisource
Jump to navigation Jump to search
# -*- coding: utf-8  -*-

#FDSYS FIXES.  Used for volumes 65-116 of the U.S. Statutes at Large
#to correct common mistakes introduced by the GPO's OCR.
fixes['fdsys'] = {
    'regex': True,
    'msg': {
        '_default':u'Robot:fixing common errors in FDsys OCR',
    },
    'replacements': [
# June 2012: further OCR corrections based on results of April 2012 run.
# Estimated number of changes to dataset resulting from this run: 110,500
#
## undoing overzealous edits from earlier run
    (u"\\bacompany(i?ni?g|ii\\^)", u"accompanying"),
    (u"internationaly", u"internationally"),
    (u"\\b([Rr])enlist", u"\\1eenlist"),
    (u"([A-Z]) ([A-Z]) TO F(?= [A-HJ-TV-Z])", u"\\1\\2T OF"),
## character substitutions (scannos)
### “£m” -> “an”, sometimes
    (u"£md", u"and"),
### “£ui” -> “an”
    (u"£ui", u"an"),
### “3d|5d” -> “yi”, sometimes
    (u"[35]dng", u"ying"),
    (u"[35]d(?=e)", u"yi"),
### “a” -> “e”, sometimes
    (u"\\btha\\b", u"the"),
    (u"\\b([Oo])thar", u"\\1ther"),
    (u"anothar", u"another"),
    (u"([Hh])aad", u"\\1ead"),
### “a” -> “m”, sometimes
    (u"\\bl?aore", u"more"),
### “a” -> “s”, sometimes
    (u"\\bahall", u"shall"),
### “B|E|K” -> “R”, sometimes
    (u"T?[BEK](?=evis)", u"R"),
    (u"[BEK](?=evenu)", u"R"),
    (u"[BEK](?=ep[oru])", u"R"),
### “d” -> “i”, sometimes
    (u"[vw]dth", u"with"),
### “e” -> “c”, sometimes
    (u"\\be(?=ont)", u"c"),
    (u"\\beons\\B", u"cons"),
### “ei” -> “a”, sometimes
    (u"\\bei(?=(fte|g[aegr]|mend|mo|n[adnty]|rm|ss))", u"a"),
    (u"([^b][Ll])eind", u"\\1and"),
    (u"([a-z])Ei(?=[gik-nrsx])", u"\\1a"),
    (u"([Pp])(a|ei)r(a|ei)", u"\\1ara"),
### “em” -> “an”, sometimes
    (u"emd\\b", u"and"),
### “Eui” -> “an”
    (u"Eui", u"an"),
### “h” -> “b”, sometimes
    (u"semhle", u"semble"),
### “h” -> “li”, sometimes
    (u"\\b([ae])h(?=[gm])", u"\\1li"),
    (u"([aou]b)H(?=[a-z])", u"\\1li"),
### “(I|L)^” -> “U”, sometimes
    (u"\\b[IL]\^+(?=(ni|rb|\\.S))", u"U"),
### “i” -> “r”, sometimes
    (u"aiy\\b", u"ary"),
### “ii” -> “n”, sometimes
    (u"aiid", u"and"),
### “ii” -> “u”, sometimes
    (u"fii(?=(n[cd]|r[bnt][^']))", u"fu"),
    (u"\\bii(?=nde)", u"u"),
### “im” -> “un”, sometimes
    (u"([Cc])ommimit", u"\\1ommunit"),
    (u"\\bimless\\b", u"unless"),
    (u"([Ff])oimd", u"\\1ound"),
    (u"\\bim\^?der", u"under"),
    (u"coimt", u"count"),
    (u"\\Btimit", u"tunit"),
    (u"Jime\\b", u"June"),
### “imi” -> “um”, sometimes
    (u"\\bnimi(?=[^s])", u"num"),
### “iu-” -> “ur”, sometimes
    (u"iu[^t](?=ance)", u"ur"),
### “i'” -> “r”, sometimes
    (u"i'(?=(a|e[^cdnorsH]|i[a-gi-km-z]|o[^enryU]))", u"r"),
### “Ji” -> “h”, sometimes
    (u"tJi", u"th"),
### “jr” -> “y”, sometimes
    (u"([bcdghlmstBLKS]|[^wr ]a|[^bdrwLR][eo]|[^p]f|[^io]n|[^mo ]p|[^eo]r)jr", u"\\1y"),
### “j^” -> “p”, sometimes
    (u"j\\^(?=r)", u"p"),
### “j^” -> “y”, sometimes
    (u"([Pp])aj\\^", u"\\1ay"),
### “k)” -> “o”, sometimes
    (u"([CG])k\\)", u"\\1o"),
### “l)” -> “b”, sometimes
    (u"l\\)(?=(a[^a-eilmnrtvyzB (]|e[^cnoprtv]|i[^fimnpsM .]|o[^cflrswx]|u[^bir]|y[^el]))", u"b"),
### “li” -> “h”, sometimes
    (u"\\b([CcSsTtWw])li(?=([io]|a[^n]|e[^dinrs]|u[^r]))", u"\\1h"),
    (u"([einr])tli\\b", u"\\1th"),
    (u"([Oo])tlier", u"\\1ther"),
### “m” -> “in”, sometimes
    (u"\\bm(?=s([eu]r|t[^or]))", u"in"),
### “m” -> “rn”, sometimes
    (u"([Gg])r?ov[ec]m", u"\\1overn"),
    (u"bom\\b", u"born"),
### “n” -> “h”, sometimes
    (u"sucn", u"such"),
    (u"witn\\b", u"with"),
    (u"\\bTne\\b", u"The"),
### “o” -> “b”, sometimes
    (u" oy\\b", u" by"),
    (u"assemol", u"assembl"),
### “o)r|o}r” -> “oy”, sometimes
    (u"o[)}]r(?=(i[^a]|[ms]))", u" oy"),
### “sd” -> “al”, sometimes
    (u"(ib|[cegmnpr-w])sd\\b", u"\\1al"),
### “si” -> “a”, sometimes
    (u"parsi(?=g)", u"para"),
### “sm” -> “an”, sometimes
    (u"smd", u"and"),
### “U” -> “li”, sometimes
    (u"U(?=e[fu])", u"li"),
    (u"earUer", u"earlier"),
### “U” -> “LI”, sometimes
    (u"(E|OB)UG", u"\\1LIG"),
    (u"BIUTY", u"BILITY"),
    (u"\\bUAB\\B", u"LIAB"),
    (u"([^BEGLRS])UFE", u"\\1LIFE"),
    (u"MIUT", u"MILIT"),
    (u"ELUG", u"ELLIG"),
    (u"\\b([A-Z].[A-IK-SU-Z][A-Z])UNE\\b", u"\\1LINE"),
### “U” -> “ll”, sometimes
    (u"U(?=(ee|o[mw]))", u"ll"),
    (u"\\baU\\b", u"all"),
    (u"([ou])U(?=er)", u"\\1ll"),
### “v” -> “y”, sometimes
    (u"davs\\b", u"days"),
    (u"\\bvear", u"year"),
    (u"dav\\b", u"day"),
    (u"\\bmav", u"may"),
    (u"tv\\b", u"ty"),
### “v^” -> “w”, sometimes
    (u"([Ll])av\^+r?", u"\\1aw"),
    (u"v[iv]\^(?=(a[nr]|[eh]|i[nt]|n[^go]|s[^o ]))", u"w"),
### “v/” -> “w”, sometimes
    (u"v/\\^?(?=[^r'*,])", u"w"),
### “w” -> “v”, sometimes
    (u"twes\\b", u"tives"),
### “X” -> “)(”, sometimes
    (u"\\(IX([A-E])\\)", u"(I)(\\1)"),
##
## general misspellings
    (u"asse...bled", u"assembled"),
    (u"emplo..[mni]ent", u"employment"),
    (u"Secretan[jr]", u"Secretary"),
    (u"[C({f][^Cc]{1,2}ongress", u"Congress"),
    (u"([Pp])ro[mwnH]ded", u"\\1rovided"),
    (u"\\be..a[ce]ted", u"enacted"),
    (u"[(<{][^Gg]r?eneral", u"General"),
    (u"([Ss])ec[^a-z]{1,2}ion", u"\\1ection"),
    (u"se[^msv]{2,3}[bh]led", u"sembled"),
    (u"throu\\^h?", u"through"),
    (u"C.[^m]mm(?=(e[^d]|[iou]))", u"Comm"),
    (u"\\bb\\^+p?inn", u"beginn"),
    (u"Act[^ ]?of", u"Act of"),
    (u"([Oo])f[li][[^i]ce", u"\\1ffice"),
    (u"([Pp])ub[^i]c", u"\\1ublic"),
    (u"\\bRE[^C]ORD", u"RECORD"),
    (u"[ILTUZfrt*][il'\"^]+?\\^+nited", u"United"),
    (u"([^s])a(u|ii)[ce]{2}\\b", u"\\1ance"),
    (u"\\b[ao]\\^+s+em", u"assem"),
    (u"fi.aud", u"fraud"),
    (u"Hou[^s]e\\b", u"House"),
    (u"Hou.{1,2}se", u"House"),
    (u"\\b([Pp])r.[^d](?=ams?)\\b", u"\\1rogr"),
    (u"\\bsh[^eov][^w]ll?\\b", u"shall"),
    (u"sh.{1,2}dl", u"shall"),
    (u"d[fltI]ng\\b", u"ding"),
    (u"\\b[A-Z].pon\\b", u"Upon"),
    (u"([Nn])\\^(?=otia)", u"\\1eg"),
    (u"\\bva [l1I]", u"val"),
##
## proper names
    (u"WILL[^I]+(?= J\\.)", u"WILLIAM"),
##
## letters for numbers
    (u"\(lO\)", u"(10)"),
    (u"lO ?[Uu][Ss][Cce]", u"10 USC"),
    (u"SO ?(?=USC)", u"50 "),
    (u"\\b[Iil](?=[89][0-9][0-9])", u"1"),
    (u"\\biioi\\b", u"1101"),
##
## excess word spacing (with some spelling corrections also)
    (u"a.?p.?[pn].?[lf].?i.?[cpr].?[aecH].?[bohD].?l[^cfrF]?e", u"applicable"),
    (u"\\b([Pp])[^aer]?[rvftiTfnw][^aeiou]?[oadpbgh][^cfmnpv]?[vrocyniuT][^emoru]?[iltvJ'][^s]?[dao][^e]?[easciftzB][^aen]?(d|c[li])\\b", u"\\1rovided"),
    (u"([Hh]).?[ea][^wz]?a[^i]?[dao][^z]?[il].?(n|u|i[ir])[^gt]?[gqo]\\b", u"\\1eading"),
    (u"A.?[MH].?E.?[RBEK].?[Ii].?C.?A", u"AMERICA"),
    (u"[RBEK].?[ec].?[vyr].?[eco][^o]?(n|u|ii).?u.?[ec]", u"Revenue"),
    (u"S T A T E S\\b", u"STATES"),
    (u"U[^A]?[NX][^L^S]?[IFTtfC][^N]?T[^J]?[EBO].?(D|t\\))", u"UNITED"),
    (u"\\b([Aa])[^fn]?f[^afir]?[tbi][^bilno]?[eaE][^acr]?[ri]\\b", u"\\1fter"),
    (u"\\bT[^HO]?[RBEKH][^R]?A[^N]?D.?E", u"TRADE"),
    (u"\\bca se", u"case"),
    (u"(\^|J )UNE\\b", u"JUNE"),
    (u"\\bPag.?e\\b", u"Page"),
    (u"\\bO F(?= [A-Z] [A-Z])", u"OF"),
    (u"([Aa]).?[pPDdo].?[pj].?[riFT].?[oepQ].?[ri].?[ilfr].?[an].?[tsl].?[if].?o.?([nuh]|r?i+)[^']?s\\b", u"\\1ppropriations"),
    (u"A.?[Pr].?P.?[RBEK].?O.?[PF].?[RBEKH].?[IL].?A.?T.?I.?O.?[NXK][^']?S\\b", u"APPROPRIATIONS"),
    (u"([Aa]).?[pPDdo].?[pj].?[riFT][^cv]?[oepQ][^cl]?[ri].?[ilfr][^gluzCN]?[an].?[tsl].?[if].?o.?([nuh]|r?i+)", u"\\1ppropriation"),
    (u"A.?[Pr].?P.?[RBEK].?O.?[PF].?[RBEKH].?[IL].?A.?T.?I.?O.?[NXK]", u"APPROPRIATION"),
    (u"Conser[vy]ati[op]n", u"Conservation"),
    (u"COKSERVATION", u"CONSERVATION"),
    (u"r.?e.?i.?[mn].?[boB].?u.?r.?[se].?[es].?m.?e.?n.?[tl].?s\\b", u"reimbursements"),
    (u"K E I M B U R S E M E N T S\\b", u"REIMBURSEMENTS"),
    (u"([Rr]).?e.?i.?[mn].?[boB].?u.?r.?[se].?[es].?m.?e.?n.?[tl]\\b", u"\\1eimbursement"),
    (u"[RK].?E.?I.?M.?B.?U.?R.?S.?E.?M.?E.?N.?T\\b", u"REIMBURSEMENT"),
    (u"([Pp]).?[riTF].?[og].?[ce].?[litIJ].?[ae].?[manseM].?a.?[tl].?i.?[oa].?([nu]|[ir]i)[^']?s\\b", u"\\1roclamations"),
    (u"P.?[RBEK].?[OC].?[CO].?[LI].?A.?M.?A.?T.?I.?O.?(N|iS')[^']?S\\b", u"PROCLAMATIONS"),
    (u"([Pp]).?[riTF].?[og][^tv]?[ce].?[litIJ].?[ae].?[manseM][^N]?a.?[tl].?i.?[oa].?([nu]|[irn]i)", u"\\1roclamation"),
    (u"P.?[RBEK].?[OC].?[CO].?[LI].?A.?M.?A.?T.?I.?O.?(N|iS')", u"PROCLAMATION"),
    (u"([Cc]).?([obp]|cJ).?[rit][^e]?[pog].?[oa].?[rifl][^z]?a.?t.?[i'].?[oc].?([nu]|[rj]i|i[vlL])[^']?s\\b", u"\\1orporations"),
    (u"C.?[OC].?[RBEK].?[PF].?O.?[RBEK].?A.?T.?[ITil].?O.?[NXJ][^']?S\\b", u"CORPORATIONS"),
    (u"([Cc]).?([obp]|cJ)[^r]?[rit][^emr]?[pog].?[oa].?[rifl][^z]?a.?t.?[i'].?[oc].?([nu]|[rj]i|i[vlL])", u"\\1orporation"),
    (u"C.?[OC].?[RBEK].?[PF].?O.?[RBEK].?A.?T.?[ITil].?O.?[NXJ]", u"CORPORATION"),
    (u"([Ee]).?x.?p[^l]?[op].?[ri].?t.?[ae].?t.?i.?o.?n", u"\\1xportation"),
    (u"E X P O R T A T I O N", u"EXPORTATION"),
    (u"([Ii]).?m.?[mn].?i.?[geop].?[riT].?a.?t.?i.?o.?([nH]|f?i:?!?)", u"\\1mmigration"),
    (u"I.?(M|BII).?M.?I.?G.?R.?A.?T.?I.?O.?N", u"IMMIGRATION"),
    (u"impartation", u"importation"),
    (u"([Ll]).?e.?[goq].?i.?[se].?[lih].?a.?[ti].?i.?o.?(n|[iU]j|fv)", u"\\1egislation"),
    (u"L E G I S L A T I O N", u"LEGISLATION"),
    (u"([Ii]).?(m|in).?[pD].?r.?o.?v.?e.?m.?e.?[nu].?t[^']?s\\b", u"\\1mprovements"),
    (u"I.?M.?P.?[RBEK].?O.?V.?[EK].?M.?E.?[NX].?[Tl].?S\\b", u"IMPROVEMENTS"),
    (u"([Ii]).?(m|in).?[pD].?r.?o.?v.?e.?m.?e.?[nu].?t", u"\\1mprovement"),
    (u"I.?M.?P.?[RBEK].?O.?V.?[EK].?M.?E.?[NX].?(T|l')", u"IMPROVEMENT"),
    (u"([Rr]).?[ecot].?[saB].?[tf].?[ri].?i.?c.?[ts].?[if].?[oe].?[nuh][^']?s\\b", u"\\1estrictions"),
    (u"[RBEK].?E.?S.?T.?[RBEK].?I.?C.?T.?I.?O.?[NX][^']?S\\b", u"RESTRICTIONS"),
    (u"([Rr]).?[ecot][^r]?[saB].?[tf].?[ri].?i.?c.?[ts].?[if][^v]?[oe].?[nuh]", u"\\1estriction"),
    (u"[BK]ESTRICTION", u"RESTRICTION"),
    (u"([Aa]).?[dau].?[jiy].?u.?[sa].?t.?m.?[ec].?(n|u|ii).?t[^']?s\\b", u"\\1djustments"),
    (u"A.?D.?J.?U.?S.?T.?M.?E.?N.?T.?S\\b", u"ADJUSTMENTS"),
    (u"A.?D.?J.?U.?S.?T.?M.?E.?N.?T", u"ADJUSTMENT"),
    (u"([Aa]).?[dau].?[jiy].?u.?[sa].?t.?m.?[ec].?(n|u|ii).?t", u"\\1djustment"),
    (u"e?m?([Aa]).?[mi].?m.?u.?n.?i.?t.?i.?o.?n", u"\\1mmunition"),
    (u"DEPARTMENT S\\b", u"DEPARTMENTS"),
    (u"D.?[Et].?[PrF].?A.?[RBEK].?T.?M.?E.?[NX].?T", u"DEPARTMENT"),
    (u"i ?n ?s ?p ?e ?c ?tions", u"inspections"),
    (u"I N S P E C T I O N S\\b", u"INSPECTIONS"),
    (u"([Ii]).?([nA]|ii).?s[^u]?[pn][^b]?[ecs].?c.?[ti].?i.?o.?[nuQ]", u"\\1nspection"),
    (u"I.?[NX].?S.?P.?E.?C.?T.?I.?O.?[NX]", u"INSPECTION"),
    (u"([Ii]).?n.?v.?e.?s.?t.?m e n t s\\b", u"\\1nvestments"),
    (u"I.?N.?V.?E.?S.?T.?M.?E.?N.?T[^']?S\\b", u"INVESTMENTS"),
    (u"([Ii]).?(n|u|ii).?v.?[ec].?[siT].?t.?m.?[ec].?(n|u|A|ii).?t", u"\\1nvestment"),
    (u"I.?[NX].?V.?E.?S.?T.?M.?E.?[NX].?T", u"INVESTMENT"),
    (u"s.?e.?t.?t l e m e n t s\\b", u"settlements"),
    (u"S.?E[^N]?[TI].?T.?L.?E.?M.?E.?N.?T", u"SETTLEMENT"),
    (u"([Ss]).?e[^n]?t.?t.?[li].?e.?m.?e.?[nuh].?t", u"\\1ettlement"),
    (u"([Aa]).?l.?l.?o.?t.?m.?[ec].?n.?[ti][^']?s\\b", u"\\1llotments"),
    (u"A.?L.?[LX].?O.?T.?M.?E.?[NX].?I?T[^']?S\\b", u"ALLOTMENTS"),
    (u"([Aa]).?l.?l.?o.?t.?m.?[ec].?n.?[ti]", u"\\1llotment"),
    (u"A.?L.?[LX].?O.?T.?M.?E.?[NX].?T", u"ALLOTMENT"),
    (u"E N D O W M E N T", u"ENDOWMENT"),
    (u"([Ee]).?n.?d.?o.?w.?m.?e.?n.?t", u"\\1ndowment"),
    (u"([Ee]).?x.?[co].?[ec].?p.?t.?i.?[oa].?[nu][^']?s\\b", u"\\1xceptions"),
    (u"E X C E P T I O N S", u"EXCEPTIONS"),
    (u"([Ee]).?x.?[co].?[ec].?p.?t.?i.?[oa].?([nu]|fti|iii)", u"\\1xception"),
    (u"\\bE.?X.?C.?E.?[PI].?T.?[IT].?O.?N", u"EXCEPTION"),
    (u"E A S E M E N T S", u"EASEMENTS"),
    (u"([Ee]).?(a|ei).?s.?e.?m.?e.?n.?t[^'(]?s\\b", u"\\1asements"),
    (u"([Ee]).?(a|ei).?s.?e.?m.?e.?n.?t", u"\\1asement"),
    (u"E ?l ?e ?c ?t ?i ?o ?n ?s\\b", u"Elections"),
    (u"E L E C T I O N S", u"ELECTIONS"),
    (u"\\b([Ee])[^f]?[li].?[ec].?c[^a]?[tc][^r]?i[^-]?[oc].?n", u"\\1lection"),
    (u"E.?L.?[EB].?C.?T.?[Ii].?O.?[NX]", u"ELECTION"),
    (u"([Mm]).?o.?n.?u.?m.?e.?n.?t[^']?s\\b", u"\\1onuments"),
    (u"M O N U M E N T S\\b", u"MONUMENTS"),
    (u"([Mm]).?o.?[nJ].?u.?m.?e.?n.?t", u"\\1onument"),
    (u"M O N U M E N T", u"MONUMENT"),
    (u"([Mm]).?[oc].?[vr].?[ev].?m.?e.?n.?[ti][^']?s\\b", u"\\1ovements"),
    (u"\\b([Mm]).?[oc].?[vr].?[ev].?m.?e.?n.?[ti]\\b", u"\\1ovement"),
    (u"M O V E M E N T", u"MOVEMENT"),
    (u"\\bC.?[EK].?[RBEKS].?T.?A.?I.?[NXK]\\b", u"CERTAIN"),
    (u"([Cc]) ?[ec] ?[ri] ?[tc] ?[ae] ?[id] ?(n|u|[ri]?i|h)\\b", u"\\1ertain"),
##
## number spacing
    (u"\\b([0-9]) ([0-9])(?= USC)", u"\\1\\2"),
    (u"(tions? [0-9]+) (?=\\([a-z0-9]\\))", u"\\1"),
##
## insufficient spacing 
    (u"USC(?=[a-z])", u"USC "),
    (u"\\bofthe\\b", u"of the"),
    (u"\\bofthis", u"of this"),
##
## superfluous characters (should be deleted, not substituted with another)
### caret (^)
    (u"\\^+(?=[A-Z][a-z])", u""),
    (u"A?\\^nte,", u"Ante,"),
    (u"o[f|]\\^the", u"of the"),
### comma (,)
    (u",tion", u"tion"),
### misc.
    (u"\\bfi[^a-z ]o?m", u"from"),
    (u"\\ba[^a-z]nd\\b", u"and"),
    (u"\\b(An|no).te\\b", u"\\1te"),
    (u"\\bfi[^a-z]ee", u"free"),
    (u"([Dd])iu.ing", u"\\1uring"),
# end of June 2012 run
###################################################################################################
# April 2012: more OCR corrections based on results of September 2011 run.
# Estimated number of changes to dataset resulting from this run: 37,000
#
## undo overzealous corrections from prior run
### “acompany” -> “accompany”
    (u"\\bacompany\\b", u"accompany"),
### “generaly” -> “generally” (also in uppercase)
    (u"\\bgeneraly\\b", u"generally"),
    (u"\\bGENERALY\\b", u"GENERALLY"),
    (u"\\bgenerallv\\b", u"generally"),
### “iregular” -> “irregular”
    (u"\\biregular\\b", u"irregular"),
### “mistatement” -> “misstatement”
    (u"\\mistatement\\b", u"misstatement"),
### "ofunds" -> "of funds"
    (u"\\bofunds\\b", u"of funds"),
### "remployment" -> "reemployment"
    (u"\\bremployment\\b", u"reemployment"),
    (u"\\bREMPLOYMENT\\b", u"REEMPLOYMENT"),
### closing up letters that should have remained spaced apart
    (u"E F F E C TO F", u"EFFECT OF"),
    (u"D I S T R I C TO F", u"DISTRICT OF"),
    (u"L I S TO F", u"LIST OF"),
    (u"P R E S I D E N TO F", u"PRESIDENT OF"),
##
## close up spacing and correct additional misspellings
## ordered roughly by length of word(s) to match
    (u"\\bA N ACT\\b", u"AN ACT"),
    (u"([Ll])abor.?a.?t.?o.?r.?y", u"\\1aboratory"),
    (u"([Mm]) and a to r y", u"\\1andatory"),
    (u"\\bA.?D.?M.?I.?[NX].?I.?S.?T.?[RBEK].?A.?T.?I.?O.?[NX]\\b", u"ADMINISTRATION"),
    (u"\\b([Aa]).?[da].?[mn].?[i'].?[nua].?i.?[seB].?[tfr].?[ri].?a.?[tsv][^o]?[ir].?o.?n", u"\\1dministration"),
    (u"\\bO.?R.?G.?A.?[NX].?[Ii].?[Zz].?A.?T.?[Ii].?O.?[NX][^']?S\\b", u"ORGANIZATIONS"),
    (u"\\b([Oo]).?[riz].?[gc].?a.?[nmp].?[ix].?[za].?a.?t.?i.?o.?[nu][^'^(]?s\\b", u"\\1rganizations"),
    (u"O.?[RBEK].?[GO].?A.?[NX].?[Ii].?[Zz].?A.?[TI].?[Ii].?O.?[NX]", u"ORGANIZATION"),
    (u"\\b([Oo]).?[riz].?[gc].?a.?[nmp].?[ix].?[za].?a.?t.?i.?o.?[nu]", u"\\1rganization"),
    (u"[REK].?E.?L.?A.?T.?I.?O.?N.?S.?H.?I.?P[^']?S\\b", u"RELATIONSHIPS"),
    (u"\\br.?e.?l.?a.?t.?i.?o.?n.?s.?h.?i.?[po].?s\\b", u"relationships"),
    (u"[REK].?E.?L.?A.?T.?I.?O.?N.?S.?H.?I.?P", u"RELATIONSHIP"),
    (u"\\b([Rr]).?e.?l.?a.?t.?i.?o.?[nu].?s.?[hn].?i.?(p|o|i\\)|j\\))", u"\\1elationship"),
    (u"A.?[ULT].?[TI].?H.?O.?[RBEKS].?I.?Z.?A.?[TI].?I.?O.?[NX]", u"AUTHORIZATION"),
    (u"([Aa]).?[uni].?[tf].?[hna].?[oc].?r.?[if].?z.?[anu].?[ti].?[i'].?[opabq].?[nu]", u"\\1uthorization"),
    (u"M A N U F ACT U R I N G", u"MANUFACTURING"),
    (u"([Mm]).?[as].?[nm].?u.?[fi].?[ao].?c.?t.?u.?r.?i.?n.?g", u"\\1anufacturing"),
    (u"V.?O.?L.?U.?N.?T.?A.?R.?I.?L.?Y", u"VOLUNTARILY"),
    (u"v[od]luntaril[yv]", u"voluntarily"),
    (u"V.?O.?L.?U.?N.?T.?A.?[REK].?Y", u"VOLUNTARY"),
    (u"([Vv]).?o.?l.?u.?[nu].?t.?[aoe].?[ri][^l]?[yv]\\b", u"\\1oluntary"),
    (u"A.?P.?P.?O.?I.?N.?T.?M.?E.?[NX].?T.?S\\b", u"APPOINTMENTS"),
    (u"([Aa]).?p.?p.?o.?i.?[nu].?t.?m.?e.?n.?t.?s\\b", u"\\1ppointments"),
    (u"A.?P.?P.?O.?I.?[NX].?T.?M.?E.?[NX].?T", u"APPOINTMENT"),
    (u"([Aa]).?[pd].?[png].?[od].?i.?[nu].?t.?m.?e.?[nu].?t\\b", u"\\1ppointment"),
    (u"A S S O C I A T I O N S\\b", u"ASSOCIATIONS"),
    (u"([Aa]).?s.?s.?o.?c.?i.?a.?t.?i.?o.?[nu][^']?s\\b", u"\\1ssociations"),
    (u"A.?S.?S.?O.?C.?[IL].?A.?T.?I.?O.?[NX]", u"ASSOCIATION"),
    (u"([Aa]).?s.?s.?.o.?c.?i.?a.?t.?i.?[oa].?n\\b", u"\\1ssociation"),
    (u"E N TITLE M E N T S\\b", u"ENTITLEMENTS"),
    (u"e.?n.?t.?i.?t.?l.?e.?m.?e.?n.?t.?s\\b", u"entitlements"),
    (u"E.?N.?T.?[IT].?T.?[LU].?E.?M.?E.?N.?T", u"ENTITLEMENT"),
    (u"([Ee]).?[nu].?t.?i.?t.?[li].?e.?m.?e.?[nu].?t\\b", u"\\1ntitlement"),
    (u"E X A M I N A T I O N S\\b", u"EXAMINATIONS"),
    (u"([Ee]).?x.?a.?m.?i.?n.?a.?t.?i.?o.?n.?s\\b", u"\\1xaminations"),
    (u"E.?X.?A.?M.?I.?[NX].?A.?T.?I.?O.?[NX]\\b", u"EXAMINATION"),
    (u"([Ee]).?x.?a.?m.?i.?[nu].?[an].?[ts].?[if].?o.?(n|u|ri)\\b", u"\\1xamination"),
    (u"E.?X.?P.?E.?[NX].?D.?[IF].?T.?U.?[RBEK].?E.?S\\b", u"EXPENDITURES"),
    (u"([Ee]).?x.?p.?[efs].?[nu].?d.?i[^s]?[td].?u.?[ri].?e.?s\\b", u"\\1xpenditures"),
    (u"E.?X.?P.?E.?N.?D.?[IF].?T.?U.?[RB].?E\\b", u"EXPENDITURE"),
    (u"([Ee]).?x.?p.?e.?[nu].?d.?i.?t.?u.?r.?[eo]\\b", u"\\1xpenditure"),
    (u"DESIGNATIONXS", u"DESIGNATIONS"),
    (u"([Dd]).?e.?s.?i.?g.?n.?a.?t.?i.?o.?n.?s\\b", u"\\1esignations"),
    (u"D.?E.?S.?I.?G.?N.?A.?T.?[Ii].?O.?[NX]\\b", u"DESIGNATION"),
    (u"([Dd]).?[eT].?[sT].?[if].?[goc].?[na].?[ai].?t.?[ix].?[op].?n\\b", u"\\1esignation"),
    (u"I.?N.?S.?T.?A.?L.?L.?M.?[EK].?N.?T.?S\\b", u"INSTALLMENTS"),
    (u"([Ii]).?[np].?s.?t.?[ao].?l.?l.?m.?e.?n.?t.?s\\b", u"\\1nstallments"),
    (u"I N S T A L L M E N T\\b", u"INSTALLMENT"),
    (u"([Ii]).?(n|ii).?s.?t.?a.?[ld].?l.?m.?e.?n.?t\\b", u"\\1nstallment"),
    (u"I.?N.?S.?T.?[IF].?T.?U.?T.?I.?O.?N[^']?S\\b", u"INSTITUTIONS"),
    (u"([Ii]).?n.?s.?t.?i.?t.?[un].?t.?i.?o.?n[^']?s\\b", u"\\1nstitutions"),
    (u"I.?N.?S.?T.?[IFT].?T.?U.?T.?I.?O.?[NX]\\b", u"INSTITUTION"),
    (u"([Ii]).?(n|u|q|ii).?s.?[tl].?i.?t.?u.?t.?i.?[op].?n\\b", u"\\1nstitution"),
    (u"I N S T R U C T I O N A L\\b", u"INSTRUCTIONAL"),
    (u"I N S T R U C T I O N S\\b", u"INSTRUCTIONS"),
    (u"\\binst.?r.?uctions\\b", u"instructions"),
    (u"I.?N.?S.?T.?R.?U.?C.?T.?I.?O.?N\\b", u"INSTRUCTION"),
    (u"\\b([Ii])[^o]?n.?s[^a]?t.?[ri].?u.?c.?t.?[i'].?o.?(n|ii)", u"\\1nstruction"),
    (u"[RBEK].?E.?Q.?U.?[Ii].?[RE].?E.?M.?[EK].?[NX].?T.?S\\b", u"REQUIREMENTS"),
    (u"\\b([Rr]).?[ea].?[qaig].?[uno].?[iu].?[ri].?[eoa].?[ml].?e.?(n|ii).?[til].?s\\b", u"\\1equirements"),
    (u"R E Q U I R E M E N TO F ", u"REQUIREMENT OF "),
    (u"[RBEK].?E.?Q.?U.?[Ii].?[RBEK].?E.?M.?[EF].?N.?T", u"REQUIREMENT"),
    (u"([Rr]).?e.?[qao].?u.?i.?r.?e.?m.?e.?(n|u|ii).?t\\b", u"\\1equirement"),
    (u"J.?U.?[REK].?I.?S.?D.?I.?[CG].?T.?I.?O.?N", u"JURISDICTION"),
    (u"([Jj]).?u.?[rix].?[il].?s.?[da].?i.?[ceo].?t.?i.?o.?n", u"\\1urisdiction"),
    (u"R E A S S I G N M E N T S", u"REASSIGNMENTS"),
    (u"R E A S S I G N M E N T", u"REASSIGNMENT"),
    (u"R e a s s i g n m e n t", u"Reassignment"),
    (u"E N L I S T M E N T S", u"ENLISTMENTS"),
    (u"([Ee])[^e]?n.?l.?i.?s.?t.?m.?e.?n.?t.?s\\b", u"\\1nlistments"),
    (u"E.?N.?L.?I.?S.?T.?M.?E.?N.?T", u"ENLISTMENT"),
    (u"([Ee])[^e]?n.?[li].?i.?s.?t.?m.?[eo].?[nr].?t", u"\\1nlistment"),
    (u"A.?C.?Q.?U.?I.?S.?I.?T.?[IT].?O.?[NX]", u"ACQUISITION"),
    (u"([Aa]).?c.?[qauog].?u.?i.?[sa].?i.?t.?[ij].?o.?[nuUD]", u"\\1cquisition"),
    (u"I M P O R T A T I O N", u"IMPORTATION"),
    (u"([Ii]).?(m|in).?p.?o.?r.?t.?[aeo].?[ti].?i.?o.?n\\b", u"\\1mportation"),
    (u"([Rr]).?e.?s.?[el].?r.?v.?a.?t.?i.?o.?n", u"\\1eservation"),
    (u"I.?N.?S.?T.?I.?T.?U.?T.?E[^']?S\\b", u"INSTITUTES"),
    (u"([Ii]).?n.?s.?t.?i.?t.?u.?t.?e[^']?s\\b", u"\\1nstitutes"),
    (u"I.?N.?S.?T.?[IFT].?T.?U.?T.?E", u"INSTITUTE"),
    (u"([Ii]).?n.?s.?t.?[il].?t.?u.?t.?e", u"\\1nstitute"),
    (u"O.?P.?E.?[RK].?A.?T.?I.?O.?[NX][^']?S\\b", u"OPERATIONS"),
    (u"\\b([Oo]).?p.?e.?[ri].?a.?[tl].?i.?[oq].?[nu][^']?s\\b", u"\\1perations"),
    (u"O[^O]?[Pr].?[EK].?[RBEK].?A.?T.?[Ii].?O.?[NX]", u"OPERATION"),
    (u"([Oo])[^O^o^m]?p.?e.?[ri].?[ak][^c]?[tli].?i.?[opdq].?[nu]", u"\\1peration"),
    (u"A.?M.?M.?U.?[NM].?I.?T.?I.?[OG].?N", u"AMMUNITION"),
    (u"\\b([Aa]).?[mn].?m.?u.?[nu].?t.?i.?o.?[nh]", u"\\1mmunition"),
    (u"C.?O.?M.?M.?[IL].?S.?S.?[Ir].?O.?[NX].?E.?R[^']?S\\b", u"COMMISSIONERS"),
    (u"C.?O.?M.?M.?[IL].?S.?S.?[Ir].?O.?[NX].?E.?R", u"COMMISSIONER"),
    (u"C.?O.?M.?M.?[IL].?S.?S.?[Ir].?O.?[NX]", u"COMMISSION"),
    (u"([Cc])[^u]?[obcp].?[mn].?[mM][^o]?[ilsrx][^c^N]?[saG].?[stgp].?[i'].?[opac].?[nu]", u"\\1ommission"),
    (u"[RBEK].?E[^Q]?T.?[IE].?[RBEK].?E.?M.?[EK].?N.?T", u"RETIREMENT"),
    (u"([Rr]).?e[^q]?t.?i.?[ri].?e.?m.?e.?(n|u|r|ii).?[tcU]", u"\\1etirement"),
    (u"C.?[Oo].?N.?T.?[RBA].?A.?C.?T[^']?S\\b", u"CONTRACTS"),
    (u"\\b([Cc]).?[ocp].?[npu][^r]?[tf][^e]?[ri].?[aer].?[co].?t[^']?s\\b", u"\\1ontracts"),
    (u"C.?O.?[NX].?T[^E]?[RBEAO][^N^R]?A.?[CP].?T", u"CONTRACT"),
    (u"\\b([Cc]).?[oj][^u]?[nu][^n^s]?[ti].?[reij].?[aeus].?[cod].?[tT]\\b", u"\\1ontract"),
    (u"P.?[RB].?I.?S.?O.?N.?E.?[RBE][^']?S\\b", u"PRISONERS"),
    (u"([Pp]).?r.?i.?s.?o.?n.?e.?r[^']?s\\b", u"\\1risoners"),
    (u"prisouer", u"prisoner"),
    (u"P R I S O N S\\b", u"PRISONS"),
    (u"T.?[RBEK].?A.?[NX].?S.?F.?E.?[RBK][^']?S\\b", u"TRANSFERS"),
    (u"([Tt]).?r.?a.?[nmu].?s.?[fji].?e.?[riT][^']?s\\b", u"\\1ransfers"),
    (u"T.?[RBKO].?A.?[NX].?S.?[FEP].?E[^R]?[RBK]", u"TRANSFER"),
    (u"\\b([Tt]).?[rifaV][^e^w]?a[^i]?[nmhirou][^l]?[srei][^u]?[fijltr][^t]?e[^r]?r", u"\\1ransfer"),
    (u"C O M M U N I T I E S", u"COMMUNITIES"),
    (u"([Cc]).?o.?[mn].?[mn].?[un].?i.?t.?i.?e.?s", u"\\1ommunities"),
    (u"C.?O.?M.?M.?U.?[NXK].?[IFT][^S]?T.?[YT]", u"COMMUNITY"),
    (u"([Cc]).?[ob].?m.?m.?[un].?[num].?[it].?[tlf].?[yvj]", u"\\1ommunity"),
    (u"E.?Q.?U.?I.?P.?M.?E.?[NI].?T", u"EQUIPMENT"),
    (u"([Ee]).?[qao].?[ui].?i.?p[^I]?[mn].?[ec].?(n|u|ii).?t\\b", u"\\1quipment"),
    (u"\\b([Vv]).?[id][^t]?[oa].?[li].?a.?[ti].?i.?o.?n", u"\\1iolation"),
    (u"VI O L A T I O N S", u"VIOLATIONS"),
    (u"VIOLAT I O N", u"VIOLATION"),
    (u"A.?C.?C.?O.?U.?[NX].?T.?I.?[NX].?G", u"ACCOUNTING"),
    (u"\\b([Aa]).?[cot].?c.?o.?[un].?[nu].?t.?i.?n.?g", u"\\1ccounting"),
    (u"A C C O U N T S\\b", u"ACCOUNTS"),
    (u"\\b([Aa]).?c.?c.?o.?u.?n.?t[^']?s\\b", u"\\1ccounts"),
    (u"\\bA.?[CG].?C.?O.?U.?[NX].?T", u"ACCOUNT"),
    (u"\\b([Aa]).?[co].?[cs].?[op].?(u|n|ii).?[nu].?t", u"\\1ccount"),
    (u"([Cc]) o m m and s:", u"\\1ommands:"),
    (u"C O M M A N D", u"COMMAND"),
    (u"([Cc]).?o.?m.?m[^i]?a.?[nu].?d", u"\\1ommand"),
    (u"S.?[EKB].?[RBEK].?V.?[IiE].?[CO].?E[^'^R]?S\\b", u"SERVICES"),
    (u"\\b([Ss]).?[ecfos].?[rif].?[vVxy].?[ildfj].?[cep].?[ecot][^'^r]?s\\b(?=[^^])", u"\\1ervices"),
    (u"\\bS.?[EK].?[RBEK].?V.?[IiE].?[CO].?[EKB]", u"SERVICE"),
    (u"\\b([Ss])[^e]?[ec][^c^x]?[riTtfp][^o]?[vyxrA][^a^o]?[ildnjf].?[coeq][^m^-]?[eB]\\b", u"\\1ervice"),
    (u"([Uu]).?(n|u|ii).?i.?fo[ri].?m[^']?s\\b", u"\\1niforms"),
    (u"U[^R]?[NX].?I.?FO[RBEK].?M", u"UNIFORM"),
    (u"([Uu])[^r]?(n|ii).?i[^n]?f.?o.?[rif].?(m|[aun]i|jn)", u"\\1niform"),
    (u"P.?[RBEK].?O.?P.?E.?[RBEK].?T.?[YTVy]", u"PROPERTY"),
    (u"\\b([Pp]).?[ri].?o[^s]?[pnu].?[er].?[rin][^e^s]?[tfi][^n]?(y|v|T|j[rTji'^]?)", u"\\1roperty"),
    (u"F.?[EK].?D.?[EK].?[RBEK].?A[^L^T]?[LIi]", u"FEDERAL"),
    (u"([Ff])[^i^r]?[ec][^c^l]?[dao][^l^n]?[ecmo].?[ricd][^i]?[ak][^l]?[lL]", u"\\1ederal"),
    (u"T[IF]TLE[ ]?S(?=(:| [I5]))", u"TITLES"),
    (u"([Tt]) i t l e s\\b", u"\\1itles"),
    (u"\\bT.?[ITfJ].?T.?L.?E", u"TITLE"),
    (u"\\b([Tt])[^h^r]?[il][^s^y]?t.?[li][^v]?[ec]\\b", u"\\1itle"),
## scannos
### “eh.” for “ch.”
    (u"\\beh\\. ", u"ch. "),
### “sees.” for “secs.”
    (u"\\b([Ss])ees\\. ", u"\\1ecs. "),
### “IT” or “IJ” for “U” in “U.S.C.”
    (u"I[JT]\. ?S\. ?C\.", u"U.S.C."),
### “\bho..ever\b” for “however” 
    (u"\\bho..ever\\b", u"however"),
### “mg” at the end of a word for “ing”
    (u"([b-hklnoprstvwyz])(m|iii|iu)g\\b", u"\\1ing"),
### “asse...led” for “assembled”
    (u"asse...led", u"assembled"),
### “however^” -> “however,”
    (u"ho(w|..)ever\\^", u"however,"),
### miscellaneous character scannos
    (u"£i(?=([a-mo-z]|n[^c]))", u"a"),
    (u"\\b[sgilbfoJna^](?=tat\\.)", u"S"),
    (u"([0-9])['\"* ^]?usc", u"\\1 USC"),
    (u"\\b([Aa])(n|u|ii)[vy]\\b ", u"\\1ny "),
    (u"^(?<![(])\^(?=[A-Za-z0-9]{1,3}\) )", u"("),
    (u"[<*^]?\\^E[CO]", u"SEC"),
    (u"\\ban\^ ", u"any "),
    (u"\\bTTie", u"The"),
    (u"0(?=(MB|PM))", u"O"),
    (u"[EK](?=el(a|o|i[^i^m]))", u"R"),
    (u"([Ss])iii(?=(ch|h|j|s))", u"\\1ub"),
    (u"\\boi\\^(?=a)", u"org"),
    (u"wii(?=ic(h|[li]i))", u"wh"),
    (u" \\^(?=cal)", u" fis"),
    (u"\\boi\\^(?=[ep])", u"op"),
# end of April 2012 run
###################################################################################################
# September 2011: more corrections resulting from identifying mis-OCR'ed text, plus
# further elimination of excess whitespace. Begin to standardize citation forms
# to Statutes at Large and U.S. Code. Dehyphenate lines to improve accuracy of 
# concordance.
# Estimated number of changes to dataset resulting from this run: 880,000
#
# character substitution/scannos:
## a
    (u"\\bStete", u"State"),
    (u"Ei(?=nd)", u"a"),
    (u"\\befter\\b", u"after"),
## al
    (u"shedl", u"shall"),
## B
    (u"H(?=e it)", u"B"),
## b
    (u"J\\)(?=y)", u"b"),
## C
    (u"\\bSE[^C]\.(?= [0-9])", u"SEC."),
    (u"\\(['^](?=[hio][a-z][a-z][^)])", u"C"),
    (u"\\beomm\\b", u"Comm"),
    (u"\\bCJo(?=[^v])", u"Co"),
    (u"\\bUSe\\b", u"USC"),
    (u"[eO](?=ongress)", u"C"),
    (u"[eO](?=ONGRESS)", u"C"),
    (u"\\bO(?=o[mn])", u"C"),
## c
    (u"se[a-z][':;bcjtC^]tion(?=[^e])", u"section"),
## D
    (u" I\\)\. (?=[A-Z])", u" D. "),
## E
    (u"([CR])P\\^S", u"\\1ES"),
## e
    (u"\\b([Tt])(h|li)c\\b", u"\\1he"),
## ee
    (u"\\bF[ir]( a|a )a", u"Free"),
    (u"\\bF[ir]w\\b", u"Free"),
## f
    (u"([EeOo])fiic", u"\\1ffic"),
## fi
    (u"([EeOo])fHc", u"\\1ffic"),
## G
    (u"\\bCJ(?=ov)", u"G"),
## gr
    (u"para\\^aph", u"paragraph"),
## h
    (u"([^a^c^g^l^C^K])li(?=(arb|av|ere|ea|el[^m]|i[grs]|osp|ous|uma))", u"\\1h"),
    (u"\\bs(ii|u)cli\\b", u"such"),
    (u"tli(?=(ir|r|o[a-z]))", u"th"),
    (u"\\beacli\\b", u"each"),
    (u"\\bwliicli\\b", u"which"),
    (u"([Cc])liapt[ce]r", u"\\1hapter"),
    (u"\\b([Ww])ne(?=[^d^'][^s])", u"\\1he"),
    (u"liundred", u"hundred"),
    (u"(m|ui|lu)ontlis\\b", u"months"),
    (u"cliange", u"change"),
    (u"liigli", u"high"),
    (u"([Ff])ra[nu]cliise", u"\\1ranchise"),
    (u"\\blliis\\b", u"this"),
## i
    (u"([st])lon", u"\\1ion"),
## J
    (u"\\.l(?=(anu|un))", u"J"),
## l
    (u"\\btitie\\b", u"title"),
## LI
    (u"\\bAPPU", u"APPLI"),
    (u"E[Ss]TABUSH", u"ESTABLISH"),
    (u"\\bUMIT", u"LIMIT"),
    (u"P[UuV]BUC", u"PUBLIC"),
    (u"\\bFACIU", u"FACILI"),
    (u"\\bQUAUF", u"QUALIF"),
    (u"POUCY", u"POLICY"),
    (u"VAUD", u"VALID"),
    (u"ABOUSH", u"ABOLISH"),
## li
    (u"([bdip])U", u"\\1li"),
    (u"Uc\\b", u"lic"),
    (u"eU(?=[gv])", u"eli"),
    (u"Umit(?=[^.])", u"limit"),
## ll
    (u"shaU", u"shall"),
    (u"U(?=ment)", u"ll"),
    (u"wiU\\b", u"will"),
## m
    (u"\\bfroa\\b", u"from"),
    (u"([Dd])epart..(i?)ent", u"\\1epartment"),
    (u"\\b([Aa])iiie", u"\\1me"),
## n
    (u"\\bfollowlii[gs]\\b", u"following"),
    (u"([Uu])poii", u"\\1pon"),
## of
    (u"\\boif(?= )", u"of"),
    (u"\\boif(?=-)", u"off"),
## p
    (u"[[ij|}][)>](?=[a-z])", u"p"),
## q
    (u"([Ee])\\(j(?=[a-z])", u"\\1q"),
## r
    (u"\\bi-(?=[aeiou])", u"r"),
    (u"([aeou])i['*](?=[^i])\\b", u"\\1r"),
## U
    (u"[LITtifXUJr]'(?=(nited|NITED))", u"U"),
## u
    (u"fiill", u"full"),
    (u"\\b([Ccf]?)ovir", u"\\1our"),
## un
    (u"\\bcoimt", u"count"),
    (u"\\bimtil\\b", u"until"),
    (u"himdred", u"hundred"),
    (u"Fimd", u"Fund"),
    (u"moimt", u"mount"),
    (u"\\bluider\\b", u"under"),
    (u"([Cc])oimcil", u"\\1ouncil"),
## ur
    (u"\\boiu[^a-z^ ]", u"our"),
    (u"[li]u.ban(?=[^d])", u"urban"),
## w
    (u"\\bw\\^", u"w"),
    (u"([aeiotu])w\\^", u"\\1w"),
    (u"Avith", u"with"),
    (u"Av(h|li)ic(h|li)", u"which"),
    (u"Av(?=o[^c^d^i^n^y])", u"w"),
    (u"([aeiou])Av", u"\\1w"),
## y
    (u"pa[j)]([a-z]+)ents\\b", u"payments"),
    (u"pa[j)]([a-z]+)ent\\b", u"payment"),
    (u"([Aa]n|a[rv]|b|[Dd]a|el|it|[Mm]a|n[clt]|or|r[mt]|tl|u[dr])j\\^ ", u"\\1y "),
## yi
    (u"jd(?=ng)", u"yi"),
## symbols
    (u"(\\([A-Z])X(?=[ivx])", u"\\1)("),
#
# dehyphenation
    (u'([^-][A-Za-z][a-eg-xz])-\r\n([a-z])', u'\\1\\2'),
#
# close up spacing:
## 15
    (u"([Nn]).?o.?t.?w.?[it].?t.?[hnH].?[sS].?t.?[aeo].?[nm].?[dao].?[ij].?[nhpaQ^].?[gof]\\b", u"\\1otwithstanding"),
    (u"[EKR].?E.?P.?[EKR].?E.?S.?E.?N.?T.?A.?T.?I.?V.?E[^(]S", u"REPRESENTATIVES"),
    (u"\\b([Rr]).?[et].?[pfvynD].?[rTfvti'y].?e.?[sHnxa].?[ec&].?[nmu].?[tif].?[aeondA].?[tif].?[ivtl/'].?[vrcoeym\].?e[^']?s\\b", u"\\1epresentatives"),
## 14
    (u"A.?D.?M.?[IE].?[NX].?[I1].?[S8].?T.?[RKOBE].?A.?[TI].?[IT].?V.?[EB]", u"ADMINISTRATIVE"),
    (u"([Aa]).?[daoH].?[mn].?i.?[nou].?i.?s.?t.?[rio].?a.?[tc].?i.?[vyr].?[eG©s]", u"\\1dministrative"),
    (u"\\b([Rr]).?e.?p.?[ri].?e.?[saH].?e.?n.?[ts].?[aeo].?t.?i.?[vrc].?e\\b", u"\\1epresentative"),
## 13
    (u"\\bD.?E.?T.?E.?[RHEK].?M.?I.?[NX].?A.?T.?I.?O.?[NX]", u"DETERMINATION"),
    (u"([Dd]).?e[^x]?t.?e.?[ri].?[mn].?[ij].?[nao].?a.?t.?[-ij].?[ocQ].?n\\b", u"\\1etermination"),
    (u"E.?[NX].?V.?I.?[RKBE].?O.?[NX].?M.?E.?[NX].?T.?A.?L", u"ENVIRONMENTAL"),
    (u"([Ee]).?n.?[vy].?i.?r.?[ob].?[nh].?m.?e.?n.?t.?[aeo][^n]?[lid]\\b", u"\\1nvironmental"),
    (u"E.?S.?T.?A.?B.?L.?I.?S.?H.?M.?E.?[NX].?T", u"ESTABLISHMENT"),
    (u"E S T A B U S H M E N T", u"ESTABLISHMENT"),
    (u"([Ee]).?[si].?t.?[aeo].?[bho].?[lfiU].?[i'].?s.?[hbn].?m.?e.?[nu].?t", u"\\1stablishment"),
    (u"I.?[NX].?T.?E.?[RBEK].?[NX].?A.?T.?I.?O.?[NX].?A.?L", u"INTERNATIONAL"),
    (u"\\b([Ii]).?n.?t[^h]?e.?r.?[nau].?a.?t.?[ij].?o.?n.?[aso].?[lLF]", u"\\1nternational"),
    (u"Q U A R T E R M A S T E R", u"QUARTERMASTER"),
    (u"([Qq]).?[nu].?a.?r.?t.?e.?r.?[mn].?a.?s.?t.?e.?r", u"\\1uartermaster"),
## 12
    (u"A.?M.?O.?[RE].?T.?I.?Z.?A.?T.?I.?O.?N", u"AMORTIZATION"),
    (u"a m o r t i z a t.?i.?o.?n\\b", u"amortization"),
    (u"C.?O.?[NX].?S.?E.?[RBK].?V.?A.?T.?I.?O.?[NX]", u"CONSERVATION"),
    (u"([Cc]).?o.?[nu].?s.?[ec].?[ri].?v.?a.?[tf].?i.?o.?[nu]\\b", u"\\1onservation"),
    (u"C O N S T I T U T I O N", u"CONSTITUTION"),
    (u"([Cc]).?o.?[nu].?s.?t.?i.?t.?u.?t.?[il].?o.?[nr]", u"\\1onstitution"),
    (u"N O T I F I C A T I O N", u"NOTIFICATION"),
    (u"([Nn]).?[oq].?t.?i.?[ft].?[il].?c.?a.?t.?i.?o.?n\\b", u"\\1otification"),
    (u"P.?A.?[RB].?T.?I.?C.?I.?P.?A.?T.?I.?O.?[NX]", u"PARTICIPATION"),
    (u"\\b([Pp]).?a.?r.?t.?[i\]].?c.?i.?p.?[aM].?t.?i.?o.?n\\b", u"\\1articipation"),
    (u"P.?[RBEX].?E.?S.?E.?[RB].?V.?A.?T.?I.?O.?[NX]", u"PRESERVATION"),
    (u"([Pp]).?[ri].?e.?s.?e.?r.?v.?a.?t.?i.?o.?n\\b", u"\\1reservation"),
    (u"([Ss]).?[up].?[boD][^-]?[pa].?[an].?[rip].?[ae].?g.?[ri].?a.?[po].?[hnk]", u"\\1ubparagraph"),
    (u"U.?N.?E.?M.?P.?L.?O.?Y.?M.?E.?[NX].?T", u"UNEMPLOYMENT"),
    (u"([Uu]).?n[^d]?e.?[mn].?p.?[l'].?o.?[yjv].?[mn].?[ce].?[nu].?t", u"\\1nemployment"),
## 11
    (u"A.?G.?[REK].?I.?C.?U.?L.?T.?U.?[RBEK].?E", u"AGRICULTURE"),
    (u"([Aa]).?[g^].?[riTF].?i.?c.?[uo].?[-l].?t.?[um].?[rTiF].?[ec]\\b", u"\\1griculture"),
    (u"A.?P.?P.?(U|L.?I).?C.?A.?T.?I.?O.?[NX]", u"APPLICATION"),
    (u"([Aa]).?p.?[pn].?[lift].?i.?[ce].?[ao].?t.?[il].?o.?n", u"\\1pplication"),
    (u"C.?O.?[RB].?[RB].?E.?C.?T.?I.?O.?[NX].?S", u"CORRECTIONS"),
    (u"([Cc]).?o.?[ri].?r.?e.?c.?t.?[il].?o.?n.?s\\b", u"\\1orrections"),
    (u"D.?E.?F.?I.?[NX].?I.?T.?I.?O.?[NX].?S", u"DEFINITIONS"),
    (u"\\b([Dd]).?[ec].?[filrt].?[ilt].?[nau].?i.?t.?i.?o.?[nu].?[sa]\\b", u"\\1efinitions"),
    (u"([Dd]).?e.?s.?i.?[ge].?[np].?a.?t.?[ij].?n.?[ge]\\b", u"\\1esignating"),
    (u"D.?E.?T.?E.?R.?M.?I.?N.?I.?N.?G", u"DETERMINING"),
    (u"([Dd]).?e.?t[^h]?[eac].?[rni].?m.?i.?n.?i.?n.?g", u"\\1etermining"),
    (u"D.?E.?V.?[EB].?L.?O.?P.?M.?E.?[NX].?T", u"DEVELOPMENT"),
    (u"([Dd]).?[ec].?v.?[ec].?[li].?o.?p.?m.?[ec].?[nu].?t\\b", u"\\1evelopment"),
    (u"E.?[NX].?F.?O.?[EKR].?C.?E.?M.?E.?[NX].?T", u"ENFORCEMENT"),
    (u"([Ee])[^i]?[nr].?[firt].?o.?[rfi].?[coe][^l]?e.?m.?[ec].?[nu].?t\\b", u"\\1nforcement"),
    (u"\\be.?[sx].?t.?[aeo].?[boh].?[lf].?i.?[sa&].?[hnt].?[ec].?d\\b", u"established"),
    (u"I.?[NX].?F.?O.?[EKR].?M.?A.?T.?I.?O.?[NX]", u"INFORMATION"),
    (u"([Ii]) n f o r m a t i o n", u"\\1nformation"),
    (u"([Ii])[nu][fitl]o[rif][ma][an]ti[op]n", u"\\1nformation"),
    (u"L.?E.?G.?I.?S.?[LI].?A.?T.?[IJ].?V.?E", u"LEGISLATIVE"),
    (u"([Ll]).?e.?[gs].?i.?s.?l.?a.?t.?i.?v.?e", u"\\1egislative"),
    (u"P.?A.?[RB].?T.?N.?E.?[RB].?S.?H.?I.?P", u"PARTNERSHIP"),
    (u"([Pp]).?a.?[ri].?t.?[na].?e.?[rif][^-]?[se].?[hn].?i.?[pj]\\b", u"\\1artnership"),
    (u"\\b([Rr]).?e.?[aA].?[d4].?[ji].?u.?s.?t.?m.?e.?n.?t\\b", u"\\1eadjustment"),
    (u"[RBEK].?E.?S.?E.?[RBEK].?V.?A.?T.?I.?O.?[NX]", u"RESERVATION"),
    (u"\\b([Rr]).?e.?[si].?[el3].?r.?[vs].?a.?t.?i.?[obc].?n\\b", u"\\1eservation"),
    (u"T E M P O R A R I L Y", u"TEMPORARILY"),
    (u"([Tt]).?e.?[mn].?p.?o.?[ri].?a.?r.?i.?l.?[yv]", u"\\1emporarily"),
    (u"T.?E.?[RBEK].?M.?I.?[NX].?A.?T.?I.?O.?[NX]", u"TERMINATION"),
    (u"\\b([Tt]).?[ec].?[ri].?[mn].?[ij].?[naou].?a.?[tf].?[ij].?[ocQ].?n", u"\\1ermination"),
## 10
    (u"A.?[MH].?E.?[NX].?D.?M.?[EP].?[NX].?T.?S", u"AMENDMENTS"),
    (u"([Aa]).?[mn].?[ecio].?[nu].?[daou].?[mn].?[ec].?[nadhu].?[td][^']?[se]\\b", u"\\1mendments"),
    (u"C.?O.?N.?F.?O.?[EKR].?M.?I.?N.?G", u"CONFORMING"),
    (u"([Cc]).?o.?[nm].?[fr].?o.?[rnfi].?[maD].?i.?n.?g", u"\\1onforming"),
    (u"D E D U C T I O N S", u"DEDUCTIONS"),
    (u"\\b([Dd]).?[ec].?d.?u.?c.?t.?[il'].?o.?[nu].?s\\b", u"\\1eductions"),
    (u"D.?E.?F.?I.?[NX].?I.?T.?I.?O.?[NX]", u"DEFINITION"),
    (u"([Dd]).?[ea].?[filtr].?[ilt][^r]?[nha][^v]?[il][^c]?t.?i.?[oc].?n", u"\\1efinition"),
    (u"D E T E R M I N E D", u"DETERMINED"),
    (u"d.?e.?t.?e.?[rin].?[ma].?i.?[nau].?e.d\\b", u"determined"),
    (u"\\bD.?I.?S.?A.?B.?I.?L.?[IT].?T.?[YT]", u"DISABILITY"),
    (u"([Dd])[^m^v]?i[^n^t]?[sa].?a.?[bo].?[id].?[lf].?i.?t.?[yv]", u"\\1isability"),
    (u"E[^E]?M.?P.?L.?O.?Y.?M.?E.?[NX].?T", u"EMPLOYMENT"),
    (u"([Ee])[^e]?[mn].?p.?[li'].?[oc].?[yjvi].?[mn].?[eoc].?[nu].?[tf]", u"\\1mployment"),
    (u"\\b([Pp]).?r[^C]?o.?p.?o[^s]?[ri].?t.?i.?o.?n\\b", u"\\1roportion"),
    (u"P.?[REK].?O.?T.?[EB].?C.?T.?I.?O.?[NX]", u"PROTECTION"),
    (u"\\b([Pp]).?[rit].?[oa].?t.?[eolG].?[cdeij].?[tl].?i.?[ob].?[nfH]\\b", u"\\1rotection"),
    (u"P.?[RBEK].?O.?V.?I.?S.?[IT].?O.?[NX][^']?S", u"PROVISIONS"),
    (u"\\b([Pp])[^e^p]?[rip][^m^p]?[oa][^c^p]?[vyTxtrV].?[ilFL][^d^n^z]?[sgaer].?[il'].?[oa][^a^n]?[nuar][^n]?s\\b", u"\\1rovisions"),
    (u"\\b([Qq]).?u.?[ae].?n.?t.?i.?t.?[il].?e.?s\\b", u"\\1uantities"),
    (u"\\b[RBE].?E.?S.?O.?L.?U.?T.?I.?O.?[NX]", u"RESOLUTION"),
    (u"\\b([Rr]).?[eca].?s.?[ou].?l.?(u|y|ii).?t.?[ilj].?o.?(n|u|li|ri)", u"\\1esolution"),
    (u"PE[BE]MITTING", u"PERMITTING"),
    (u"\\b([Pp]).?e.?r.?m.?i.?t.?t.?i.?n.?g\\b", u"\\1ermitting"),
    (u"P U R C H A S I N G", u"PURCHASING"),
    (u"([Pp]) u r c h a s i n g", u"\\1urchasing"),
    (u"S.?U.?B.?C.?H.?A.?P.?T.?[EB].?[RBEK]", u"SUBCHAPTER"),
    (u"([Ss]).?[uad].?[bD].?c.?[hn].?a.?p.?t.?[ec].?[rct]", u"\\1ubchapter"),
    (u"t h r o u g h o u t", u"throughout"),
## 9 
    (u"A.?U.?T.?H.?O.?[RBEK].?I.?T.?Y", u"AUTHORITY"),
    (u"\\b([Aa]).?[unJ].?[t ][^M]?[hnH].?[obpq].?r.?[ifl][^z]?[tlfi].?[yv/^]\\b", u"\\1uthority"),
    (u"A.?M.?E.?[NX].?D.?M.?E.?[NX].?T", u"AMENDMENT"),
    (u"([Aa])[^m]?m[^m]?[eoc][^u]?[nuh][^n]?[daoTt].?m.?[esc][^h]?[nuhHa].?t", u"\\1mendment"),
    (u"C.?O.?M.?M.?I.?T.?T.?E.?[EB]", u"COMMITTEE"),
    (u"([Cc]).?o.?[mn].?[mn].?[i'].?[t'].?t.?[ecio].?e\\b", u"\\1ommittee"),
    (u"\\b([DR]).?E.?D.?U.?C[^A]?T.?I.?O.?[NX]", u"\\1EDUCTION"),
    (u"\\b([DdRr])[^e]?e[^e]?d.?u.?c[^a]?t.?[il'].?o.?[nu]", u"\\1eduction"),
    (u"D E F E N D A N T", u"DEFENDANT"),
    (u"\\b([Dd]).?e.?[fri].?e.?[nu].?d.?a.?[nu].?t", u"\\1efendant"),
    (u"D.?E.?T.?E.?[RB].?M.?I.?N.?E", u"DETERMINE"),
    (u"([Dd]).?e.?t.?[ce].?r.?[mn].?i.?n.?e", u"\\1etermine"),
    (u"\\b([Ee]).?[damMuo].?[una].?[co].?[au].?[tsl(][^o]?[ir].?[oci9].?[nuiajUt»^]\\b", u"\\1ducation"),
    (u"E.?F.?F.?E.?C.?T.?I.?V.?E", u"EFFECTIVE"),
    (u"([Ee])[^a^c^f^i^l^n^s^t]?[fitl][^a^i^l^t]?[fTitFrjEC][^t^x]?[ecs].?[co].?[tcf].?[ilY][^a^e^o]?[vxAyt].?e", u"\\1ffective"),
    (u"E.?X.?E.?C.?U.?T.?I.?V.?E", u"EXECUTIVE"),
    (u"([Ee]).?x.?[es].?[co].?[un].?t.?[il].?v.?[ec]", u"\\1xecutive"),
    (u"i m p o r t e r s", u"importers"),
    (u"I.?[NX].?S.?U.?[RBEK].?A.?[NX].?C.?E", u"INSURANCE"),
    (u"([Ii])[^g]?[nu][^']?[sgt][^a^e^o]?[umv][^a]?[ri][^d^e]?[as].?[numap].?c.?[ect]", u"\\1nsurance"),
    (u"n o r t h e r l y", u"northerly"),
    (u"O.?P.?E.?[RK].?A.?T.?I[^O]?[NX].?G", u"OPERATING"),
    (u"([Oo])[^o]?p.?[ce].?[ri].?a.?[tf].?i[^o]?n.?g", u"\\1perating"),
    (u"P.?A.?[RE].?A.?G.?R.?A.?P.?H", u"PARAGRAPH"),
    (u"([Pp]).?a.?[ri].?a.?[-efgio^].?[ri].?a.?[pgo].?(h|n|[jl]i)", u"\\1aragraph"),
    (u"P E R F O R M E D", u"PERFORMED"),
    (u"p.?[ea].?[rf].?[fr].?o.?r.?[ma][^t]?e[^n]?d\\b", u"performed"),
    (u"P E R M I T T E D", u"PERMITTED"),
    (u"\\b([Pp]).?e.?r.?m.?i.?t.?t.?e.?d\\b", u"\\1ermitted"),
    (u"R E I N S U R E R", u"REINSURER"),
    (u"r e i n s u r e r", u"reinsurer"),
    (u"S.?E.?[Ce].?[RBEK].?E.?T.?A.?[RBEK].?Y", u"SECRETARY"),
    (u"\\b([Ss])[^l^p^t]?e.?[cop][^u]?[rilxfT].?[ecv].?[tfilU].?[aoen'][^r]?[rixutzjy].?[yijv/-})>35,^]\\b", u"\\1ecretary"),
    (u"([Ss]).?o.?u.?t.?h.?e.?r[^n]?l.?y", u"\\1outherly"),
    (u"S T A T E M E N T", u"STATEMENT"),
    (u"([Ss])[^s]?t.?[aeos].?t.?[ec].?[mn].?[ec].?[nu].?t\\b", u"\\1tatement"),
    (u"\\b([Ss]).?[ui].?p.?p.?l.?[yv].?i.?n.?[go]\\b", u"\\1upplying"),
    (u"T E M P O R A R Y", u"TEMPORARY"),
    (u"([Tt]).?e.?[mna].?p.?o.?[ri].?a.?[ri][^l]?[yv]", u"\\1emporary"),
    (u"([TW]) H E [RBEK] E F O [RBEK] E", u"\\1HEREFORE"),
## 8
    (u"\\b([Aa]).?m.?e.?n.?d.?i.?[nu].?g\\b", u"\\1mending"),
    (u"([Aa]).?r.?t.?i.?c.?l.?e[^']?s\\b", u"\\1rticles"),
    (u"([Cc]).?[ob].?m[^a]?[mn].?[e4].?r.?[coe].?[ec]\\b", u"\\1ommerce"),
    (u"\\b([Dd]).?[ilL].?[sh^].?[tfI].?[rit][^l]?[ilf][^l]?[cbter][^u]?[-trc]\\b", u"\\1istrict"),
    (u"D.?R.?A.?W.?B.?A.?C.?K", u"DRAWBACK"),
    (u"\\be.?a.?s.?t.?e.?r[^n]?l.?y", u"easterly"),
    (u"\\b([Ii]).?m.?p.?o.?r.?t.?e.?r\\b", u"\\1mporter"),
    (u"([Ii]).?n.?t.?[ec].?r.?[i.][^r]?[orQ)][^r]?[ri]\\b", u"\\1nterior"),
    (u"\\b([Nn]).?[an ].?[tl].?[if].?[op].?[nuaHort].?a[^n]?[lidfrLJT]\\b", u"\\1ational"),
    (u"\\b([Nn]).?o.?r.?t.?[hn].?e.?r.?n\\b", u"\\1orthern"),
    (u"\\bO.?F.?F.?[Ii].?C.?E.?R[^']?S\\b", u"OFFICERS"),
    (u"\\b([Pp]).?[a^].?[yvr][^e^t]?m.?e.?[numh].?t.?s\\b", u"\\1ayments"),
    (u"\\b([Pp]).?r[^R]?e.?s.?e.?r.?v.?e\\b", u"\\1reserve"),
    (u"\\b([Pp]).?[ri].?e.?v.?i.?o.?u.?[s^]\\b", u"\\1revious"),
    (u"\\b([Pp]).?u.?r.?[ce].?h.?a.?s.?[ce]\\b", u"\\1urchase"),
    (u"([Pp]).?[nu].?r.?p.?o.?s.?[ce].?s\\b", u"\\1urposes"),
    (u"\\b([Pp])[^e]?[un].?[ri].?[seag].?[unai].?[ast^].?[num].?[td]\\b", u"\\1ursuant"),
    (u"\\b([Qq]).?[uji].?a.?n.?t.?[-i].?t.?[yvj]\\b", u"\\1uantity"),
    (u"\\b([Ss])[^m]?a.?l.?a.?[ri].?i.?[ec].?s\\b", u"\\1alaries"),
    (u"\\b([Ss])[esofc][rif7][vxVy./\][ilfdj][cep][eco]s\\b", u"\\1ervices"),
    (u"([Ss]) e r v i c e s", u"\\1ervices"),
    (u"([Ss]).?o.?u.?t.?h.?e.?r.?n\\b", u"\\1outhern"),
    (u"([Ss]).?t.?and.?a.?[ri].?d", u"\\1tandard"),
    (u"\\b([Ss]).?t.?a.?t.?[una][^e]?t.?[eco].?[st]\\b", u"\\1tatutes"),
    (u"([Ss]).?u.?p.?e.?r[^v]?i.?o.?r", u"\\1uperior"),
    (u"\\b([Ss]).?u.?p.?p.?l.?i.?e.?(?=[rd])", u"\\1upplie"),
    (u"([Tt]).?[rVi].?[ec].?a.?s.?u.?[ritT].?[yv]\\b", u"\\1reasury"),
    (u"([Vv]).?e.?t.?e.?[ri].?a.?[nu][^']?s\\b", u"\\1eterans"),
    (u"w.?e.?s.?t.?e.?r[^n]?l.?y", u"westerly"),
## 7
    (u"\\b([Aa]).?[fi£].?[fTiEGlC].?[aoe].?[il].?r[^e]?s\\b", u"\\1ffairs"),
    (u"([Aa]).?[mn].?[ce].?[nu].?d.?[ce].?d\\b", u"\\1mended"),
    (u"\\bA.?[REBK].?[TI].?I.?C.?[LUX].?[EJK]\\b", u"ARTICLE"),
    (u"([Cc]).?[hn].?a.?p.?t.?[ce].?r", u"\\1hapter"),
    (u"([Cc])[^c]?o.?[mn].?p.?a.?[nu].?y", u"\\1ompany"),
    (u"D A M A G E S", u"DAMAGES"),
    (u"\\b([Dd]).?[el?][^v]?[fir].?[e&].?[nup].?s.?[ecBQ]\\b", u"\\1efense"),
    (u"\\b([Ee]).?a.?s.?t.?e.?r.?n", u"\\1astern"),
    (u"\\be.?n.?t.?e.?r.?e.?d\\b", u"entered"),
    (u"F.?O.?[EKR].?E.?I.?G.?[NX]", u"FOREIGN"),
    (u"([Ff]).?u.?r.?n.?i.?s.?h", u"\\1urnish"),
    (u"G.?E.?N.?E.?R.?A[^L]?L", u"GENERAL"),
    (u"\\b([Hh]).?o.?u.?[so].?[il].?[nuh^].?[gc]\\b", u"\\1ousing"),
    (u"\\bI n d i a n ([as])\\b", u"Indian\\1"),
    (u"\\b([Ii])[^t]?s.?l.?and[^']?s\\b", u"\\1slands"),
    (u"([Jj]).?u.?s.?t.?i.?c.?[eco]\\b", u"\\1ustice"),
    (u"M.?A.?X.?I.?M.?U.?M", u"MAXIMUM"),
    (u"([Mm]).?a.?x.?i.?[mn].?[nu].?[mn]", u"\\1aximum"),
    (u"M.?I.?N.?I.?M.?U.?M", u"MINIMUM"),
    (u"\\b([Mm]).?i.?n.?i.?[mn].?[nu].?[mn]", u"\\1inimum"),
    (u"\\b([Mm]).?o.?[nm].?t[^i]?[hnb].?l.?y\\b", u"\\1onthly"),
    (u"([Nn]) o t h i n g", u"\\1othing"),
    (u"([Pp]) a r t i a l", u"\\1artial"),
    (u"\\b([Pp]).?a.?[y35vs}^][^e]?m.?e.?[nmu].?[t1]\\b", u"\\1ayment"),
    (u"\\b([Pp]).?e.?r.?m.?i.?t.?s\\b", u"\\1ermits"),
    (u"P.?R.?I.?V.?A.?T.?E", u"PRIVATE"),
    (u"\\b([Pp]).?[rti].?o[^c]?[geK].?[riT].?[a&].?[mnoe)^]\\b", u"\\1rogram"),
    (u"([Rr])[^r]?e.?g.?[nu].?[li].?a.?r", u"\\1egular"),
    (u"\\b([Ss]).?t.?a.?t.?u[^e]?t.?e\\b", u"\\1tatute"),
    (u"\\b([Ss]).?u.?[bDQ].?[po].?a.?[ri].?t\\b", u"\\1ubpart"),
    (u"([Tt]).?e.?x.?t.?i.?l.?e\\b", u"\\1extile"),
    (u"the r e i n", u"therein"),
    (u"the r e t o", u"thereto"),
    (u"([Tt]).?h[^o]?r.?o.?u.?g.?h\\b", u"\\1hrough"),
    (u"\\bW.?e.?s.?t.?e.?r.?n\\b", u"Western"),
    (u"\\bW.?H.?E.?[EKR].?E.?A.?S\\b", u"WHEREAS"),
    (u"\\bW.?H.?E.?[EKR].?E.?O.?F\\b", u"WHEREOF"),
    (u"\\b([Ww]).?r.?i.?t[^h]?i.?[nhQ].?[gao]\\b", u"\\1riting"),
## 6 
    (u"ACT.?I.?O.?[NX]", u"ACTION"),
    (u"\\ba.?m.?e.?n.?d.?s\\b", u"amends"),
    (u"\\b([Aa])[^d^s]?[gse^][^e^h^n^s^t]?[eac][^c^i]?[nur].?[cpqge(][^x]?[yjv35]\\b", u"\\1gency"),
    (u"\\b([Aa]).?[nu][^o]?n.?u.?a.?l\\b", u"\\1nnual"),
    (u"\\b([Cc])[^h]?e.?[nu].?[tl][^r]?e.?r\\b", u"\\1enter"),
    (u"\\b([Dd]).?[es].?[po].?u.?t.?[yv]\\b", u"\\1eputy"),
    (u"\\b([Dd]).?[un][^j]?[ri].?[il].?[nug].?[gj]\\b", u"\\1uring"),
    (u"([Ee]).?[nu].?e.?[ri].?g.?[yvj3^]\\b", u"\\1nergy"),
    (u"([Ff]) o r m e r", u"\\1ormer"),
    (u"([Hh]).?[ce].?[a«].?[ld].?[tf].?[hnk|]\\b", u"\\1ealth"),
    (u"I.?n.?d.?[iU].?a.?[numh]\\b", u"Indian"),
    (u"([Ii]) n t e n t", u"\\1ntent"),
    (u"\\bI[^t]?s.?l.?and\\b", u"Island"),
    (u"\\bm.?o.?[nm].?t[^i]?[hnb][^']?s\\b", u"months"),
    (u"\\b([Nn]).?[opQ].?t.?[iV].?[coe][^r]?[epoc]\\b", u"\\1otice"),
    (u"\\bO.?[FPE].?[FTPE7][^E^R]?[Ii][^J]?[CO].?[EB]\\b", u"OFFICE"),
    (u"\\b([Pp])[^r]?[eaoc].?[ritljf^].?[itlj].?[op].?d\\b", u"\\1eriod"),
    (u"\\b([Pp]).?e.?r.?m.?i.?t\\b", u"\\1ermit"),
    (u"([Pp]).?o[^f^F]?u.?n.?d.?s\\b", u"\\1ounds"),
    (u"\\b([Rr]).?[ecaf][^a^k^s^x]?p[^e]?[oc°^].?[ri'][^a^n]?[tl1]\\b", u"\\1eport"),
    (u"\\b([Ss]).?a[^n]?[li][^v]?a.?[ri].?[yvj]\\b", u"\\1alary"),
    (u"\\b([Tt])[^h^r]?a[^c^l^s]?([kx]).?i.?n.?g\\b", u"\\1a\\2ing"),
    (u"([Tt]) h a n k s", u"\\1hanks"),
    (u"\\b([Ww]).?ithin\\b", u"\\1ithin"),
## 5
    (u"([Aa]) m e n d\\b", u"\\1mend"),
    (u"\\b([Aa]).?m.?[oQ].?n.?[gj]\\b", u"\\1mong"),
    (u"\\b([Dd])[^i^r^u]?a[^m^r]?t.?e[^f]?([ds])\\b", u"\\1ate\\2"),
    (u"\\b([Dd]).?e[^p]?a.?t.?[hnk]\\b", u"\\1eath"),
    (u"E a r t h", u"Earth"),
    (u"\\b([Ee])[^i^r]?n.?t[^h]?[ec].?r\\b", u"\\1nter"),
    (u"\\b([Ee]).?n.?[tf][^o]?[ri][^t]?[yv]\\b", u"\\1ntry"),
    (u"([Ff])[^f]?u.?n.?d[^']?s\\b", u"\\1unds"),
    (u"\\b([Hh])[^a^e]?u[^l^-]?[mna].?a.?[nm]\\b", u"\\1uman"),
    (u"\\b([Ii])[^n]?t[^d^i]?[ce][^o]?[mn][^(^'^d^i]?s\\b", u"\\1tems"),
    (u"([Jj]).?o.?i.?[nu].?t\\b", u"\\1oint"),
    (u"\\b([Ll])[^l]?[a^][^g^r]?[boD].?[o6].?[rT]\\b", u"\\1abor"),
    (u"\\bl and s\\b", u"lands"),
    (u"\\b([Mm]).?e[^g]?a[^r]?[mn].?s\\b", u"\\1eans"),
    (u"\\b([Mm]).?o.?[nm].?t[^i]?[hnb]\\b", u"\\1onth"),
    (u"\\b([Nn])[^f^w]?o.?r.?t[^o]?[hn]\\b", u"\\1orth"),
    (u"\\bO.?T.?H.?E.?[EKR]\\b", u"OTHER"),
    (u"([Pp]) a r t y", u"\\1arty"),
    (u"([Qq]).?[uj].?[opO].?[tf].?a\\b", u"\\1uota"),
    (u"R e a d y", u"Ready"),
    (u"\\b[EKR].?U.?L.?E.?S", u"RULES"),
    (u"\\bT.?[IiFJ].?[TLiU^].?[LUEX].?[EPBFK]\\b", u"TITLE"),
    (u"\\b([Tt])[^e]?[rVinYTt^][^e]?[amd][^e]?d[^l]?e\\b", u"\\1rade"),
    (u"\\b([Uu])[^q^s]?[nuimrhdaHD^][^c^f^T]?[daouj^][^T]?[ecp6si^].?r\\b", u"\\1nder"),
    (u"([Uu]) n d u e", u"\\1ndue"),
    (u"\\b([Uu]).?[rif][^s]?[bh].?[an][^i]?[nu]\\b", u"\\1rban"),
    (u"\\b([Ww]).?[hnmfbK%][^l]?[ila'^][^l]?[cdpoi;].?h\\b", u"\\1hich"),
## 4
    (u"\\b([Dd])[^i^r^u]?a[^m^n^r^t]?t.?e\\b", u"\\1ate"),
    (u"([Dd]) e b t", u"\\1ebt"),
    (u"F o r t\\b", u"Fort"),
    (u"\\b([Ff])[^o]?r.?o.?m\\b", u"\\1rom"),
    (u"\\b([Ii])[^n]?t[^i]?[ce][^a^o]?m\\b", u"\\1tem"),
    (u"\\b([Ll])[^i^o]?[ae^].?w[^e^i^n^'^(]?s\\b", u"\\1aws"),
    (u"\\b([Ll])[^i^o]?e[^a^d^i^n]?s.?s\\b", u"\\1ess"),
    (u"\\b([Mm])[^o]?o[^h^n^o^u]?r[^i^s]?e\\b", u"\\1ore"),
    (u"([Nn]) o t e\\b", u"\\1ote"),
    (u"\\b([Pp]).?a.?r.?t\\b", u"\\1art"),
    (u"\\br.?e[^g^l^m^p^w^U]?a[^n^r]?[d4]\\b", u"read"),
    (u"\\b[EKR].?U.?L.?E\\b", u"RULE"),
    (u"([Tt]) e r m", u"\\1erm"),
    (u"\\b([Tt]).?e.?x.?t\\b", u"\\1ext"),
    (u"\\bt.?h.?a.?n\\b", u"than"),
    (u"\\b([Tt]).?h[^e^w]?a.?t\\b", u"\\1hat"),
    (u"([Uu]) p o n", u"\\1pon"),
    (u"\\bv.?a.?l.?\.", u"val."),
    (u"\\b([Yy])[^c^/]?[ea6sg^][^d^g^h^m^s^u^v]?[ae&n^].?[r]\\b", u"\\1ear"),
## 3
    (u"\\b([Aa]) [nu] y\\b", u"\\1ny"),
    (u"\\b([Aa])( n|n )y\\b", u"\\1ny"),
    (u"\\b([Ff]) o r\\b", u"\\1or"),
    (u"\\b([Ff])( o|o )r\\b", u"\\1or"),
    (u"\\bF O R\\b", u"FOR"),
    (u"\\bh a s\\b", u"has"),
    (u"\\b([Ll]) a w\\b", u"\\1aw"),
    (u"\\bL aw\\b", u"Law"),
    (u"\\bm a [vy]\\b", u"may"),
    (u"\\bm( a|a )y\\b", u"may"),
    (u"\\b([Nn]) o t\\b", u"\\1ot"),
    (u"\\b([Nn])( o|o )t\\b", u"\\1ot"),
    (u"\\bN O W\\b", u"NOW"),
    (u"\\b([Pp]) a j", u"\\1ay"),
    (u"\\bT H E\\b", u"THE"),
    (u"T( h|h )e\\b", u"The"),
    (u"\\bT( H|H )E\\b", u"THE"),
    (u"T lie", u"The"),
## 2
    (u"\\bB Y\\b", u"BY"),
    (u"\\b([Bb]) [yv]\\b", u"\\1y"),
    (u"I N(?= GENERAL)", u"IN"),
    (u"(DEPARTMENT|STATE) O F", u"\\1 OF"),
    (u"\\b([Tt]) o\\b", u"\\1o"),
    (u"\\bT O\\b", u"TO"),
## symbols only
    (u"([A-Za-z0-9]\\)) \\((?=[A-Za-z0-9]\\))", u"\\1("),
    (u"([^-]) (?=[.,;:?)])", u"\\1"),
    (u'^" (?=[A-Z(])', u'"'),
#
# proper names:
    (u"FRANKUN", u"FRANKLIN"),
    (u"\\bT.?R.?U.?M.?A.?N", u"TRUMAN"),
    (u"\\bH.?A.?R[^N^V]?[EKR].?Y", u"HARRY"),
    (u"\\bD.?W.?I.?G.?H.?T", u"DWIGHT"),
    (u"E.?I.?S.?E.?N.?H.?O.?W.?E.?R", u"EISENHOWER"),
    (u"J.?O.?H.?N.?S[^T]?O.?N", u"JOHNSON"),
    (u"L Y N D O N", u"LYNDON"),
    (u"J.?O.?H.?N", u"JOHN"),
    (u"F I T Z G E R A L D", u"FITZGERALD"),
    (u"K.?E.?N.?N.?E.?D.?Y", u"KENNEDY"),
    (u"N.?I.?X.?O.?N", u"NIXON"),
    (u"[EKR].?I.?C.?H.?A.?[EKR].?D", u"RICHARD"),
    (u"G.?E.?R.?A.?L.?D", u"GERALD"),
    (u"\. F O R D ,", u". FORD,"),
    (u"J I M M Y", u"JIMMY"),
    (u"C A R T E R", u"CARTER"),
    (u"R O N A L D", u"RONALD"),
    (u"R E A G A N", u"REAGAN"),
    (u"GEORGE B U S H", u"GEORGE BUSH"),
#
# months:
    (u"J.?a.?n.?u.?a.?r.?y", u"January"),
    (u"J[^e^u]?a.?n.?\.", u"Jan."),
    (u"F.?e.?b.?r.?u.?a.?r.?y", u"February"),
    (u"F.?e.?b.?\.", u"Feb."),
    (u"M.?a.?r.?[^a^e^i^o^u]?c.?h", u"March"),
    (u"M[^c^e^i^u]?a[^d^i^s^u^v^y]?r.?\.", u"Mar."),
    (u"A.?p[^a]?r.?i.?l", u"April"),
    (u"A p r \.", u"Apr."),
    (u"M a y\\b", u"May"),
    (u"\\bMa[Vv]\\b", u"May"),
    (u"\\bJ[^e^i^o]?u[^a]?n[^d^g^k]?e\\b", u"June"),
    (u"J u n \.", u"Jun."),
    (u"J.?u.?[lI].?y", u"July"),
    (u"A.?u.?g.?u.?s.?t", u"August"),
    (u"Augu[^s]t", u"August"),
    (u"Au[^g]ust", u"August"),
    (u"A.?u.?g[^u]?\.", u"Aug."),
    (u"S.?[ce].?p.?t.?[ce].?[mn].?b.?[ce].?r", u"September"),
    (u"S e p t \.", u"Sept."),
    (u"O.?[ce].?t.?o.?[bh].?[ce].?r", u"October"),
    (u"O c t \.", u"Oct."),
    (u"[O0]c[']?t[-']?\.", u"Oct."),
    (u"N.?o.?v.?[ce].?[mn].?b.?[ce].?r", u"November"),
    (u"N.?o.?v.?\.", u"Nov."),
    (u"D.?[ce].?[ce].?[ce].?[mn].?[bh].?[ce].?r", u"December"),
    (u"D e c \.", u"Dec."),
    (u"D[ft]?e[ce]\.", u"Dec."),
#
# military:
    (u"F.?[ao].?r.?c.?[ce].?s\\b", u"Forces"),
    (u"F o r c e", u"Force"),
    (u"A.?r[^a]?[mn].?y", u"Army"),
    (u"N.?a.?v.?y\\b", u"Navy"),
    (u"N.?a.?v.?a.?l", u"Naval"),
    (u"M a r i n e s", u"Marines"),
    (u"M a r i n e", u"Marine"),
    (u"C.?o.?r.?p[^u]?s", u"Corps"),
#
# increase spacing:
    (u"([0-9],)(?=[12][0-9]{3})", u"\\1 "),
    (u"([123]) ([0-9])(?=,[0-9]{3})", u"\\1\\2"),
    (u"([Ss])ection(?=[0-9])", u"\\1ection "),
    (u"([0-9])(USC|usc)", u"\\1 USC"),
    (u"(USC|usc)(?=[0-9])", u"USC "),
    (u"ofA(?=[a-z])", u"of A"),
    (u"\\bterm'", u"term '"),
    (u"ofthe(?=[A-Z])", u"of the "),
    (u"PUBLICLAW", u"PUBLIC LAW"),
#
# citation:
    (u"[Uu][ ]?\.[ ]?[Ss][ ]?\.[ ]?[Cc][ ]?[.,]", u"U.S.C."),
    (u"S.?t[^r]?a[^r]?t[^e]?\.", u"Stat."),
    (u"([0-9])(?=Stat)", u"\\1 "),
    (u"Stat\.(?=[0-9])", u"Stat. "),
    (u"([0-9])U.(S| S)", u"\\1 U.S"),
#
# legislative terms:
    (u"assemblea", u"assembled"),
    (u"assembled\^", u"assembled,"),
    (u"assern[boh]", u"assemb"),
    (u"f[un]rth[ce]r\^", u"further,"),
    (u"([Pp])rovided\^", u"\\1rovided,"),
    (u"U.?[nu][^l]?i.?t.?[ce].?d", u"United"),
    (u"[A-Z][A-Z](m|ni)t[ce]d", u"United"),
    (u"S.?[ft].?a.?[ft][^i^u]?[ce][^']?s\\b", u"States"),
    (u"S[^e^i]?[ft][^r]?a.?[ft][^i^u^.]?[ce]\\b", u"State"),
    (u"A.?[mn].?[ce].?r.?i.?[ce].?a", u"America"),
    (u"eiiact", u"enact"),
    (u"it ena[^c]t", u"it enact"),
    (u"\\bit en.{1,3}ted\\b", u"it enacted"),
    (u"\\bA c t\\b", u"Act"),
#
# Roman numerals
    (u"I ([IVX])\\b", u"I\\1"),
    (u"\\bV I", u"VI"),
# end of September 2011 run
###################################################################################################
# August 2011: begin to fix errors identified by running concordance of text through spell checker.
# estimated number of changes to dataset resulting from this run: 260,000
#
## excess whitespace:
        (u"\\( ([a-z0-9A-Z]) \\)", u"(\\1)"),
        (u"\\b([Tt]) h e\\b", u"\\1he"),
        (u"\\b([Aa]) n d\\b", u"\\1nd"),
        (u"([Ff])re e", u"\\1ree"),
        (u"([Ff])r ee", u"\\1ree"),
        (u"\\b([Ff]) ree", u"\\1ree"),
        (u"\\bA C T\\b", u"ACT"),
        (u"\\b1 9 ([0-9]) ([0-9])\\b", u"19\\1\\2"),
        (u"\\bt a x\\b", u"tax"),
        (u"\\bS E C \.", u"SEC."),
        (u"([Nn]) a tion", u"\\1ation"),
### whitespace fixes that will also catch transposition errors or simple misspellings:
        (u"A.?p.?p.?r.?o.?p.?r.?i.?a.?t.?i.?o.?n\\b", u"Appropriation"),
        (u"\\b([Aa]).?t.?t.?e.?n.?d.?a.?n.?[ce].?e\\b", u"\\1ttendance"),
        (u"\\b([Aa]).?s.?s.?i.?s.?t.?a.?n.?[ce].?e\\b", u"\\1ssistance"),
        (u"([Cc]).?o.?n.?s.?t.?r.?u.?[ce].?t.?i.?o.?n\\b", u"\\1onstruction"),
        (u"\\bC.?O.?N.?C.?U.?R.?R.?E.?N.?T\\b", u"CONCURRENT"),
        (u"\b([DdRr]).?e.?d.?u.?[ce].?t.?i.?o.?n\b", u"\\1eduction"),
        (u"\\bD.?e.?p.?a.?r.?t.?m.?e.?n.?t\\b", u"Department"),
        (u"\\bD.?E.?P.?A.?R.?T.?M.?E.?N.?T\\b", u"DEPARTMENT"),
        (u"d.?e.?s.?i.?g.?n.?a.?t.?e.?d\\b", u"designated"),
        (u"\\b([Ee]).?m.?p.?l.?o.?y.?m.?[ce].?n.?t\\b", u"\\1mployment"),
        (u"\\bf.?o.?l.?l.?o.?[vw].?i.?n.?g\\b", u"following"),
        (u"\\b([Ff]).?o.?r.?e.?i.?g.?n\\b", u"\\1oreign"),
        (u"\\b([Gg]).?o.?v.?[ce].?r.?n.?m.?[ce].?n.?t\\b", u"\\1overnment"),
        (u"\\b([Hh]).?e.?a.?d.?i.?n.?g\\b", u"\\1eading"),
        (u"\\b([Ii]).?n.?s.?e.?r.?t.?i.?n.?g\\b", u"\\1nserting"),
        (u"\\bJ.?O.?I.?N.?T\\b", u"JOINT"),
        (u"\\b([Mm]).?a[^r^s]?k.?i.?n.?g\\b", u"\\1aking"),
        (u"([Pp]).?a.?r.?a.?g.?r.?a.?p.?h\\b", u"\\1aragraph"),
        (u"\\bp.?r.?e.?[ce].?e.?d.?i.?n.?g\\b", u"preceding"),
        (u"\b([Pp]).?u.?b.?l.?i.?[ce].?a.?t.?i.?o.?n\b", u"\\1ublication"),
        (u"\\bR.?E.?S.?O.?L.?U.?T.?I.?O.?N\\b", u"RESOLUTION"),
        (u"\\b([Oo])[^c]?t.?h.?e.?r\\b", u"\\1ther"),
        (u"\\bs.?t.?r.?i.?k.?i.?n.?g\\b", u"striking"),
        (u"([Tt]).?r.?a.?n.?s.?p.?o.?r.?t.?a.?t.?i.?o.?n\\b", u"\\1ransportation"),
## character substitution errors in OCR transcription:
        (u"\\bh[vy]\\b", u"by"),
        (u"\\bbv\\b", u"by"),
        (u"FrM", u"Free"),
        (u"\\bFraa\\b", u"Free"),
        (u"\\bimder\\b", u"under"),
        (u"\\b([Tt])[ilr]ie\\b", u"\\1he"),
        (u"\\bt[bn]e\\b", u"the"),
        (u"\\bPub.c\\b", u"Public"),
        (u"([^K][aeioudg])i-(?=[a-z])", u"\\1r"),
        (u"([^K][aeiou])i'(?=[a-hj-z])", u"\\1r"),
        (u"\\bOt[>)]", u"(b"),
        (u"assemhl", u"assembl"),
        (u"nnent\\b", u"ment"),
        (u"emment\\b", u"ernment"),
        (u"\\bPUB[OU]C\\b", u"PUBLIC"),
        (u"\\]\\)(?=[a-z])", u"p"),
        (u"\\bfimd", u"fund"),
        (u"\\bGreneral\\b", u"General"),
        (u"\\bamoimt\\b", u"amount"),
        (u"\\bcanying\\b", u"carrying"),
        (u"\\bslia", u"sha"),
        (u"t[li]o(u|ii)\\b", u"tion"),
        (u"([Tt])[li]iere", u"\\1here"),
        (u"\\busc of\\b", u"use of"),
        (u"[)}]dng\\b", u"ying"),
        (u"([^e])nient\\b", u"\\1ment"),
        (u"([Ss])cct", u"\\1ect"),
        (u"\\([JT]ove", u"Gove"),
        (u"d\^.?ree", u"degree"),
        (u"e<.(?=[ot])", u"ec"),
        (u"\\bt[il]ic\\b", u"the"),
        (u'\u00A3uid', u'and'),
### numbering
        (u"\\(D ", u"(1) "),
        (u"\.SX", u".5%"),
        (u"\\bi([0-9] USC)", u"1\\1"),
## citations
        (u"U\.S \.C\.", u"U.S.C."),
        (u"U\.S \.C \.", u"U.S.C."),
# end of August 2011 run
#
# July 2011: initial series of replacements
        (u"\\(([a-z0-9])X", u"(\\1)("),
        (u"([0-9]) us[ce]\\b", u"\\1 USC"),
        (u"([0-9]) u s [ce]\\b", u"\\1 USC"),
        (u"tiou\\b", u"tion"),
        (u"([Ss])cction", u"\\1ection"),
    ]
}

# TESSERACT FIXES. Used for early volumes of the U.S. Statutes at Large
fixes['tesseract'] = {
    'regex': True,
    'msg': {
        '_default':u'Robot:correcting common OCR errors',
    },
    'replacements': [
# dehyphenation. This will correct errors like “hun” and “dred”
# being separately identified as misspelled when they appear on
# different lines separated by a hyphen. It will introduce new 
# errors where hyphens appear inside, or just before, a marginal
# note in the text, but these should be substantially outnumbered
# by the errors fixed from rejoining hyphenated words.
        (u'([a-zA-Z][a-xz])[-–•]\r\n([a-z])', u'\\1\\2'),
# deleting surplusage
        (u" [`'] ", u" "),
        (u"\.\.\.\.", u""),
        (u" _([a-z])", u" \\1"),
        (u"\bgg\b", u""),
        (u"if'\b", u"if"),
        (u"if' ", u"if "),
## garbage strings at front and end of line
        (u'^(¤|°|;|:|\'|"|¥|\||,|\.)+', u''),
        (u'(¤|°|;|:|\'|"|¥|\||,|\.)+$', u''),
# number ranges
        (u"ty(—|\u00B7)(?=[efnost])", u"ty-"),
# common "scannos" (incorrect character recognition)
        (u'f`', u'f'),
        (u' _f', u' f'),
        (u' 0f', u' of'),
        (u" of[`‘’_'] ", u' of '),
        (u" ot[`‘’'] ", u' of '),
        (u" qf([a-zA-Z0-9])", u' of \\1'),
        (u' 1n ', u' in '),
        (u'{i', u'fi'),
        (u' VV', u' W'),
        (u'1\u00B7 ', u'r '),
        (u'1\u00B7(?=[a-z1])', u'r'),
        (u'\b[Il]\)(?=[^ ])', u'D'),
        (u'\./[Iil1]', u'A'),
        (u'\.0(?=n)', u'A'),
        (u"{'(?=[aeioru])", u"f"),
        (u"[it{]`", u'f'),
        (u"f'(?=[a-rt-z01])", u"f"),
        (u"\.[dH](?=[cen])", u"A"),
        (u"tl1\b", u"th"),
# ligatures seem to be particularly difficult for Tesseract
        (u' oif', u' off'),
        (u'suihc', u'suffic'),
        (u'Ojiice', u'Office'),
        (u'Q[fj]ic', u'Offic'),
        (u'o[dHj]ice', u'office'),
        (u'o[fit]hce', u'office'),
        (u'O[dHhj]ice', u'Office'),
        (u'of[A-Z]ce', u'office'),
        (u'oiii[ceo]', u'offic'),
        (u'qyic', u'offic'),
        (u'otfic', u'offic'),
        (u' o ce', u' office'),
        (u' o ence', u' offence'),
        (u'afh', u'affi'),
        (u'[BH]rst', u'first'),
        (u'speciii', u'specifi'),
        (u'([eo])tli[ce]', u'\\1ffic'),
        (u'oilic[ce]', u'office'),
        (u'eiiec', u'effec'),
        (u'iift([ehy])', u'fift\\1'),
        (u'(f1|ii)v[ce]', u'five'),
        (u' tive ', u' five '),
        (u'aliirm', u'affirm'),
        (u'Hft', u'fift'),
        (u'Hv[ce]', u'five'),
        (u"eilect", u"effect"),
        (u"\bilft", u"fift"),
        (u'[lI1][iIl1]fty', u'fifty'),
        (u"ty[-–—]liv", u"ty-fiv"),
        (u"ti[ft]t", u"fift"),
        (u"oihoe", u"office"),
        (u"iil(?=(ed|ing))", u"fil"),
        (u"liv[ce](?= (dol|hun|thou))", u"five"),
# "e" is frequently mis-OCRed as "c"
        (u'\bctc\.', ur'etc.'),
        (u'rcs', u'res'),
        (u'([Rr])cp', u'\\1ep'),
        (u'([lt])cd\b', u'\\1ed'),
        (u'mcnt', u'ment'),
        (u'([Pp])rcs', u'\\1res'),
        (u'tivc', u'tive'),
        (u"cxt", u"ext"),
        (u"livcs", u"tives"),
        (u"\b[ce]nt[ce]r", u"enter"),
        (u"\b([bBhHwW])c\b", u"\\1e"),
        (u"([dD])c(?=[np])", u"\\1e"),
        (u"cth\b", u"eth"),
        (u"Scp(?=[t.])", u"Sep"),
        (u"\b[ce][nu]..[nu][ce][ce]r", u"engineer"),
# "i" is frequently mis-OCRed as the number "1" or lowercase "l"
        (u'[lI1]ng\b', u'ing'),
        (u'h[l1](?=[bms])', u'hi'),
        (u'w[l1]th', u'with'),
        (u"th1[sS]", u"this"),
        (u"sh1p", u"ship"),
        (u"([Cc])h1[ce]f", u"\\1hief"),
        (u"w1s[ce]", u"wise"),
        (u"h1gh", u"high"),
        (u'mach1ne', u'machine'),
        (u's 1p\b', u'ship'),
        (u'([st])[iIl1][oO0]n', u'\\1ion'),
        (u's[oO0][lI1]di?[ce]r(?=[ a-z])', u'soldier'),
        (u'cr[il1]on', u'ction'),
        (u'th[lI1]r(?=[dt])', u'thir'),
# "n" is frequently mi-OCRed as "u"
        (u'\baud\b', u'and'),
# "o" is frequently mis-OCRed as the number "0"
        (u'0ther', u'other'),
        (u't0wn', u'town'),
        (u'0h1[co]', u'Ohio'),
        (u'([Ff])0', u'\\1o'),
        (u'pr[O0]v', u'prov'),
        (u'\b[o0]n[ce]\b', u'one'),
# "y" is frequently mis-OCRed as "v"
        (u'trv', u'try'),
        (u'monev', u'money'),
        (u' anv ', u' any '),
        (u'\bmav\b', u'may'),
# "rn" and "in" are sometimes mis-OCRed as "m"
        (u'govem', u'govern'),
        (u'ordam', u'ordain'),
        (u'mcreas', u'increas'),
# directions
        (u'(we|We|ea|Ea)stem', u'\\1stern'),
        (u'(nor|Nor|sou|Sou)them', u'\\1thern'),
# other common substitutions of one character for another
# that cause misspellings
##and
        (u'\bund\b', u'and'),
        (u'amd', u'and'),
# but the preceding fix can be overzealous sometimes, so:
        (u'Canden', u'Camden'),
## for
        (u' ibr', u' for'),
        (u' (fb|to)r ', u' for '),
        (u'_/br', u'for'),
## further
        (u'[fj]i[erstxz]rt[lh][ce]r', u'further'),
        (u'furlher', u'further'),
        (u'fuiither', u'further'),
        (u'fwther', u'further'),
        (u'j(h|ia)rther', u'further'),
        (u'jirr.?.?.?er', u'further'),
## in
        (u' iu ', u' in '),
        (u' [t1]n ', u' in '),
        (u'_1n', u'in'),
        (u' 1n', u' in'),
## is
        (u' [il1][sS] ', u' is '),
## of
        (u' [cegprqsu]f ', u' of '),
        (u' [o0][filty]` ', u' of '),
        (u',[o0][filty]` ', u', of '),
        (u" q[`'] ", u' of '),
        (u"of_([a-zA-Z0-9])", u'of \\1'),
        (u" [ce]y[`'] ", u' of '),
        (u'1y"', u'of'),
        (u'\bot? th[a-z]\b', u'of the'),
        (u'\bo(ft)? e\b', u'of the'),
        (u'\bot all\b', u'of all'),
        (u' Q" ', u' of '),
        (u'\b[o0] tate', u'of State'),
        (u'\b0 the\b', u'of the'),
        (u'q/`', u'of'),
        (u'1y`', u'of'),
        (u'0 R e', u'of Re'),
## or
        (u' 0r', u' or'),
        (u',0r', u', or'),
## shall
        (u' s a l ', u' shall '),
        (u' sha l', u' shall'),
        (u'\bs all\b', u'shall'),
## the
        (u't[l/]ze', u'the'),
        (u'th[cg]', u'the'),
        (u' tl e', u' the'),
        (u' t[hl][l1]e', u' the'),
        (u'\btl[a-z1]e\b', u'the'),
        (u' of th ', u' of the '),
        (u'\btot e\b', u'to the'),
        (u' in th ', u' in the '),
        (u'{he', u'the'),
## with
        (u' mth ', u' with '),
        (u'w[nx]th', u'with'),
##
        (u' Jet ', u' Act '),
        (u' [eu]ct ', u' act '),
        (u' Au ', u' An '),
        (u'Jn ', u'An '),
#        (ur'" An', ur'"An'),
        (u' dn Act', u' An Act'),
        (u'afl[ce]r', u'after'),
        (u' unend', u' amend'),
        (u'arnend', u'amend'),
        (u' bo ', u' be '),
        (u'wuse', u'cause'),
        (u'cer [i1] ca e', u'certificate'),
        (u'c[il1]t[il1]z[ce]n', u'citizen'),
        (u'ddllar', u'dollar'),
        (u"do[l1][l1]ar", u"dollar"),
        (u'g1ve', u'give'),
        (u'[ti]iirn', u'furn'),
        (u' i[fily]` ', u' if '),
        (u'irnp', u'imp'),
        (u'im o(?=(rt|se))', u'impo'),
        (u'([a-z])mg ', u'\\1ing '),
        (u' mten([dt])', u' inten\\1'),
        (u' sai ', u' said '),
        (u'samc', u'same'),
        (u'And e it', u'And be it'),
        (u'Arrnov[mn]n', u'Approved'),
        (u'authoriae', u'authorize'),
        (u'authomze', u'authorize'),
        (u'D[il1]str[il1]ct', u'District'),
        (u'enawt', u'enact'),
        (u'ena[ce]t[ce][dj]', u'enacted'),
#
        (u'intitulcd', u'intituled'),
        (u'prescri e', u'prescribe'),
        (u'([Pp])rovi e', u'\\1rovide'),
        (u'sect[i1][o0]n', u'section'),
        (u'therc', u'there'),
        (u' t is act', u' this act'),
        (u' t0 ', u' to '),
        (u' te the', u' to the'),
#numbers
        (u'tw0', u'two'),
        (u'thxrd', u'third'),
        (u'\bt r[ce][ce]', u'three'),
        (u'thr[ce][ce]', u'three'),
        (u'f[co]u(r|1•)', u'four'),
        (u's1x', u'six'),
        (u"  ix", u" six"),
        (u's[ce]v[ce][nu]', u'seven'),
        (u'[ce][i1]ght', u'eight'),
        (u'e[i1] ht', u'eight'),
        (u'n1n[ce]', u'nine'),
        (u'\bmne', u'nine'),
        (u'tcn', u'ten'),
        (u'[ce]i ht[ce][ce][nu]', u'eighteen'),
        (u'twquty', u'twenty'),
        (u"tw[a-z][a-z]ty", u"twenty"),
        (u'twent -', u'twenty-'),
        (u't[^ ][^ ]ir(?=[dt])', u'thir'),
        (u"\bsix y\b", u"sixty"),
        (u"\bseven ty", u"seventy"),
        (u'eigh ty', u'eighty'),
        (u'\beig.?.?ty', u'eighty'),
        (u'[^e^E]ig[^h]ty', u'eighty'),
        (u'nincty', u'ninety'),
        (u"\bnine y", u"ninety"),
        (u'hnmdred', u'hundred'),
        (u'\bun red?\b', u'hundred'),
        (u'h[un][un]d([^r]e|r[^e]|[^r][^e])d', u'hundred'),
        (u'himdred', u'hundred'),
        (u'\bhundre\b', u'hundred'),
        (u't ousan ', u'thousand '),
        (u't ousand', u'thousand'),
        (u' ousan ', u' thousand '),
        (u'\bthousan\b', u'thousand'),
        (u'1[Ss](?=[0-9][0-9])', u'18'),
#
        (u'c0untry', u'country'),
        (u'[ce]na[ce]t[ce]d', u'enacted'),
# legislature
        (u'Am[ce][a-z][a-z][co]a', u'America'),
        (u'Am[ce][a-z][co]a', u'America'),
        (u'Arr([a-z01>]+), (?=[A-Z])', ur'Approved, '),
        (u'(II|H)[oO0]us[ce]', u'House'),
        (u' Hm[a-z][ce] ', u' House '),
        (u'Hausa', u'House'),
        (u'I[a-z]us[ce]', u'House'),
        (u'and H[a-z][a-z][a-z][a-z]e ', u'and House '),
        (u'and I[a-zI][a-z][a-z][a-z][ce] ', u'and House '),
        (u'Senate and H[a-z][a-z][a-z][ce] ', u'Senate and House '),
        (u'Senate and Hom[ce] ', u'Senate and House '),
        (u' Slate ', u' State '),
        (u'U[a-z][a-z][a-z][ce]d', u'United'),
        (u'Umked', u'United'),
        (u' nite tates', u' United States'),
        (u"\bnit?e? S?tates?", u"United States"),
        (u'Sncr[a-z][a-z][a-z] ', u'Section '),
        (u'Sncr[a-z][a-z][a-z][a-z] ', u'Section '),
        (u'O[co]([a-z]+)gr([a-z]{0,4})\b', u'Congress'),
        (u'S[amn]c\.', ur'Sec.'),
        (ur'S[an]o\.', ur'Sec.'),
        (u'SE[CG]\.', ur'Sec.'),
        (u'S[amn]ss\.', ur'Sess.'),
        (u'SESS\.', ur'Sess.'),
        (u'C[nuH]\.', ur'Ch.'),
        (u'ta[^t]iv', u'tativ'),
        (u'Am[ce]rwa', u'America'),
        (u'(en|m)ac[^t]ed', u'enacted'),
# fixes for erroneously inserted or deleted whitespace
        (u' t e', u' the'),
        (u'shallbe', u'shall be'),
        (u'[oq]fth[ce]', u'of the'),
        (u'bythe', u'by the'),
        (u'inthe', u'in the'),
        (u'tothe', u'to the'),
        (u'An[au]ct', u'An act'),
        (u'B e it', u'Be it'),
        (u'itenact', u'it enact'),
        (u'i[ft]further', u'it further'),
        (u'it-further', u'it further'),
        (u'thereforefurther', u'therefore further'),
        (u"ji([a-z]+)h[ce]r", u"further"),
        (u'[aoq]fRep', u'of Rep'),
        (u',in', u', in'),
        (u'Actfor', u'Act for'),
        (u'havethe', u'have the'),
        (u'atleast', u'at least'),
        (u'andfor', u'and for'),
        (u" ofR ", u" of R"),
        (u' [“"] (?=[A-Z])', ur' “'),
        (u'ofAmer', u'of Amer'),
# months and years
        (u'Jan\.(?=[0-9])', ur'Jan. '),
        (u'F[co]b\.', ur'Feb.'),
        (u'Feb\.(?=[0-9])', ur'Feb. '),
        (u'Mar\.(?=[0-9])', ur'Mar. '),
        (u'April(?=[0-9])', ur'April '),
        (u'M[a-z]y(?=[0-9])', ur'May '),
        (u'J [nu](?=(ne|ly))', ur'Ju'),
        (u'J[un]n[ce](?=[0-9])', ur'June '),
        (u'J[un]ly(?=[0-9])', ur'July '),
        (u'A[nu]g\.(?=[0-9])', ur'Aug. '),
        (u'S[ce][p ]t\.(?=[0-9])', ur'Sept. '),
        (u'O[co]t\.(?=[0-9])', ur'Oct. '),
        (u'Nov\.(?=[0-9])', ur'Nov. '),
        (u'D[a-z][a-z]\.(?=[0-9])', ur'Dec. '),
        (u'([0-9],)(?=1[78][0-9][0-9])', u'\\1 '),
# character omissions
        (u"accor mg", u"according"),
        (u"ap oin", u"appoin"),
        (u"ap int", u"appoint"),
        (u'ap rop', u'approp'),
        (u"ap? r[co] ri", u"appropri"),
        (u'ap rov', u'approv'),
        (u"assemb e", u"assemble"),
        (u'aut or', u'author'),
        (u"\bui ing", u"building"),
        (u"com le", u"comple"),
        (u"Com trol", u"Comptrol"),
        (u"\bde enden", u"dependen"),
        (u"\bdis ur", u"disbur"),
        (u"\bdis os", u"dispos"),
        (u'di trict', u'district'),
        (u'\bistrict', u'district'),
        (u'd[o0] ars', u'dollars'),
        (u' [o0] ars', u' dollars'),
        (u' dolars', u' dollars'),
        (u'do(l+) ar', u'dollar'),
        (u'emp oy', u'employ'),
        (u" em lo ", u" employ"),
        (u'entit[^l]e', u'entitle'),
        (u'exce t', u'except'),
        (u'gnmt', u'grant'),
        (u"\berein\b", u"herein"),
        (u"\bim ris", u"impris"),
        (u"\bim os", u"impos"),
        (u'\bim ro', u'impro'),
        (u"inc u (in|m)g", u"including"),
        (u"ju ge", u"judge"),
        (u'juri diction', u'jurisdiction'),
        (u' aws', u' laws'),
        (u"li ht", u"light"),
        (u"li uor", u"liquor"),
        (u"\b0 ve\b", u"of five"),
        (u' ot er', u' other'),
        (u"\bot [1inwz]er", u"other"),
        (u"\bp ace", u"place"),
        (u"\bro er", u"proper"),
        (u"resi en", u"residen"),
        (u"ri ht", u"right"),
        (u'Re ublic', u'Republic'),
        (u' ublic ', u' public '),
        (u"\bpu ic\b", u"public"),
        (u'p.ension', u'pension'),
        (u"pu is e", u"publishe"),
        (u"\bro rata", u"pro rata"),
        (u're pect', u'respect'),
        (u'regi t', u'regist'),
        (u"scri e", u"scribe"),
        (u"sen ence", u"sentence"),
        (u'\bshal\b', u'shall'),
        (u"\bsu (?=(ject|mit))", u"sub"),
        (u'subscrib r', u'subscriber'),
        (u'\bsuc ', u'such '),
        (u"\bsu ple", u"supple"),
        (u"\bsu[p ][p ]ort", u"support"),
        (u"\bsu reme", u"supreme"),
        (u"\btra e", u"trade"),
        (u'assembled, hat', u'assembled, That'),
        (u'assembled, T t', u'assembled, That'),
        (u' T at', u' That'),
        (u'u on\b', u'upon'),
        (u'ves el', u'vessel'),
        (u'\bw ic\b', u'which'),
        (u'I.?.?antry', u'Infantry'),
        (u'([pP])ubhc', u'\\1ublic'),
# extra characters in otherwise correctly spelled words
        (u'C.?o.?n.?g.?r.?e.?s.?s\b', u'Congress'),
        (u'S.?e.?n.?a.?t.?e\b', u'Senate'),
        (u'H.?o.?u.?s.?e\b', u'House'),
        (u'A.?m.?e.?r.?i.?c.?a\b', u'America'),
        (u'U.?n.?i.?t.?e.?d\b', u'United'),
        (u'S.?e.?c.?r.?e.?t.?a.?r.?y\b', u'Secretary'),
        (u'R.?e.?p.?r.?e.?s.?e.?n.?t', u'Represent'),
        (u'\bd.?o.?l.?l.?a.?r\b', u'dollar'),
        (u'\bs.?h.?a.?l.?l\b', u'shall'),
    ]
}