package Lingua::DE::ASCII;

use 5.006;
use strict;
use warnings;

require Exporter;

our @ISA = qw(Exporter);

our @EXPORT = qw(to_ascii to_latin1);
our %EXPORT_TAGS = ( 'all' => [ @EXPORT ]);
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
our $VERSION = '0.03';

my %ascii = (qw(
         A
		 A
		 A
		 A
		 Ae
		 A
		 Ae
		 C
		 E
		 E
		 E
		 E
		 I
		 I
		 I
		 I
		 D
		 N
		 O
		 O
		 O
		 O
		 Oe
		 x
		 Oe
		 U
		 U
		 U
		 Ue
		 Y
		 Th
		 ss 	
		 a
		 a
		 a
		 a
		 ae
		 a
		 ae
		 c
		 e
		 e
		 e
		 e
		 i
		 i
		 i
		 i
		 p
		 n
		 o
		 o
		 o
		 o
		 oe
		 o
		 oe 
		 u
		 u
		 u
		 ue
		 y
		 th
		 y
		 +-
		 ^2
		 ^3
		 ue
		 P
		 .
		 ^1
         >>
         <<
        ),
	     ("" => "'",
	      "" => ",",
          "" => "(R)",
          "" => "(C)")
    );

# remove all unknown chars
$ascii{$_} = '' foreach (grep {!defined($ascii{$_})} map {chr} (128..255));

my $non_ascii_char = join("", map {chr} (128..255));

sub to_ascii($) {
    my $text = shift or return;
    $text =~ s/([$non_ascii_char])/$ascii{$1}/eg;
    return $text;
}

my %mutation = qw(ae 
		  Ae 
		  oe 
		  Oe 
		  ue 
		  Ue );

my $vocal = qr/[aeiouAEIOU]/;
my $consonant = qr/[bcdfghjklmnpqrstvwxzBCDFGHJKLMNPQRSTVWXZ]/;
my $letter = qr/[abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ]/;

my $prefix = qr/(?:[Aa](?:[nb]|u[fs]|bend)|
                   [Bb]e(?:reit|i||isammen|vor|)|
                   [Dd](?:a(?>fr|neben|rum|r|)|
                       icke?|
                       rin|
                       urch|
                       rei
                    )|
                   [Ee]in|
                   [Ee]nt|
                   [Ee]r|
                   [Ff]e(?:hl|st)|
                   [Ff]rei|
                   (?:[Gg](?:erade|
                             leich|
                             ro|
                             ross)
                   )|
                   [Ll]os|
                   [Gg]e(?:heim(?:nis)?)?|
                   [Gg]enug|
                   [Gg]ut|
                   [Hh](?:alb|eraus|erum|in(?:(?:un)?ter)?)|
                   [Kk]rank|
                   [Kk]und|
                   [Mm]ehr|
                   [Mm]it|
                   [Nn]ach|
                   [Nn]icht|
                   [Nn]eun|
                   (?:[Ss](?:chn|till|tramm))|
                   [Tt]ot|
                   [Uu]m|
                   [Vv][eo]r|
                   [Vv]ier(?:tel)?|
                   [Ww]e[gh]|
                   [Ww]ichtig|
                   [Uu]n|
                   [Zz]u(?:rck|sammen)?|
                   [Zz]wei|
                   []ber
                )
               /x;

my $town_with_a = qr/[Ff]uld|
                     [Aa]lton|
                     [Gg]han|
                     [Gg]oth|
                     [Ll]ausch|
                     [Mm]oden|
                     [Nn]izz|
                     [Pp]anam|
                     [Pp]arm|
                     [Rr]ig|
                     [Ss]myrn|
                     [Ss]ofi/x;

my $town_with_o = qr/[Kk]air|
                     [Oo]sl|
                     [Tt]og|
                     [Tt]oki/x;
                     
sub to_latin1($) {
    local $_ = shift or return;

	if (/[Aa]e/) {
	    s/ (?<! [Gg]al)               # Galaempfnge
    	   (?<! [Jj]en)               # Jenaer Glas  
           (?<! Dek)                  # Dekaeder
           (?<! [^n]dek)
           (?<! [Hh]ex)
           (?<! [Ii]kos)
           (?<! [Tt]etr)
           (?<! [Oo]kt)
           (?<! [Mm]eg)
           (?<!  Pent)                # upper case, because of Gruppentter
           (?<! [Ss]of)               # Sofaecke
           ae
           (?!rleb)                   # e.g. Ahaerlebnis
           (?!rreg[^i])               #      Malariaerreger
           (?=\w)                     # no  at the end of a word
           (?!n\b)                    # even not if in plural
           (?!pid)                    # Choleraepidemie
           (?!in)                     # Kameraeinstellung
           (?!lit)                    # dingsda-elit
           (?!lem)                    # ...element
     	 //gx;
        s/(?<=[rtz])ae(?=n\b)//g;                # Eozn, Kapitn, Souvern
        s/phorae/phor/g;             # Epiphor
        s/kenae/ken/g;               # Myken
        s/ovae\b/ov/g;
        s/($town_with_a)r/$1aer/g;
        s/(?<=[mr])(?=ls?\b|li)/ae/g; # Mari
        s/(?<=\b[Pp]r)ae//g;         # Pr...
        s/(?<=bd)ae(?=n)//g;         # Molybdn
        s/(?<=[^Aa]ns)
          (?<!kens)
          (?<!eins)
          
          (?![uetg])      # Mensaessen != Ameisensure, Ansen, Ansgen
          (?![fm]t|         # Bratensfte, ...mter
              ngs|        # ...-ngste
              r[gz]|         # ...-srge, ...-rzte
              c[kh]|         # ...-scke
              n[dgk]e|         # ...-nderung, ...-nge, ... -snke
              [lh]e          #sle, ... she
          )
         /ae/gx; 
        
        s/\bAe (?!r[oiu])//gx;         # Ae at beginning of a word, like Aerobic != ra, ren
    }

	if (/[Oo]e/) {
	    # oe => 
    	s/(?<! [bB]enz )             # Benzoesure 
	      (?<! [Bb]ru tt)            # Bruttoertrge
	      (?<! [Nn]e tt)             # Nettoertrge
	      (?<! [^e]ot)               # Fotoelektrizitt != Stereotne
	      (?<! iez)                  # Piezoelektronik
		  (?<! [Tt]herm)                 # Thermoelektrizitt
          (?<! [Bb]i)                # Bio...
          (?<!  ktr)                 # Elektro..., 
          (?<! [Gg]astr)               
          (?<! [Mm]ikr)              # Mikro...
          (?<! [Rr]homb)
          (?<! [Tt]rapez)
	      ( [oO] e )
	      (?! u)
          (?!ffi[^gn])                     # Koeffizent  != Hffige, Schffin, ...effekt
          (?!ffek)                   # ..effekt
          (?!rot)                    # rot
          (?!lem)                    # element
          (?!rgo)                    # ergometer
          (?!mpf)                    # empfang
    	 /$mutation{$1}/egx;
         
         s/($town_with_o)r/$1oer/g;
	 }
    
	if (/[Uu]e/) {
        die $_ if //;
	    # ue => , but take care for 'eue','ue', 'aue', 'que'
    	s/(?:(?<![oaeAEqQzZ]) | 
        	 (?<=nde) | 
	         (?<=ga)  |                 # Jogabung
    	     (?<=era) |                 # kameraberwachte
	    	 (?<=ve)  |                 # Reservebung
             (?<=deo) |                 # videober...
             (?<=ldo) |                 # Saldober...
   	 	     (?<=(?<![eEfFgGtT])[rR]e) |	  	# Ressieren, but not treuem
	         (?<=$vocal ne)|             # Routineberprfung 
             (?<=[Vv]orne)              # vorneber
    	   )
           (?<![Ss]tat)              # Statue
           (?<!x)                    # Sexuelle
           ( [uU] e )
           (?= [\w\-])                   # no  at the end of a word
	       (?! i)                    # Zueilende - -...
           (?! llst\w)                 # Spirituellste
         /$mutation{$1}/egx;
         
        s/(?<=[Zz])ue(?=g | n[dfgs] | c[hk] | be[lr] | rn[^t] | ri?ch | bl)//gx; 
        s/(?<=z)ue(?=rnte?[mnrs]?t?\b)//g;
        s/(?<=\b[Aa]bz)(?=rnt)/ue/g;        # Abzuerntende
        s/vn\b/vuen/g;
        s/(?<=ga)(?=r(in)?)\b/ue/g;      # ...gauer like Argauer, Thurgauer, ...
         
        {no warnings;
         s/((?:${prefix}|en)s)?(([tT])n(de?|\b))(?!chen|lein|lich)
          /$1 ? "$1$2" : "$3uen$4"/xgeo;# Grotuende, but abstnde, Stndchen
        }
        s/($prefix s? t)(r(ische?[mnrs]?|
                           i?[ns](nen)?)?\b)/$1ue$2/gx;
        s/($prefix t)(?=s?t\b|risch)/$1 ? "$1ue" : "$1"/gxe;  # zurcktuest, grotuerisch 
        s/grnz/gruenz/g;
        s/(?<!en)(s?)(?!\w)/ue$1/g;   # Im deutschen enden keine Worte auf , bis auf Ausnahmen
        s/z(?!rich)([rs][befhiosz])/zue$1/g; # Zuerzhlende != zricherisch
    
        s/([uU] e) (?=bt)/$mutation{$1}/egx;  # bte
        s/(?<=[Dd])(?=ll)/ue/g;              # Duell
        s/ert/euert/g;   # geneuert
        s/re(?=[nv]|s?t)/reue/g;   # reuen
        
        s/([Au]ssen|
           [Dd]oppel|
           [Dd]reh|
           [Ee]ingangs|
           [Ee]ntree|
           [Ee]tagen|
           [Ff]all|
           [Gg]eheim|
           [Hh]aus|
           [Hh]inter|
           [Kk]eller|
           [Kk]irchen|
           [Kk]orridor|
           [Nn]ot|
           [Oo]fen|
           [Pp]endel|
           [Ss]aal|
           (?:[Ss]ch(?:iebe|
                       rank|
                       wing))|
           [Ss]eiten|
           [Tt]apeten|
           [Vv]erbindungs|
           [Vv]order|
           [Ww]agen|
           [Ww]ohnungs|
           [Zz]wischen) tuer (?!isch)/$1tr/gx;
    }
	
	if (/ss/) {
   	     # russ => ru
    	 s/(?<=(?<![dD])(?<!sau)(?<![Vv]i)[rRfF][u])  # Brachosaurusses, Virusses
	       ss 
    	   (?! el) (?! le)                    # Brssel, Brssler
      	   (?! isch)                          # Russisch
           (?! land)                          # Ruland
           (?! tau)
           (?! o)
          //gx;
          
         # ss =>  with many exceptions
         s/(?<= $letter{2})
           (?<! $consonant $consonant)
           (?<! (?<! [bBfFmMsSeE] ) [u] )  # ben, Fu, ..., but Fluss
           (?<! [Mm] u)   # musst, musste, ...
           (?<! su)
           (?<! [bBdDfFhgGHkKlLmMnNpPrRsStTuUvVwWzZ] i )   # 'wissen', -nisse,
           (?<! [dgsklnt] )
           (?<! [bBdDfFgGhHiIjJkKnNtTwWlLpP] a )     # is a short vocal
           (?<! (?<![Ss]t) (?<![fF]) [rR]a)                # Rasse != Strae, fraen
           (?<! [Qq]u a)
           (?<! [bBfFgGhHlLnNpPsSwW] )          # (short vocal) Ablsse, 
           (?<! [cCdDfFgGhHjJlLmMnNpPrsStTwWzZ] e )           # is very short vocal
           (?<! sae)                             # Mensaessen
           (?<! ion )                            # Direktionssekretrin
           (?<! en )                             # dingenssachen 
           (?<! [fFhHoO] l o)
           (?<! (?<![gG]) [rR] o)                # Ross-Schlchter, but Baumgroe          
           (?<! [bBdDgGkKnNpPzZ] [o])
           (?<! [sS]chl )
           (?<! [bBkKuU]e)                       # Kessel
           (?<! [yj])
	       (?<! [br]r $vocal)
           (?<! [Pp]r ei)

           ss

           (?! ch )
           (?! isch )                        # genssisch
           (?! t[o])                   
           (?! tr[ao])   # Davisstrae, but Schweitreibende, ...-stroh
           (?! treif)
           (?! tur)   # Eissturm, but Schweituch
	       (?! t(?:ck|[hr]))  # Beweisstck,  Bischofssthle, Kursstrze, but Schweitcher
	       (?! tau?[bd])   # Preisstabilitt, ...-stadt
           (?! ist)   # Ditassistentin
           (?! te[pu])   # ...steppe, ...steuern
           (?! eins?\b)   # ...-sein
           (?! eit)
           (?! i[vl])    # Massiv, Fossil
           (?! l?ich)  # grsslich  ...sicherung
	       (?! ge)   # Kreissge
	       (?! [tu])    # Siegessule, Tagesstze
           (?! ier)   # Krassier
           (?! ag)   # Massage, lossagen
           (?! ard)   # Bussard
           (?! p [i]) # Ks-sptzle, # ...-spitze
           (?! pr[eai])                   # lossprche, sprechen, sprach
           (?! [oy])
           (?! eh)    # ...-seh, setzen
           (?! itz)   # ...-sitz
           (?! ist)
           (?! ees?\b) # ... -see
           (?! aise)  # foreign words don't have an 
           (?! age)
           (?! agte)  # ...-sagte
           (?! upp)   # ...-suppe
           (?! anc) # Renaissance
           (?! egn)  # ...-segne
	      //gxo;
          
          s/(?<= [AaEe]u)                        # drauen
	        ss 
            (?! []) 
            (?! e[ehg])                             # Chaussee, ...seh
            (?=\b|e|l)
		  //gxo;                    # scheulich 

         s/((?<=[fs][]) |
            (?<=[Ss]p[a])    
		   )                      # ends on long vocal plus ss, like
           ss                                  # Gef != Schluss
          (?! [])
          (?! er)                           # Gefe != Fsser
          (?! iv)
          (?=\b|e|$consonant)                 # end of word or plural or new composite (Gefverschluss)
         //gxo;
         
         s/(?<=verg[a])ss(?=e|\b)//g;  # verge

        s/(?<!chlo)                                # Schloss
          (?<! (?<![gG]) [rR] o)
          (?<! [bBpPgG] o )  # goss, Boss
          ((?<=o) |(?<=ie))          # Flo, gro, Griebrei, Nu, but no Ross-Schlchter 
          ss
          (?! ch)
          (?! t? [])
          (?! teu)
          (?! pr[eai])                   # lossprche
          (?=\b|es|$consonant)
        //gxo;
        s/(u|(?<!chl))sschen/$1chen/go;
        
        s/(?<=[bBeEnN][Ss]a)ss(?=\b|en)//g; # absa, beisammensaen
        s/($prefix)sass/$1sa/g;

        s/(?:(?<=[mM][ai])|(?<=[Ss])|(?<=[Ss]t)|(?<=[Ww]ei))ss(?=ge|lich)//go;
        
        s/(?<=[Gg]ro) ss (?=t|$vocal) (?!ist)//gx;   # grotte, gro-o...
        s/(?<=[Ss]pa) ss (?!ion) (?!age) (?!iv)//gx;         # spaig, but not Matthuspassion

        
        if (//) {
            s/(?<=[mM][u])(?=te|en|er)/ss/go;
            s/($prefix|en)?([Ss]a)([ea])/$1 ? "$1$2$3" : "$2ss$3"/goe;  
                     # Gefngnisinsasse, Sassafra != aufsaen, beisammensaen

            s/(?<=[rR] [a]) (?<![Gg]r)  (?=l |e [rl](?!$vocal) | chen)/ss/gxo;      # Rsser, Rssel
    
	        s/(?<=(?<![GgPp])
	              (?<![Bb]e)
		          (?<![Ee]nt)
    		      (?<![Vv]er)
	    	      [Rr]u
	          )
    	      
	          (?=[ei](?![sg])(?>nnen|n|)(\b|\S{5,}))
	        /ss/gxo;  # Russe, Russin, != Prue, != Gru, != Beruen, != Entruen, != Rues, != Ruige
            s/Ruki/Russki/g;
            
            #s/(?<=[rb])(?=[tpy])/ss/g;
            s/(?<=$consonant)(?=$consonant|y)/ss/g;
            s/(?<=[^i]e)(?=en)/ss/g;
            s/(?<=[Aa]u)(?=end(?!i)|etz)/ss/g;
            s/(?<!)u(?=el|lig)/uss/;  # Fussel
            s/(?<=[gG]lo)/ss/g;
            s/(?<=sa)(?=in)/ss/g;
            s/(?<=M[aou])(?=$vocal)/ss/g;  # Massai, Massaker, Massel, Mossul, Musselin
            s/ma(?=el|ak|ig)/mass/g;
            s/\bma(?=en\w)/mass/g;
            s/((?:\b|$prefix)flo)/$1ss/g;
        }
        
        s/($prefix)?scho(ss|)/$1 ? "$1schoss" : "scho"/ge;
	}
    
    # symbols
    s/\(R\)//g;
    s/\(C\)//g;

    # special characters
    s/<<(\D*?)>>/$1/g;    # if there are numbers between,
    s/>>(\D*?)<</$1/g;    # it could be also a mathematical/physical equation

    # foreign words
    s/cademie/cadmie/g;
    s/rancais/ranais/g;
    s/leen/len/g;
    s/grement/grment/g;
    s/lencon/lenon/g;
    s/Ancien Regime/Ancien Rgime/g;
    s/Andre(?=s?\b)/Andr/g;
    s/Apercu/Aperu/g;
    s/([aA])pres/$1prs/g;
    s/Apero/Apro/g;
    s/Aragon/Aragn/g;
    s/deco/dco/g;
    s/socie/soci/g;
    s/([aA])suncion/$1suncin/g;
    s/([aA])ttache/$1ttach/g;
    s/Balpare/Balpar/g;
    s/Bartok/Bartk/g;
    s/Baumegrad/Baumgrad/g;
    s/Beaute/Beaut/g;
    s/Epoque/poque/g;
    s/Bjrnson/Bjrnson/g;
    s/Bogota/Bogot/g;
    s/Bokmal/Bokml/g;
    s/Boucle/Boucl/g;
    s/rree/rre/g;
    s/Bruyere/Bruyre/g;
    s/Bebe/Bb/g;
    s/echamel/chamel/g;
    s/Beret/Bret/g;
    s/([cC])afe/$1af/g;
    s/([cC])reme/$1rme/g;
    s/alderon/aldern/g;
    s/Cams/Cames/g;
    s/anape/anap/g;
    s/Canoa/Canossa/g;
    s/celebre/clbre/g;
    s/tesimo/tsimo/g;
    s/eparee/pare/g;
    s/Elysee/lyse/g;
    s/onniere/onnire/g;
    s/Charite/Charit/g;
    s/inee/ine/g;
    s/hicoree/hicore/g;
    s/Chateau/Chteau/g;
    s/Cigany/Cigny/g;
    s/Cinecitta/Cinecitt/g;
    s/Cliche/Clich/g;
    s/Cloisonne/Cloisonn/g;
    s/Cloque/Cloqu/g;
    s/dell\'Arte/dellArte/g;
    s/Communique/Communiqu/g;
    s/Consomme/Consomm/g;
    s/d\'Ampezzo/dAmpezzo/g;
    s/d\'Etat/dEtat/g;
    s/Coupe/Coup/g;
    s/Cox\'Z/Cox/g;
    s/Craquele/Craquel/g;
    s/roise/rois/g;
    s/(?<! l)
      (?<! pap)
      iere\b
     /ire/g;

    s/([cC])reme/$1rme/g;
    s/fraiche/frache/g;
    s/Crepe/Crpe/g;
    s/Csikos/Csiks/g;
    s/Csardas/Csrds/g;
    s/Cure/Cur/g;
    s/Cadiz/Cdiz/g;
    s/Centimo/Cntimo/g;
    s/Cezanne/Czanne/g;
    s/Cordoba/Crdoba/g;

    s/Dauphine/Dauphin/g;
    s/Dekollete/Dekollet/g;
    s/ieces/ices/g;
    s/trochu/trochuss/g;
    s/Drape/Drap/g;
    s/m(?=[et])/mss/g;
    s/Dvorak/Dvork/g;
    s/([dD])eja/$1j/g;
    s/habille/habill/g;
    s/Detente/Dtente/g;

    s/Ekarte/Ekart/g;
    s/El Nino/El Nio/g;
    s/Epingle/Epingl/g;
    s/Expose/Expos/g;
    s/Faure/Faur/g;
    s/Filler/Fillr/g;
    s/Siecle/Sicle/g;
    s/lel/lssel/g;
    s/Bergere/Bergre/g;
    s/Fouche/Fouch/g;
    s/Fouque/Fouqu/g;
    s/elementaire/lmentaire/g;
    s/ternite(s?)\b/ternit$1/g;
    s/risee/rise/g;
    s/roi(|ss)e/roiss/g;
    s/\bFrotte(?=\b|s\b)/Frott/g;
    s/Fume/Fum/g;
    s/([Gg])arcon/$1aron/g;
    s/([Gg])efss/$1ef/g;
    s/Gemechte/Gemchte/g;
    s/Geneve/Genve/g;
    s/Glace/Glac/g;
    s/Godemiche/Godemich/g;
    s/Godthab/Godthb/g;
    s/(?<=[Gg])(?=th)/oe/g;
    s/lame(?=\b|s)/lam/g;
    s/uyere/uyre/g;
    s/Grege/Grge/g;
    s/Gulyas/Gulys/g;
    s/abitue/abitu/g;
    s/Haler/Halr/g;
    s/ornuss/ornu/g;
    s/Horvath/Horvth/g;
    s/Hottehue/Hotteh/g;
    s/Hacek/Hcek/g;
    s/matozn/matozoen/g;
    s/chlosse(?![rsn])/chloe/g;
    s/doree/dore/g;
    s/Jerome/Jrme/g;
    s/Kodaly/Kodly/g;
    s/rzitiv/oerzitiv/g;
    s/nique/niqu/g;
    s/Kalman/Klmn/g;
    s/iberte/ibert/g;
    s/Egalite/galit/g;
    s/Linne/Linn/g;
    s/([fF])asss/$1as/g;
    s/Lome/Lom/g;
    s/Makore/Makor/g;
    s/Mallarme/Mallarm/g;
    s/aree/are/g;
    s/Maitre/Matre/g;
    s/([Mm]$vocal)liere\b/$1lire/g;
    s/Mouline/Moulin/g;
    s/Mousterien/Moustrien/g;
    s/Malaga/Mlaga/g;
    s/Meche/Mche/g;
    s/erimee/rime/g;
    s/eglige/eglig/g;
    s/eaute/eaut/g;
    s/egritude/gritude/g;
    s/anache/anach/g;
    s/Pappmache/Pappmach/g;
    s/Parana/Paran/g;
    s/Pathetique/Pathtique/g;
    s/Merite/Mrite/g;
    s/([Pp])reuss/$1reu/g;
    s/otege/oteg/g;
    s/recis/rcis/g;
    s/Prilitt/Puerilitt/g;
    s/Ratine/Ratin/g;
    s/Raye/Ray/g;
    s/Renforce/Renforc/g;
    s/Rene/Ren/g;
    s/Rev/Revue/g;
    s/Riksmal/Riksml/g;
    s/xupery/xupry/g;
    s/S(?:|ae)ns/Sans/g;
    s/Jose(?=s?\b)/Jos/g;
    s/bernaise/brnaise/g;
    s/Sassnitz/Sanitz/g;
	s/Saone/Sane/g;
	s/Schntr/Schntuer/g;   # more probable
	s/chling/chssling/g;
	s/Senor/Seor/g;
	s/Skues/Sks/g;
	s/Souffle(?=s|\b)/Souffl/g;
	s/Spass/Spa/g;
	s/(?<=[Cc])oupe/oup/g;
	s/Stl\b/Stal/g;
	s/Suarez/Surez/g;
	s/Sao\b/So/g;
	s/Tome(?=s|\b)/Tom/g;
	s/Seance/Sance/g;
	s/Serac/Srac/g;
	s/Sevres/Svres/g;
	s/Stassfurt/Stafurt/g;
	s/Troms/Troms/g;
	s/Trouvere/Trouvre/g;
	s/Tnder/Tnder/g;
	s/ariete/ariet/g;
	s/Welline/Wellin/g;
	s/Yucatan/Yucatn/g;
	s/((?<!\w)$prefix g)ass(?!$vocal)/$1a/gx;
	s/((?<!\w)$prefix)ass/$1a/gx;
    s/((?<!\w)$prefix)sse/$1e/g;
    s/(\A|\W)sse/$1e/g;
	s/($prefix) (?<![Ee]in)    # != einflen
                (?<![Ee]inzu)  #    einzuflen
       fl(e(n?|s?t))\b
      /$1flss$2/gx;   # exception of rule
    s/(${prefix}|\b)sche/$1schsse/go; # also an exception
    {no warnings; s/($prefix)?spre/$1sprsse/go;}
    s/($prefix)dre/$1drsse/g;
	s/\bass(?=\b|en\b)/a/go;  # a
    s/\^2//go;
    s/\^3//go;
    s/gemecht/gemcht/go;
    s/(?<=[Hh])ue\b//g;
    s/aelbe/asselbe/g;
    s/linnesch/linnsch/g;
    s/(?<=\b[Mm]u)ss(?=t?\b)//g;
    s/mech(?=e|s?t)/mch/g;
    s/metallise/mtallis/g;
    s/la(\W+)la/l$1l/g;
    s/(?<=\b[Oo]l)e\b//g;
    s/peu(\W+)a(\W+)peu/peu$1$2peu/g;
    s/reussisch/reuisch/g;
    s/sans gene\b/sans gne/g;
    s/(?<=\b[Ss]a)ss(?=(en|es?t)\b)//g;
    s/\bskal\b/skl/g;
    s/(?<=\bst)ue(?=nde)//g;
    s/(?<=[Tt]sch)ue(?=s)//g;
    s/([Tt])ete-a-([Tt])ete/$1te--$2te/g;
    s/voila/voil/g;
    s/Alandinseln/landinseln/g;
    s/Angstrm/ngstrm/g;
    s/Egalite/galit/g;
    s/(?<=[Ll]and)bue/busse/g;
    s/\ba(?=\W+(?:condition|deux mains|fonds perdu|gogo|jour|la))//g;
    s/a discretion/ discrtion/g;
    s/(?<=[Bb]ai)(?=e)/ss/g;
    s/(?<=[Hh]au)(?=e)/ss/g;
    s/\bue\././g;
    s/berflo/berfloss/g;
    s/chm/Aechm/g;  # e.g. Aechmea
    s/(?<=[Aa]n)(?=ro)/ae/g;
    s/prter/praeter/g;
    s/Anaphorae/Anaphor/g;
    s/Bdeker/Baedeker/g;
    s/Aspiratae/Aspirat/g;
    s/hamr(?=(?:[sn]|in|innen)?\b)/hamaer/g;   # Bahamer, Bahamerin and similar
    s/(?<=[Pp])(?=se)/ae/g;  # Bel Paese
    s/Clius/Caelius/g;
    s/(?<=Famul)ae\b//g;
    s/(?<=F)(?=ce)/ae/g;  # Faeces
    s/((Gan)?[Gg])raen/$1rn/g;
    s/(?<=[gG]r)(?=c(?:um|as))/ae/g;
    s/Hckel/Haeckel/g;
    s/Intimae/Intim/g;
    s/Kannae/Kann/g;
    s/Klavikulae/Klavikul/g;
    s/Kolossae/Koloss/g;
    s/Konjunktivae/Konjunktiv/g;
    s/Lrtes/Laertes/g;
    s/ariae\b/ari/g;
    s/\bMst(?![eu])/Maest/g;
    s/rcox/raecox/g;
    s/ichl/ichael/g;
    s/(?<=[Ss])ae(?=nger)//g;
    s/(?<=[Pp])(?=lla)/ae/g;
    s/Pht/Phaet/g;
    s/(?<=\b[Rr]a)                         # Raphael, Raffael ...
      (ff?|ph)l/$1ael/gx;       # != Niagarafllen
    s/($prefix)saesse/$1se/g;
    s/(?<!ph)(?=ro[bds])/ae/g;
    s/Tkwondo/Taekwondo/g;
    s/mondaen/mondn/g;
    s/o\.ae\./o../g;
    s/Al/Aloe/g;
    s/Apn/Apnoe/g;
    s/Bing/Boeing/g;
    s/c\./oec./g;
    s/Her/Heroe/g;
    s/Hk\b/Hoek/g;
    s/zn\b/zoen/g;
    s/obszoen/obszn/g;
    s/Itzeh/Itzehoe/g;
    s/Jl/Joel/g;
    s/(?<=[Kk])(?=du|x)/oe/g;   # Koedukation, ...
    s/Ob/Oboe/g;
    s/(?<=i)oe(?=se?[mnr]?)//g;
    s/(?<=\b[Pp])(?=bene|[mt]|sie)(?!tt)/oe/g;
    s/($prefix)p(?=bene|[mt]|sie)(?!tt)/$1poe/g;
    s/Pr(?=[^bps])/Proe/g;
    s/stroeme/strme/g;
    s/Crus/Crusoe/g;
    s/Z(?!\w)/Zoe/g;
    s/sben/soeben/g;
    s/Airbue/Airbusse/g;
    s/pioes/piosses/g;
    s/Cottbu/Cottbuss/g;
    s/Globu/Globuss/g;
    s/Beisae/Beisasse/g;
    s/Boruia/Borussia/g;
    s/Bra/Brass/g;
    s/Caia/Caissa/g;
    s/(?<=c$vocal)(?=$vocal)/ss/g;
    s/(?<=[Bb]u)(?=erl)/ss/g;
    s/(Cr?a)(?=ata|i|us)/$1ss/g;
    s/(?<=[CZ]erberu)/ss/g;
    s/Croiant/Croissant/g;
    s/Digloie/Diglossie/g;
    s/(?<=\b[Ee]i)(?=\w)/ss/g;
    s/Elsa/Elsass/g;
    s//ss/g;
    s/rimae/rimasse/g;
    s/olo/oloss/g;
    s/(?<=[Ll]ai)/ss/g;  # Laissez-faire
    s/aachu/assachu/g;
    s/fu(?=l[ei])/fuss/g;
    s/groo/grosso/g;
    s/ktl/ktuel/g;
    s/(?<=nn)(?=lle)/ue/g;
    s/jz\b/juez/g;
    s/BDUe/BD/g;
    s/nn\b/nuen/g;
    s/g(tt|rr)e/gue$1e/g;
    s/Bl(?=chip|jean|movie)/Blue/g;
    s/(?<=[Mm]en)ue//g;
    s/Bnos/Buenos/g;
    s/Deng/Dengue/g;
    s/nndo(?=\b|s)/nuendo/g;
    s/(?<=b)(?=nt([ei]n(nen)?)?\b)/ue/g;
    s/Dnja/Duenja/g;
    s/(?<=[Dd])tt/uett/g;
    s/manl/manuel/g;
    s/(?<=[Ff]ond)/ue/g;
    s/Frte/Fuerte/g;
    s/Gricke/Guericke/g;
    s/(?<=[Gg])(?=rill)/ue/g;
    s/Grnica/Guernica/g;
    s/(?<=[vs]id)(?=n)/ue/g;
    s/flnz/fluenz/g;
    s/ongrn/ongruen/g;
    s/tnte/tuente/g;
    s/tlle/tuelle/g;
    s/([\w   ]+t)ll/$1 eq lc($1) ? "$1uell" : "$1ll"/gex; #eventuell != Hkeltll    
    s/Eventll/Eventuell/g;
    s/Lang/Langue/g;
    s/Manl/Manuel/g;
    s/Migl/Miguel/g;
    s/entt/enuett/g;
    s/grite/guerite/g;
    s/innd/inuend/g;
    s/(?<=[Gg])(?=st)/ue/g;
    s/Pblo/Pueblo/g;
    s/(?<=[Pp])(?=rto)/ue/g;
    s/Re(?=[nv])/Reue/g;
    s/Saml/Samuel/g;
    s/Sve/Sueve/g;
    s/Sz/Suez/g;
    s/(?<=[Tt])(?=rei)/ue/g;
    s/rdingen/Uerdingen/g;
    s/cker/Uecker/g;
    s/sll/suell/g;
    s/nnll/nnuell/g;

    s/\beinzng/einzueng/g;
    s/\bt(?=s?t\b)/tue/g;
    return $_;
}

1;
__END__

=head1 NAME

Lingua::DE::ASCII - Perl extension to convert german umlauts to and from ascii

=head1 SYNOPSIS

  use Lingua::DE::ASCII;
  print to_ascii("Umlaute wie ,,, oder auch  usw. " .
                 "sind nicht im ASCII Format " .
                 "und werden deshalb umgeschrieben);
  print to_latin1("Dies muesste auch rueckwaerts funktionieren ma cherie");
                 

=head1 DESCRIPTION

This module enables conversion from and to the ASCII format of german texts.

It has two methods: C<to_ascii> and C<to_latin1> which one do exactly what they 
say.

Please note that both methods take only one scalar as argument and 
not whole a list.

=head2 EXPORT

to_ascii($string)
to_latin1($string)

=head1 BUGS

That's only a stupid computer program, faced with a very hard ai problem.
So there will be some words that will be always hard to retranslate from ascii 
to latin1 encoding. A known example is the difference between "Ma(einheit)" and
"Masseentropie" or similar. Another examples are "flsse" and "Fle"
or "(Der Schornstein) rue" and "Russe", "Geheimtuer(isch)" and "Geheimtr", 
"anzu-ecken" and "anzcken". 
Also, it's  hard to find the right spelling for the prefixes "miss-" or "mi-".
In doubt I tried to use to more common word.
I tried it with a huge list of german words, but please tell me if you find a 
serious back-translation bug.

This module is intended for ANSI code that is e.g. different from windows coding.

Misspelled words will create a lot of extra mistakes by the program.
In doubt it's better to write with new Rechtschreibung.

The C<to_latin1> method is not very quick,
it's programmed to handle as many exceptions as possible.

I avoided localizations for character handling
(thus it should work on every computer),
but the price is that in some rare cases of words with multiple umlauts
(like "Hkeltlle") some buggy conversions can occur.
Please tell me if you find such words.

=head1 AUTHOR

Janek Schleicher, <bigj@kamelfreund.de>

=head1 SEE ALSO

Lingua::DE::Sentence   (another cool module)

=cut
