##---------------------------------------------------------------------------## ## File: ## @(#) iso8859.pl 2.4 99/08/13 22:10:35 ## Author: ## Earl Hood mhonarc@pobox.com ## Description: ## Routines to process data encoded in iso8859 character sets. ##---------------------------------------------------------------------------## ## Copyright (C) 1996-1999 Earl Hood, mhonarc@pobox.com ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ## 02111-1307, USA ##---------------------------------------------------------------------------## package iso_8859; ############################################################################### ## Mapping arrays for characters to entity references ############################################################################### ##--------------------------------------------------------------------------- ## US-ASCII/Common characters ##--------------------------------------------------------------------------- %US_ASCII_To_Ent = ( #-------------------------------------------------------------------------- # Hex Code Entity Ref # ISO external entity and description #-------------------------------------------------------------------------- 0x26, "&", # ISOnum : Ampersand 0x3C, "<", # ISOnum : Less-than sign 0x3E, ">", # ISOnum : Greater-than sign 0xA0, " ", # ISOnum : NO-BREAK SPACE ); ##--------------------------------------------------------------------------- ## ISO-8859-1: Latin-1 ##--------------------------------------------------------------------------- %ISO_8859_1_To_Ent = ( #-------------------------------------------------------------------------- # Hex Code Entity Ref # ISO external entity and description #-------------------------------------------------------------------------- 0xA1, "¡", # ISOnum : INVERTED EXCLAMATION MARK 0xA2, "¢", # ISOnum : CENT SIGN 0xA3, "£", # ISOnum : POUND SIGN 0xA4, "¤", # ISOnum : CURRENCY SIGN 0xA5, "¥", # ISOnum : YEN SIGN 0xA6, "¦", # ISOnum : BROKEN BAR 0xA7, "§", # ISOnum : SECTION SIGN 0xA8, "¨", # ISOdia : DIAERESIS 0xA9, "©", # ISOnum : COPYRIGHT SIGN 0xAA, "ª", # ISOnum : FEMININE ORDINAL INDICATOR 0xAB, "«", # ISOnum : LEFT-POINTING DOUBLE ANGLE # QUOTATION MARK 0xAC, "¬", # ISOnum : NOT SIGN 0xAD, "­", # ISOnum : SOFT HYPHEN 0xAE, "®", # ISOnum : REGISTERED SIGN 0xAF, "¯", # ISOdia : OVERLINE (MACRON) 0xB0, "°", # ISOnum : DEGREE SIGN 0xB1, "±", # ISOnum : PLUS-MINUS SIGN 0xB2, "²", # ISOnum : SUPERSCRIPT TWO 0xB3, "³", # ISOnum : SUPERSCRIPT THREE 0xB4, "´", # ISOdia : ACUTE ACCENT 0xB5, "µ", # ISOnum : MICRO SIGN 0xB6, "¶", # ISOnum : PILCROW SIGN 0xB7, "·", # ISOnum : MIDDLE DOT 0xB8, "¸", # ISOdia : CEDILLA 0xB9, "¹", # ISOnum : SUPERSCRIPT ONE 0xBA, "º", # ISOnum : MASCULINE ORDINAL INDICATOR 0xBB, "»", # ISOnum : RIGHT-POINTING DOUBLE ANGLE # QUOTATION MARK 0xBC, "¼", # ISOnum : VULGAR FRACTION ONE QUARTER 0xBD, "½", # ISOnum : VULGAR FRACTION ONE HALF 0xBE, "¾", # ISOnum : VULGAR FRACTION THREE QUARTERS 0xBF, "¿", # ISOnum : INVERTED QUESTION MARK 0xC0, "À", # ISOlat1: LATIN CAPITAL LETTER A WITH GRAVE 0xC1, "Á", # ISOlat1: LATIN CAPITAL LETTER A WITH ACUTE 0xC2, "Â", # ISOlat1: LATIN CAPITAL LETTER A WITH # CIRCUMFLEX 0xC3, "Ã", # ISOlat1: LATIN CAPITAL LETTER A WITH TILDE 0xC4, "Ä", # ISOlat1: LATIN CAPITAL LETTER A WITH # DIAERESIS 0xC5, "Å", # ISOlat1: LATIN CAPITAL LETTER A WITH RING # ABOVE 0xC6, "Æ", # ISOlat1: LATIN CAPITAL LETTER AE 0xC7, "Ç", # ISOlat1: LATIN CAPITAL LETTER C WITH CEDILLA 0xC8, "È", # ISOlat1: LATIN CAPITAL LETTER E WITH GRAVE 0xC9, "É", # ISOlat1: LATIN CAPITAL LETTER E WITH ACUTE 0xCA, "Ê", # ISOlat1: LATIN CAPITAL LETTER E WITH # CIRCUMFLEX 0xCB, "Ë", # ISOlat1: LATIN CAPITAL LETTER E WITH # DIAERESIS 0xCC, "Ì", # ISOlat1: LATIN CAPITAL LETTER I WITH GRAVE 0xCD, "Í", # ISOlat1: LATIN CAPITAL LETTER I WITH ACUTE 0xCE, "Î", # ISOlat1: LATIN CAPITAL LETTER I WITH # CIRCUMFLEX 0xCF, "Ï", # ISOlat1: LATIN CAPITAL LETTER I WITH # DIAERESIS 0xD0, "Ð", # ISOlat1: LATIN CAPITAL LETTER ETH (Icelandic) 0xD1, "Ñ", # ISOlat1: LATIN CAPITAL LETTER N WITH TILDE 0xD2, "Ò", # ISOlat1: LATIN CAPITAL LETTER O WITH GRAVE 0xD3, "Ó", # ISOlat1: LATIN CAPITAL LETTER O WITH ACUTE 0xD4, "Ô", # ISOlat1: LATIN CAPITAL LETTER O WITH # CIRCUMFLEX 0xD5, "Õ", # ISOlat1: LATIN CAPITAL LETTER O WITH TILDE 0xD6, "Ö", # ISOlat1: LATIN CAPITAL LETTER O WITH # DIAERESIS 0xD7, "×", # ISOnum : MULTIPLICATION SIGN 0xD8, "Ø", # ISOlat1: LATIN CAPITAL LETTER O WITH STROKE 0xD9, "Ù", # ISOlat1: LATIN CAPITAL LETTER U WITH GRAVE 0xDA, "Ú", # ISOlat1: LATIN CAPITAL LETTER U WITH ACUTE 0xDB, "Û", # ISOlat1: LATIN CAPITAL LETTER U WITH # CIRCUMFLEX 0xDC, "Ü", # ISOlat1: LATIN CAPITAL LETTER U WITH # DIAERESIS 0xDD, "Ý", # ISOlat1: LATIN CAPITAL LETTER Y WITH ACUTE 0xDE, "Þ", # ISOlat1: LATIN CAPITAL LETTER THORN # (Icelandic) 0xDF, "ß", # ISOlat1: LATIN SMALL LETTER SHARP S (German) 0xE0, "à", # ISOlat1: LATIN SMALL LETTER A WITH GRAVE 0xE1, "á", # ISOlat1: LATIN SMALL LETTER A WITH ACUTE 0xE2, "â", # ISOlat1: LATIN SMALL LETTER A WITH CIRCUMFLEX 0xE3, "ã", # ISOlat1: LATIN SMALL LETTER A WITH TILDE 0xE4, "ä", # ISOlat1: LATIN SMALL LETTER A WITH DIAERESIS 0xE5, "å", # ISOlat1: LATIN SMALL LETTER A WITH RING ABOVE 0xE6, "æ", # ISOlat1: LATIN SMALL LETTER AE 0xE7, "ç", # ISOlat1: LATIN SMALL LETTER C WITH CEDILLA 0xE8, "è", # ISOlat1: LATIN SMALL LETTER E WITH GRAVE 0xE9, "é", # ISOlat1: LATIN SMALL LETTER E WITH ACUTE 0xEA, "ê", # ISOlat1: LATIN SMALL LETTER E WITH CIRCUMFLEX 0xEB, "ë", # ISOlat1: LATIN SMALL LETTER E WITH DIAERESIS 0xEC, "ì", # ISOlat1: LATIN SMALL LETTER I WITH GRAVE 0xED, "í", # ISOlat1: LATIN SMALL LETTER I WITH ACUTE 0xEE, "î", # ISOlat1: LATIN SMALL LETTER I WITH CIRCUMFLEX 0xEF, "ï", # ISOlat1: LATIN SMALL LETTER I WITH DIAERESIS 0xF0, "ð", # ISOlat1: LATIN SMALL LETTER ETH (Icelandic) 0xF1, "ñ", # ISOlat1: LATIN SMALL LETTER N WITH TILDE 0xF2, "ò", # ISOlat1: LATIN SMALL LETTER O WITH GRAVE 0xF3, "ó", # ISOlat1: LATIN SMALL LETTER O WITH ACUTE 0xF4, "ô", # ISOlat1: LATIN SMALL LETTER O WITH CIRCUMFLEX 0xF5, "õ", # ISOlat1: LATIN SMALL LETTER O WITH TILDE 0xF6, "ö", # ISOlat1: LATIN SMALL LETTER O WITH DIAERESIS 0xF7, "÷", # ISOnum : DIVISION SIGN 0xF8, "ø", # ISOlat1: LATIN SMALL LETTER O WITH STROKE 0xF9, "ù", # ISOlat1: LATIN SMALL LETTER U WITH GRAVE 0xFA, "ú", # ISOlat1: LATIN SMALL LETTER U WITH ACUTE 0xFB, "û", # ISOlat1: LATIN SMALL LETTER U WITH CIRCUMFLEX 0xFC, "ü", # ISOlat1: LATIN SMALL LETTER U WITH DIAERESIS 0xFD, "ý", # ISOlat1: LATIN SMALL LETTER Y WITH ACUTE 0xFE, "þ", # ISOlat1: LATIN SMALL LETTER THORN # (Icelandic) 0xFF, "ÿ", # ISOlat1: LATIN SMALL LETTER Y WITH DIAERESIS ); *LATIN1_To_Ent = *ISO_8859_1_To_Ent; ##--------------------------------------------------------------------------- ## ISO-8859-2: Latin-2 ##--------------------------------------------------------------------------- %ISO_8859_2_To_Ent = ( #-------------------------------------------------------------------------- # Hex Code Entity Ref # ISO external entity and description #-------------------------------------------------------------------------- 0xA1, "Ą", # ISOlat2: LATIN CAPITAL LETTER A WITH OGONEK 0xA2, "˘", # ISOdia : BREVE 0xA3, "Ł", # ISOlat2: LATIN CAPITAL LETTER L WITH STROKE 0xA4, "¤", # ISOnum : CURRENCY SIGN 0xA5, "Ľ", # ISOlat2: LATIN CAPITAL LETTER L WITH CARON 0xA6, "Ś", # ISOlat2: LATIN CAPITAL LETTER S WITH ACUTE 0xA7, "§", # ISOnum : SECTION SIGN 0xA8, "¨", # ISOdia : DIAERESIS 0xA9, "Š", # ISOlat2: LATIN CAPITAL LETTER S WITH CARON 0xAA, "Ş", # ISOlat2: LATIN CAPITAL LETTER S WITH CEDILLA 0xAB, "Ť", # ISOlat2: LATIN CAPITAL LETTER T WITH CARON 0xAC, "Ź", # ISOlat2: LATIN CAPITAL LETTER Z WITH ACUTE 0xAD, "­", # ISOnum : SOFT HYPHEN 0xAE, "Ž", # ISOlat2: LATIN CAPITAL LETTER Z WITH CARON 0xAF, "Ż", # ISOlat2: LATIN CAPITAL LETTER Z WITH DOT # ABOVE 0xB0, "°", # ISOnum : DEGREE SIGN 0xB1, "ą", # ISOlat2: LATIN SMALL LETTER A WITH OGONEK 0xB2, "˛", # ISOdia : OGONEK 0xB3, "ł", # ISOlat2: LATIN SMALL LETTER L WITH STROKE 0xB4, "´", # ISOdia : ACUTE ACCENT 0xB5, "ľ", # ISOlat2: LATIN SMALL LETTER L WITH CARON 0xB6, "ś", # ISOlat2: LATIN SMALL LETTER S WITH ACUTE 0xB7, "ˇ", # ISOdia : CARON 0xB8, "¸", # ISOdia : CEDILLA 0xB9, "š", # ISOlat2: LATIN SMALL LETTER S WITH CARON 0xBA, "ş", # ISOlat2: LATIN SMALL LETTER S WITH CEDILLA 0xBB, "ť", # ISOlat2: LATIN SMALL LETTER T WITH CARON 0xBC, "ź", # ISOlat2: LATIN SMALL LETTER Z WITH ACUTE 0xBD, "˝", # ISOdia : DOUBLE ACUTE ACCENT 0xBE, "ž", # ISOlat2: LATIN SMALL LETTER Z WITH CARON 0xBF, "ż", # ISOlat2: LATIN SMALL LETTER Z WITH DOT ABOVE 0xC0, "Ŕ", # ISOlat2: LATIN CAPITAL LETTER R WITH ACUTE 0xC1, "Á", # ISOlat1: LATIN CAPITAL LETTER A WITH ACUTE 0xC2, "Â", # ISOlat1: LATIN CAPITAL LETTER A WITH # CIRCUMFLEX 0xC3, "Ă", # ISOlat2: LATIN CAPITAL LETTER A WITH BREVE 0xC4, "Ä", # ISOlat1: LATIN CAPITAL LETTER A WITH # DIAERESIS 0xC5, "Ĺ", # ISOlat2: LATIN CAPITAL LETTER L WITH ACUTE 0xC6, "Ć", # ISOlat2: LATIN CAPITAL LETTER C WITH ACUTE 0xC7, "Ç", # ISOlat2: LATIN CAPITAL LETTER C WITH CEDILLA 0xC8, "Č", # ISOlat2: LATIN CAPITAL LETTER C WITH CARON 0xC9, "É", # ISOlat1: LATIN CAPITAL LETTER E WITH ACUTE 0xCA, "Ę", # ISOlat2: LATIN CAPITAL LETTER E WITH OGONEK 0xCB, "Ë", # ISOlat1: LATIN CAPITAL LETTER E WITH # DIAERESIS 0xCC, "Ě", # ISOlat2: LATIN CAPITAL LETTER E WITH CARON 0xCD, "Í", # ISOlat1: LATIN CAPITAL LETTER I WITH ACUTE 0xCE, "Î", # ISOlat1: LATIN CAPITAL LETTER I WITH # CIRCUMFLEX 0xCF, "Ď", # ISOlat2: LATIN CAPITAL LETTER D WITH CARON 0xD0, "Đ", # ISOlat2: LATIN CAPITAL LETTER D WITH STROKE 0xD1, "Ń", # ISOlat2: LATIN CAPITAL LETTER N WITH ACUTE 0xD2, "Ň", # ISOlat2: LATIN CAPITAL LETTER N WITH CARON 0xD3, "Ó", # ISOlat1: LATIN CAPITAL LETTER O WITH ACUTE 0xD4, "Ô", # ISOlat1: LATIN CAPITAL LETTER O WITH # CIRCUMFLEX 0xD5, "Ő", # ISOlat2: LATIN CAPITAL LETTER O WITH DOUBLE # ACUTE 0xD6, "Ö", # ISOlat1: LATIN CAPITAL LETTER O WITH # DIAERESIS 0xD7, "×", # ISOnum : MULTIPLICATION SIGN 0xD8, "Ř", # ISOlat2: LATIN CAPITAL LETTER R WITH CARON 0xD9, "Ů", # ISOlat2: LATIN CAPITAL LETTER U WITH RING # ABOVE 0xDA, "Ú", # ISOlat1: LATIN CAPITAL LETTER U WITH ACUTE 0xDB, "Ű", # ISOlat2: LATIN CAPITAL LETTER U WITH DOUBLE # ACUTE 0xDC, "Ü", # ISOlat1: LATIN CAPITAL LETTER U WITH # DIAERESIS 0xDD, "Ý", # ISOlat2: LATIN CAPITAL LETTER Y WITH ACUTE 0xDE, "Ţ", # ISOlat2: LATIN CAPITAL LETTER T WITH CEDILLA 0xDF, "ß", # ISOlat1: LATIN SMALL LETTER SHARP S (German) 0xE0, "ŕ", # ISOlat2: LATIN SMALL LETTER R WITH ACUTE 0xE1, "á", # ISOlat1: LATIN SMALL LETTER A WITH ACUTE 0xE2, "â", # ISOlat1: LATIN SMALL LETTER A WITH CIRCUMFLEX 0xE3, "ă", # ISOlat2: LATIN SMALL LETTER A WITH BREVE 0xE4, "ä", # ISOlat1: LATIN SMALL LETTER A WITH DIAERESIS 0xE5, "ĺ", # ISOlat2: LATIN SMALL LETTER L WITH ACUTE 0xE6, "ć", # ISOlat2: LATIN SMALL LETTER C WITH ACUTE 0xE7, "ç", # ISOlat1: LATIN SMALL LETTER C WITH CEDILLA 0xE8, "č", # ISOlat2: LATIN SMALL LETTER C WITH CARON 0xE9, "é", # ISOlat1: LATIN SMALL LETTER E WITH ACUTE 0xEA, "ę", # ISOlat2: LATIN SMALL LETTER E WITH OGONEK 0xEB, "ë", # ISOlat1: LATIN SMALL LETTER E WITH DIAERESIS 0xEC, "ě", # ISOlat2: LATIN SMALL LETTER E WITH CARON 0xED, "í", # ISOlat1: LATIN SMALL LETTER I WITH ACUTE 0xEE, "î", # ISOlat1: LATIN SMALL LETTER I WITH CIRCUMFLEX 0xEF, "ď", # ISOlat2: LATIN SMALL LETTER D WITH CARON 0xF0, "đ", # ISOlat2: LATIN SMALL LETTER D WITH STROKE 0xF1, "ń", # ISOlat2: LATIN SMALL LETTER N WITH ACUTE 0xF2, "ň", # ISOlat2: LATIN SMALL LETTER N WITH CARON 0xF3, "ó", # ISOlat1: LATIN SMALL LETTER O WITH ACUTE 0xF4, "ô", # ISOlat1: LATIN SMALL LETTER O WITH CIRCUMFLEX 0xF5, "ő", # ISOlat2: LATIN SMALL LETTER O WITH DOUBLE # ACUTE 0xF6, "ö", # ISOlat1: LATIN SMALL LETTER O WITH DIAERESIS 0xF7, "÷", # ISOnum : DIVISION SIGN 0xF8, "ř", # ISOlat2: LATIN SMALL LETTER R WITH CARON 0xF9, "ů", # ISOlat2: LATIN SMALL LETTER U WITH RING ABOVE 0xFA, "ú", # ISOlat1: LATIN SMALL LETTER U WITH ACUTE 0xFB, "ű", # ISOlat2: LATIN SMALL LETTER U WITH DOUBLE # ACUTE 0xFC, "ü", # ISOlat1: LATIN SMALL LETTER U WITH DIAERESIS 0xFD, "ý", # ISOlat1: LATIN SMALL LETTER Y WITH ACUTE 0xFE, "ţ", # ISOlat2: LATIN SMALL LETTER T WITH CEDILLA 0xFF, "˙", # ISOdia : DOT ABOVE ); *LATIN2_To_Ent = *ISO_8859_2_To_Ent; ##--------------------------------------------------------------------------- ## ISO-8859-3: Latin-3 ##--------------------------------------------------------------------------- %ISO_8859_3_To_Ent = ( #-------------------------------------------------------------------------- # Hex Code Entity Ref # ISO external entity and description #-------------------------------------------------------------------------- 0xA1, "Ħ", # ISOlat2: LATIN CAPITAL LETTER H WITH STROKE 0xA2, "˘", # ISOdia : BREVE 0xA3, "£", # ISOnum : POUND SIGN 0xA4, "¤", # ISOnum : CURRENCY SIGN 0xA6, "Ĥ", # ISOlat2: LATIN CAPITAL LETTER H WITH # CIRCUMFLEX 0xA7, "§", # ISOnum : SECTION SIGN 0xA8, "¨", # ISOdia : DIAERESIS 0xA9, "İ", # ISOlat2: LATIN CAPITAL LETTER I WITH DOT # ABOVE 0xAA, "Ş", # ISOlat2: LATIN CAPITAL LETTER S WITH CEDILLA 0xAB, "Ğ", # ISOlat2: LATIN CAPITAL LETTER G WITH BREVE 0xAC, "Ĵ", # ISOlat2: LATIN CAPITAL LETTER J WITH # CIRCUMFLEX 0xAD, "­", # ISOnum : SOFT HYPHEN 0xAF, "Ż", # ISOlat2: LATIN CAPITAL LETTER Z WITH DOT # ABOVE 0xB0, "°", # ISOnum : DEGREE SIGN 0xB1, "ħ", # ISOlat2: LATIN SMALL LETTER H WITH STROKE 0xB2, "²", # ISOnum : SUPERSCRIPT TWO 0xB3, "³", # ISOnum : SUPERSCRIPT THREE 0xB4, "´", # ISOdia : ACUTE ACCENT 0xB5, "µ", # ISOnum : MICRO SIGN 0xB6, "ĥ", # ISOlat2: LATIN SMALL LETTER H WITH # CIRCUMFLEX 0xB7, "·", # ISOnum : MIDDLE DOT 0xB8, "¸", # ISOdia : CEDILLA 0xB9, "ı", # ISOlat2: LATIN SMALL LETTER I DOTLESS 0xBA, "ş", # ISOlat2: LATIN SMALL LETTER S WITH CEDILLA 0xBB, "ğ", # ISOlat2: LATIN SMALL LETTER G WITH BREVE 0xBC, "ĵ", # ISOlat2: LATIN SMALL LETTER J WITH CIRCUMFLEX 0xBD, "½", # ISOnum : VULGAR FRACTION ONE HALF 0xBF, "ż", # ISOlat2: LATIN SMALL LETTER Z WITH DOT ABOVE 0xC0, "À", # ISOlat1: LATIN CAPITAL LETTER A WITH GRAVE 0xC1, "Á", # ISOlat1: LATIN CAPITAL LETTER A WITH ACUTE 0xC2, "Â", # ISOlat1: LATIN CAPITAL LETTER A WITH # CIRCUMFLEX 0xC4, "Ä", # ISOlat1: LATIN CAPITAL LETTER A WITH # DIAERESIS 0xC5, "Ċ", # ISOlat2: LATIN CAPITAL LETTER C WITH DOT # ABOVE 0xC6, "Ĉ", # ISOlat2: LATIN CAPITAL LETTER C WITH # CIRCUMFLEX 0xC7, "Ç", # ISOlat2: LATIN CAPITAL LETTER C WITH CEDILLA 0xC8, "È", # ISOlat1: LATIN CAPITAL LETTER E WITH GRAVE 0xC9, "É", # ISOlat1: LATIN CAPITAL LETTER E WITH ACUTE 0xCA, "Ê", # ISOlat2: LATIN CAPITAL LETTER E WITH # CIRCUMFLEX 0xCB, "Ë", # ISOlat1: LATIN CAPITAL LETTER E WITH # DIAERESIS 0xCC, "Ì", # ISOlat1: LATIN CAPITAL LETTER I WITH GRAVE 0xCD, "Í", # ISOlat1: LATIN CAPITAL LETTER I WITH ACUTE 0xCE, "Î", # ISOlat1: LATIN CAPITAL LETTER I WITH # CIRCUMFLEX 0xCF, "Ï", # ISOlat1: LATIN CAPITAL LETTER I WITH # DIAERESIS 0xD1, "Ñ", # ISOlat1: LATIN CAPITAL LETTER N WITH TILDE 0xD2, "Ò", # ISOlat1: LATIN CAPITAL LETTER O WITH GRAVE 0xD3, "Ó", # ISOlat1: LATIN CAPITAL LETTER O WITH ACUTE 0xD4, "Ô", # ISOlat1: LATIN CAPITAL LETTER O WITH # CIRCUMFLEX 0xD5, "Ġ", # ISOlat2: LATIN CAPITAL LETTER G WITH DOT # ABOVE 0xD6, "Ö", # ISOlat1: LATIN CAPITAL LETTER O WITH # DIAERESIS 0xD7, "×", # ISOnum : MULTIPLICATION SIGN 0xD8, "Ĝ", # ISOlat2: LATIN CAPITAL LETTER G WITH # CIRCUMFLEX 0xD9, "Ù", # ISOlat1: LATIN CAPITAL LETTER U WITH GRAVE # ABOVE 0xDA, "Ú", # ISOlat1: LATIN CAPITAL LETTER U WITH ACUTE 0xDB, "Û", # ISOlat1: LATIN CAPITAL LETTER U WITH # CIRCUMFLEX 0xDC, "Ü", # ISOlat1: LATIN CAPITAL LETTER U WITH # DIAERESIS 0xDD, "Ŭ", # ISOlat2: LATIN CAPITAL LETTER U WITH BREVE 0xDE, "Ŝ", # ISOlat2: LATIN CAPITAL LETTER S WITH # CIRCUMFLEX 0xDF, "ß", # ISOlat1: LATIN SMALL LETTER SHARP S (German) 0xE0, "à", # ISOlat1: LATIN SMALL LETTER A WITH GRAVE 0xE1, "á", # ISOlat1: LATIN SMALL LETTER A WITH ACUTE 0xE2, "â", # ISOlat1: LATIN SMALL LETTER A WITH CIRCUMFLEX 0xE4, "ä", # ISOlat1: LATIN SMALL LETTER A WITH DIAERESIS 0xE5, "ċ", # ISOlat2: LATIN SMALL LETTER C WITH DOT ABOVE 0xE6, "&ccirce;", # ISOlat2: LATIN SMALL LETTER C WITH # CIRCUMFLEX 0xE7, "ç", # ISOlat1: LATIN SMALL LETTER C WITH CEDILLA 0xE8, "è", # ISOlat1: LATIN SMALL LETTER E WITH GRAVE 0xE9, "é", # ISOlat2: LATIN SMALL LETTER E WITH ACUTE 0xEA, "ê", # ISOlat2: LATIN SMALL LETTER E WITH # CIRCUMFLEX 0xEB, "ë", # ISOlat1: LATIN SMALL LETTER E WITH DIAERESIS 0xEC, "ì", # ISOlat1: LATIN SMALL LETTER I WITH GRAVE 0xED, "í", # ISOlat1: LATIN SMALL LETTER I WITH ACUTE 0xEE, "î", # ISOlat1: LATIN SMALL LETTER I WITH CIRCUMFLEX 0xEF, "ï", # ISOlat1: LATIN SMALL LETTER I WITH DIAERESIS 0xF1, "ñ", # ISOlat1: LATIN SMALL LETTER N WITH TILDE 0xF2, "ò", # ISOlat1: LATIN SMALL LETTER O WITH GRAVE 0xF3, "ó", # ISOlat1: LATIN SMALL LETTER O WITH ACUTE 0xF4, "ô", # ISOlat1: LATIN SMALL LETTER O WITH CIRCUMFLEX 0xF5, "ġ", # ISOlat2: LATIN SMALL LETTER G WITH DOT ABOVE 0xF6, "ö", # ISOlat1: LATIN SMALL LETTER O WITH DIAERESIS 0xF7, "÷", # ISOnum : DIVISION SIGN 0xF8, "ĝ", # ISOlat2: LATIN SMALL LETTER G WITH # CIRCUMFLEX 0xF9, "ù", # ISOlat1: LATIN SMALL LETTER U WITH GRAVE 0xFA, "ú", # ISOlat1: LATIN SMALL LETTER U WITH ACUTE 0xFB, "û", # ISOlat1: LATIN SMALL LETTER U WITH # CIRCUMFLEX 0xFC, "ü", # ISOlat1: LATIN SMALL LETTER U WITH DIAERESIS 0xFD, "ŭ", # ISOlat2: LATIN SMALL LETTER U WITH BREVE 0xFE, "ŝ", # ISOlat2: LATIN SMALL LETTER S WITH # CIRCUMFLEX 0xFF, "˙", # ISOdia : DOT ABOVE ); *LATIN3_To_Ent = *ISO_8859_3_To_Ent; ##--------------------------------------------------------------------------- ## ISO-8859-4: Latin-4 ##--------------------------------------------------------------------------- %ISO_8859_4_To_Ent = ( #-------------------------------------------------------------------------- # Hex Code Entity Ref # ISO external entity and description #-------------------------------------------------------------------------- 0xA1, "Ą", # ISOlat2: LATIN CAPITAL LETTER A WITH OGONEK 0xA2, "ĸ", # ISOlat2: LATIN SMALL LETTER KRA (Greenlandic) 0xA3, "Ŗ", # ISOlat2: LATIN CAPITAL LETTER R WITH CEDILLA 0xA4, "¤", # ISOnum : CURRENCY SIGN 0xA5, "&Itilde", # ISOlat2: LATIN CAPITAL LETTER I WITH TILDE 0xA6, "Ļ", # ISOlat2: LATIN CAPITAL LETTER L WITH CEDILLA 0xA7, "§", # ISOnum : SECTION SIGN 0xA8, "¨", # ISOdia : DIAERESIS 0xA9, "Š", # ISOlat2: LATIN CAPITAL LETTER S WITH CARON 0xAA, "Ē", # ISOlat2: LATIN CAPITAL LETTER E WITH MACRON 0xAB, "Ģ", # ISOlat2: LATIN CAPITAL LETTER G WITH CEDILLA 0xAC, "Ŧ", # ISOlat2: LATIN CAPITAL LETTER T WITH STROKE 0xAD, "­", # ISOnum : SOFT HYPHEN 0xAE, "&Zcaron", # ISOlat2: LATIN CAPITAL LETTER Z WITH CARON 0xAF, "¯", # ISOdia : MACRON 0xB0, "°", # ISOnum : DEGREE SIGN 0xB1, "ą", # ISOlat2: LATIN SMALL LETTER A WITH OGONEK 0xB2, "˛", # ISOdia : OGONEK 0xB3, "ŗ", # ISOlat2: LATIN SMALL LETTER R WITH CEDILLA 0xB4, "´", # ISOdia : ACUTE ACCENT 0xB5, "ĩ", # ISOlat2: LATIN SMALL LETTER I WITH TILDE 0xB6, "ļ", # ISOlat2: LATIN SMALL LETTER L WITH CEDILLA 0xB7, "ˇ", # ISOdia : CARON 0xB8, "¸", # ISOdia : CEDILLA 0xB9, "š", # ISOlat2: LATIN SMALL LETTER S WITH CARON 0xBA, "ē", # ISOlat2: LATIN SMALL LETTER E WITH MACRON 0xBB, "&gcedil;", # ISOlat2: LATIN SMALL LETTER G WITH CEDILLA 0xBC, "ŧ", # ISOlat2: LATIN SMALL LETTER J WITH STROKE 0xBD, "Ŋ", # ISOlat2: LATIN CAPITAL LETTER ENG (Lappish) 0xBE, "ž", # ISOlat2: LATIN SMALL LETTER Z WITH CARON 0xBF, "ŋ", # ISOlat2: LATIN SMALL LETTER ENG (Lappish) 0xC0, "Ā", # ISOlat1: LATIN CAPITAL LETTER A WITH MACRON 0xC1, "Á", # ISOlat1: LATIN CAPITAL LETTER A WITH ACUTE 0xC2, "Â", # ISOlat1: LATIN CAPITAL LETTER A WITH # CIRCUMFLEX 0xC3, "Ã", # ISOlat1: LATIN CAPITAL LETTER A WITH TILDE 0xC4, "Ä", # ISOlat1: LATIN CAPITAL LETTER A WITH # DIAERESIS 0xC5, "Å", # ISOlat1: LATIN CAPITAL LETTER A WITH RING # ABOVE 0xC6, "Æ", # ISOlat1: LATIN CAPITAL LETTER AE 0xC7, "Į", # ISOlat2: LATIN CAPITAL LETTER I WITH OGONEK 0xC8, "Č", # ISOlat2: LATIN CAPITAL LETTER C WITH CARON 0xC9, "É", # ISOlat1: LATIN CAPITAL LETTER E WITH ACUTE 0xCA, "Ę", # ISOlat2: LATIN CAPITAL LETTER E WITH OGONEK 0xCB, "Ë", # ISOlat1: LATIN CAPITAL LETTER E WITH # DIAERESIS 0xCC, "Ė", # ISOlat1: LATIN CAPITAL LETTER E WITH DOT # ABOVE 0xCD, "Í", # ISOlat1: LATIN CAPITAL LETTER I WITH ACUTE 0xCE, "Î", # ISOlat1: LATIN CAPITAL LETTER I WITH # CIRCUMFLEX 0xCF, "Ī", # ISOlat2: LATIN CAPITAL LETTER I WITH MACRON 0xD0, "&Dstrok", # ISOlat2: LATIN CAPITAL LETTER D WITH STROKE 0xD1, "Ņ", # ISOlat2: LATIN CAPITAL LETTER N WITH CEDILLA 0xD2, "Ō", # ISOlat2: LATIN CAPITAL LETTER O WITH MACRON 0xD3, "Ķ", # ISOlat2: LATIN CAPITAL LETTER K WITH CEDILLA 0xD4, "Ô", # ISOlat1: LATIN CAPITAL LETTER O WITH # CIRCUMFLEX 0xD5, "Õ", # ISOlat1: LATIN CAPITAL LETTER O WITH TILDE 0xD6, "Ö", # ISOlat1: LATIN CAPITAL LETTER O WITH # DIAERESIS 0xD7, "×", # ISOnum : MULTIPLICATION SIGN 0xD8, "Ø", # ISOlat1: LATIN CAPITAL LETTER O WITH STROKE 0xD9, "Ų", # ISOlat2: LATIN CAPITAL LETTER U WITH OGONEK 0xDA, "Ú", # ISOlat1: LATIN CAPITAL LETTER U WITH ACUTE 0xDB, "Û", # ISOlat1: LATIN CAPITAL LETTER U WITH # CIRCUMFLEX 0xDC, "Ü", # ISOlat1: LATIN CAPITAL LETTER U WITH # DIAERESIS 0xDD, "Ũ", # ISOlat2: LATIN CAPITAL LETTER U WITH TILDE 0xDE, "Ū", # ISOlat2: LATIN CAPITAL LETTER U WITH MACRON 0xDF, "ß", # ISOlat1: LATIN SMALL LETTER SHARP S (German) 0xE0, "ā", # ISOlat1: LATIN SMALL LETTER A WITH MACRON 0xE1, "á", # ISOlat1: LATIN SMALL LETTER A WITH ACUTE 0xE2, "â", # ISOlat1: LATIN SMALL LETTER A WITH CIRCUMFLEX 0xE3, "ã", # ISOlat1: LATIN SMALL LETTER A WITH TILDE 0xE4, "ä", # ISOlat1: LATIN SMALL LETTER A WITH DIAERESIS 0xE5, "å", # ISOlat1: LATIN SMALL LETTER A WITH RING ABOVE 0xE6, "æ", # ISOlat1: LATIN SMALL LETTER AE 0xE7, "į", # ISOlat2: LATIN SMALL LETTER I WITH OGONEK 0xE8, "č", # ISOlat2: LATIN SMALL LETTER C WITH CARON 0xE9, "é", # ISOlat2: LATIN SMALL LETTER E WITH ACUTE 0xEA, "ę", # ISOlat2: LATIN SMALL LETTER E WITH OGONEK 0xEB, "ë", # ISOlat1: LATIN SMALL LETTER E WITH DIAERESIS 0xEC, "ė", # ISOlat2: LATIN SMALL LETTER E WITH DOT ABOVE 0xED, "í", # ISOlat1: LATIN SMALL LETTER I WITH ACUTE 0xEE, "î", # ISOlat1: LATIN SMALL LETTER I WITH CIRCUMFLEX 0xEF, "ī", # ISOlat2: LATIN SMALL LETTER I WITH MACRON 0xF0, "đ", # ISOlat2: LATIN SMALL LETTER D WITH STROKE 0xF1, "ņ", # ISOlat2: LATIN SMALL LETTER N WITH CEDILLA 0xF2, "ō", # ISOlat2: LATIN SMALL LETTER O WITH MACRON 0xF3, "ķ", # ISOlat2: LATIN SMALL LETTER K WITH CEDILLA 0xF4, "ô", # ISOlat1: LATIN SMALL LETTER O WITH CIRCUMFLEX 0xF5, "õ", # ISOlat1: LATIN SMALL LETTER O WITH TILDE 0xF6, "ö", # ISOlat1: LATIN SMALL LETTER O WITH DIAERESIS 0xF7, "÷", # ISOnum : DIVISION SIGN 0xF8, "ø", # ISOlat1: LATIN SMALL LETTER O WITH STROKE 0xF9, "ų", # ISOlat2: LATIN SMALL LETTER U WITH OGONEK 0xFA, "ú", # ISOlat1: LATIN SMALL LETTER U WITH ACUTE 0xFB, "û", # ISOlat1: LATIN SMALL LETTER U WITH # CIRCUMFLEX 0xFC, "ü", # ISOlat1: LATIN SMALL LETTER U WITH DIAERESIS 0xFD, "ũ", # ISOlat2: LATIN SMALL LETTER U WITH TILDE 0xFE, "ū", # ISOlat2: LATIN SMALL LETTER U WITH MACRON 0xFF, "˙", # ISOdia : DOT ABOVE ); *LATIN4_To_Ent = *ISO_8859_4_To_Ent; ##--------------------------------------------------------------------------- ## ISO-8859-5: Cyrillic ##--------------------------------------------------------------------------- %ISO_8859_5_To_Ent = ( #-------------------------------------------------------------------------- # Hex Code Entity Ref # ISO external entity and description #-------------------------------------------------------------------------- 0xA1, "Ё", # ISOcyr1: CYRILLIC CAPITAL LETTER IO 0xA2, "Ђ", # ISOcyr2: CYRILLIC CAPITAL LETTER DJE # (Serbocroatian) 0xA3, "Ѓ", # ISOcyr2: CYRILLIC CAPITAL LETTER GJE # (Macedonian) 0xA4, "Е", # ISOcyr1: CYRILLIC CAPITAL LETTER UKRAINIAN IE 0xA5, "Ѕ", # ISOcyr2: CYRILLIC CAPITAL LETTER DZE # (Macedonian) 0xA6, "І", # ISOcyr2: CYRILLIC CAPITAL LETTER # BYELORUSSIAN-UKRAINIAN I 0xA7, "Ї", # ISOcyr2: CYRILLIC CAPITAL LETTER YI # (Ukrainian) 0xA8, "Є", # ISOcyr2: CYRILLIC CAPITAL LETTER JE 0xA9, "Љ", # ISOcyr2: CYRILLIC CAPITAL LETTER LJE 0xAA, "Њ", # ISOcyr2: CYRILLIC CAPITAL LETTER NJE 0xAB, "Ћ", # ISOcyr2: CYRILLIC CAPITAL LETTER TSHE # (Serbocroatian) 0xAC, "Ќ", # ISOcyr2: CYRILLIC CAPITAL LETTER KJE # (Macedonian) 0xAD, "­", # ISOnum : SOFT HYPHEN 0xAE, "Ў", # ISOcyr2: CYRILLIC CAPITAL LETTER SHORT U # (Byelorussian) 0xAF, "Џ", # ISOcyr2: CYRILLIC CAPITAL LETTER DZHE 0xB0, "А", # ISOcyr1: CYRILLIC CAPITAL LETTER A 0xB1, "Б", # ISOcyr1: CYRILLIC CAPITAL LETTER BE 0xB2, "В", # ISOcyr1: CYRILLIC CAPITAL LETTER VE 0xB3, "Г", # ISOcyr1: CYRILLIC CAPITAL LETTER GHE 0xB4, "Д", # ISOcyr1: CYRILLIC CAPITAL LETTER DE 0xB5, "Е", # ISOcyr1: CYRILLIC CAPITAL LETTER IE 0xB6, "Ж", # ISOcyr1: CYRILLIC CAPITAL LETTER ZHE 0xB7, "З", # ISOcyr1: CYRILLIC CAPITAL LETTER ZE 0xB8, "И", # ISOcyr1: CYRILLIC CAPITAL LETTER I 0xB9, "Й", # ISOcyr1: CYRILLIC CAPITAL LETTER SHORT I 0xBA, "К", # ISOcyr1: CYRILLIC CAPITAL LETTER KA 0xBB, "Л", # ISOcyr1: CYRILLIC CAPITAL LETTER EL 0xBC, "М", # ISOcyr1: CYRILLIC CAPITAL LETTER EM 0xBD, "Н", # ISOcyr1: CYRILLIC CAPITAL LETTER EN 0xBE, "О", # ISOcyr1: CYRILLIC CAPITAL LETTER O 0xBF, "П", # ISOcyr1: CYRILLIC CAPITAL LETTER PE 0xC0, "Р", # ISOcyr1: CYRILLIC CAPITAL LETTER ER 0xC1, "С", # ISOcyr1: CYRILLIC CAPITAL LETTER ES 0xC2, "Т", # ISOcyr1: CYRILLIC CAPITAL LETTER TE 0xC3, "У", # ISOcyr1: CYRILLIC CAPITAL LETTER U 0xC4, "Ф", # ISOcyr1: CYRILLIC CAPITAL LETTER EF 0xC5, "Х", # ISOcyr1: CYRILLIC CAPITAL LETTER HA 0xC6, "Ц", # ISOcyr1: CYRILLIC CAPITAL LETTER TSE 0xC7, "Ч", # ISOcyr1: CYRILLIC CAPITAL LETTER CHE 0xC8, "Ш", # ISOcyr1: CYRILLIC CAPITAL LETTER SHA 0xC9, "Щ", # ISOcyr1: CYRILLIC CAPITAL LETTER SHCHA 0xCA, "Ъ", # ISOcyr1: CYRILLIC CAPITAL LETTER HARD SIGN 0xCB, "Ы", # ISOcyr1: CYRILLIC CAPITAL LETTER YERU 0xCC, "Ь", # ISOcyr1: CYRILLIC CAPITAL LETTER SOFT SIGN 0xCD, "Э", # ISOcyr1: CYRILLIC CAPITAL LETTER E 0xCE, "Ю", # ISOcyr1: CYRILLIC CAPITAL LETTER YU 0xCF, "Я", # ISOcyr1: CYRILLIC CAPITAL LETTER YA 0xD0, "а", # ISOcyr1: CYRILLIC SMALL LETTER A 0xD1, "б", # ISOcyr1: CYRILLIC SMALL LETTER BE 0xD2, "в", # ISOcyr1: CYRILLIC SMALL LETTER VE 0xD3, "г", # ISOcyr1: CYRILLIC SMALL LETTER GHE 0xD4, "д", # ISOcyr1: CYRILLIC SMALL LETTER DE 0xD5, "е", # ISOcyr1: CYRILLIC SMALL LETTER IE 0xD6, "ж", # ISOcyr1: CYRILLIC SMALL LETTER ZHE 0xD7, "з", # ISOcyr1: CYRILLIC SMALL LETTER ZE 0xD8, "и", # ISOcyr1: CYRILLIC SMALL LETTER I 0xD9, "й", # ISOcyr1: CYRILLIC SMALL LETTER SHORT I 0xDA, "к", # ISOcyr1: CYRILLIC SMALL LETTER KA 0xDB, "л", # ISOcyr1: CYRILLIC SMALL LETTER EL 0xDC, "м", # ISOcyr1: CYRILLIC SMALL LETTER EM 0xDD, "н", # ISOcyr1: CYRILLIC SMALL LETTER EN 0xDE, "о", # ISOcyr1: CYRILLIC SMALL LETTER O 0xDF, "п", # ISOcyr1: CYRILLIC SMALL LETTER PE 0xE0, "р", # ISOcyr1: CYRILLIC SMALL LETTER ER 0xE1, "с", # ISOcyr1: CYRILLIC SMALL LETTER ES 0xE2, "т", # ISOcyr1: CYRILLIC SMALL LETTER TE 0xE3, "у", # ISOcyr1: CYRILLIC SMALL LETTER U 0xE4, "ф", # ISOcyr1: CYRILLIC SMALL LETTER EF 0xE5, "х", # ISOcyr1: CYRILLIC SMALL LETTER HA 0xE6, "ц", # ISOcyr1: CYRILLIC SMALL LETTER TSE 0xE7, "ч", # ISOcyr1: CYRILLIC SMALL LETTER CHE 0xE8, "ш", # ISOcyr1: CYRILLIC SMALL LETTER SHA 0xE9, "щ", # ISOcyr1: CYRILLIC SMALL LETTER SHCHA 0xEA, "ъ", # ISOcyr1: CYRILLIC SMALL LETTER HARD SIGN 0xEB, "ы", # ISOcyr1: CYRILLIC SMALL LETTER YERU 0xEC, "ь", # ISOcyr1: CYRILLIC SMALL LETTER SOFT SIGN 0xED, "э", # ISOcyr1: CYRILLIC SMALL LETTER E 0xEE, "ю", # ISOcyr1: CYRILLIC SMALL LETTER YU 0xEF, "я", # ISOcyr1: CYRILLIC SMALL LETTER YA 0xF0, "№", # ISOcyr1: NUMERO SIGN 0xF1, "ё", # ISOcyr1: CYRILLIC SMALL LETTER IO 0xF2, "ђ", # ISOcyr2: CYRILLIC SMALL LETTER DJE # (Serbocroatian) 0xF3, "ѓ", # ISOcyr2: CYRILLIC SMALL LETTER GJE # (Macedonian) 0xF4, "е", # ISOcyr1: CYRILLIC SMALL LETTER UKRAINIAN IE 0xF5, "ѕ", # ISOcyr2: CYRILLIC SMALL LETTER DZE # (Macedonian) 0xF6, "і", # ISOcyr2: CYRILLIC SMALL LETTER # BYELORUSSIAN-UKRAINIAN I 0xF7, "ї", # ISOcyr2: CYRILLIC SMALL LETTER YI # (Ukrainian) 0xF8, "ј", # ISOcyr2: CYRILLIC SMALL LETTER JE 0xF9, "љ", # ISOcyr2: CYRILLIC SMALL LETTER LJE 0xFA, "њ", # ISOcyr2: CYRILLIC SMALL LETTER NJE 0xFB, "ћ", # ISOcyr2: CYRILLIC SMALL LETTER TSHE # (Serbocroatian) 0xFC, "ќ", # ISOcyr2: CYRILLIC SMALL LETTER KJE # (Macedonian) 0xFD, "§", # ISOnum : SECTION SIGN 0xFE, "ў", # ISOcyr2: CYRILLIC SMALL LETTER SHORT U # (Byelorussian) 0xFF, "џ", # ISOcyr2: CYRILLIC SMALL LETTER DZHE ); ##--------------------------------------------------------------------------- ## ISO-8859-6: Arabic ##--------------------------------------------------------------------------- ## Note: There is no ISO entities for arabic characters. Some of ## the following are non-standard entity references. "ISOarbc" ## is used as the entity defining the Arabic entities in ## anticipation that ISO will define such an entity. %ISO_8859_6_To_Ent = ( #-------------------------------------------------------------------------- # Hex Code Entity Ref # ISO external entity and description #-------------------------------------------------------------------------- 0xA4, "¤", # ISOnum : CURRENCY SIGN 0xAC, "&arcomma;", # ISOarbc: ARABIC COMMA 0xAD, "­", # ISOnum : SOFT HYPHEN 0xBB, "&arsemi;", # ISOarbc: ARABIC SEMICOLON 0xBF, "&arquest;", # ISOarbc: ARABIC QUESTION MARK 0xC1, "&hamz;", # ISOarbc: ARABIC LETTER HAMZA 0xC2, "&alefmadd;", # ISOarbc: ARABIC LETTER ALEF WITH MADDA ABOVE 0xC3, "&alefhamz;", # ISOarbc: ARABIC LETTER ALEF WITH HAMZA ABOVE 0xC4, "&wawhamz;", # ISOarbc: ARABIC LETTER WAW WITH HAMZA ABOVE 0xC5, "&alefhamz;", # ISOarbc: ARABIC LETTER ALEF WITH HAMZA BELOW 0xC6, "&yehhamz;", # ISOarbc: ARABIC LETTER YEH WITH HAMZA ABOVE 0xC7, "&alef;", # ISOarbc: ARABIC LETTER ALEF 0xC8, "&beh;", # ISOarbc: ARABIC LETTER BEH 0xC9, "&tehmarb;", # ISOarbc: ARABIC LETTER TEH MARBUTA 0xCA, "&teh;", # ISOarbc: ARABIC LETTER TEH 0xCB, "&theh;", # ISOarbc: ARABIC LETTER THEH 0xCC, "&jeem;", # ISOarbc: ARABIC LETTER JEEM 0xCD, "&hah;", # ISOarbc: ARABIC LETTER HAH 0xCE, "&khah;", # ISOarbc: ARABIC LETTER KHAH 0xCF, "&dal;", # ISOarbc: ARABIC LETTER DAL 0xD0, "&thal;", # ISOarbc: ARABIC LETTER THAL 0xD1, "&reh;", # ISOarbc: ARABIC LETTER REH 0xD2, "&zain;", # ISOarbc: ARABIC LETTER ZAIN 0xD3, "&seen;", # ISOarbc: ARABIC LETTER SEEN 0xD4, "&sheen;", # ISOarbc: ARABIC LETTER SHEEN 0xD5, "&sad;", # ISOarbc: ARABIC LETTER SAD 0xD6, "&dad;", # ISOarbc: ARABIC LETTER DAD 0xD7, "&tah;", # ISOarbc: ARABIC LETTER TAH 0xD8, "&zah;", # ISOarbc: ARABIC LETTER ZAH 0xD9, "&ain;", # ISOarbc: ARABIC LETTER AIN 0xDA, "&ghain;", # ISOarbc: ARABIC LETTER GHAIN 0xE0, "&tatweel;", # ISOarbc: ARABIC TATWEEL 0xE1, "&feh;", # ISOarbc: ARABIC LETTER FEH 0xE2, "&qaf;", # ISOarbc: ARABIC LETTER QAF 0xE3, "&kaf;", # ISOarbc: ARABIC LETTER KAF 0xE4, "&lam;", # ISOarbc: ARABIC LETTER LAM 0xE5, "&meem;", # ISOarbc: ARABIC LETTER MEEM 0xE6, "&noon;", # ISOarbc: ARABIC LETTER NOON 0xE7, "&heh;", # ISOarbc: ARABIC LETTER HEH 0xE8, "&waw;", # ISOarbc: ARABIC LETTER WAW 0xE9, "&alefmaks;", # ISOarbc: ARABIC LETTER ALEF MAKSURA 0xEA, "&yeh;", # ISOarbc: ARABIC LETTER YEH 0xEB, "&fathatan;", # ISOarbc: ARABIC FATHATAN 0xEC, "&dammatan;", # ISOarbc: ARABIC DAMMATAN 0xED, "&kasratan;", # ISOarbc: ARABIC KASRATAN 0xEE, "&fatha;", # ISOarbc: ARABIC FATHA 0xEF, "&damma;", # ISOarbc: ARABIC DAMMA 0xF0, "&kasra;", # ISOarbc: ARABIC KASRA 0xF1, "&shadda;", # ISOarbc: ARABIC SHADDA 0xF2, "&sukun;", # ISOarbc: ARABIC SUKUN ); ##--------------------------------------------------------------------------- ## ISO-8859-7: Greek ##--------------------------------------------------------------------------- %ISO_8859_7_To_Ent = ( #-------------------------------------------------------------------------- # Hex Code Entity Ref # ISO external entity and description #-------------------------------------------------------------------------- 0xA1, "‘", # ISOnum : SINGLE HIGH-REVERSED-9 QUOTATION # MARK 0xA2, "’", # ISOnum : RIGHT SINGLE QUOTATION MARK 0xA3, "£", # ISOnum : POUND SIGN 0xA6, "¦", # ISOnum : BROKEN BAR 0xA7, "§", # ISOnum : SECTION SIGN 0xA8, "¨", # ISOdia : DIAERESIS 0xA9, "©", # ISOnum : COPYRIGHT SIGN 0xAB, "«", # ISOnum : LEFT-POINTING DOUBLE ANGLE # QUOTATION MARK 0xAC, "¬", # ISOnum : NOT SIGN 0xAD, "­", # ISOnum : SOFT HYPHEN 0xAF, "—", # ISOpub : EM DASH 0xB0, "°", # ISOnum : DEGREE SIGN 0xB1, "±", # ISOnum : PLUS-MINUS SIGN 0xB2, "²", # ISOnum : SUPERSCRIPT TWO 0xB3, "³", # ISOnum : SUPERSCRIPT THREE 0xB4, "´", # ISOdia : ACUTE ACCENT 0xB5, "&diagr;", # ISOgrk?: ACUTE ACCENT AND DIAERESIS # (Tonos and Dialytika) 0xB6, "&Aacgr;", # ISOgrk2: GREEK CAPITAL LETTER ALPHA WITH # ACUTE 0xB7, "·", # ISOnum : MIDDLE DOT 0xB8, "&Eacgr;", # ISOgrk2: GREEK CAPITAL LETTER EPSILON WITH # ACUTE 0xB9, "&EEacgr;", # ISOgrk2: GREEK CAPITAL LETTER ETA WITH ACUTE 0xBA, "&Iacgr;", # ISOgrk2: GREEK CAPITAL LETTER IOTA WITH ACUTE 0xBB, "»", # ISOnum : RIGHT-POINTING DOUBLE ANGLE # QUOTATION MARK 0xBC, "&Oacgr;", # ISOgrk2: GREEK CAPITAL LETTER OMICRON WITH # ACUTE 0xBD, "½", # ISOnum : VULGAR FRACTION ONE HALF 0xBE, "&Uacgr;", # ISOgrk2: GREEK CAPITAL LETTER UPSILON WITH # ACUTE 0xBF, "&OHacgr;", # ISOgrk2: GREEK CAPITAL LETTER OMEGA WITH # ACUTE 0xC0, "&idiagr;", # ISOgrk2: GREEK SMALL LETTER IOTA WITH ACUTE # AND DIAERESIS 0xC1, "&Agr;", # ISOgrk1: GREEK CAPITAL LETTER ALPHA 0xC2, "&Bgr;", # ISOgrk1: GREEK CAPITAL LETTER BETA 0xC3, "&Ggr;", # ISOgrk1: GREEK CAPITAL LETTER GAMMA 0xC4, "&Dgr;", # ISOgrk1: GREEK CAPITAL LETTER DELTA 0xC5, "&Egr;", # ISOgrk1: GREEK CAPITAL LETTER EPSILON 0xC6, "&Zgr;", # ISOgrk1: GREEK CAPITAL LETTER ZETA 0xC7, "&EEgr;", # ISOgrk1: GREEK CAPITAL LETTER ETA 0xC8, "&THgr;", # ISOgrk1: GREEK CAPITAL LETTER THETA 0xC9, "&Igr;", # ISOgrk1: GREEK CAPITAL LETTER IOTA 0xCA, "&Kgr;", # ISOgrk1: GREEK CAPITAL LETTER KAPPA 0xCB, "&Lgr;", # ISOgrk1: GREEK CAPITAL LETTER LAMDA 0xCC, "&Mgr;", # ISOgrk1: GREEK CAPITAL LETTER MU 0xCD, "&Ngr;", # ISOgrk1: GREEK CAPITAL LETTER NU 0xCE, "&Xgr;", # ISOgrk1: GREEK CAPITAL LETTER XI 0xCF, "&Ogr;", # ISOgrk1: GREEK CAPITAL LETTER OMICRON 0xD0, "&Pgr;", # ISOgrk1: GREEK CAPITAL LETTER PI 0xD1, "&Rgr;", # ISOgrk1: GREEK CAPITAL LETTER RHO 0xD3, "&Sgr;", # ISOgrk1: GREEK CAPITAL LETTER SIGMA 0xD4, "&Tgr;", # ISOgrk1: GREEK CAPITAL LETTER TAU 0xD5, "&Ugr;", # ISOgrk1: GREEK CAPITAL LETTER UPSILON 0xD6, "&PHgr;", # ISOgrk1: GREEK CAPITAL LETTER PHI 0xD7, "&KHgr;", # ISOgrk1: GREEK CAPITAL LETTER CHI 0xD8, "&PSgr;", # ISOgrk1: GREEK CAPITAL LETTER PSI 0xD9, "&OHgr;", # ISOgrk1: GREEK CAPITAL LETTER OMEGA 0xDA, "&Idigr;", # ISOgrk2: GREEK CAPITAL LETTER IOTA WITH # DIAERESIS 0xDB, "&Udigr;", # ISOgrk2: GREEK CAPITAL LETTER UPSILON WITH # DIAERESIS 0xDC, "&aacgr;", # ISOgrk2: GREEK SMALL LETTER ALPHA WITH ACUTE 0xDD, "&eacgr;", # ISOgrk2: GREEK SMALL LETTER EPSILON WITH # ACUTE 0xDE, "&eeacgr;", # ISOgrk2: GREEK SMALL LETTER ETA WITH ACUTE 0xDF, "&iacgr;", # ISOgrk2: GREEK SMALL LETTER IOTA WITH ACUTE 0xE0, "&udiagr;", # ISOgrk2: GREEK SMALL LETTER UPSILON WITH # ACUTE AND DIAERESIS 0xE1, "&agr;", # ISOgrk1: GREEK SMALL LETTER ALPHA 0xE2, "&bgr;", # ISOgrk1: GREEK SMALL LETTER BETA 0xE3, "&ggr;", # ISOgrk1: GREEK SMALL LETTER GAMMA 0xE4, "&dgr;", # ISOgrk1: GREEK SMALL LETTER DELTA 0xE5, "&egr;", # ISOgrk1: GREEK SMALL LETTER EPSILON 0xE6, "&zgr;", # ISOgrk1: GREEK SMALL LETTER ZETA 0xE7, "&eegr;", # ISOgrk1: GREEK SMALL LETTER ETA 0xE8, "&thgr;", # ISOgrk1: GREEK SMALL LETTER THETA 0xE9, "&igr;", # ISOgrk1: GREEK SMALL LETTER IOTA 0xEA, "&kgr;", # ISOgrk1: GREEK SMALL LETTER KAPPA 0xEB, "&lgr;", # ISOgrk1: GREEK SMALL LETTER LAMDA 0xEC, "&mgr;", # ISOgrk1: GREEK SMALL LETTER MU 0xED, "&ngr;", # ISOgrk1: GREEK SMALL LETTER NU 0xEE, "&xgr;", # ISOgrk1: GREEK SMALL LETTER XI 0xEF, "&ogr;", # ISOgrk1: GREEK SMALL LETTER OMICRON 0xF0, "&pgr;", # ISOgrk1: GREEK SMALL LETTER PI 0xF1, "&rgr;", # ISOgrk1: GREEK SMALL LETTER RHO 0xF2, "&sfgr;", # ISOgrk1: GREEK SMALL LETTER FINAL SIGMA 0xF3, "&sgr;", # ISOgrk1: GREEK SMALL LETTER SIGMA 0xF4, "&tgr;", # ISOgrk1: GREEK SMALL LETTER TAU 0xF5, "&ugr;", # ISOgrk1: GREEK SMALL LETTER UPSILON 0xF6, "&phgr;", # ISOgrk1: GREEK SMALL LETTER PHI 0xF7, "&khgr;", # ISOgrk1: GREEK SMALL LETTER CHI 0xF8, "&psgr;", # ISOgrk1: GREEK SMALL LETTER PSI 0xF9, "&ohgr;", # ISOgrk1: GREEK SMALL LETTER OMEGA 0xFA, "&idigr;", # ISOgrk2: GREEK SMALL LETTER IOTA WITH # DIAERESIS 0xFB, "&udigr;", # ISOgrk2: GREEK SMALL LETTER UPSILON WITH # DIAERESIS 0xFC, "&oacgr;", # ISOgrk2: GREEK SMALL LETTER OMICRON WITH # ACUTE 0xFD, "&uacgr;", # ISOgrk2: GREEK SMALL LETTER UPSILON WITH # ACUTE 0xFE, "&ohacgr;", # ISOgrk2: GREEK SMALL LETTER OMEGA WITH ACUTE ); ##--------------------------------------------------------------------------- ## ISO-8859-8: Hebrew ##--------------------------------------------------------------------------- ## Note: There is no ISO entities for hebrew characters. ISOamso ## defines a few characters, but they are for math purposes. ## Some of the following are non-standard entity references. ## "ISOhbrw" is used as the entity defining the Hebrew entities ## in anticipation that ISO will define such an entity. %ISO_8859_8_To_Ent = ( #-------------------------------------------------------------------------- # Hex Code Entity Ref # ISO external entity and description #-------------------------------------------------------------------------- 0xA2, "¢", # ISOnum : CENT SIGN 0xA3, "£", # ISOnum : POUND SIGN 0xA4, "¤", # ISOnum : CURRENCY SIGN 0xA5, "¥", # ISOnum : YEN SIGN 0xA6, "¦", # ISOnum : BROKEN BAR 0xA7, "§", # ISOnum : SECTION SIGN 0xA8, "¨", # ISOdia : DIAERESIS 0xA9, "©", # ISOnum : COPYRIGHT SIGN 0xAA, "×", # ISOnum : MULTIPLICATION SIGN 0xAB, "«", # ISOnum : LEFT-POINTING DOUBLE ANGLE # QUOTATION MARK 0xAC, "¬", # ISOnum : NOT SIGN 0xAD, "­", # ISOnum : SOFT HYPHEN 0xAE, "®", # ISOnum : REGISTERED SIGN 0xAF, "¯", # ISOdia : OVERLINE (MACRON) 0xB0, "°", # ISOnum : DEGREE SIGN 0xB1, "±", # ISOnum : PLUS-MINUS SIGN 0xB2, "²", # ISOnum : SUPERSCRIPT TWO 0xB3, "³", # ISOnum : SUPERSCRIPT THREE 0xB4, "´", # ISOdia : ACUTE ACCENT 0xB5, "µ", # ISOnum : MICRO SIGN 0xB6, "¶", # ISOnum : PILCROW SIGN 0xB7, "·", # ISOnum : MIDDLE DOT 0xB8, "¸", # ISOdia : CEDILLA 0xB9, "¹", # ISOnum : SUPERSCRIPT ONE 0xBA, "÷", # ISOlat1: DIVISION SIGN 0xBB, "»", # ISOnum : RIGHT-POINTING DOUBLE ANGLE # QUOTATION MARK 0xBC, "¼", # ISOnum : VULGAR FRACTION ONE QUARTER 0xBD, "½", # ISOnum : VULGAR FRACTION ONE HALF 0xBE, "¾", # ISOnum : VULGAR FRACTION THREE QUARTERS 0xDF, "&dlowbar;", # ISOnum?: DOUBLE LOW LINE 0xE0, "&alef;", # ISOhbrw: HEBREW LETTER ALEF 0xE1, "&bet;", # ISOhbrw: HEBREW LETTER BET 0xE2, "ℷ", # ISOhbrw: HEBREW LETTER GIMEL 0xE3, "&dalet;", # ISOhbrw: HEBREW LETTER DALET 0xE4, "&he;", # ISOhbrw: HEBREW LETTER HE 0xE5, "&vav;", # ISOhbrw: HEBREW LETTER VAV 0xE6, "&zayin;", # ISOhbrw: HEBREW LETTER ZAYIN 0xE7, "&het;", # ISOhbrw: HEBREW LETTER HET 0xE8, "&tet;", # ISOhbrw: HEBREW LETTER TET 0xE9, "&yod;", # ISOhbrw: HEBREW LETTER YOD 0xEA, "&fkaf;", # ISOhbrw: HEBREW LETTER FINAL KAF 0xEB, "&kaf;", # ISOhbrw: HEBREW LETTER KAF 0xEC, "&lamed;", # ISOhbrw: HEBREW LETTER LAMED 0xED, "&fmem;", # ISOhbrw: HEBREW LETTER FINAL MEM 0xEE, "&mem;", # ISOhbrw: HEBREW LETTER MEM 0xEF, "&fnun;", # ISOhbrw: HEBREW LETTER FINAL NUN 0xF0, "&nun;", # ISOhbrw: HEBREW LETTER NUN 0xF1, "&samekh;", # ISOhbrw: HEBREW LETTER SAMEKH 0xF2, "&ayin;", # ISOhbrw: HEBREW LETTER AYIN 0xF3, "&fpe;", # ISOhbrw: HEBREW LETTER FINAL PE 0xF4, "&pe;", # ISOhbrw: HEBREW LETTER PE 0xF5, "&ftsadi;", # ISOhbrw: HEBREW LETTER FINAL TSADI 0xF6, "&tsadi;", # ISOhbrw: HEBREW LETTER TSADI 0xF7, "&qof;", # ISOhbrw: HEBREW LETTER QOF 0xF8, "&resh;", # ISOhbrw: HEBREW LETTER RESH 0xF9, "&shin;", # ISOhbrw: HEBREW LETTER SHIN 0xFA, "&tav;", # ISOhbrw: HEBREW LETTER TAV ); ##--------------------------------------------------------------------------- ## ISO-8859-9: Latin-5 ##--------------------------------------------------------------------------- %ISO_8859_9_To_Ent = ( #-------------------------------------------------------------------------- # Hex Code Entity Ref # ISO external entity and description #-------------------------------------------------------------------------- 0xA1, "¡", # ISOnum : INVERTED EXCLAMATION MARK 0xA2, "¢", # ISOnum : CENT SIGN 0xA3, "£", # ISOnum : POUND SIGN 0xA4, "¤", # ISOnum : CURRENCY SIGN 0xA5, "¥", # ISOnum : YEN SIGN 0xA6, "¦", # ISOnum : BROKEN BAR 0xA7, "§", # ISOnum : SECTION SIGN 0xA8, "¨", # ISOdia : DIAERESIS 0xA9, "©", # ISOnum : COPYRIGHT SIGN 0xAA, "ª", # ISOnum : FEMININE ORDINAL INDICATOR 0xAB, "«", # ISOnum : LEFT-POINTING DOUBLE ANGLE # QUOTATION MARK 0xAC, "¬", # ISOnum : NOT SIGN 0xAD, "­", # ISOnum : SOFT HYPHEN 0xAE, "®", # ISOnum : REGISTERED SIGN 0xAF, "¯", # ISOdia : OVERLINE (MACRON) 0xB0, "°", # ISOnum : DEGREE SIGN 0xB1, "±", # ISOnum : PLUS-MINUS SIGN 0xB2, "²", # ISOnum : SUPERSCRIPT TWO 0xB3, "³", # ISOnum : SUPERSCRIPT THREE 0xB4, "´", # ISOdia : ACUTE ACCENT 0xB5, "µ", # ISOnum : MICRO SIGN 0xB6, "¶", # ISOnum : PILCROW SIGN 0xB7, "·", # ISOnum : MIDDLE DOT 0xB8, "¸", # ISOdia : CEDILLA 0xB9, "¹", # ISOnum : SUPERSCRIPT ONE 0xBA, "º", # ISOnum : MASCULINE ORDINAL INDICATOR 0xBB, "»", # ISOnum : RIGHT-POINTING DOUBLE ANGLE # QUOTATION MARK 0xBC, "¼", # ISOnum : VULGAR FRACTION ONE QUARTER 0xBD, "½", # ISOnum : VULGAR FRACTION ONE HALF 0xBE, "¾", # ISOnum : VULGAR FRACTION THREE QUARTERS 0xBF, "¿", # ISOnum : INVERTED QUESTION MARK 0xC0, "À", # ISOlat1: LATIN CAPITAL LETTER A WITH GRAVE 0xC1, "Á", # ISOlat1: LATIN CAPITAL LETTER A WITH ACUTE 0xC2, "Â", # ISOlat1: LATIN CAPITAL LETTER A WITH # CIRCUMFLEX 0xC3, "Ã", # ISOlat1: LATIN CAPITAL LETTER A WITH TILDE 0xC4, "Ä", # ISOlat1: LATIN CAPITAL LETTER A WITH # DIAERESIS 0xC5, "Å", # ISOlat1: LATIN CAPITAL LETTER A WITH RING # ABOVE 0xC6, "Æ", # ISOlat1: LATIN CAPITAL LETTER AE 0xC7, "Ç", # ISOlat1: LATIN CAPITAL LETTER C WITH CEDILLA 0xC8, "È", # ISOlat1: LATIN CAPITAL LETTER E WITH GRAVE 0xC9, "É", # ISOlat1: LATIN CAPITAL LETTER E WITH ACUTE 0xCA, "Ê", # ISOlat1: LATIN CAPITAL LETTER E WITH # CIRCUMFLEX 0xCB, "Ë", # ISOlat1: LATIN CAPITAL LETTER E WITH # DIAERESIS 0xCC, "Ì", # ISOlat1: LATIN CAPITAL LETTER I WITH GRAVE 0xCD, "Í", # ISOlat1: LATIN CAPITAL LETTER I WITH ACUTE 0xCE, "Î", # ISOlat1: LATIN CAPITAL LETTER I WITH # CIRCUMFLEX 0xCF, "Ï", # ISOlat1: LATIN CAPITAL LETTER I WITH # DIAERESIS 0xD0, "Ğ", # ISOlat2: LATIN CAPITAL LETTER G WITH BREVE 0xD1, "Ñ", # ISOlat1: LATIN CAPITAL LETTER N WITH TILDE 0xD2, "Ò", # ISOlat1: LATIN CAPITAL LETTER O WITH GRAVE 0xD3, "Ó", # ISOlat1: LATIN CAPITAL LETTER O WITH ACUTE 0xD4, "Ô", # ISOlat1: LATIN CAPITAL LETTER O WITH # CIRCUMFLEX 0xD5, "Õ", # ISOlat1: LATIN CAPITAL LETTER O WITH TILDE 0xD6, "Ö", # ISOlat1: LATIN CAPITAL LETTER O WITH # DIAERESIS 0xD7, "×", # ISOnum : MULTIPLICATION SIGN 0xD8, "Ø", # ISOlat1: LATIN CAPITAL LETTER O WITH STROKE 0xD9, "Ù", # ISOlat1: LATIN CAPITAL LETTER U WITH GRAVE 0xDA, "Ú", # ISOlat1: LATIN CAPITAL LETTER U WITH ACUTE 0xDB, "Û", # ISOlat1: LATIN CAPITAL LETTER U WITH # CIRCUMFLEX 0xDC, "Ü", # ISOlat1: LATIN CAPITAL LETTER U WITH # DIAERESIS 0xDD, "İ", # ISOlat2: LATIN CAPITAL LETTER I WITH DOT # ABOVE 0xDE, "Ş", # ISOlat2: LATIN CAPITAL LETTER S WITH CEDILLA 0xDF, "ß", # ISOlat1: LATIN SMALL LETTER SHARP S (German) 0xE0, "à", # ISOlat1: LATIN SMALL LETTER A WITH GRAVE 0xE1, "á", # ISOlat1: LATIN SMALL LETTER A WITH ACUTE 0xE2, "â", # ISOlat1: LATIN SMALL LETTER A WITH CIRCUMFLEX 0xE3, "ã", # ISOlat1: LATIN SMALL LETTER A WITH TILDE 0xE4, "ä", # ISOlat1: LATIN SMALL LETTER A WITH DIAERESIS 0xE5, "å", # ISOlat1: LATIN SMALL LETTER A WITH RING ABOVE 0xE6, "æ", # ISOlat1: LATIN SMALL LETTER AE 0xE7, "ç", # ISOlat1: LATIN SMALL LETTER C WITH CEDILLA 0xE8, "è", # ISOlat1: LATIN SMALL LETTER E WITH GRAVE 0xE9, "é", # ISOlat1: LATIN SMALL LETTER E WITH ACUTE 0xEA, "ę", # ISOlat2: LATIN SMALL LETTER E WITH OGONEK 0xEB, "ë", # ISOlat1: LATIN SMALL LETTER E WITH DIAERESIS 0xEC, "ė", # ISOlat2: LATIN SMALL LETTER E WITH DOT ABOVE 0xED, "í", # ISOlat1: LATIN SMALL LETTER I WITH ACUTE 0xEE, "î", # ISOlat1: LATIN SMALL LETTER I WITH CIRCUMFLEX 0xEF, "ī", # ISOlat2: LATIN SMALL LETTER I WITH MACRON 0xF0, "ğ", # ISOlat2: LATIN SMALL LETTER G WITH BREVE 0xF1, "ñ", # ISOlat1: LATIN SMALL LETTER N WITH TILDE 0xF2, "ò", # ISOlat1: LATIN SMALL LETTER O WITH GRAVE 0xF3, "ó", # ISOlat1: LATIN SMALL LETTER O WITH ACUTE 0xF4, "ô", # ISOlat1: LATIN SMALL LETTER O WITH CIRCUMFLEX 0xF5, "õ", # ISOlat1: LATIN SMALL LETTER O WITH TILDE 0xF6, "ö", # ISOlat1: LATIN SMALL LETTER O WITH DIAERESIS 0xF7, "÷", # ISOlat1: DIVISION SIGN 0xF8, "ø", # ISOlat1: LATIN SMALL LETTER O WITH STROKE 0xF9, "ù", # ISOlat1: LATIN SMALL LETTER U WITH GRAVE 0xFA, "ú", # ISOlat1: LATIN SMALL LETTER U WITH ACUTE 0xFB, "û", # ISOlat1: LATIN SMALL LETTER U WITH CIRCUMFLEX 0xFC, "ü", # ISOlat1: LATIN SMALL LETTER U WITH DIAERESIS 0xFD, "ı", # ISOlat2: LATIN SMALL LETTER I DOTLESS 0xFE, "ş", # ISOlat2: LATIN SMALL LETTER S WITH CEDILLA 0xFF, "ÿ", # ISOlat1: LATIN SMALL LETTER Y WITH DIAERESIS ); *LATIN5_To_Ent = *ISO_8859_9_To_Ent; ##--------------------------------------------------------------------------- ## ISO-8859-10: Latin-6 ##--------------------------------------------------------------------------- %ISO_8859_10_To_Ent = ( #-------------------------------------------------------------------------- # Hex Code Entity Ref # ISO external entity and description #-------------------------------------------------------------------------- 0xA1, "Ą", # ISOlat1: LATIN CAPITAL LETTER A WITH OGONEK 0xA2, "Ē", # ISOlat2: LATIN CAPITAL LETTER E WITH MACRON 0xA3, "Ģ", # ISOlat2: LATIN CAPITAL LETTER G WITH CEDILLA 0xA4, "Ī", # ISOlat2: LATIN CAPITAL LETTER I WITH MACRON 0xA5, "Ĩ", # ISOlat2: LATIN CAPITAL LETTER I WITH TILDE 0xA6, "Ķ", # ISOlat2: LATIN CAPITAL LETTER K WITH CEDILLA 0xA7, "Ļ", # ISOlat2: LATIN CAPITAL LETTER L WITH CEDILLA 0xA8, "Ń", # ISOlat2: LATIN CAPITAL LETTER N WITH ACUTE 0xA9, "Ŗ", # ISOlat2: LATIN CAPITAL LETTER R WITH CEDILLA 0xAA, "Š", # ISOlat2: LATIN CAPITAL LETTER S WITH CARON 0xAB, "Ŧ", # ISOlat2: LATIN CAPITAL LETTER T WITH STROKE 0xAC, "Ž", # ISOlat2: LATIN CAPITAL LETTER Z WITH CARON 0xAD, "­", # ISOnum : SOFT HYPHEN 0xAE, "ĸ", # ISOlat2: LATIN SMALL LETTER KRA (Greenlandic) 0xAF, "&end;", # ISOlat?: LATIN SMALL LETTER END (Lappish) 0xB0, "đ", # ISOlat2: LATIN SMALL LETTER d WITH STROKE 0xB1, "ą", # ISOlat2: LATIN SMALL LETTER a WITH OGONEK 0xB2, "ē", # ISOlat2: LATIN SMALL LETTER e WITH MACRON 0xB3, "&gcedil;", # ISOlat2: LATIN SMALL LETTER g WITH CEDILLA 0xB4, "ī", # ISOlat2: LATIN SMALL LETTER i WITH MACRON 0xB5, "ĩ", # ISOlat2: LATIN SMALL LETTER i WITH TILDE 0xB6, "ķ", # ISOlat2: LATIN SMALL LETTER k WITH CEDILLA 0xB7, "ļ", # ISOlat2: LATIN SMALL LETTER l WITH CEDILLA 0xB8, "ń", # ISOlat2: LATIN SMALL LETTER n WITH ACUTE 0xB9, "ŗ", # ISOlat2: LATIN SMALL LETTER r WITH CEDILLA 0xBA, "š", # ISOlat2: LATIN SMALL LETTER s WITH CARON 0xBB, "ŧ", # ISOlat2: LATIN SMALL LETTER t WITH STROKE 0xBC, "ž", # ISOlat2: LATIN SMALL LETTER z WITH CARON 0xBD, "§", # ISOnum : SECTION SIGN 0xBE, "ß", # ISOlat1: LATIN SMALL LETTER SHARP s (German) 0xBF, "ŋ", # ISOlat2: LATIN SMALL LETTER ENG (Lappish) 0xC0, "Ā", # ISOlat2: LATIN CAPITAL LETTER A WITH MACRON 0xC1, "Á", # ISOlat1: LATIN CAPITAL LETTER A WITH ACUTE 0xC2, "Â", # ISOlat1: LATIN CAPITAL LETTER A WITH # CIRCUMFLEX 0xC3, "Ã", # ISOlat1: LATIN CAPITAL LETTER A WITH TILDE 0xC4, "Ä", # ISOlat1: LATIN CAPITAL LETTER A WITH # DIAERESIS 0xC5, "Å", # ISOlat1: LATIN CAPITAL LETTER A WITH RING # ABOVE 0xC6, "Æ", # ISOlat1: LATIN CAPITAL LETTER AE 0xC7, "Į", # ISOlat2: LATIN CAPITAL LETTER I WITH OGONEK 0xC8, "Č", # ISOlat2: LATIN CAPITAL LETTER C WITH CARON 0xC9, "É", # ISOlat1: LATIN CAPITAL LETTER E WITH ACUTE 0xCA, "Ę", # ISOlat2: LATIN CAPITAL LETTER E WITH OGONEK 0xCB, "Ë", # ISOlat1: LATIN CAPITAL LETTER E WITH # DIAERESIS 0xCC, "Ė", # ISOlat2: LATIN CAPITAL LETTER E WITH # DOT ABOVE 0xCD, "Í", # ISOlat1: LATIN CAPITAL LETTER I WITH ACUTE 0xCE, "Î", # ISOlat1: LATIN CAPITAL LETTER I WITH # CIRCUMFLEX 0xCF, "Ï", # ISOlat1: LATIN CAPITAL LETTER I WITH # DIAERESIS 0xD0, "Đ", # ISOlat2: LATIN CAPITAL LETTER D WITH STROKE 0xD1, "Ņ", # ISOlat2: LATIN CAPITAL LETTER N WITH CEDILLA 0xD2, "Ō", # ISOlat2: LATIN CAPITAL LETTER O WITH MACRON 0xD3, "Ó", # ISOlat1: LATIN CAPITAL LETTER O WITH ACUTE 0xD4, "Ô", # ISOlat1: LATIN CAPITAL LETTER O WITH # CIRCUMFLEX 0xD5, "Õ", # ISOlat1: LATIN CAPITAL LETTER O WITH TILDE 0xD6, "Ö", # ISOlat1: LATIN CAPITAL LETTER O WITH # DIAERESIS 0xD7, "Ũ", # ISOlat2: LATIN CAPITAL LETTER U WITH TILDE 0xD8, "Ø", # ISOlat1: LATIN CAPITAL LETTER O WITH STROKE 0xD9, "Ų", # ISOlat2: LATIN CAPITAL LETTER U WITH OGONEK 0xDA, "Ú", # ISOlat1: LATIN CAPITAL LETTER U WITH ACUTE 0xDB, "Û", # ISOlat1: LATIN CAPITAL LETTER U WITH # CIRCUMFLEX 0xDC, "Ü", # ISOlat1: LATIN CAPITAL LETTER U WITH # DIAERESIS 0xDD, "Ý", # ISOlat1: LATIN CAPITAL LETTER Y WITH ACUTE 0xDE, "Þ", # ISOlat1: LATIN CAPITAL LETTER THORN # (Icelandic) 0xDF, "Ū", # ISOlat2: LATIN CAPITAL LETTER U WITH MACRON 0xE0, "ā", # ISOlat2: LATIN SMALL LETTER a WITH MACRON 0xE1, "á", # ISOlat1: LATIN SMALL LETTER a WITH ACUTE 0xE2, "â", # ISOlat1: LATIN SMALL LETTER a WITH CIRCUMFLEX 0xE3, "ã", # ISOlat1: LATIN SMALL LETTER a WITH TILDE 0xE4, "ä", # ISOlat1: LATIN SMALL LETTER a WITH DIAERESIS 0xE5, "å", # ISOlat1: LATIN SMALL LETTER a WITH RING ABOVE 0xE6, "æ", # ISOlat1: LATIN SMALL LETTER ae 0xE7, "į", # ISOlat2: LATIN SMALL LETTER i WITH OGONEK 0xE8, "č", # ISOlat2: LATIN SMALL LETTER c WITH CARON 0xE9, "é", # ISOlat1: LATIN SMALL LETTER e WITH ACUTE 0xEA, "ę", # ISOlat2: LATIN SMALL LETTER e WITH OGONEK 0xEB, "ë", # ISOlat1: LATIN SMALL LETTER e WITH DIAERESIS 0xEC, "ė", # ISOlat2: LATIN SMALL LETTER e WITH DOT ABOVE 0xED, "í", # ISOlat1: LATIN SMALL LETTER i WITH ACUTE 0xEE, "î", # ISOlat1: LATIN SMALL LETTER i WITH CIRCUMFLEX 0xEF, "ï", # ISOlat1: LATIN SMALL LETTER i WITH DIAERESIS 0xF0, "ð", # ISOlat1: LATIN SMALL LETTER ETH (Icelandic) 0xF1, "ņ", # ISOlat2: LATIN SMALL LETTER n WITH CEDILLA 0xF2, "ō", # ISOlat2: LATIN SMALL LETTER o WITH MACRON 0xF3, "ó", # ISOlat1: LATIN SMALL LETTER o WITH ACUTE 0xF4, "ô", # ISOlat1: LATIN SMALL LETTER o WITH CIRCUMFLEX 0xF5, "õ", # ISOlat1: LATIN SMALL LETTER o WITH TILDE 0xF6, "ö", # ISOlat1: LATIN SMALL LETTER o WITH DIAERESIS 0xF7, "ũ", # ISOlat2: LATIN SMALL LETTER u WITH TILDE 0xF8, "ø", # ISOlat1: LATIN SMALL LETTER o WITH STROKE 0xF9, "ų", # ISOlat2: LATIN SMALL LETTER u WITH OGONEK 0xFA, "ú", # ISOlat1: LATIN SMALL LETTER u WITH ACUTE 0xFB, "û", # ISOlat1: LATIN SMALL LETTER u WITH CIRCUMFLEX 0xFC, "ü", # ISOlat1: LATIN SMALL LETTER u WITH DIAERESIS 0xFD, "ý", # ISOlat1: LATIN SMALL LETTER y WITH ACUTE 0xFE, "þ", # ISOlat1: LATIN SMALL LETTER THORN (Icelandic) 0xFF, "ū", # ISOlat2: LATIN SMALL LETTER u WITH MACRON ); *LATIN6_To_Ent = *ISO_8859_10_To_Ent; ############################################################################### ## Routines ############################################################################### ##---------------------------------------------------------------------------## ## str2sgml converts a string encoded by $charset to an sgml ## string where special characters are converted to entity ## references. ## ## $return_data = iso_8859'str2sgml($data, $charset, $only8bit); ## ## If $only8bit is non-zero, than only 8-bit characters are ## translated. ## sub str2sgml { local($data, $charset, $only8bit) = ($_[0], $_[1], $_[2]); local($ret, $offset, $len) = ('', 0, 0); # Get mapping (this method works for Perl 4 and 5) $charset =~ tr/a-z/A-Z/; $charset =~ tr/-/_/; local(*map) = ("${charset}_To_Ent"); # Convert string $len = length($data); while ($offset < $len) { $char = unpack("C", substr($data, $offset++, 1)); if ($only8bit && $char < 0xA0) { $ret .= pack("C", $char); } else { $ret .= ($map{$char} || $US_ASCII_To_Ent{$char} || pack("C", $char)); } } $ret; } ##---------------------------------------------------------------------------## 1;