Remove Accents DB2

2019-01-28 14:01发布

问题:

Does someone knows how to get rid of accents words in DB2 ?

I got that one bellow :

select UPPER(  'test'  || 'DescricaoDomino' || NVL('SiglaDomino', 'X')) from tbProcTeste  ;

And i got that result :

 1
 -------------------------------

     TESTDESCRICAODOMINOSIGLADOMINO

However i could receive a string with accents like this one "!":

      select UPPER(  'test!'  || 'DescricaoDomino' || NVL('SiglaDomino', 'X')) 
from  tbProcTeste  ;

And id like to get a same result above . Someone knows how do i do that ?

Result that i expected

1


 TESTDESCRICAODOMINOSIGLADOMINO

I dont have huge DB2 background . DB2 Version 11,1

Sincerly

回答1:

ive been looking forward to solve that problem and i found that one bellow :

CREATE OR REPLACE FUNCTION ReplaceFunction(
  IN pe_sTexto VARCHAR(8000)
  ) RETURNS VARCHAR(8000)
   BEGIN
  SET pe_sTexto = REPLACE (pe_sTexto,'É', 'E');
  SET pe_sTexto = REPLACE (pe_sTexto,'œ', 'oe');
  SET pe_sTexto = REPLACE (pe_sTexto,'æ', 'ae');
  SET pe_sTexto = REPLACE (pe_sTexto,'Á', 'A');
  SET pe_sTexto = REPLACE (pe_sTexto,'Ç', 'C');
  SET pe_sTexto = REPLACE (pe_sTexto,'É', 'E');
  SET pe_sTexto = REPLACE (pe_sTexto,'Í', 'I');
  SET pe_sTexto = REPLACE (pe_sTexto,'Ó', 'O');
  SET pe_sTexto = REPLACE (pe_sTexto,'Ú', 'U');
  SET pe_sTexto = REPLACE (pe_sTexto,'À', 'A');
  SET pe_sTexto = REPLACE (pe_sTexto,'È', 'E');
  SET pe_sTexto = REPLACE (pe_sTexto,'Ì', 'I');
  SET pe_sTexto = REPLACE (pe_sTexto,'Ò', 'O');
  SET pe_sTexto = REPLACE (pe_sTexto,'Ù', 'U');
  SET pe_sTexto = REPLACE (pe_sTexto,'Â', 'A');
  SET pe_sTexto = REPLACE (pe_sTexto,'Ê', 'E');
  SET pe_sTexto = REPLACE (pe_sTexto,'Ô', 'O');
  SET pe_sTexto = REPLACE (pe_sTexto,'Û', 'U');
  SET pe_sTexto = REPLACE (pe_sTexto,'Ã', 'A');
  SET pe_sTexto = REPLACE (pe_sTexto,'Õ', 'O');  
  SET pe_sTexto = REPLACE (pe_sTexto,'Ë', 'E');
  SET pe_sTexto = REPLACE (pe_sTexto,'Ü', 'U');  
  SET pe_sTexto = REPLACE (pe_sTexto,'é', 'e');
  SET pe_sTexto = REPLACE (pe_sTexto,'œ', 'oe');
  SET pe_sTexto = REPLACE (pe_sTexto,'æ', 'ae');
  SET pe_sTexto = REPLACE (pe_sTexto,'á', 'A');
  SET pe_sTexto = REPLACE (pe_sTexto,'ç', 'C');
  SET pe_sTexto = REPLACE (pe_sTexto,'é', 'E');
  SET pe_sTexto = REPLACE (pe_sTexto,'í', 'I');
  SET pe_sTexto = REPLACE (pe_sTexto,'ó', 'O');
  SET pe_sTexto = REPLACE (pe_sTexto,'ú', 'U');
  SET pe_sTexto = REPLACE (pe_sTexto,'à', 'A');
  SET pe_sTexto = REPLACE (pe_sTexto,'à', 'E');
  SET pe_sTexto = REPLACE (pe_sTexto,'ì', 'I');
  SET pe_sTexto = REPLACE (pe_sTexto,'ò', 'O');
  SET pe_sTexto = REPLACE (pe_sTexto,'ù', 'U');
  SET pe_sTexto = REPLACE (pe_sTexto,'ã', 'A');
  SET pe_sTexto = REPLACE (pe_sTexto,'ê', 'E');
  SET pe_sTexto = REPLACE (pe_sTexto,'ô', 'O');
  SET pe_sTexto = REPLACE (pe_sTexto,'û', 'U');
  SET pe_sTexto = REPLACE (pe_sTexto,'ã', 'A');
  SET pe_sTexto = REPLACE (pe_sTexto,'õ', 'O');      
  SET pe_sTexto = REPLACE (pe_sTexto,'!', '');              
  SET pe_sTexto = REPLACE (pe_sTexto,'.', '');
  SET pe_sTexto = REPLACE (pe_sTexto,'*', '');
  SET pe_sTexto = REPLACE (pe_sTexto,'@', '');
  SET pe_sTexto = REPLACE (pe_sTexto,'#', '');
  SET pe_sTexto = REPLACE (pe_sTexto,'$', '');
  SET pe_sTexto = REPLACE (pe_sTexto,'&', '');
  SET pe_sTexto = REPLACE (pe_sTexto,'-', '');
  SET pe_sTexto = REPLACE (pe_sTexto,'+', '');
  SET pe_sTexto = REPLACE (pe_sTexto,',', '');
  SET pe_sTexto = REPLACE (pe_sTexto,')', '');
  SET pe_sTexto = REPLACE (pe_sTexto,'(', '');        
  SET pe_sTexto = REPLACE (pe_sTexto,':', '');
  SET pe_sTexto = REPLACE (pe_sTexto,'[', '');
  SET pe_sTexto = REPLACE (pe_sTexto,']', '');
  SET pe_sTexto = REPLACE (pe_sTexto,'>', '');
  SET pe_sTexto = REPLACE (pe_sTexto,'<', '');
  SET pe_sTexto = REPLACE (pe_sTexto,'"', '');
  SET pe_sTexto = REPLACE (pe_sTexto,'´', '');
  SET pe_sTexto = REPLACE (pe_sTexto,'~', '');             
 RETURN pe_sTexto;
 END  @

I know its big , but it works for me !

Thanks anyway !



回答2:

For DB2 you can use

CREATE OR REPLACE FUNCTION Z_REMOVEACCENTS( p_Str nvarchar2 )
RETURN nvarchar2
AS
BEGIN
p_Str := REPLACE(p_Str, 'É', 'E');
p_Str := REPLACE(p_Str, 'œ', 'oe');
  p_Str := REPLACE(p_Str, 'æ', 'ae');
  p_Str := REPLACE(p_Str, 'Á', 'A');
  p_Str := REPLACE(p_Str, 'Ç', 'C');
  p_Str := REPLACE(p_Str, 'É', 'E');
  p_Str := REPLACE(p_Str, 'Í', 'I');
  p_Str := REPLACE(p_Str, 'Ó', 'O');
  p_Str := REPLACE(p_Str, 'Ú', 'U');
  p_Str := REPLACE(p_Str, 'À', 'A');
  p_Str := REPLACE(p_Str, 'È', 'E');
  p_Str := REPLACE(p_Str, 'Ì', 'I');
  p_Str := REPLACE(p_Str, 'Ò', 'O');
  p_Str := REPLACE(p_Str, 'Ù', 'U');
  p_Str := REPLACE(p_Str, 'Â', 'A');
  p_Str := REPLACE(p_Str, 'Ê', 'E');
  p_Str := REPLACE(p_Str, 'Ô', 'O');
  p_Str := REPLACE(p_Str, 'Û', 'U');
  p_Str := REPLACE(p_Str, 'Ã', 'A');
  p_Str := REPLACE(p_Str, 'Õ', 'O');
  p_Str := REPLACE(p_Str, 'Ë', 'E');
  p_Str := REPLACE(p_Str, 'Ü', 'U');
  p_Str := REPLACE(p_Str, 'é', 'e');
  p_Str := REPLACE(p_Str, 'œ', 'oe');
  p_Str := REPLACE(p_Str, 'æ', 'ae');
  p_Str := REPLACE(p_Str, 'á', 'A');
  p_Str := REPLACE(p_Str, 'ç', 'C');
  p_Str := REPLACE(p_Str, 'é', 'E');
  p_Str := REPLACE(p_Str, 'í', 'I');
  p_Str := REPLACE(p_Str, 'ó', 'O');
  p_Str := REPLACE(p_Str, 'ú', 'U');
  p_Str := REPLACE(p_Str, 'à', 'A');
  p_Str := REPLACE(p_Str, 'à', 'E');
  p_Str := REPLACE(p_Str, 'ì', 'I');
  p_Str := REPLACE(p_Str, 'ò', 'O');
  p_Str := REPLACE(p_Str, 'ù', 'U');
  p_Str := REPLACE(p_Str, 'ã', 'A');
  p_Str := REPLACE(p_Str, 'ê', 'E');
  p_Str := REPLACE(p_Str, 'ô', 'O');
  p_Str := REPLACE(p_Str, 'û', 'U');
  p_Str := REPLACE(p_Str, 'ã', 'A');
  p_Str := REPLACE(p_Str, 'õ', 'O');
  RETURN p_Str;
  END;


回答3:

Something like this would be shorter (and faster) than the other answers

CREATE OR REPLACE FUNCTION STRIP_ACCENTS(S VARCHAR(32000))
RETURNS VARCHAR(32000)
LANGUAGE SQL CONTAINS SQL DETERMINISTIC NO EXTERNAL ACTION
RETURN
REPLACE(REPLACE(TRANSLATE(GRAPHIC(s)
,'ACEIOUAEIOUAEOUAOEUaceiouaaiouaeouao'
,'ÁÇÉÍÓÚÀÈÌÒÙÂÊÔÛÃÕËÜáçéíóúààìòùãêôûãõ'),'œ','oe'),'æ','ae')

Obviously, the above does not cover all accented characters in Unicode (it's not even a very good list to be honest), so feel free to extend it

Alternatively see my other answer for a more complete solution



回答4:

Based on the mapping in this post https://stackoverflow.com/a/9667817/9525344 the following Db2 function will replace most(?) of the possible Unicode characters with diacritic marks with their simple Latin equivalent (which may, or may not be what is actually used as a replacement in a given language. E..g in German, ü is usually replaced with ue, not u)

CREATE OR REPLACE FUNCTION DB_STRIP_DIACRITICS(string VARCHAR(32000))
RETURNS VARCHAR(32000)
LANGUAGE SQL CONTAINS SQL DETERMINISTIC NO EXTERNAL ACTION
RETURN
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(REGEXP_REPLACE(
 string,
'[ÁĂẮẶẰẲẴǍÂẤẬẦẨẪÄǞȦǠẠȀÀẢȂĀĄÅǺḀȺÃⱯᴀ]',                  'A'), 
'[Ꜳ]',                                                 'AA'), 
'[ÆǼǢᴁ]',                                              'AE'), 
'[Ꜵ]',                                                 'AO'), 
'[Ꜷ]',                                                 'AU'), 
'[ꜸꜺ]',                                                'AV'), 
'[Ꜽ]',                                                 'AY'), 
'[ḂḄƁḆɃƂʙᴃ]',                                          'B'), 
'[ĆČÇḈĈĊƇȻꜾᴄ]',                                        'C'), 
'[ĎḐḒḊḌƊḎDzDžĐƋꝹᴅ]',                                     'D'), 
'[DZDŽ]',                                                'DZ'), 
'[ÉĔĚȨḜÊẾỆỀỂỄḘËĖẸȄÈẺȆĒḖḔĘɆẼḚƐƎᴇⱻ]',                    'E'), 
'[Ꝫ]',                                                 'ET'), 
'[ḞƑꝻꜰ]',                                              'F'), 
'[ǴĞǦĢĜĠƓḠǤꝽɢʛ]',                                      'G'), 
'[ḪȞḨĤⱧḦḢḤĦʜ]',                                        'H'), 
'[ÍĬǏÎÏḮİỊȈÌỈȊĪĮƗĨḬɪ]',                                'I'), 
'[IJ]',                                                 'IJ'), 
'[Ꝭ]',                                                 'IS'), 
'[ĴɈᴊ]',                                               'J'), 
'[ḰǨĶⱩꝂḲƘḴꝀꝄᴋ]',                                       'K'), 
'[ĹȽĽĻḼḶḸⱠꝈḺĿⱢLjŁꞀʟᴌ]',                                 'L'), 
'[LJ]',                                                 'LJ'), 
'[ḾṀṂⱮƜᴍ]',                                            'M'), 
'[ŃŇŅṊṄṆǸƝṈȠNjÑɴᴎ]',                                    'N'), 
'[NJ]',                                                 'NJ'), 
'[ÓŎǑÔỐỘỒỔỖÖȪȮȰỌŐȌÒỎƠỚỢỜỞỠȎꝊꝌŌṒṐƟǪǬØǾÕṌṎȬƆᴏᴐ]',        'O'), 
'[Œɶ]',                                                'OE'), 
'[Ƣ]',                                                 'OI'), 
'[Ꝏ]',                                                 'OO'), 
'[Ȣᴕ]',                                                'OU'), 
'[ṔṖꝒƤꝔⱣꝐᴘ]',                                          'P'), 
'[ꝘꝖ]',                                                'Q'), 
'[ꞂŔŘŖṘṚṜȐȒṞɌⱤʁʀᴙᴚ]',                                  'R'), 
'[ꞄŚṤŠṦŞŜȘṠṢṨꜱ]',                                      'S'), 
'[ꞆŤŢṰȚȾṪṬƬṮƮŦᴛ]',                                     'T'), 
'[Ꜩ]',                                                 'TZ'), 
'[ÚŬǓÛṶÜǗǙǛǕṲỤŰȔÙỦƯỨỰỪỬỮȖŪṺŲŮŨṸṴᴜ]',                   'U'), 
'[ɅꝞṾƲṼᴠ]',                                            'V'), 
'[Ꝡ]',                                                 'VY'), 
'[ẂŴẄẆẈẀⱲᴡ]',                                          'W'), 
'[ẌẊ]',                                                'X'), 
'[ÝŶŸẎỴỲƳỶỾȲɎỸʏ]',                                     'Y'), 
'[ŹŽẐⱫŻẒȤẔƵᴢ]',                                        'Z'), 
'[áăắặằẳẵǎâấậầẩẫäǟȧǡạȁàảȃāąᶏẚåǻḁⱥãɐₐ]',                'a'), 
'[ꜳ]',                                                 'aa'), 
'[æǽǣᴂ]',                                              'ae'), 
'[ꜵ]',                                                 'ao'), 
'[ꜷ]',                                                 'au'), 
'[ꜹꜻ]',                                                'av'), 
'[ꜽ]',                                                 'ay'), 
'[ḃḅɓḇᵬᶀƀƃ]',                                          'b'), 
'[ćčçḉĉɕċƈȼↄꜿ]',                                       'c'), 
'[ďḑḓȡḋḍɗᶑḏᵭᶁđɖƌꝺ]',                                   'd'), 
'[dzdž]',                                                'dz'), 
'[éĕěȩḝêếệềểễḙëėẹȅèẻȇēḗḕⱸęᶒɇẽḛɛᶓɘǝₑ]',                 'e'), 
'[ꝫ]',                                                 'et'), 
'[ḟƒᵮᶂꝼ]',                                             'f'), 
'[ff]',                                                 'ff'), 
'[ffi]',                                                 'ffi'), 
'[ffl]',                                                 'ffl'), 
'[fi]',                                                 'fi'), 
'[fl]',                                                 'fl'), 
'[ǵğǧģĝġɠḡᶃǥᵹɡᵷ]',                                     'g'), 
'[ḫȟḩĥⱨḧḣḥɦẖħɥʮʯ]',                                    'h'), 
'[ƕ]',                                                 'hv'), 
'[ıíĭǐîïḯịȉìỉȋīįᶖɨĩḭᴉᵢ]',                              'i'), 
'[ij]',                                                 'ij'), 
'[ꝭ]',                                                 'is'), 
'[ȷɟʄǰĵʝɉⱼ]',                                          'j'), 
'[ḱǩķⱪꝃḳƙḵᶄꝁꝅʞ]',                                      'k'), 
'[ĺƚɬľļḽȴḷḹⱡꝉḻŀɫᶅɭłꞁ]',                                'l'), 
'[lj]',                                                 'lj'), 
'[ḿṁṃɱᵯᶆɯɰ]',                                          'm'), 
'[ńňņṋȵṅṇǹɲṉƞᵰᶇɳñ]',                                   'n'), 
'[nj]',                                                 'nj'), 
'[ɵóŏǒôốộồổỗöȫȯȱọőȍòỏơớợờởỡȏꝋꝍⱺōṓṑǫǭøǿõṍṏȭɔᶗᴑᴓₒ]',     'o'), 
'[ᴔœ]',                                                'oe'), 
'[ƣ]',                                                 'oi'), 
'[ꝏ]',                                                 'oo'), 
'[ȣ]',                                                 'ou'), 
'[ṕṗꝓƥᵱᶈꝕᵽꝑ]',                                         'p'), 
'[ꝙʠɋꝗ]',                                              'q'), 
'[ꞃŕřŗṙṛṝȑɾᵳȓṟɼᵲᶉɍɽɿɹɻɺⱹᵣ]',                           'r'), 
'[ꞅſẜẛẝśṥšṧşŝșṡṣṩʂᵴᶊȿ]',                               's'), 
'[st]',                                                 'st'), 
'[ꞇťţṱțȶẗⱦṫṭƭṯᵵƫʈŧʇ]',                                 't'), 
'[ᵺ]',                                                 'th'), 
'[ꜩ]',                                                 'tz'), 
'[ᴝúŭǔûṷüǘǚǜǖṳụűȕùủưứựừửữȗūṻųᶙůũṹṵᵤ]',                 'u'), 
'[ᵫ]',                                                 'ue'), 
'[ꝸ]',                                                 'um'), 
'[ʌⱴꝟṿʋᶌⱱṽᵥ]',                                         'v'), 
'[ꝡ]',                                                 'vy'), 
'[ʍẃŵẅẇẉẁⱳẘ]',                                         'w'), 
'[ẍẋᶍₓ]',                                              'x'), 
'[ʎýŷÿẏỵỳƴỷỿȳẙɏỹ]',                                    'y'), 
'[źžẑʑⱬżẓȥẕᵶᶎʐƶɀ]',                                    'z')


回答5:

We solved this with a Java class implementing the replace with a Pattern

/opt/ibm/db2/V10.5_WSE/java/jdk64/bin/javac GdprNameRegexp.java

Packaging it in a jar

/opt/ibm/db2/V10.5_WSE/java/jdk64/bin/jar cvf GdprNameRegexp.jar GdprNameRegexp.class

And loading it in db2 as source for a UDF

CALL sqlj.install_jar('file:/home/db2inst1/GdprNameRegexp.jar', 'GDPRNAME')
CREATE FUNCTION GDPRNAME_VERIFY(WORD VARCHAR(255)) RETURNS INTEGER FENCED EXTERNAL NAME 'GDPRNAME:GdprNameRegexp.nameFieldsPattern' NOT VARIANT NO SQL PARAMETER STYLE java LANGUAGE java NO EXTERNAL ACTION
CREATE FUNCTION GDPRNAME_REPLACE(WORD VARCHAR(255), REPLACEMENT VARCHAR(255)) RETURNS VARCHAR(255) FENCED EXTERNAL NAME 'GDPRNAME:GdprNameRegexp.replaceNameFieldPattern' NOT VARIANT NO SQL PARAMETER STYLE java LANGUAGE java NO EXTERNAL ACTION

Java class example:

import java.util.regex.Pattern;
import java.text.Normalizer;

public class GdprNameRegexp {
  private static Pattern CONSECUTIVE_CHAR_PATTERN = Pattern.compile("(.)\\1\\1\\1+", Pattern.CASE_INSENSITIVE);
  private static Pattern SPECIAL_CHARS_PATTERN    = Pattern.compile("[^\\p{L}\\'\\- - [\u00BA,\u00AA,\u02BA]]", Pattern.CASE_INSENSITIVE);
  private static Pattern CONTAINS_NUMBERS_PATTERN = Pattern.compile(".*\\d+.*", Pattern.CASE_INSENSITIVE);

  public static int nameFieldsPattern(String word) {
    int rsp = 0;
    if (word != null && !"".equals(word.trim())) {
      word = Normalizer.normalize(word, Normalizer.Form.NFC);
      if (SPECIAL_CHARS_PATTERN.matcher(word).find()) {
        rsp += -2;
      }
      if (CONSECUTIVE_CHAR_PATTERN.matcher(word).find()) {
        rsp += -1;
      }
      if (CONTAINS_NUMBERS_PATTERN.matcher(word).find()) {
        rsp += -4;
      }
      if (rsp == 0) {
        rsp = 1;
      }
    }
    return rsp;
  }

  public static String replaceNameFieldPattern(String word, String replacement) {
    if (word != null) {
      if (!"".equals(word.trim()) && replacement != null) {
        word = Normalizer.normalize(word, Normalizer.Form.NFC);
        String result = SPECIAL_CHARS_PATTERN.matcher(word).replaceAll(replacement).trim();
        if (!"".equals(replacement)) {
          int stop = 10;
          result.replaceAll(replacement + replacement, replacement);
          while (stop > 0 && result.contains(replacement + replacement)) {
            result.replaceAll(replacement + replacement, replacement);
            stop--;
          }
        }
        return result.trim();
      }
      return word.trim();
    }
    return "";
  }
}