I have a javascript case conversion problem which I cannot solve due to non-English letters. My main concern is the Turkish alphabet.
What I need to do is this:
- hello world => Hello World
- HELLO WORLD => Hello World
- hELLO wOrLd => Hello World
Here is what I've accomplished so far:
String.prototype.turkishToUpper = function(){
var stringlow = this;
var letterslow = { 'i': 'İ', 'ş': 'Ş', 'ğ': 'Ğ', 'ü': 'Ü', 'ö': 'Ö', 'ç': 'Ç', 'ı': 'I' };
stringlow = stringlow.replace(/(([iışğüçö]))/g, function(letterlow){ return letterslow[letterlow]; })
return stringlow.toUpperCase();
}
String.prototype.turkishToLower = function(){
var stringup = this;
var lettersup = { 'İ': 'i', 'I': 'ı', 'Ş': 'ş', 'Ğ': 'ğ', 'Ü': 'ü', 'Ö': 'ö', 'Ç': 'ç' };
stringup = stringup.replace(/(([İIŞĞÜÇÖ]))/g, function(letterup){ return lettersup[letterup]; })
return stringup.toLowerCase();
}
String.prototype.toProperCase = function () {
return this.replace(/\w\S*/g, function(txt){return txt.charAt(0).turkishToUpper() + txt.substr(1).turkishToLower();});
};
But this does not give me the correct results and I am suspecting the regex replace not being usable on unicode, but ascii.
When I test with Turkish characters, I get wrong results.
- şeker becomes şEker instead of Şeker
- çoban ırmak becomes çOban ıRmak intead of Çoban Irmak
Also, if this can ever get resolved, I need an icing on the cake to separate words not only by spaces, but also by some other stop characters such as : - = / etc so that
- hello-world becomes Hello-World
- hello:world becomes Hello:World
I've read through many similar questions here on SO, but no luck so far.
Thanks
Note: I think this is called Title Case but some have argued that it is Pascal Case. To be frank, I am interested in resolving the unicode issue (which I believe is the root cause) rather than semantics, so please forgive me if I've used wrong terminology :)
Standalone function:
function toProperCase(s){
return s.replace(/([^\s:\-])([^\s:\-]*)/g,function($0,$1,$2){
return $1.toUpperCase()+$2.toLowerCase();
});
}
Or for extending of String.prototype:
String.prototype.toProperCase=function() {
return this.replace(/([^\s:\-])([^\s:\-]*)/g,function($0,$1,$2){
return $1.toUpperCase()+$2.toLowerCase();
});
}
"çoban ırmak becomes çOban ıRmak intead of Çoban Irmak Hello-wOrld".toProperCase();
// "Çoban Irmak Becomes Çoban Irmak Intead Of Çoban Irmak Hello-World"
Update:
Next code uses custom functionality for converting locale specific chars (tested partially). Code adds functions into String.prototype
: toLocaleProperCase2
, toLocaleLowerCase2
and toLocaleUpperCase2
.
(function(){
// locale specific chars
// IMPORTANT: name of locale must be always in lower case (for "tr-TR" locale - "tr-tr") !!!
var localeInfos={
"tr-tr": { lower: { i:"İ", ı:"I", ş:"Ş", ğ:"Ğ", ü:"Ü", ç:"Ç", ö:"Ö" },
upper: { İ:"i", I:"ı", Ş:"ş", Ğ:"ğ", Ü:"ü", Ç:"ç", Ö:"ö" } }
},
localeInfo;
// helper vars
var mask="\\s:\\-", // add additional delimeters chars to the mask if needed
rg=new RegExp("([^"+mask+"])([^"+mask+"]*)","g");
var fnToLocaleLower=function(s){ return localeInfo.upper[s]; },
fnToLocaleUpper=function(s){ return localeInfo.lower[s]; },
fnToProper=function($0,$1,$2){
if(localeInfo){
if(localeInfo.lower.hasOwnProperty($1))$1=localeInfo.lower[$1];
$2=$2.replace(localeInfo.upperSearchRegExp,fnToLocaleLower);
}
return $1.toUpperCase()+$2.toLowerCase();
};
// helper calculations
var localeInfosKeys=Object.keys(localeInfos);
for(var i=0;localeInfo=localeInfos[localeInfosKeys[i]];i++){
localeInfo.lowerSearchRegExp=new RegExp("["+Object.keys(localeInfo.lower).join("")+"]","g");
localeInfo.upperSearchRegExp=new RegExp("["+Object.keys(localeInfo.upper).join("")+"]","g");
}
// extending String.prototype
String.prototype.toLocaleProperCase2=function toLocaleProperCase2(locale){
localeInfo=localeInfos[arguments.length?locale.toLowerCase():null];
return this.replace(rg,fnToProper);
};
String.prototype.toLocaleLowerCase2=function toLocaleLowerCase2(locale){
return ((localeInfo=localeInfos[arguments.length?locale.toLowerCase():null]) ?
this.replace(localeInfo.upperSearchRegExp,fnToLocaleLower):
this).toLowerCase();
};
String.prototype.toLocaleUpperCase2=function toLocaleUpperCase2(locale){
return ((localeInfo=localeInfos[arguments.length?locale.toLowerCase():null]) ?
this.replace(localeInfo.lowerSearchRegExp,fnToLocaleUpper) :
this).toUpperCase();
};
})();
// testing
var sss="çoban ırmak ibecıoimes çOban ıRmak intead of Çoban IrImaİk Hello-wOrld";
console.log("Origin: ", sss);
console.log("Proper TR: ", sss.toLocaleProperCase2("tr-TR"));
console.log("Proper: ", sss.toLocaleProperCase2());
console.log("Lower TR: ", sss.toLocaleLowerCase2("tr-TR"));
console.log("Lower: ", sss.toLocaleLowerCase2());
console.log("Upper TR: ", sss.toLocaleUpperCase2("tr-TR"));
console.log("Upper: ", sss.toLocaleUpperCase2());
// Origin: çoban ırmak ibecıoimes çOban ıRmak intead of Çoban IrImaİk Hello-wOrld
// Proper TR: Çoban Irmak İbecıoimes Çoban Irmak İntead Of Çoban Irımaik Hello-World
// Proper: Çoban Irmak Ibecıoimes Çoban Irmak Intead Of Çoban Irimaik Hello-World
// Lower TR: çoban ırmak ibecıoimes çoban ırmak intead of çoban ırımaik hello-world
// Lower: çoban ırmak ibecıoimes çoban ırmak intead of çoban irimaik hello-world
// Upper TR: ÇOBAN IRMAK İBECIOİMES ÇOBAN IRMAK İNTEAD OF ÇOBAN IRIMAİK HELLO-WORLD
// Upper: ÇOBAN IRMAK IBECIOIMES ÇOBAN IRMAK INTEAD OF ÇOBAN IRIMAİK HELLO-WORLD
String.prototype.toProperCase = function (){
var arr= this.toLowerCase().split(' ');
for( var i=0;i<arr.length;i++){
arr[i] = arr[i].charAt(0).toUpperCase() + arr[i].substr(1);
};
return arr.join(' ');
};
'çoban ırmak'.toProperCase() // "Çoban Irmak"
Here is the JS function that will do the job.
function toProperCase(string) {
output = '';
explodedString = string.split(' '); //explode string by space
for(var i=0;i<explodedString.length;i++){
output += explodedString[i].charAt(0).toUpperCase() + explodedString[i].slice(1).toLowerCase()+' ';
}
return output.slice(0, -1);
}
Just another option but with the icing on the cake:
function toProperCase( str ) {
var i,
j,
chars,
arr;
arr = str.toLowerCase( ).split("");
chars = {
" " : true,
"-" : true,
":" : true,
"=" : true,
"/" : true
};
for( var i = 0, j = -1; i < arr.length; i += 1, j += 1 ) {
// if previous char (j) exists in chars and current (i) does not;
// replace with uppercase equivalent.
if ( ( arr[j] && chars[ arr[j] ] && !chars[ arr[i] ] ) || i === 0){
arr[i] = arr[i].toUpperCase( );
}
}
return arr.join("");
}
Hopefully that helps :)