I wrote this function that creates a random string of UTF-8 characters. It works well, but the regular expression [^\p{L}]
is not filtering all non-letter characters it seems. I can't think of a better way to generate the full range of unicode without non-letter characters.. short of manually searching for and defining the decimal letter ranges between 65 and 65533.
function rand_str($max_length, $min_length = 1, $utf8 = true) {
static $utf8_chars = array();
if ($utf8 && !$utf8_chars) {
for ($i = 1; $i <= 65533; $i++) {
$utf8_chars[] = mb_convert_encoding("&#$i;", 'UTF-8', 'HTML-ENTITIES');
}
$utf8_chars = preg_replace('/[^\p{L}]/u', '', $utf8_chars);
foreach ($utf8_chars as $i => $char) {
if (trim($utf8_chars[$i])) {
$chars[] = $char;
}
}
$utf8_chars = $chars;
}
$chars = $utf8 ? $utf8_chars : str_split('abcdefghijklmnopqrstuvwxyz');
$num_chars = count($chars);
$string = '';
$length = mt_rand($min_length, $max_length);
for ($i = 0; $i < $length; $i++) {
$string .= $chars[mt_rand(1, $num_chars) - 1];
}
return $string;
}