I need to format a number with a specifc mask: 9.9.9.9.99.999, depending on the length of number string.
For example:
- 123456789 => 1.2.3.4.56.789
- 123456 => 1.2.3.4.56
- 1234 => 1.2.3.4
- 123 => 1.2.3
- 12 => 1.2
It will not occur a number string with 7 or 8 digits in the input.
How could that be implemented with regex, preferably in python?
Thanks in advance.
You can use this pattern:
(?:(?<=^\d)|(?<=^\d{2})|(?<=^\d{3})|(?<=^\d{4})|(?<=^\d{6}))(?=\d)
with .
as replacement.
example:
re.sub(r'(?:(?<=^\d)|(?<=^\d{2})|(?<=^\d{3})|(?<=^\d{4})|(?<=^\d{6}))(?=\d)', '.', yourstr)
Does it have to be a regular expression?
num = "123456789"
def fmt(num):
block1 = list(num[:4])
block2 = len(num) > 4 and [num[4:6]] or []
block3 = len(num) > 6 and [num[6:]] or []
return ".".join(block1 + block2 + block3)
print fmt(num)
I'm confident I still understand that in two years. Not so sure about the regex.
Something more general without help of regexp:
from itertools import islice
def formatn(n, pat='9.9.9.9.99.999', sep='.'):
pos = map(len, pat.split(sep))
it = iter(str(n))
spl = [''.join(islice(it, p)) for p in pos]
return sep.join(x for x in spl if x)
Demo:
>>> formatn(1234)
'1.2.3.4'
>>> formatn(123456789)
'1.2.3.4.56.789'
A non-regexp way: (this reminds me that I should learn regexp asap)
def get_formated_number(num, split_at):
nums = list(str(num))
for i in sorted(split_at[len(nums)], reverse=True):
nums.insert(i, '.')
return ''.join(nums)
nums = [12, 123, 1234, 123456, 123456789]
split_at = {2: [1],
3: [1, 2],
4: [1, 2, 3],
6: [1, 2, 3, 4],
9: [1, 2, 3, 4, 6]}
for num in nums:
print get_formated_number(num, split_at)
Output
1.2
1.2.3
1.2.3.4
1.2.3.4.56
1.2.3.4.56.789
EDIT 2
I found a solution 2 times faster than my regex solution that was the fastest one.
And it doesn't need a regex:
def fmt3(num):
return '.'.join((num[0:1],num[1:2],num[2:3],num[3:4],
num[4:6],num[6:])).rstrip('.')
I think it's because access to elements of a string is extremely fast.
.
It can be generalized, as did alko, but it keeps an acceptable execution's time, similar to other solutions, while alko's solution is 10 times slower than all the other solutions.
def fmt4(num,mask = '9.9.9.9.99.999'):
def gen(mask,a = 0,b = 0,li = []):
for c in mask:
if c=='.':
yield num[a:b]
a = b
else:
b += 1
yield num[a:b]
return '.'.join(gen(mask)).strip('.')
print fmt4('123456789')
print fmt4('123456')
print fmt4('1234')
print fmt4('123')
print fmt4('12')
print
print fmt4('123456789',mask = '.9.99.9.99.99.9')
print fmt4('123456789',mask = '9.99.9.99.99.9')
print fmt4('123456789',mask = '9...99.9.99.99.9')
print fmt4('123456789',mask = '9.99.9.99.99.9.')
print fmt4('123456789',mask = '9.99.99999.9')
result
1.2.3.4.56.789
1.2.3.4.56
1.2.3.4
1.2.3
1.2
1.23.4.56.78.9
1.23.4.56.78.9
1...23.4.56.78.9
1.23.4.56.78.9
1.23.45678.9
MY INITIAL ANSWER
My following solution ,
with pat1 = '(\d)(\d)?(\d)?(\d)?(\d\d)?(\d\d\d)?'
and '.'.join(filter(None,r1.match(thestring).groups('')))
seems to be the fastest;
import re
from time import clock
from itertools import islice
def formatn(n, pat='9.9.9.9.99.999', sep='.'):
pos = map(len, pat.split(sep))
it = iter(str(n))
spl = [''.join(islice(it, p)) for p in pos]
return sep.join(x for x in spl if x)
def fmt(num):
block1 = list(num[:4])
block2 = len(num) > 4 and [num[4:6]] or []
block3 = len(num) > 6 and [num[6:]] or []
return ".".join(block1 + block2 + block3)
pat1 = '(\d)(\d)?(\d)?(\d)?(\d\d)?(\d\d\d)?'
r1 = re.compile(pat1)
pat2 = '(?:(?<=^\d)|(?<=^\d{2})|(?<=^\d{3})|(?<=^\d{4})|(?<=^\d{6}))(?=\d)'
r2 = re.compile(pat2)
iterat = 20000
te = clock()
for i in xrange(iterat):
'.'.join(filter(None,r1.match('123456789').groups('')))
print clock()-te
print ' ','.'.join(filter(None,r1.match('123456789').groups('')))
te = clock()
for i in xrange(iterat):
r2.sub('.','123456789')
print clock()-te
print ' ',r2.sub('.','123456789')
te = clock()
for i in xrange(iterat):
fmt('123456789')
print clock()-te
print ' ',fmt('123456789')
te = clock()
for i in xrange(iterat):
formatn('123456789')
print clock()-te
print ' ',formatn('123456789')
print '-----------------------------'
te = clock()
for i in xrange(iterat):
'.'.join(filter(None,r1.match('123456').groups()))
print clock()-te
print ' ','.'.join(filter(None,r1.match('123456').groups()))
te = clock()
for i in xrange(iterat):
r2.sub('.','123456')
print clock()-te
print " ",r2.sub('.','123456')
te = clock()
for i in xrange(iterat):
fmt('123456')
print clock()-te
print ' ',fmt('123456')
te = clock()
for i in xrange(iterat):
formatn('123456789')
print clock()-te
print ' ',formatn('123456789')
print '-----------------------------'
te = clock()
for i in xrange(iterat):
'.'.join(filter(None,r1.match('1234').groups()))
print clock()-te
print ' ','.'.join(filter(None,r1.match('1234').groups()))
te = clock()
for i in xrange(iterat):
r2.sub('.','1234')
print clock()-te
print ' ',r2.sub('.','1234')
te = clock()
for i in xrange(iterat):
fmt('1234')
print clock()-te
print ' ',fmt('1234')
te = clock()
for i in xrange(iterat):
formatn('1234')
print clock()-te
print ' ',formatn('1234')
result
0.186308036357
1.2.3.4.56.789
0.397971250536
1.2.3.4.56.789
0.258452959804
1.2.3.4.56.789
1.9979410791
1.2.3.4.56.789
-----------------------------
0.208518959812
1.2.3.4.56
0.319339748488
1.2.3.4.56
0.247042291688
1.2.3.4.56
1.97725548918
1.2.3.4.56.789
-----------------------------
0.179872581571
1.2.3.4
0.273376644238
1.2.3.4
0.207427200943
1.2.3.4
1.9792909434
1.2.3.4
EDIT
Inspired by Lukas Graf's answer:
def fmt2(num):
a = '.'.join(num[:4])
b = num[4:6]
c = num[6:]
return '%s.%s.%s' % (a,b,c) if c \
else a + '.' + b if b else a