Format number number with specific mask regex pyth

2019-06-20 08:58发布

问题:

I need to format a number with a specifc mask: 9.9.9.9.99.999, depending on the length of number string.
For example:

- 123456789 => 1.2.3.4.56.789
- 123456    => 1.2.3.4.56
- 1234      => 1.2.3.4
- 123       => 1.2.3
- 12        => 1.2

It will not occur a number string with 7 or 8 digits in the input.

How could that be implemented with regex, preferably in python?

Thanks in advance.

回答1:

You can use this pattern:

(?:(?<=^\d)|(?<=^\d{2})|(?<=^\d{3})|(?<=^\d{4})|(?<=^\d{6}))(?=\d)

with . as replacement.

example:

re.sub(r'(?:(?<=^\d)|(?<=^\d{2})|(?<=^\d{3})|(?<=^\d{4})|(?<=^\d{6}))(?=\d)', '.', yourstr)


回答2:

Does it have to be a regular expression?

num = "123456789"

def fmt(num):
    block1 = list(num[:4])
    block2 = len(num) > 4 and [num[4:6]] or []
    block3 = len(num) > 6 and [num[6:]] or []
    return ".".join(block1 + block2 + block3)

print fmt(num)

I'm confident I still understand that in two years. Not so sure about the regex.



回答3:

Something more general without help of regexp:

from itertools import islice
def formatn(n, pat='9.9.9.9.99.999', sep='.'):
    pos = map(len, pat.split(sep))
    it = iter(str(n))
    spl = [''.join(islice(it, p)) for p in pos]
    return sep.join(x for x in spl if x)

Demo:

>>> formatn(1234)
'1.2.3.4'
>>> formatn(123456789)
'1.2.3.4.56.789'


回答4:

A non-regexp way: (this reminds me that I should learn regexp asap)

def get_formated_number(num, split_at):
    nums = list(str(num))
    for i in sorted(split_at[len(nums)], reverse=True):
        nums.insert(i, '.')
    return ''.join(nums)

nums = [12, 123, 1234, 123456, 123456789]
split_at = {2: [1], 
            3: [1, 2],
            4: [1, 2, 3],
            6: [1, 2, 3, 4],
            9: [1, 2, 3, 4, 6]}

for num in nums:
    print get_formated_number(num, split_at)

Output

1.2
1.2.3
1.2.3.4
1.2.3.4.56
1.2.3.4.56.789


回答5:

EDIT 2

I found a solution 2 times faster than my regex solution that was the fastest one.
And it doesn't need a regex:

def fmt3(num):
   return '.'.join((num[0:1],num[1:2],num[2:3],num[3:4],
                     num[4:6],num[6:])).rstrip('.')

I think it's because access to elements of a string is extremely fast.

.

It can be generalized, as did alko, but it keeps an acceptable execution's time, similar to other solutions, while alko's solution is 10 times slower than all the other solutions.

def fmt4(num,mask = '9.9.9.9.99.999'):
    def gen(mask,a = 0,b = 0,li = []):
        for c in mask:
            if c=='.':
                yield num[a:b]
                a = b
            else:
                b += 1
        yield num[a:b]
    return '.'.join(gen(mask)).strip('.')

print fmt4('123456789')
print fmt4('123456')
print fmt4('1234')
print fmt4('123')
print fmt4('12')
print
print fmt4('123456789',mask = '.9.99.9.99.99.9')
print fmt4('123456789',mask = '9.99.9.99.99.9')
print fmt4('123456789',mask = '9...99.9.99.99.9')
print fmt4('123456789',mask = '9.99.9.99.99.9.')
print fmt4('123456789',mask = '9.99.99999.9')

result

1.2.3.4.56.789
1.2.3.4.56
1.2.3.4
1.2.3
1.2

1.23.4.56.78.9
1.23.4.56.78.9
1...23.4.56.78.9
1.23.4.56.78.9
1.23.45678.9

MY INITIAL ANSWER

My following solution ,
with pat1 = '(\d)(\d)?(\d)?(\d)?(\d\d)?(\d\d\d)?'
and '.'.join(filter(None,r1.match(thestring).groups('')))
seems to be the fastest;

import re
from time import clock

from itertools import islice
def formatn(n, pat='9.9.9.9.99.999', sep='.'):
    pos = map(len, pat.split(sep))
    it = iter(str(n))
    spl = [''.join(islice(it, p)) for p in pos]
    return sep.join(x for x in spl if x)

def fmt(num):
    block1 = list(num[:4])
    block2 = len(num) > 4 and [num[4:6]] or []
    block3 = len(num) > 6 and [num[6:]] or []
    return ".".join(block1 + block2 + block3)

pat1 = '(\d)(\d)?(\d)?(\d)?(\d\d)?(\d\d\d)?'
r1 = re.compile(pat1)

pat2 = '(?:(?<=^\d)|(?<=^\d{2})|(?<=^\d{3})|(?<=^\d{4})|(?<=^\d{6}))(?=\d)'
r2 = re.compile(pat2)

iterat = 20000

te = clock()
for i in xrange(iterat):
    '.'.join(filter(None,r1.match('123456789').groups('')))
print clock()-te
print '  ','.'.join(filter(None,r1.match('123456789').groups('')))

te = clock()
for i in xrange(iterat):
    r2.sub('.','123456789')
print clock()-te
print '  ',r2.sub('.','123456789')

te = clock()
for i in xrange(iterat):
    fmt('123456789')
print clock()-te
print '  ',fmt('123456789')

te = clock()
for i in xrange(iterat):
    formatn('123456789')
print clock()-te
print '  ',formatn('123456789')

print '-----------------------------'

te = clock()
for i in xrange(iterat):
    '.'.join(filter(None,r1.match('123456').groups()))
print clock()-te
print '  ','.'.join(filter(None,r1.match('123456').groups()))

te = clock()
for i in xrange(iterat):
    r2.sub('.','123456')
print clock()-te
print "  ",r2.sub('.','123456')

te = clock()
for i in xrange(iterat):
    fmt('123456')
print clock()-te
print '  ',fmt('123456')

te = clock()
for i in xrange(iterat):
    formatn('123456789')
print clock()-te
print '  ',formatn('123456789')

print '-----------------------------'

te = clock()
for i in xrange(iterat):
    '.'.join(filter(None,r1.match('1234').groups()))
print clock()-te
print '  ','.'.join(filter(None,r1.match('1234').groups()))

te = clock()
for i in xrange(iterat):
    r2.sub('.','1234')
print clock()-te
print '  ',r2.sub('.','1234')

te = clock()
for i in xrange(iterat):
    fmt('1234')
print clock()-te
print '  ',fmt('1234')

te = clock()
for i in xrange(iterat):
    formatn('1234')
print clock()-te
print '  ',formatn('1234')

result

0.186308036357
   1.2.3.4.56.789
0.397971250536
   1.2.3.4.56.789
0.258452959804
   1.2.3.4.56.789
1.9979410791
   1.2.3.4.56.789
-----------------------------
0.208518959812
   1.2.3.4.56
0.319339748488
   1.2.3.4.56
0.247042291688
   1.2.3.4.56
1.97725548918
   1.2.3.4.56.789
-----------------------------
0.179872581571
   1.2.3.4
0.273376644238
   1.2.3.4
0.207427200943
   1.2.3.4
1.9792909434
   1.2.3.4

EDIT

Inspired by Lukas Graf's answer:

def fmt2(num):
    a = '.'.join(num[:4])
    b = num[4:6]
    c = num[6:]
    return '%s.%s.%s' % (a,b,c) if c \
           else a + '.' + b if b else a