Date conversion in pandas csv reader

2019-08-11 03:08发布

问题:

from StringIO import StringIO
import datetime as dt
import pandas as pd

def addtime(temp):
    temp = (temp) + (dt.timedelta(hours  =16))
    return(temp)

data = """\
    '12-31-2012',5100, 5200
    '01/1/2013',5300,5400"""

tdata = pd.read_csv(StringIO(data),
    names =    ['date', 'field1', 'field'], index_col = None,
    parse_dates =['date'], header= None)

print tdata

old_date = tdata.ix[0,'date']
print 'old date =',old_date
new_date = addtime(old_date)
print 'new date =',new_date

#                 date  field1  field
#0  2012-12-31 00:00:00    5100   5200
#1  2013-01-01 00:00:00    5300   5400
#old date = 2012-12-31 00:00:00
#new date = 2012-12-31 16:00:00

I would like to add 16 hours to each date as part of the Pandas CSV reader. but I can't figure out how to add a "parse_dates=" that works. The sample code performs the operation correctly, just not in the desired manner. Help would be appreciated.

回答1:

Use the date_parser parameter. Ordinarily, that defaults to dateutil.parser.parse, but you can specify a custom function (such as addtime, below) which not only parses the date string but also adds a timedelta as well.

from StringIO import StringIO
import datetime as dt
import pandas as pd
import dateutil.parser as parser

def addtime(temp):
    temp = parser.parse(temp) + dt.timedelta(hours=16)
    return temp

data = """\
    '12-31-2012',5100, 5200
    '01/1/2013',5300,5400"""

tdata = pd.read_csv(StringIO(data),
    names = ['date', 'field1', 'field'], index_col = None,
    parse_dates =['date'], header= None, date_parser=addtime)

print tdata

yields

                  date  field1  field
0  2012-12-31 16:00:00    5100   5200
1  2013-01-01 16:00:00    5300   5400