我已经写一个Python脚本插入一些数据(300百万)到MySQL表:
#!/usr/bin/python
import os
import MySQLdb
from multiprocessing import Pool
class DB(object):
def __init__(self):
self.conn = MySQLdb.connect(host='localhost',user='root',passwd='xxx',db='xdd',port=3306)
self.cur = self.conn.cursor()
def insert(self, arr):
self.cur.execute('insert into RAW_DATA values(null,%s,%s,%s,%s,%s,%s,%s)', arr)
def close(self):
self.conn.commit()
self.cur.close()
self.conn.close()
def Import(fname):
db = DB()
print 'importing ', fname
with open('data/'+fname, 'r') as f:
for line in f:
arr = line.split()
db.insert(arr)
db.close()
if __name__ == '__main__':
# 800+ files
files = [d for d in os.listdir('data') if d[-3:]=='txt']
pool = Pool(processes = 10)
pool.map(Import, files)
问题是,脚本运行速度非常非常慢,有没有的利用多重的任何明显的错误?