We are running a Tornado 3.0 service on a RedHat OS and getting the following error:
[E 140102 17:07:37 ioloop:660] Exception in I/O handler for fd 11
Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/tornado/ioloop.py", line 653, in start
self._handlers[fd](fd, events)
File "/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py", line 241, in wrapped
callback(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/tornado/netutil.py", line 136, in accept_handler
connection, address = sock.accept()
File "/usr/lib/python2.7/socket.py", line 202, in accept
error: [Errno 24] Too many open files
But we couldn't figure out what that means.
Our Tornado code is as follows:
import sys
from tornado.ioloop import IOLoop
from tornado.options import parse_command_line, define, options
from tornado.httpserver import HTTPServer
from tornado.netutil import bind_sockets
import tornado
sys.path.append("..")
from tornado.web import RequestHandler, Application
from shared.bootstrap import *
from time import time
from clients import ClientFactory
from shared.configuration import Config
from shared.logger import Logger
from algorithms.neighborhood.application import NeighborhoodApplication
import traceback
define('port', default=8000, help="Run on the given port", type=int)
define('debug', default=True, help="Run application in debug mode", type=bool)
class WService(RequestHandler):
_clients = {}
def prepare(self):
self._start_time = time()
RequestHandler.prepare(self)
def get(self, algorithm = None):
self.add_header('Content-type', 'application/json')
response = {'skus' : []}
algorithm = 'neighborhood' if not algorithm else algorithm
try:
if not algorithm in self._clients:
self._clients[algorithm] = ClientFactory.get_instance(algorithm)
arguments = self.get_arguments_by_client(self._clients[algorithm].get_expected_arguments())
response['skus'] = app.get_manager().make_recommendations(arguments)
self.write(response)
except Exception as err:
self.write(response)
error("Erro: " + str(err))
def get_arguments_by_client(self, expected_arguments):
arguments = {}
for key in expected_arguments:
arguments[key] = self.get_argument(key, expected_arguments[key])
return arguments
def on_connection_close(self):
self.finish({'skus':[]})
RequestHandler.on_connection_close(self)
def on_finish(self):
response_time = 1000.0 *(time() - self._start_time)
log("%d %s %.2fms" % (self.get_status(), self._request_summary(), response_time))
RequestHandler.on_finish(self)
def handling_exception(signal, frame):
error('IOLoop blocked for %s seconds in\n%s\n\n' % ( io_loop._blocking_signal_threshold, ''.join(traceback.format_stack(frame)[-3:])))
if __name__ == "__main__":
configuration = Config()
Logger.configure(configuration.get_configs('logger'))
app = NeighborhoodApplication({
'application': configuration.get_configs('neighborhood'),
'couchbase': configuration.get_configs('couchbase'),
'stock': configuration.get_configs('stock')
})
app.run()
log("Neighborhood Matrices successfully created...")
log("Initiating Tornado Service...")
parse_command_line()
application = Application([
(r'/(favicon.ico)', tornado.web.StaticFileHandler, {"path": "./images/"}),
(r"/(.*)", WService)
], **{'debug':options.debug, 'x-headers' : True})
sockets = bind_sockets(options.port, backlog=1024)
server = HTTPServer(application)
server.add_sockets(sockets)
io_loop = IOLoop.instance()
io_loop.set_blocking_signal_threshold(.05, handling_exception)
io_loop.start()
It's a very basic script, basically it gets the URL, process it in the make_recommendation
function and sends back the response.
We've tried to set a tornado timeout of 50 ms through the io_loop.set_blocking_signal_threshold
function as sometimes the processing of the URL might take this long.
The system receives around 8000 requests per minute and it worked fine for about 30 minutes, but after that it started throwing the "too many files error" and broke down. On general the requests were taking about 20 ms to get processed but when the error started happening the time consumed increased to seconds, all of a sudden.
We tried to see how many connections the port 8000 had and it had several open connections all with the "ESTABLISHED" status.
Is there something wrong in our Tornado script? We believe our timeout function is not working properly, but for what we've researched so far everything seems to be ok.
If you need more info please let me know.
Thanks in advance,