SQLAlchemy/dataset how to specify created table

2019-08-29 06:22发布

I'm playing around a little bit with dataset.readthedocs.io which is an API on top of SQLAlchemy, but I'm not able to generically create the tables in an easy way. I know that it's possible to write create table statements and connect to the table, but I'm wondering if there is a more convenient way.

Example code below:

Pipeline code:

  class DBWritePipeline(object):
      def __init__(self, dataset_uri, dataset_table):
          self.dataset_uri = dataset_uri
          self.dataset_table = dataset_table

      @classmethod
      def from_crawler(cls, crawler):
          return cls(
              dataset_uri=crawler.settings.get('CONNECTION_STRING'),
              dataset_table=crawler.settings.get('DATASET_TABLE', 'itabone')
          )

      def open_spider(self, spider):
          self.db = dataset.connect(self.dataset_uri)

      def close_spider(self, spider):
          pass

      def process_item(self, item, spider):
          if hasattr(item,"key"):
              self.db[item.table].upsert(item,item.key)
          else:
              self.db[item.table].insert(item)

          #self.db[self.dataset_table].insert(item)
          return item

settings:

  # Database settings
  MYSQL_HOST = 'localhost'
  MYSQL_PORT = 3306
  MYSQL_USER = 'scrapy'
  MYSQL_PASSWORD = 'scrapy'
  MYSQL_DB = 'scrapy'
  MYSQL_UPSERT = True

  CONNECTION_STRING = "{drivername}://{user}:{passwd}@{host}:{port}/{db_name}".format(
      drivername="mysql",
      user=MYSQL_USER,
      passwd=MYSQL_PASSWORD,
      host=MYSQL_HOST,
      port=MYSQL_PORT,
      db_name=MYSQL_DB,)

example items:

  class Itabone(scrapy.Item):
      name = scrapy.Field(type="String(32)")
      val = scrapy.Field()
      table = "itabone"
      key = "name"

  class Itabthree(scrapy.Item):
      name = scrapy.Field()
      val = scrapy.Field()
      table = "itabthree"

  class Itabtwo(scrapy.Item):
      name = scrapy.Field(max_length=30,key=True)
      val = scrapy.Field()
      table = "itabtwo"
      key = ["name","val"]

Spider:

  import scrapy
  from myproject.items import Itabone
  from myproject.items import Itabtwo
  from myproject.items import Itabthree

  class ItemTest(scrapy.Spider):
      name = 'itempiptest'
      start_urls = ['https://www.example.com']

      def parse(self, response):
          i1 = Itabone(name='tsts',val=1)
          i2 = Itabtwo(name='vcvcv',val=2)
          i3 = Itabthree(name='vcvcv',val=2)

          yield i1
          yield i2
          yield i3

What should I write in the item.py so the tables are created and updated correctly?

0条回答
登录 后发表回答