Source code for dbprocessing.tables

#!/usr/bin/env python
"""Table definitions for dbprocessing.

Contains definitions for all database tables, keyed by table name and
suitable for passing as arguments to the :class:`~sqlalchemy.schema.Table`
constructor.
"""

from sqlalchemy import schema, types


names = ['mission', 'satellite', 'instrument', 'product',
         'instrumentproductlink', 'process', 'productprocesslink',
         'file', 'unixtime', 'filefilelink', 'code', 'processqueue',
         'filecodelink', 'release', 'logging', 'logging_file', 'inspector',
         ]
"""Names of tables, in order they should be created (as a table should
be defined before it's linked to), although a table does not necessarily
depend on *all* tables coming before it in this list."""

# NOTE: if one stops using sqlite then change file_id, logging_id and file_logging_id
#       to BigIntegers (sqlite doesn't know BigInteger)
# TODO this can/should all be redone using the new syntax and relations
# see: http://docs.sqlalchemy.org/en/rel_0_7/orm/relationships.html# for
# some examples.
# TODO: It would also be useful to be able to look up a table name and get
# all the tables it requires.
# This is a function rather than dict so that separate column objects are
# created on each call; otherwise reusing the object if e.g. opening two
# databases without quitting Python causes problems (since it gets
# associated with a particular database).
[docs]def definition(name): """Get definition of a table Parameters ---------- name : :class:`str` Name of the table to get definition for, see :data:`names`. Returns ------- :class:`tuple` Arguments for :class:`~sqlalchemy.schema.Table` constructor, such that it can be called ``Table(name, metadata, *dbprocessing.tables.definition[name])`` Raises ------ ValueError if the name is not recognized """ if name == 'mission': return ( schema.Column('mission_id', types.Integer, autoincrement=True, primary_key=True, nullable=False), schema.Column('mission_name', types.String(20), nullable=False, unique=True), schema.Column('rootdir', types.String(250), nullable=False, ), schema.Column('incoming_dir', types.String(250), nullable=False, ), schema.Column('codedir', types.String(250), nullable=True, ), schema.Column('inspectordir', types.String(250), nullable=True, ), schema.Column('errordir', types.String(250), nullable=True, ) ) elif name == 'satellite': return ( schema.Column('satellite_id', types.Integer, autoincrement=True, primary_key=True, nullable=False), schema.Column('satellite_name', types.String(20), nullable=False), # hmm long enough? schema.Column('mission_id', types.Integer, schema.ForeignKey('mission.mission_id'), nullable=False, ), schema.UniqueConstraint('satellite_name', 'mission_id', name='unique_pairs_satellite') ) elif name == 'instrument': return ( schema.Column('instrument_id', types.Integer, autoincrement=True, primary_key=True, nullable=False), schema.Column('instrument_name', types.String(20), nullable=False), # hmm long enough? schema.Column('satellite_id', types.Integer, schema.ForeignKey('satellite.satellite_id'), nullable=False, ), schema.UniqueConstraint('instrument_name', 'satellite_id', name='unique_pairs_instrument') ) elif name == 'product': return ( schema.Column('product_id', types.Integer, autoincrement=True, primary_key=True, nullable=False, index=True), schema.Column('product_name', types.String(100), nullable=False, index=True), # hmm long enough? schema.Column('instrument_id', types.Integer, schema.ForeignKey('instrument.instrument_id'), nullable=False, ), schema.Column('relative_path', types.String(250), nullable=False), # hmm long enough? schema.Column('level', types.Float, nullable=False), schema.Column('format', types.Text, nullable=False), # hmm long enough? schema.Column('product_description', types.Text, nullable=True), # hmm long enough? schema.UniqueConstraint('product_name', 'instrument_id', 'relative_path', name='unique_triplet_product') ) elif name == 'instrumentproductlink': return ( schema.Column('instrument_id', types.Integer, schema.ForeignKey('instrument.instrument_id'), nullable=False), schema.Column('product_id', types.Integer, schema.ForeignKey('product.product_id'), nullable=False), schema.PrimaryKeyConstraint('instrument_id', 'product_id') ) elif name == 'process': return ( schema.Column('process_id', types.Integer, autoincrement=True, primary_key=True, nullable=False, index=True), schema.Column('process_name', types.String(50), nullable=False), # hmm long enough? schema.Column('output_product', types.Integer, schema.ForeignKey('product.product_id'), nullable=True, index=True), schema.Column('output_timebase', types.String(10), nullable=True, index=True), schema.Column('extra_params', types.Text, nullable=True), schema.UniqueConstraint('process_name', 'output_product') ) elif name == 'productprocesslink': return ( schema.Column('process_id', types.Integer, schema.ForeignKey('process.process_id'), nullable=False), schema.Column('input_product_id', types.Integer, schema.ForeignKey('product.product_id'), nullable=False), schema.Column('optional', types.Boolean, nullable=False), schema.Column('yesterday', types.Integer, nullable=False), schema.Column('tomorrow', types.Integer, nullable=False), schema.PrimaryKeyConstraint('process_id', 'input_product_id') ) elif name == 'file': return ( # this was a bigint, sqlalchemy doesn't seem to like this... think here schema.Column('file_id', types.Integer, autoincrement=True, primary_key=True, nullable=False, index=True), schema.Column('filename', types.String(250), nullable=False, unique=True, index=True), schema.Column('utc_file_date', types.Date, nullable=True, index=True), schema.Column('utc_start_time', types.DateTime, nullable=True, index=True), schema.Column('utc_stop_time', types.DateTime, nullable=True, index=True), schema.Column('data_level', types.Float, nullable=False, index=True), schema.Column('interface_version', types.SmallInteger, nullable=False), schema.Column('quality_version', types.SmallInteger, nullable=False), schema.Column('revision_version', types.SmallInteger, nullable=False), schema.Column('verbose_provenance', types.Text, nullable=True), schema.Column('check_date', types.DateTime, nullable=True), schema.Column('quality_comment', types.Text, nullable=True), schema.Column('caveats', types.Text, nullable=True), schema.Column('file_create_date', types.DateTime, nullable=False), schema.Column('met_start_time', types.Float, nullable=True), schema.Column('met_stop_time', types.Float, nullable=True), schema.Column('exists_on_disk', types.Boolean, nullable=False), schema.Column('quality_checked', types.Boolean, nullable=True, default=False), schema.Column('product_id', types.Integer, schema.ForeignKey('product.product_id'), nullable=False), schema.Column('shasum', types.String(40), nullable=True), schema.Column('process_keywords', types.Text, nullable=True), schema.CheckConstraint('utc_stop_time is not NULL OR met_stop_time is not NULL'), schema.CheckConstraint('utc_start_time is not NULL OR met_start_time is not NULL'), schema.CheckConstraint('met_start_time <= met_stop_time'), # in case of one entry schema.CheckConstraint('utc_start_time <= utc_stop_time'), # in case of one entry schema.CheckConstraint('interface_version >= 1'), schema.UniqueConstraint('utc_file_date', 'product_id', 'interface_version', 'quality_comment', 'revision_version', name='Unique file tuple'), schema.Index('ix_file_big', 'filename', 'utc_file_date', 'utc_start_time', 'utc_stop_time', unique=True), ) elif name == 'unixtime': return ( schema.Column('file_id', types.Integer, schema.ForeignKey('file.file_id'), primary_key=True, index=True), schema.Column('unix_start', types.Integer, index=True), schema.Column('unix_stop', types.Integer, index=True), schema.CheckConstraint('unix_start <= unix_stop'), ) elif name == 'filefilelink': return ( schema.Column('source_file', types.Integer, schema.ForeignKey('file.file_id'), nullable=False, index=True), schema.Column('resulting_file', types.Integer, schema.ForeignKey('file.file_id'), nullable=False, index=True), schema.PrimaryKeyConstraint('source_file', 'resulting_file'), schema.CheckConstraint('source_file <> resulting_file'), # TODO this is supposed to be more general than != ) elif name == 'code': return ( schema.Column('code_id', types.Integer, autoincrement=True, primary_key=True, nullable=False, index=True), schema.Column('filename', types.String(250), nullable=False, unique=False), schema.Column('relative_path', types.String(250), nullable=False), schema.Column('code_start_date', types.Date, nullable=False), schema.Column('code_stop_date', types.Date, nullable=False), schema.Column('code_description', types.Text, nullable=False), schema.Column('process_id', types.Integer, schema.ForeignKey('process.process_id'), nullable=False, index=True), schema.Column('interface_version', types.SmallInteger, nullable=False), schema.Column('quality_version', types.SmallInteger, nullable=False), schema.Column('revision_version', types.SmallInteger, nullable=False), schema.Column('output_interface_version', types.SmallInteger, nullable=False), schema.Column('active_code', types.Boolean, nullable=False, default=False), schema.Column('date_written', types.Date, nullable=False), schema.Column('shasum', types.String(40), nullable=True), schema.Column('newest_version', types.Boolean, nullable=False), schema.Column('arguments', types.Text, nullable=True), schema.Column('ram', types.Float, nullable=True), # amanount of ram used in Gigs schema.Column('cpu', types.SmallInteger, nullable=True), # number of cpus used schema.CheckConstraint('code_start_date <= code_stop_date'), schema.CheckConstraint('interface_version >= 1'), schema.CheckConstraint('output_interface_version >= 1'), ) elif name == 'processqueue': return ( schema.Column('file_id', types.Integer, schema.ForeignKey('file.file_id'), primary_key=True, nullable=False, unique=True, index=True), schema.Column('version_bump', types.SmallInteger, nullable=True), schema.CheckConstraint('version_bump is NULL or version_bump < 3'), ) elif name == 'filecodelink': return ( schema.Column('resulting_file', types.Integer, schema.ForeignKey('file.file_id'), nullable=False), schema.Column('source_code', types.Integer, schema.ForeignKey('code.code_id'), nullable=False), schema.PrimaryKeyConstraint('resulting_file', 'source_code') ) elif name == 'release': return ( schema.Column('file_id', types.Integer, schema.ForeignKey('file.file_id'), nullable=False, ), schema.Column('release_num', types.String(20), nullable=False), schema.PrimaryKeyConstraint('file_id', 'release_num') ) elif name == 'logging': return ( schema.Column('logging_id', types.Integer, autoincrement=True, primary_key=True, nullable=False), schema.Column('currently_processing', types.Boolean, nullable=False, default=False), schema.Column('pid', types.Integer, nullable=True), schema.Column('processing_start_time', types.DateTime, nullable=False), # might have to be a TIMESTAMP schema.Column('processing_end_time', types.DateTime, nullable=True), schema.Column('comment', types.Text, nullable=True), schema.Column('mission_id', types.Integer, schema.ForeignKey('mission.mission_id'), nullable=False), schema.Column('user', types.String(30), nullable=False), schema.Column('hostname', types.String(100), nullable=False), # schema.PrimaryKeyConstraint('logging_id'), schema.CheckConstraint('processing_start_time < processing_end_time'), ) elif name == 'logging_file': return ( schema.Column('logging_file_id', types.Integer, autoincrement=True, primary_key=True, nullable=False), schema.Column('logging_id', types.Integer, schema.ForeignKey('logging.logging_id'), nullable=False), schema.Column('file_id', types.Integer, schema.ForeignKey('file.file_id'), nullable=False), schema.Column('code_id', types.Integer, schema.ForeignKey('code.code_id'), nullable=False), schema.Column('comments', types.Text, nullable=True), # schema.PrimaryKeyConstraint('logging_file_id'), ) elif name == 'inspector': return ( schema.Column('inspector_id', types.Integer, autoincrement=True, primary_key=True, nullable=False, index=True), schema.Column('filename', types.String(250), nullable=False, unique=False), schema.Column('relative_path', types.String(250), nullable=False), schema.Column('description', types.Text, nullable=False), schema.Column('interface_version', types.SmallInteger, nullable=False), schema.Column('quality_version', types.SmallInteger, nullable=False), schema.Column('revision_version', types.SmallInteger, nullable=False), schema.Column('output_interface_version', types.SmallInteger, nullable=False), schema.Column('active_code', types.Boolean, nullable=False, default=False, index=True), schema.Column('date_written', types.Date, nullable=False), schema.Column('shasum', types.String(40), nullable=True), schema.Column('newest_version', types.Boolean, nullable=False, index=True), schema.Column('arguments', types.Text, nullable=True), schema.Column('product', types.Integer, schema.ForeignKey('product.product_id'), nullable=False), schema.CheckConstraint('interface_version >= 1'), schema.CheckConstraint('output_interface_version >= 1'), ) else: raise ValueError('Unknown table {}'.format(name))