Source code for dbprocessing.DBstrings
#!/usr/bin/env python
"""String handling for dbprocessing.
Contains functions/objects useful in handling strings related to
dbprocessing: parsing, formatting, etc.
"""
from __future__ import print_function
__author__ = 'Jonathan Niehof <jniehof@lanl.gov>'
import string
[docs]class DBformatter(string.Formatter):
"""String formatter extended/modified for DButils
Notes
-----
As this is currently implemented, :meth:`re` may not handle
``{{`` and ``}}`` properly, since regex expansion and basic formatting
occur in two different steps, and thus ``{{`` and ``}}`` are already
replaced with ``{`` and ``}`` in the second step.
"""
SPECIAL_FIELDS = {
'Y': ('{Y:04d}', '(19|2\d)\d\d'),
'm': ('{m:02d}', '(0\d|1[0-2])'),
'b': ('{b}', 'Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec'),
'd': ('{d:02d}', '[0-3]\d'),
'y': ('{y:02d}', '\d\d'),
'j': ('{j:03d}', '[0-3]\d\d'),
'H': ('{H:02d}', '[0-2]\d'),
'M': ('{M:02d}', '[0-6]\d'),
'S': ('{S:02d}', '[0-6]\d'),
'MILLI': ('{MILLI:03d}', '\d{3}'),
'MICRO': ('{MICRO:03d}', '\d{3}'),
'QACODE': ('{QACODE}', 'ok|ignore|problem'),
'VERSION': ('{VERSION}', '\d+\.\d+\.\d+'),
'DATE': ('{DATE}', '(19|2\d)\d\d(0\d|1[0-2])[0-3]\d'),
'datetime': ('{datetime}', '(19|2\d)\d\d(0\d|1[0-2])[0-3]\d'),
'mday': ('{mday:d}', '-?\d+'),
'APID': ('{APID:x}', '[\da-fA-F]+'),
'??': ('{??}', '..'),
'???': ('{???}', '...'),
'????': ('{????}', '....'),
'nn': ('{nn}', '\d\d'),
'nnn': ('{nnn}', '\d\d\d'),
'nnnn': ('{nnnn}', '\d\d\d\d'),
}
"""indexed by field name; each element contains a fully-formatted
representation of the field and a regular expression that should match
it. (:class:`dict`)"""
[docs] def format(self, format_string, *args, **kwargs):
"""Expand base format to handle datetime and special dbp keywords
This is the top-level function to call.
Parameters
----------
format_string : :class:`str`
String with format specifiers.
Returns
-------
:class:`str`
The formatted string.
"""
self.expand_datetime(kwargs)
return super(DBformatter, self).format(
self.expand_format(format_string), *args, **kwargs)
[docs] def re(self, format_string, *args, **kwargs):
"""Format with regexp for unspecified fields.
Similar to :meth:`format`, but any fields which are not specified
are replaced wth regular expressions according to
:data:`SPECIAL_FIELDS`.
Parameters
----------
format_string : :class:`str`
String with format specifiers.
Returns
-------
:class:`str`
The formatted string.
"""
self.expand_datetime(kwargs)
return super(DBformatter, self).format(
self.expand_format(format_string, kwargs), *args, **kwargs)
[docs] def expand_datetime(self, kwargs):
"""Expands datetime keyword into special keywords. Helper function!
A single datetime keyword may be provided to :meth:`format`; this
function expands that datetime keyword into all the fields that
may be provided by the datetime object and inserts those keywords
into ``kwargs``.
Parameters
----------
kwargs : :class:`dict`
keywords passed to :meth:`format`. Updated in place.
Examples
--------
>>> kwargs = { 'datetime': datetime.datetime(2010, 1, 1) }
>>> dbprocessing.DBstrings.DBformatter().expand_datetime(kwargs)
>>> kwargs
{'DATE': '20100101',
'H': 0,
'M': 0,
'MICRO': 0,
'MILLI': 0,
'S': 0,
'Y': 2010,
'b': 'Jan',
'd': 1,
'datetime': datetime.datetime(2010, 1, 1, 0, 0),
'j': 1,
'm': 1,
'y': 10}
"""
if 'datetime' in kwargs:
dt = kwargs['datetime']
if hasattr(dt, 'year'):
if not 'Y' in kwargs:
kwargs['Y'] = dt.year
if not 'm' in kwargs:
kwargs['m'] = dt.month
if not 'd' in kwargs:
kwargs['d'] = dt.day
if not 'y' in kwargs:
kwargs['y'] = dt.year % 100
if not 'j' in kwargs:
kwargs['j'] = int(dt.strftime('%j'))
if not 'DATE' in kwargs:
kwargs['DATE'] = dt.strftime('%Y%m%d')
if not 'b' in kwargs:
kwargs['b'] = dt.strftime('%b')
if hasattr(dt, 'hour'):
if not 'H' in kwargs:
kwargs['H'] = dt.hour
if not 'M' in kwargs:
kwargs['M'] = dt.minute
if not 'S' in kwargs:
kwargs['S'] = dt.second
if not 'MILLI' in kwargs:
kwargs['MILLI'] = int(dt.microsecond / 1000)
if not 'MICRO' in kwargs:
kwargs['MICRO'] = dt.microsecond % 1000
[docs] def expand_format(self, format_string, kwargs=None):
"""Add formatting codes to 'special' fields in format string.
Helper function!
For every field defined in :data:`SPECIAL_FIELDS`, if there is no
format spec nor conversion specified, replace it on the output
with the full format spec in :data:`SPECIAL_FIELDS`.
If the format spec/conversion is not provided or matches that in
:data:`SPECIAL_FIELDS`, and the field is not found in ``kwargs``,
replace with the regular expression from :data:`SPECIAL_FIELDS`.
Everything else is returned verbatim.
Parameters
----------
format_string : :class:`str`
The format string to convert.
kwargs : :class:`dict`
Provided keywords to check for existence. If not supplied, do
no regex substitution.
Returns
-------
:class:`str`
``format_string`` with the fields defined in :data:`SPECIAL_FIELDS`
expanded to full format specifiers and replaced by regular
expressions, as desired.
"""
result = []
for literal, field, format, conversion in self.parse(format_string):
result.append(literal)
orig = self.assemble('', field, format, conversion)
if field in self.SPECIAL_FIELDS:
if kwargs == None or field in kwargs:
# assume field is provided
if (not format) and (not conversion):
result.append(self.SPECIAL_FIELDS[field][0])
else:
result.append(orig)
else:
# field not provided, put in regex instead
if (not format and not conversion) \
or self.SPECIAL_FIELDS[field][0] == orig:
new_re = '(' + self.SPECIAL_FIELDS[field][1].replace(
'{', '{{').replace('}', '}}') + ')'
result.append(new_re)
else:
result.append(orig)
else:
result.append(orig)
return ''.join(result)
[docs] def assemble(self, literal, field, format, conversion):
"""Assembles components of a field specification
Converse of parse. Takes literal text, field name, format spec,
and conversion and assembles into a full field spec.
Parameters
----------
literal : :class:`str`
any literal text preceding the field definition
field : :class:`str`
name of the field
format : :class:`str`
format specification to apply to ``field``
conversion : :class:`str`
conversion to apply to ``field``
Returns
-------
:class:`str`
A full format spec that will parse into ``literal``, ``field``,
``format``, ``conversion``
Examples
--------
>>> f = dbprocessing.DBstrings.DBformatter()
>>> parsed = list(f.parse('The year is {Y}'))
>>> parsed[0]
('The year is ', 'Y', '', None)
>>> f.assemble(*parsed[0])
'The year is {Y}'
"""
if not field and not conversion and not format:
return literal
fs = literal + '{'
if field:
fs += field
if conversion:
fs += ('!' + conversion)
if format:
if literal or field or conversion:
fs += ':'
fs += format
fs += '}'
return fs