LSSTApplications  1.1.2+25,10.0+13,10.0+132,10.0+133,10.0+224,10.0+41,10.0+8,10.0-1-g0f53050+14,10.0-1-g4b7b172+19,10.0-1-g61a5bae+98,10.0-1-g7408a83+3,10.0-1-gc1e0f5a+19,10.0-1-gdb4482e+14,10.0-11-g3947115+2,10.0-12-g8719d8b+2,10.0-15-ga3f480f+1,10.0-2-g4f67435,10.0-2-gcb4bc6c+26,10.0-28-gf7f57a9+1,10.0-3-g1bbe32c+14,10.0-3-g5b46d21,10.0-4-g027f45f+5,10.0-4-g86f66b5+2,10.0-4-gc4fccf3+24,10.0-40-g4349866+2,10.0-5-g766159b,10.0-5-gca2295e+25,10.0-6-g462a451+1
LSSTDataManagementBasePackage
Public Member Functions | Private Types | Private Member Functions | Static Private Member Functions | Private Attributes | Static Private Attributes | List of all members
lsst::ap::utils::CsvReader Class Reference

#include <Csv.h>

Public Member Functions

 CsvReader (std::string const &path, CsvControl const &control, bool namesInFirstRecord=false)
 
 CsvReader (std::istream &in, CsvControl const &control, bool namesInFirstRecord=false)
 
 ~CsvReader ()
 
CsvControl const & getControl () const
 
size_t getNumLines () const
 
size_t getNumRecords () const
 
std::vector< std::string > const & getFieldNames () const
 
void setFieldNames (std::vector< std::string > const &names)
 
void setFieldNames (std::string const &names, std::string const &regex, bool stripWhitespace=true)
 
bool isDone () const
 
void nextRecord ()
 
int getIndexOf (std::string const &name) const
 
int getIndexOf (char const *name) const
 
Access fields in the current record
int getNumFields () const
 
bool isNull (int i) const
 
bool isNull (std::string const &name) const
 const More...
 
bool isNull (char const *name) const
 const More...
 
std::string const get (int i) const
 
std::string const get (std::string const &name) const
 const More...
 
std::string const get (char const *name) const
 const More...
 
template<typename T >
T const get (int i) const
 
template<typename T >
T const get (std::string const &name) const
 
template<typename T >
T const get (char const *name) const
 

Private Types

enum  State {
  START_RECORD = 0, START_FIELD, IN_FIELD, IN_QUOTED_FIELD,
  INITIAL_ESCAPE, ESCAPE, EMBEDDED_QUOTE
}
 
typedef
std::tr1::unordered_map
< std::string, int > 
FieldIndexes
 

Private Member Functions

 CsvReader (CsvReader const &)
 
CsvReaderoperator= (CsvReader const &)
 
void _ioError (char const *msg) const
 
void _runtimeError (char const *msg) const
 
bool _readLine (int offset)
 
void _readRecord ()
 
void _checkWhitespace (char const *s, char const *msg) const
 
template<typename T >
_get (char const *value) const
 
template<>
bool _get (char const *field) const
 
template<>
char _get (char const *field) const
 
template<>
signed char _get (char const *field) const
 
template<>
short _get (char const *field) const
 
template<>
int _get (char const *field) const
 
template<>
long _get (char const *field) const
 
template<>
long long _get (char const *field) const
 
template<>
unsigned char _get (char const *field) const
 
template<>
unsigned short _get (char const *field) const
 
template<>
unsigned int _get (char const *field) const
 
template<>
unsigned long _get (char const *field) const
 
template<>
unsigned long long _get (char const *field) const
 
template<>
float _get (char const *field) const
 
template<>
double _get (char const *field) const
 
template<>
long double _get (char const *field) const
 
template<>
char const * _null ()
 
template<>
bool _null ()
 
template<>
char _null ()
 
template<>
signed char _null ()
 
template<>
short _null ()
 
template<>
int _null ()
 
template<>
long _null ()
 
template<>
long long _null ()
 
template<>
unsigned char _null ()
 
template<>
unsigned short _null ()
 
template<>
unsigned int _null ()
 
template<>
unsigned long _null ()
 
template<>
unsigned long long _null ()
 
template<>
float _null ()
 
template<>
double _null ()
 
template<>
long double _null ()
 
template<>
char const * _get (char const *field) const
 

Static Private Member Functions

template<typename T >
static T _null ()
 

Private Attributes

std::string _path
 File name. More...
 
CsvControl _control
 File format. More...
 
std::vector< std::string > _names
 Field names in order of occurence. More...
 
FieldIndexes _indexes
 Field name to index map. More...
 
boost::scoped_ptr< std::ifstream > _stream
 File stream. More...
 
std::istream * _in
 Input stream. More...
 
size_t _numLines
 1-based index of current line. More...
 
size_t _numRecords
 1-based index of current record. More...
 
boost::scoped_array< char > _record
 Data for a single record. More...
 
int _capacity
 Capacity of _record. More...
 
bool _done
 Finished reading file? More...
 
std::vector< int > _fields
 

Static Private Attributes

static std::string const WHITESPACE
 
static int const MAX_RECORD_LENGTH = 4*1024*1024
 
static int const DEFAULT_CAPACITY = 128*1024
 

Detailed Description

A class for reading records from Character-Separated-Value files in sequential order.

Field Access

Fields may be accessed either by name or by index - performance sensitive code should favor the latter. Values can be retrieved as std::string objects via get(int) const . Alternatively, to avoid the cost of memory allocation, they can be retrieved via get<char const *>(int) const, which returns a null-terminated C string. But beware: the C string is located in the readers internal decoded character buffer and is invalidated by the next call to nextRecord()!

The get<T>(int) const method can be used to retrieve and cast a field value simultaneously. The supported types are:
bool
Values that are case insensitive matches to "y", "t", "yes", "true", or "1" are mapped to true. Case insensitive matches to "n", "f", "no", "false", "0" are mapped to false. Any other value results in an exception. Leading and trailing whitespace is permitted.
char
Field values must contain exactly one character. Any other value results in an exception. Leading or trailing whitespace is illegal.
integral types
If the field is a decimal representation of an integer that fits in the requested type, it is converted to binary value and returned. If the decimal value overflows the range of the type, cannot be converted, or contains extraneous characters other than leading or trailing whitespace, an exception is thrown.
floating point types
If the field value is a floating point number, it is converted to a binary value and returned. If the value overflows the range of the type, +/-INF is returned. If it underflows, 0.0 is returned. If the field cannot be converted or contains extraneous characters other than leading or trailing whitespace, an exception is thrown.
NULL values

When a field is recognized as a database NULL (e.g. \N with the CsvControl::standardEscapes == true), the field access methods return the following values:

std::string
An empty string
char const *
A null pointer
bool
false
char

'\0'

signed integral types
The minimum representable value
unsigned integral types
The maximum representable value
floating point types
A quiet NaN

To distinguish between a NULL field and one which happens to take one of values above, use the isNull(int) const method.

Exception safety

All methods provide the strong exception safety guarantee, except for nextRecord() - see the documentation of that method for details.

Limitations

Definition at line 122 of file Csv.h.

Member Typedef Documentation

typedef std::tr1::unordered_map<std::string, int> lsst::ap::utils::CsvReader::FieldIndexes
private

Definition at line 177 of file Csv.h.

Member Enumeration Documentation

Enumerator
START_RECORD 
START_FIELD 
IN_FIELD 
IN_QUOTED_FIELD 
INITIAL_ESCAPE 
ESCAPE 
EMBEDDED_QUOTE 

Definition at line 183 of file Csv.h.

Constructor & Destructor Documentation

lsst::ap::utils::CsvReader::CsvReader ( std::string const &  path,
CsvControl const &  control,
bool  namesInFirstRecord = false 
)

Creates a new CsvReader for a file and reads the first record.

Parameters
pathInput file name.
controlCSV format of the input file.
namesInFirstRecordSet field names to the strings in the first record of the input file?

Definition at line 76 of file Csv.cc.

81  :
82  _path(path),
83  _control(control),
84  _names(),
85  _indexes(),
86  _stream(new ifstream(path.c_str(), ios::binary | ios::in)),
87  _in(_stream.get()),
88  _numLines(0),
89  _numRecords(0),
90  _record(new char[DEFAULT_CAPACITY]),
92  _done(false),
93  _fields()
94 {
95  if (!_stream->good()) {
96  throw LSST_EXCEPT(pexExcept::IoError,
97  "failed to open file " + path + " for reading");
98  }
99  // exception mask for _stream is clear
100  _readRecord();
101  if (namesInFirstRecord && !isDone()) {
102  vector<string> names;
103  for (int i = 0; i < getNumFields(); ++i) {
104  names.push_back(get(i));
105  }
106  setFieldNames(names);
107  _readRecord();
108  }
109 }
bool _done
Finished reading file?
Definition: Csv.h:216
boost::scoped_ptr< std::ifstream > _stream
File stream.
Definition: Csv.h:210
static int const DEFAULT_CAPACITY
Definition: Csv.h:181
void setFieldNames(std::vector< std::string > const &names)
Definition: Csv.cc:167
std::istream * _in
Input stream.
Definition: Csv.h:211
boost::scoped_array< char > _record
Data for a single record.
Definition: Csv.h:214
int _capacity
Capacity of _record.
Definition: Csv.h:215
size_t _numLines
1-based index of current line.
Definition: Csv.h:212
CsvControl _control
File format.
Definition: Csv.h:206
std::vector< int > _fields
Definition: Csv.h:217
size_t _numRecords
1-based index of current record.
Definition: Csv.h:213
bool isDone() const
Definition: Csv.cc:90
#define LSST_EXCEPT(type,...)
Definition: Exception.h:46
int getNumFields() const
Definition: Csv.cc:119
FieldIndexes _indexes
Field name to index map.
Definition: Csv.h:208
std::vector< std::string > _names
Field names in order of occurence.
Definition: Csv.h:207
std::string _path
File name.
Definition: Csv.h:205
lsst::ap::utils::CsvReader::CsvReader ( std::istream &  in,
CsvControl const &  control,
bool  namesInFirstRecord = false 
)

Creates a new CsvReader from an std::istream and reads the first record.

This std::istream must be kept alive for the life-time of the CsvReader. If it is externally modified while the reader is alive (e.g. its exception mask is changed, or external reads are performed from it) then the behaviour of the reader is undefined.

Parameters
inInput stream
controlCSV format of the input stream
namesInFirstRecordSet field names to the strings in the first record read from the input stream?

Definition at line 118 of file Csv.cc.

123  :
124  _path(),
125  _control(control),
126  _names(),
127  _indexes(),
128  _stream(),
129  _in(&in),
130  _numLines(0),
131  _numRecords(0),
132  _record(new char[DEFAULT_CAPACITY]),
134  _done(false),
135  _fields()
136 {
137  if (!in.good()) {
138  throw LSST_EXCEPT(pexExcept::IoError,
139  "std::istream not good() for reading");
140  }
141  // clear exception mask
142  in.exceptions(ios::goodbit);
143  _readRecord();
144  if (namesInFirstRecord && !isDone()) {
145  vector<string> names;
146  for (int i = 0; i < getNumFields(); ++i) {
147  names.push_back(get(i));
148  }
149  setFieldNames(names);
150  _readRecord();
151  }
152 }
bool _done
Finished reading file?
Definition: Csv.h:216
boost::scoped_ptr< std::ifstream > _stream
File stream.
Definition: Csv.h:210
static int const DEFAULT_CAPACITY
Definition: Csv.h:181
void setFieldNames(std::vector< std::string > const &names)
Definition: Csv.cc:167
std::istream * _in
Input stream.
Definition: Csv.h:211
boost::scoped_array< char > _record
Data for a single record.
Definition: Csv.h:214
int _capacity
Capacity of _record.
Definition: Csv.h:215
size_t _numLines
1-based index of current line.
Definition: Csv.h:212
CsvControl _control
File format.
Definition: Csv.h:206
std::vector< int > _fields
Definition: Csv.h:217
size_t _numRecords
1-based index of current record.
Definition: Csv.h:213
bool isDone() const
Definition: Csv.cc:90
#define LSST_EXCEPT(type,...)
Definition: Exception.h:46
int getNumFields() const
Definition: Csv.cc:119
FieldIndexes _indexes
Field name to index map.
Definition: Csv.h:208
std::vector< std::string > _names
Field names in order of occurence.
Definition: Csv.h:207
std::string _path
File name.
Definition: Csv.h:205
lsst::ap::utils::CsvReader::~CsvReader ( )

Definition at line 154 of file Csv.cc.

154  {
155  _in = 0;
156 }
std::istream * _in
Input stream.
Definition: Csv.h:211
lsst::ap::utils::CsvReader::CsvReader ( CsvReader const &  )
private

Member Function Documentation

void lsst::ap::utils::CsvReader::_checkWhitespace ( char const *  s,
char const *  msg 
) const
private

Throws an exception if the given string contains anything but whitespace.

Definition at line 533 of file Csv.cc.

533  {
534  for (; WHITESPACE.find(*s) != string::npos; ++s) { }
535  if (*s != '\0') {
536  _runtimeError(msg);
537  }
538 }
static std::string const WHITESPACE
Definition: Csv.h:179
void _runtimeError(char const *msg) const
Definition: Csv.cc:249
template<typename T >
T lsst::ap::utils::CsvReader::_get ( char const *  value) const
private
template<>
char const* lsst::ap::utils::CsvReader::_get ( char const *  field) const
inlineprivate

Definition at line 239 of file Csv.cc.

239  {
240  return field;
241 }
template<>
bool lsst::ap::utils::CsvReader::_get ( char const *  field) const
private

Definition at line 540 of file Csv.cc.

540  {
541  for (; WHITESPACE.find(*field) != string::npos; ++field) { }
542  bool value = false;
543  char c = *field++;
544  switch (c) {
545  case '1':
546  value = true;
547  break;
548  case 't':
549  case 'T':
550  value = true;
551  if ((field[0] == 'r' || field[0] == 'R') &&
552  (field[1] == 'u' || field[1] == 'U') &&
553  (field[2] == 'e' || field[2] == 'E')) {
554  field += 3;
555  }
556  break;
557  case 'y':
558  case 'Y':
559  value = true;
560  if ((field[0] == 'e' || field[0] == 'E') &&
561  (field[1] == 's' || field[1] == 'S')) {
562  field += 2;
563  }
564  break;
565  case '0':
566  break;
567  case 'f':
568  case 'F':
569  if ((field[0] == 'a' || field[0] == 'A') &&
570  (field[1] == 'l' || field[1] == 'L') &&
571  (field[2] == 's' || field[2] == 'S') &&
572  (field[3] == 'e' || field[3] == 'E')) {
573  field += 4;
574  }
575  break;
576  case 'n':
577  case 'N':
578  if (field[0] == 'o' || field[0] == 'O') {
579  ++field;
580  }
581  break;
582  default:
583  _runtimeError("failed to convert field value to bool");
584  }
585  _checkWhitespace(field, "failed to convert field value to bool");
586  return value;
587 }
void _checkWhitespace(char const *s, char const *msg) const
Definition: Csv.cc:533
static std::string const WHITESPACE
Definition: Csv.h:179
void _runtimeError(char const *msg) const
Definition: Csv.cc:249
template<>
char lsst::ap::utils::CsvReader::_get ( char const *  field) const
private

Definition at line 589 of file Csv.cc.

589  {
590  if (field[0] == '\0') {
591  _runtimeError("empty field");
592  }
593  if (field[1] != '\0') {
594  _runtimeError("field value contains more than one character");
595  }
596  return field[0];
597 }
void _runtimeError(char const *msg) const
Definition: Csv.cc:249
template<>
signed char lsst::ap::utils::CsvReader::_get ( char const *  field) const
private

Definition at line 615 of file Csv.cc.

template<>
short lsst::ap::utils::CsvReader::_get ( char const *  field) const
private

Definition at line 616 of file Csv.cc.

template<>
int lsst::ap::utils::CsvReader::_get ( char const *  field) const
private

Definition at line 617 of file Csv.cc.

template<>
long lsst::ap::utils::CsvReader::_get ( char const *  field) const
private

Definition at line 618 of file Csv.cc.

template<>
long long lsst::ap::utils::CsvReader::_get ( char const *  field) const
private

Definition at line 619 of file Csv.cc.

template<>
unsigned char lsst::ap::utils::CsvReader::_get ( char const *  field) const
private

Definition at line 620 of file Csv.cc.

template<>
unsigned short lsst::ap::utils::CsvReader::_get ( char const *  field) const
private

Definition at line 621 of file Csv.cc.

template<>
unsigned int lsst::ap::utils::CsvReader::_get ( char const *  field) const
private

Definition at line 622 of file Csv.cc.

template<>
unsigned long lsst::ap::utils::CsvReader::_get ( char const *  field) const
private

Definition at line 623 of file Csv.cc.

template<>
unsigned long long lsst::ap::utils::CsvReader::_get ( char const *  field) const
private

Definition at line 624 of file Csv.cc.

template<>
float lsst::ap::utils::CsvReader::_get ( char const *  field) const
private

Definition at line 639 of file Csv.cc.

template<>
double lsst::ap::utils::CsvReader::_get ( char const *  field) const
private

Definition at line 640 of file Csv.cc.

template<>
long double lsst::ap::utils::CsvReader::_get ( char const *  field) const
private

Definition at line 641 of file Csv.cc.

void lsst::ap::utils::CsvReader::_ioError ( char const *  msg) const
private

Throws an lsst::pex::exceptions::IoError with a file name, line number and record number.

Definition at line 235 of file Csv.cc.

235  {
236  ostringstream s;
237  if (_path.empty()) {
238  s << "CSV stream";
239  } else {
240  s << "CSV file " << _path;
241  }
242  s << " line " << _numLines << " record " << _numRecords << ": " << msg;
243  throw LSST_EXCEPT(pexExcept::IoError, s.str());
244 }
size_t _numLines
1-based index of current line.
Definition: Csv.h:212
size_t _numRecords
1-based index of current record.
Definition: Csv.h:213
#define LSST_EXCEPT(type,...)
Definition: Exception.h:46
std::string _path
File name.
Definition: Csv.h:205
template<>
char const* lsst::ap::utils::CsvReader::_null ( )
inlineprivate

Definition at line 196 of file Csv.cc.

196  {
197  return 0;
198 }
template<>
bool lsst::ap::utils::CsvReader::_null ( )
inlineprivate

Definition at line 199 of file Csv.cc.

199  {
200  return false;
201 }
template<>
char lsst::ap::utils::CsvReader::_null ( )
inlineprivate

Definition at line 202 of file Csv.cc.

202  {
203  return '\0';
204 }
template<typename T >
T lsst::ap::utils::CsvReader::_null ( )
inlinestaticprivate

Definition at line 190 of file Csv.cc.

190  {
191  BOOST_STATIC_ASSERT(sizeof(T) == 0);
192 }
template<>
signed char lsst::ap::utils::CsvReader::_null ( )
inlineprivate

Definition at line 210 of file Csv.cc.

template<>
short lsst::ap::utils::CsvReader::_null ( )
inlineprivate

Definition at line 211 of file Csv.cc.

template<>
int lsst::ap::utils::CsvReader::_null ( )
inlineprivate

Definition at line 212 of file Csv.cc.

template<>
long lsst::ap::utils::CsvReader::_null ( )
inlineprivate

Definition at line 213 of file Csv.cc.

template<>
long long lsst::ap::utils::CsvReader::_null ( )
inlineprivate

Definition at line 214 of file Csv.cc.

template<>
unsigned char lsst::ap::utils::CsvReader::_null ( )
inlineprivate

Definition at line 221 of file Csv.cc.

template<>
unsigned short lsst::ap::utils::CsvReader::_null ( )
inlineprivate

Definition at line 222 of file Csv.cc.

template<>
unsigned int lsst::ap::utils::CsvReader::_null ( )
inlineprivate

Definition at line 223 of file Csv.cc.

template<>
unsigned long lsst::ap::utils::CsvReader::_null ( )
inlineprivate

Definition at line 224 of file Csv.cc.

template<>
unsigned long long lsst::ap::utils::CsvReader::_null ( )
inlineprivate

Definition at line 225 of file Csv.cc.

template<>
float lsst::ap::utils::CsvReader::_null ( )
inlineprivate

Definition at line 232 of file Csv.cc.

template<>
double lsst::ap::utils::CsvReader::_null ( )
inlineprivate

Definition at line 233 of file Csv.cc.

template<>
long double lsst::ap::utils::CsvReader::_null ( )
inlineprivate

Definition at line 234 of file Csv.cc.

bool lsst::ap::utils::CsvReader::_readLine ( int  offset)
private

Reads a single line from the underlying stream.

Definition at line 262 of file Csv.cc.

262  {
263  bool gotData = false;
264  while (true) {
265  _in->getline(_record.get() + offset, _capacity - 1 - offset);
266  if (_in->eof()) {
267  if (_in->gcount() > 0) {
268  gotData = true;
269  ++_numLines;
270  }
271  return gotData;
272  } else if (_in->bad()) {
273  _ioError("std::istream::getline() failed");
274  } else if (_in->fail()) {
275  // not enough space for line in buffer - expand it
276  offset += _in->gcount();
277  if (_capacity == MAX_RECORD_LENGTH) {
278  _ioError("record too long");
279  }
280  int cap = min(_capacity*2, MAX_RECORD_LENGTH);
281  boost::scoped_array<char> rec(new char[cap]);
282  memcpy(rec.get(), _record.get(), static_cast<size_t>(_capacity));
283  swap(rec, _record);
284  _capacity = cap;
285  continue;
286  }
287  offset += _in->gcount();
288  ++_numLines;
289  return true;
290  }
291 }
void swap(Ellipse< DataT > &a, Ellipse< DataT > &b)
Definition: EllipseTypes.h:90
static int const MAX_RECORD_LENGTH
Definition: Csv.h:180
std::istream * _in
Input stream.
Definition: Csv.h:211
void _ioError(char const *msg) const
Definition: Csv.cc:235
boost::scoped_array< char > _record
Data for a single record.
Definition: Csv.h:214
int _capacity
Capacity of _record.
Definition: Csv.h:215
size_t _numLines
1-based index of current line.
Definition: Csv.h:212
double min
Definition: attributes.cc:216
void lsst::ap::utils::CsvReader::_readRecord ( )
private

Reads a single CSV record from the underlying stream.

Definition at line 295 of file Csv.cc.

295  {
296  State state = START_RECORD;
297  State popState = START_RECORD;
298  int writeOffset = 0;
299  int offset = 0;
300  char c = 0, c2 = 0;
301 
302  _fields.clear();
303  if (!_readLine(offset)) {
304  _done = true;
305  return;
306  }
307  ++_numRecords;
308 
309  // Run parsing state machine until we reach the end of a record,
310  // then return.
311  while (true) {
312  c = _record[offset++];
313  switch (state) {
314  case START_RECORD:
315  if (c == '\0') {
317  _runtimeError("expecting trailing delimiter "
318  "at end of record");
319  } else if (_control.hasNull &&
320  _control.null.empty()) {
321  _fields.push_back(-1);
322  }
323  return; // finished record
324  }
325  state = START_FIELD;
326  // Fall-through
327 
328  case START_FIELD:
329  _fields.push_back(writeOffset);
330  if (c == '\0') {
331  // finished empty field
332  _record[writeOffset++] = '\0';
333  if (_control.hasNull && _control.null.empty()) {
334  _fields.back() = -1; // NULL
335  }
337  // a trailing delimiter is expected and does not
338  // yield a trailing empty field
339  _fields.pop_back();
340  }
341  return; // finished record
342  } else if (_control.quoting != "QUOTE_NONE" &&
343  c == _control.getQuoteChar()) {
344  // start quoted field
345  state = IN_QUOTED_FIELD;
346  } else if (c == _control.getEscapeChar() &&
347  _control.getEscapeChar() != '\0') {
348  // escape at beginning of field
349  popState = IN_FIELD;
350  state = INITIAL_ESCAPE;
351  } else if (c == _control.getDelimiter()) {
352  // finished empty field
353  _record[writeOffset++] = '\0';
354  if (_control.hasNull && _control.null.empty()) {
355  _fields.back() = -1; // NULL
356  }
357  state = START_FIELD;
358  } else if (_control.skipInitialSpace &&
359  WHITESPACE.find(c) != string::npos) {
360  // eat initial whitespace
361  state = START_FIELD;
362  } else {
363  // start unquoted field
364  _record[writeOffset++] = c;
365  state = IN_FIELD;
366  }
367  break;
368 
369  case IN_FIELD:
370  if (c == '\0') {
371  // finished field
372  _record[writeOffset++] = '\0';
374  _runtimeError("expecting trailing delimiter "
375  "at end of record");
376  }
377  if (_control.hasNull &&
378  _control.null == _record.get() + _fields.back()) {
379  _fields.back() = -1; // NULL
380  }
381  return; // finished record
382  } else if (c == _control.getEscapeChar()) {
383  popState = IN_FIELD;
384  state = ESCAPE;
385  } else if (c == _control.getDelimiter()) {
386  // finished field
387  _record[writeOffset++] = '\0';
388  if (_control.hasNull &&
389  _control.null == _record.get() + _fields.back()) {
390  _fields.back() = -1; // NULL
391  }
392  state = START_FIELD;
393  } else {
394  _record[writeOffset++] = c;
395  state = IN_FIELD;
396  }
397  break;
398 
399  case IN_QUOTED_FIELD:
400  if (c == '\0') {
401  // newline in quoted field
402  _record[writeOffset++] = '\n';
403  // keep reading field on next line
404  if (!_readLine(offset)) {
405  _record[writeOffset++] = '\0';
406  _runtimeError("expecting quote character "
407  "at end of field");
408  }
409  state = IN_QUOTED_FIELD;
410  } else if (c == '\0') {
411  _record[writeOffset++] = '\0';
412  _runtimeError("expecting quote character at end of field");
413  } else if (c == _control.getEscapeChar()) {
414  popState = IN_QUOTED_FIELD;
415  state = ESCAPE;
416  } else if (c == _control.getQuoteChar()) {
417  state = EMBEDDED_QUOTE;
418  } else {
419  _record[writeOffset++] = c;
420  state = IN_QUOTED_FIELD;
421  }
422  break;
423 
424  case INITIAL_ESCAPE:
425  if (c == 'N' && _control.standardEscapes) {
426  char c2 = _record[offset];
427  if (c2 == '\0') {
428  // finished NULL field
429  _fields.back() = -1;
431  _runtimeError("expecting trailing delimiter "
432  "at end of record");
433  }
434  // finished record
435  return;
436  } else if (c2 == _control.getDelimiter()) {
437  // finished NULL field
438  _fields.back() = -1;
439  ++offset;
440  state = START_FIELD;
441  break;
442  }
443  }
444  // Fall-through
445 
446  case ESCAPE:
447  if (c == '\0') {
448  _record[writeOffset++] = '\n';
449  // keep reading field on next line
450  if (!_readLine(offset)) {
451  _record[writeOffset++] = '\0';
452  }
453  }
455  // handle standard escape sequences
456  switch (c) {
457  case 'Z': c = 0x1a; break;
458  case 'b': c = 0x08; break;
459  case 'n': c = '\n'; break;
460  case 'r': c = '\r'; break;
461  case 't': c = '\t'; break;
462  case 'v': c = 0x0b; break;
463  case 'x':
464  // one or two digit hex-escape sequence
465  c2 = _record[offset++];
466  if (c2 >= '0' && c2 <= '9') {
467  c = c2 - '0';
468  } else if (c2 >= 'A' && c2 <= 'F') {
469  c = 10 + (c2 - 'A');
470  } else if (c2 >= 'a' && c2 <= 'f') {
471  c = 10 + (c2 - 'a');
472  } else {
473  _record[writeOffset++] = '\0';
474  _runtimeError("hex escape sequence must be "
475  "followed by one or two hex "
476  "digits");
477  }
478  c2 = _record[offset];
479  if (c2 >= '0' && c2 <= '9') {
480  c = c*16 + (c2 - '0');
481  ++offset;
482  } else if (c2 >= 'A' && c2 <= 'F') {
483  c = c*16 + 10 + (c2 - 'A');
484  ++offset;
485  } else if (c2 >= 'a' && c2 <= 'f') {
486  c = c*16 + 10 + (c2 - 'a');
487  ++offset;
488  }
489  break;
490  default:
491  break;
492  }
493  }
494  _record[writeOffset++] = c;
495  state = popState;
496  break;
497 
498  case EMBEDDED_QUOTE:
499  if (c == '\0') {
500  // finished field
501  _record[writeOffset++] = '\0';
503  _runtimeError("expecting trailing delimiter "
504  "at end of record");
505  }
506  return; // finished record
507  } else if (c == _control.getQuoteChar() &&
509  // save "" as "
510  _record[writeOffset++] = c;
511  state = IN_QUOTED_FIELD;
512  } else if (c == _control.getDelimiter()) {
513  // finished field
514  _record[writeOffset++] = '\0';
515  state = START_FIELD;
516  } else {
517  _record[writeOffset++] = '\0';
518  _runtimeError("expecting delimiter after ending quote");
519  }
520  break;
521 
522  default:
523  throw LSST_EXCEPT(pexExcept::LogicError,
524  "CSV parser bug - state machine reached an "
525  "illegal state");
526  }
527  }
528 }
bool _done
Finished reading file?
Definition: Csv.h:216
char getDelimiter() const
Definition: CsvControl.h:155
char getEscapeChar() const
Definition: CsvControl.h:158
char getQuoteChar() const
Definition: CsvControl.h:161
boost::scoped_array< char > _record
Data for a single record.
Definition: Csv.h:214
std::string null
&quot;String representation of NULL field values. Never quoted on output.\n&quot; &quot;If specified, the representation may not contain any delimiter,\n&quot; &quot;quote, escape or line terminator characters (&#39;\\n&#39;/&#39;\\r&#39;).\n&quot; ;
Definition: CsvControl.h:61
CsvControl _control
File format.
Definition: Csv.h:206
std::vector< int > _fields
Definition: Csv.h:217
size_t _numRecords
1-based index of current record.
Definition: Csv.h:213
#define LSST_EXCEPT(type,...)
Definition: Exception.h:46
bool skipInitialSpace
&quot;If true, whitespace immediately following the delimiter is ignored.&quot; ;
Definition: CsvControl.h:100
std::string quoting
&quot;Field quoting style for CSV input/output. Legal values are:\n&quot; &quot;\n&quot; &quot;&#39;QUOTE_MINIMAL&#39;: Only quot...
Definition: CsvControl.h:77
bool _readLine(int offset)
Definition: Csv.cc:262
static std::string const WHITESPACE
Definition: Csv.h:179
bool hasNull
&quot;Indicates whether the null string is valid. If set to false, the only\n&quot; &quot;way NULLs can be recogn...
Definition: CsvControl.h:67
bool standardEscapes
&quot;Flag indicating whether standard escape sequences should be handled.\n&quot; &quot;If false, then the character sequence &#39;\\C&#39;, where C is any character,\n&quot; &quot;is mapped to C (assuming &#39;\\&#39; is the escape character). If true,\n&quot; &quot;the following special cases are handled differently:\n&quot; &quot;\n&quot; &quot;- &#39;\\b&#39; is mapped to BS - backspace (ASCII 8)\n&quot; &quot;- &#39;\\f&#39; is mapped to FF - form feed (ASCII 12)\n&quot; &quot;- &#39;\\n&#39; is mapped to NL - newline (ASCII 10)\n&quot; &quot;- &#39;\\r&#39; is mapped to CR - carriage return (ASCII 13)\n&quot; &quot;- &#39;\\t&#39; is mapped to TAB - horizontal tab (ASCII 9)\n&quot; &quot;- &#39;\\v&#39; is mapped to VT - vertical tab (ASCII 11)\n&quot; &quot;- &#39;\\xD&#39; and &#39;\\xDD&#39;, where D is a hexadecimal digit, is mapped to\n&quot; &quot; the character with that numeric code.\n&quot; &quot;- A field value of exactly &#39;\\N&#39; (no quotes, whitespace, or other\n&quot; &quot; content) is treated as a NULL.\n&quot; ;
Definition: CsvControl.h:123
bool trailingDelimiter
&quot;If true, then a trailing delimiter character is expected and written\n&quot; &quot;at end of every record...
Definition: CsvControl.h:127
void _runtimeError(char const *msg) const
Definition: Csv.cc:249
bool doubleQuote
&quot;If true, embedded quote characters are escaped with a leading quote\n&quot; &quot;character. Otherwise the escape character is used. If escaping and\n&quot; &quot;double-quoting are disabled, writing a field with embedded quote\n&quot; &quot;character will raise an exception.\n&quot; ;
Definition: CsvControl.h:106
void lsst::ap::utils::CsvReader::_runtimeError ( char const *  msg) const
private

Throws an lsst::pex::exceptions::RuntimeError with a file name, line number and record number.

Definition at line 249 of file Csv.cc.

249  {
250  ostringstream s;
251  if (_path.empty()) {
252  s << "CSV stream";
253  } else {
254  s << "CSV file " << _path;
255  }
256  s << " line " << _numLines << " record " << _numRecords << ": " << msg;
257  throw LSST_EXCEPT(pexExcept::RuntimeError, s.str());
258 }
size_t _numLines
1-based index of current line.
Definition: Csv.h:212
size_t _numRecords
1-based index of current record.
Definition: Csv.h:213
#define LSST_EXCEPT(type,...)
Definition: Exception.h:46
std::string _path
File name.
Definition: Csv.h:205
std::string const lsst::ap::utils::CsvReader::get ( int  i) const
inline

Returns the value of a field as an instance of type T.

Returns the value of a field as a std::string.

Definition at line 147 of file Csv.cc.

147  {
148  // force a compile time error for unsupported types
149  static int const typeAllowed = boost::mpl::or_<
150  boost::is_same<T, char const *>,
151  boost::is_same<T, std::string>,
152  boost::is_integral<T>,
153  boost::is_floating_point<T>
154  >::value;
155  BOOST_STATIC_ASSERT(typeAllowed);
156  if (isNull(i)) {
157  return _null<T>();
158  }
159  // _get specializations live in the implementation file
160  return _get<T>(_record.get() + _fields[i]);
161 }
boost::scoped_array< char > _record
Data for a single record.
Definition: Csv.h:214
std::vector< int > _fields
Definition: Csv.h:217
bool isNull(int i) const
Definition: Csv.cc:125
std::string const lsst::ap::utils::CsvReader::get ( std::string const &  name) const
inline

const

Returns the value of a field as an instance of type T.

const

Definition at line 164 of file Csv.cc.

164  {
165  return get<T>(getIndexOf(name));
166 }
int getIndexOf(std::string const &name) const
Definition: Csv.cc:79
std::string const lsst::ap::utils::CsvReader::get ( char const *  name) const
inline

const

Returns the value of a field as an instance of type T.

const

Definition at line 169 of file Csv.cc.

169  {
170  return get<T>(getIndexOf(name));
171 }
int getIndexOf(std::string const &name) const
Definition: Csv.cc:79
template<typename T >
T const lsst::ap::utils::CsvReader::get ( int  i) const
inline

Returns the number of fields in the current record, or 0 if there is no current record.

template<typename T >
T const lsst::ap::utils::CsvReader::get ( std::string const &  name) const
inline

Returns the number of fields in the current record, or 0 if there is no current record.

template<typename T >
T const lsst::ap::utils::CsvReader::get ( char const *  name) const
inline

Returns the number of fields in the current record, or 0 if there is no current record.

CsvControl const & lsst::ap::utils::CsvReader::getControl ( ) const
inline

Returns a description of the format understood by this reader.

Definition at line 49 of file Csv.cc.

49  {
50  return _control;
51 }
CsvControl _control
File format.
Definition: Csv.h:206
std::vector< std::string > const & lsst::ap::utils::CsvReader::getFieldNames ( ) const
inline

Returns the field names defined for this CsvReader. The i-th name identifies the i-th field in a record.

Definition at line 71 of file Csv.cc.

71  {
72  return _names;
73 }
std::vector< std::string > _names
Field names in order of occurence.
Definition: Csv.h:207
int lsst::ap::utils::CsvReader::getIndexOf ( std::string const &  name) const
inline

Returns the 0-based index (in a record) of the field with the specified name, or -1 if the field name is not recognized.

Definition at line 79 of file Csv.cc.

79  {
80  FieldIndexes::const_iterator i = _indexes.find(name);
81  return i == _indexes.end() ? -1 : static_cast<int>(i->second);
82 }
FieldIndexes _indexes
Field name to index map.
Definition: Csv.h:208
int lsst::ap::utils::CsvReader::getIndexOf ( char const *  name) const
inline

Returns the 0-based index (in a record) of the field with the specified name, or -1 if the field name is not recognized.

Definition at line 83 of file Csv.cc.

83  {
84  return getIndexOf(std::string(name));
85 }
int getIndexOf(std::string const &name) const
Definition: Csv.cc:79
int lsst::ap::utils::CsvReader::getNumFields ( ) const
inline

Returns the number of fields in the current record, or 0 if there is no current record.

Definition at line 119 of file Csv.cc.

119  {
120  return static_cast<int>(_fields.size());
121 }
std::vector< int > _fields
Definition: Csv.h:217
size_t lsst::ap::utils::CsvReader::getNumLines ( ) const
inline

Returns the number of lines read in. This is the 1-based index of the last line in the current record, and is 0 only when the input stream is empty.

Definition at line 57 of file Csv.cc.

57  {
58  return _numLines;
59 }
size_t _numLines
1-based index of current line.
Definition: Csv.h:212
size_t lsst::ap::utils::CsvReader::getNumRecords ( ) const
inline

Returns the number of records read in. This is the 1-based index of the current record, and is 0 only when the input stream is empty.

Definition at line 64 of file Csv.cc.

64  {
65  return _numRecords;
66 }
size_t _numRecords
1-based index of current record.
Definition: Csv.h:213
bool lsst::ap::utils::CsvReader::isDone ( ) const
inline

Returns true if all records have been read (and there is no current record).

Definition at line 90 of file Csv.cc.

90  {
91  return _done;
92 }
bool _done
Finished reading file?
Definition: Csv.h:216
bool lsst::ap::utils::CsvReader::isNull ( int  i) const
inline

Returns true if the value of the given field is a database NULL.

Definition at line 125 of file Csv.cc.

125  {
126  if (_done) {
127  _ioError("no current record (all records have been read)");
128  }
129  if (i < 0 || static_cast<size_t>(i) >= _fields.size()) {
130  _ioError("attempt to access field with invalid field name or index");
131  }
132  return _fields[i] < 0;
133 }
bool _done
Finished reading file?
Definition: Csv.h:216
void _ioError(char const *msg) const
Definition: Csv.cc:235
std::vector< int > _fields
Definition: Csv.h:217
bool lsst::ap::utils::CsvReader::isNull ( std::string const &  name) const
inline

const

const

Definition at line 136 of file Csv.cc.

136  {
137  return isNull(getIndexOf(name));
138 }
bool isNull(int i) const
Definition: Csv.cc:125
int getIndexOf(std::string const &name) const
Definition: Csv.cc:79
bool lsst::ap::utils::CsvReader::isNull ( char const *  name) const
inline

const

const

Definition at line 141 of file Csv.cc.

141  {
142  return isNull(getIndexOf(name));
143 }
bool isNull(int i) const
Definition: Csv.cc:125
int getIndexOf(std::string const &name) const
Definition: Csv.cc:79
void lsst::ap::utils::CsvReader::nextRecord ( )
inline

Advances to the next record in the file. If all records have been read, the function has no effect.

Exceptions
lsst::pex::exception::RuntimeErrorIf this exception is thrown, it is because the input file did not conform to the readers format. The current record will contain the fields succesfully read-in, but the last field may be incomplete or otherwise incorrectly decoded. The next call to nextRecord() will resume reading at the beginning of the next line in the file. If fields contain new-lines, this will not necessarily be at the start of a record!
lsst::pex::exception::IoErrorA system I/O call failed - one cannot recover in any general way.
lsst::pex::exception::LogicErrorThere is a serious bug in the internal CSV parser. File a ticket!

Definition at line 110 of file Csv.cc.

110  {
111  if (!_done) {
112  _readRecord();
113  }
114 }
bool _done
Finished reading file?
Definition: Csv.h:216
CsvReader& lsst::ap::utils::CsvReader::operator= ( CsvReader const &  )
private
void lsst::ap::utils::CsvReader::setFieldNames ( std::vector< std::string > const &  names)

Associates field names with field indexes. The i-th field name in the list corresponds to the i-th field in a record. Once field names have been set, they can be used in lieu of field indexes to look up field values.

Field names are case-sensitive and must all be distinct. The only exception is that empty field names are skipped.

Definition at line 167 of file Csv.cc.

167  {
168  if (names.size() > static_cast<size_t>(numeric_limits<int>::max())) {
169  throw LSST_EXCEPT(pexExcept::InvalidParameterError,
170  "too many field names");
171  }
172  // create temporary name->index map.
173  FieldIndexes indexes;
174  for (vector<string>::size_type i = 0; i < names.size(); ++i) {
175  if (names[i].empty()) {
176  continue; // skip empty field names
177  }
178  pair<FieldIndexes::iterator, bool> p = indexes.insert(make_pair(
179  names[i], static_cast<int>(i)));
180  if (!p.second) {
181  throw LSST_EXCEPT(pexExcept::InvalidParameterError,
182  "Duplicate field name: " + names[i]);
183  }
184  }
185  // copy name list
186  vector<string> copy(names);
187  // commit state
188  swap(copy, _names);
189  swap(indexes, _indexes);
190 }
void swap(Ellipse< DataT > &a, Ellipse< DataT > &b)
Definition: EllipseTypes.h:90
std::tr1::unordered_map< std::string, int > FieldIndexes
Definition: Csv.h:177
SelectEigenView< T >::Type copy(Eigen::EigenBase< T > const &other)
Copy an arbitrary Eigen expression into a new EigenView.
Definition: eigen.h:390
double max
Definition: attributes.cc:218
#define LSST_EXCEPT(type,...)
Definition: Exception.h:46
FieldIndexes _indexes
Field name to index map.
Definition: Csv.h:208
std::vector< std::string > _names
Field names in order of occurence.
Definition: Csv.h:207
void lsst::ap::utils::CsvReader::setFieldNames ( std::string const &  names,
std::string const &  regex,
bool  stripWhitespace = true 
)

Splits a string containing a delimited list of field names using the given regular expression and associates the resulting field names with field indexes.

Field names are case-sensitive and must all be distinct. The only exception is that empty field names are skipped.
See Also
setFieldNames(std::vector<std::string> const&)
Parameters
namesDelimited list of field names.
regexRegular expression matching delimiter.
stripWhitespaceStrip whitespace from field names?

Definition at line 202 of file Csv.cc.

206  {
207  vector<string> fieldNames;
208  boost::regex re(regex);
209  boost::sregex_token_iterator i(names.begin(), names.end(), re, -1);
210  boost::sregex_token_iterator e;
211  for (; i != e; ++i) {
212  string name = i->str();
213  if (stripWhitespace) {
214  size_t w = name.find_last_not_of(WHITESPACE);
215  if (w != string::npos) {
216  name.erase(w + 1);
217  } else {
218  name.clear();
219  }
220  w = name.find_first_not_of(WHITESPACE);
221  if (w != string::npos) {
222  name.erase(0, w);
223  } else {
224  name.clear();
225  }
226  }
227  fieldNames.push_back(name);
228  }
229  setFieldNames(fieldNames);
230 }
void setFieldNames(std::vector< std::string > const &names)
Definition: Csv.cc:167
double w
Definition: CoaddPsf.cc:57
static std::string const WHITESPACE
Definition: Csv.h:179

Member Data Documentation

int lsst::ap::utils::CsvReader::_capacity
private

Capacity of _record.

Definition at line 215 of file Csv.h.

CsvControl lsst::ap::utils::CsvReader::_control
private

File format.

Definition at line 206 of file Csv.h.

bool lsst::ap::utils::CsvReader::_done
private

Finished reading file?

Definition at line 216 of file Csv.h.

std::vector<int> lsst::ap::utils::CsvReader::_fields
private

Index of first character for each field in _record, -1 if NULL.

Definition at line 217 of file Csv.h.

std::istream* lsst::ap::utils::CsvReader::_in
private

Input stream.

Definition at line 211 of file Csv.h.

FieldIndexes lsst::ap::utils::CsvReader::_indexes
private

Field name to index map.

Definition at line 208 of file Csv.h.

std::vector<std::string> lsst::ap::utils::CsvReader::_names
private

Field names in order of occurence.

Definition at line 207 of file Csv.h.

size_t lsst::ap::utils::CsvReader::_numLines
private

1-based index of current line.

Definition at line 212 of file Csv.h.

size_t lsst::ap::utils::CsvReader::_numRecords
private

1-based index of current record.

Definition at line 213 of file Csv.h.

std::string lsst::ap::utils::CsvReader::_path
private

File name.

Definition at line 205 of file Csv.h.

boost::scoped_array<char> lsst::ap::utils::CsvReader::_record
private

Data for a single record.

Definition at line 214 of file Csv.h.

boost::scoped_ptr<std::ifstream> lsst::ap::utils::CsvReader::_stream
private

File stream.

Definition at line 210 of file Csv.h.

int const lsst::ap::utils::CsvReader::DEFAULT_CAPACITY = 128*1024
staticprivate

Definition at line 181 of file Csv.h.

int const lsst::ap::utils::CsvReader::MAX_RECORD_LENGTH = 4*1024*1024
staticprivate

Definition at line 180 of file Csv.h.

std::string const lsst::ap::utils::CsvReader::WHITESPACE
staticprivate

Definition at line 179 of file Csv.h.


The documentation for this class was generated from the following files: