LSSTApplications  1.1.2+25,10.0+13,10.0+132,10.0+133,10.0+224,10.0+41,10.0+8,10.0-1-g0f53050+14,10.0-1-g4b7b172+19,10.0-1-g61a5bae+98,10.0-1-g7408a83+3,10.0-1-gc1e0f5a+19,10.0-1-gdb4482e+14,10.0-11-g3947115+2,10.0-12-g8719d8b+2,10.0-15-ga3f480f+1,10.0-2-g4f67435,10.0-2-gcb4bc6c+26,10.0-28-gf7f57a9+1,10.0-3-g1bbe32c+14,10.0-3-g5b46d21,10.0-4-g027f45f+5,10.0-4-g86f66b5+2,10.0-4-gc4fccf3+24,10.0-40-g4349866+2,10.0-5-g766159b,10.0-5-gca2295e+25,10.0-6-g462a451+1
LSSTDataManagementBasePackage
Csv.cc
Go to the documentation of this file.
1 // -*- lsst-c++ -*-
2 
3 /*
4  * LSST Data Management System
5  * Copyright 2008, 2009, 2010 LSST Corporation.
6  *
7  * This product includes software developed by the
8  * LSST Project (http://www.lsst.org/).
9  *
10  * This program is free software: you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation, either version 3 of the License, or
13  * (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the LSST License Statement and
21  * the GNU General Public License along with this program. If not,
22  * see <http://www.lsstcorp.org/LegalNotices/>.
23  */
24 
29 #include "lsst/ap/utils/Csv.h"
30 
31 #include <cerrno>
32 #include <cfloat>
33 #include <cstring>
34 #include <sstream>
35 #include <utility>
36 
37 #include "boost/filesystem.hpp"
38 #include "boost/regex.hpp"
39 
40 #include "lsst/utils/ieee.h"
41 #include "lsst/pex/exceptions.h"
42 
43 using std::ifstream;
44 using std::ios;
45 using std::make_pair;
46 using std::memcpy;
47 using std::min;
48 using std::numeric_limits;
49 using std::ofstream;
50 using std::ostringstream;
51 using std::pair;
52 using std::string;
53 using std::strtof;
54 using std::strtod;
55 using std::strtold;
56 using std::strtol;
57 using std::strtoll;
58 using std::strtoul;
59 using std::strtoull;
60 using std::swap;
61 using std::vector;
62 
63 namespace pexExcept = lsst::pex::exceptions;
64 
65 
66 namespace lsst { namespace ap { namespace utils {
67 
68 // -- CsvReader implementation ----
69 
70 std::string const CsvReader::WHITESPACE(" \t\f\v\n\r");
71 int const CsvReader::MAX_RECORD_LENGTH = 4*1024*1024;
72 int const CsvReader::DEFAULT_CAPACITY = 128*1024;
73 
77  std::string const &path,
78  CsvControl const &control,
79  bool namesInFirstRecord
80  ) :
82  _path(path),
83  _control(control),
84  _names(),
85  _indexes(),
86  _stream(new ifstream(path.c_str(), ios::binary | ios::in)),
87  _in(_stream.get()),
88  _numLines(0),
89  _numRecords(0),
90  _record(new char[DEFAULT_CAPACITY]),
91  _capacity(DEFAULT_CAPACITY),
92  _done(false),
93  _fields()
94 {
95  if (!_stream->good()) {
96  throw LSST_EXCEPT(pexExcept::IoError,
97  "failed to open file " + path + " for reading");
98  }
99  // exception mask for _stream is clear
100  _readRecord();
101  if (namesInFirstRecord && !isDone()) {
102  vector<string> names;
103  for (int i = 0; i < getNumFields(); ++i) {
104  names.push_back(get(i));
105  }
106  setFieldNames(names);
107  _readRecord();
108  }
109 }
110 
119  std::istream &in,
120  CsvControl const &control,
121  bool namesInFirstRecord
122  ) :
124  _path(),
125  _control(control),
126  _names(),
127  _indexes(),
128  _stream(),
129  _in(&in),
130  _numLines(0),
131  _numRecords(0),
132  _record(new char[DEFAULT_CAPACITY]),
133  _capacity(DEFAULT_CAPACITY),
134  _done(false),
135  _fields()
136 {
137  if (!in.good()) {
138  throw LSST_EXCEPT(pexExcept::IoError,
139  "std::istream not good() for reading");
140  }
141  // clear exception mask
142  in.exceptions(ios::goodbit);
143  _readRecord();
144  if (namesInFirstRecord && !isDone()) {
145  vector<string> names;
146  for (int i = 0; i < getNumFields(); ++i) {
147  names.push_back(get(i));
148  }
149  setFieldNames(names);
150  _readRecord();
151  }
152 }
153 
155  _in = 0;
156 }
157 
167 void CsvReader::setFieldNames(std::vector<std::string> const &names) {
168  if (names.size() > static_cast<size_t>(numeric_limits<int>::max())) {
169  throw LSST_EXCEPT(pexExcept::InvalidParameterError,
170  "too many field names");
171  }
172  // create temporary name->index map.
173  FieldIndexes indexes;
174  for (vector<string>::size_type i = 0; i < names.size(); ++i) {
175  if (names[i].empty()) {
176  continue; // skip empty field names
177  }
178  pair<FieldIndexes::iterator, bool> p = indexes.insert(make_pair(
179  names[i], static_cast<int>(i)));
180  if (!p.second) {
181  throw LSST_EXCEPT(pexExcept::InvalidParameterError,
182  "Duplicate field name: " + names[i]);
183  }
184  }
185  // copy name list
186  vector<string> copy(names);
187  // commit state
188  swap(copy, _names);
189  swap(indexes, _indexes);
190 }
191 
203  std::string const &names,
204  std::string const &regex,
205  bool stripWhitespace
206 ) {
207  vector<string> fieldNames;
208  boost::regex re(regex);
209  boost::sregex_token_iterator i(names.begin(), names.end(), re, -1);
210  boost::sregex_token_iterator e;
211  for (; i != e; ++i) {
212  string name = i->str();
213  if (stripWhitespace) {
214  size_t w = name.find_last_not_of(WHITESPACE);
215  if (w != string::npos) {
216  name.erase(w + 1);
217  } else {
218  name.clear();
219  }
220  w = name.find_first_not_of(WHITESPACE);
221  if (w != string::npos) {
222  name.erase(0, w);
223  } else {
224  name.clear();
225  }
226  }
227  fieldNames.push_back(name);
228  }
229  setFieldNames(fieldNames);
230 }
231 
235 void CsvReader::_ioError(char const *msg) const {
236  ostringstream s;
237  if (_path.empty()) {
238  s << "CSV stream";
239  } else {
240  s << "CSV file " << _path;
241  }
242  s << " line " << _numLines << " record " << _numRecords << ": " << msg;
243  throw LSST_EXCEPT(pexExcept::IoError, s.str());
244 }
245 
249 void CsvReader::_runtimeError(char const *msg) const {
250  ostringstream s;
251  if (_path.empty()) {
252  s << "CSV stream";
253  } else {
254  s << "CSV file " << _path;
255  }
256  s << " line " << _numLines << " record " << _numRecords << ": " << msg;
257  throw LSST_EXCEPT(pexExcept::RuntimeError, s.str());
258 }
259 
262 bool CsvReader::_readLine(int offset) {
263  bool gotData = false;
264  while (true) {
265  _in->getline(_record.get() + offset, _capacity - 1 - offset);
266  if (_in->eof()) {
267  if (_in->gcount() > 0) {
268  gotData = true;
269  ++_numLines;
270  }
271  return gotData;
272  } else if (_in->bad()) {
273  _ioError("std::istream::getline() failed");
274  } else if (_in->fail()) {
275  // not enough space for line in buffer - expand it
276  offset += _in->gcount();
277  if (_capacity == MAX_RECORD_LENGTH) {
278  _ioError("record too long");
279  }
280  int cap = min(_capacity*2, MAX_RECORD_LENGTH);
281  boost::scoped_array<char> rec(new char[cap]);
282  memcpy(rec.get(), _record.get(), static_cast<size_t>(_capacity));
283  swap(rec, _record);
284  _capacity = cap;
285  continue;
286  }
287  offset += _in->gcount();
288  ++_numLines;
289  return true;
290  }
291 }
292 
296  State state = START_RECORD;
297  State popState = START_RECORD;
298  int writeOffset = 0;
299  int offset = 0;
300  char c = 0, c2 = 0;
301 
302  _fields.clear();
303  if (!_readLine(offset)) {
304  _done = true;
305  return;
306  }
307  ++_numRecords;
308 
309  // Run parsing state machine until we reach the end of a record,
310  // then return.
311  while (true) {
312  c = _record[offset++];
313  switch (state) {
314  case START_RECORD:
315  if (c == '\0') {
317  _runtimeError("expecting trailing delimiter "
318  "at end of record");
319  } else if (_control.hasNull &&
320  _control.null.empty()) {
321  _fields.push_back(-1);
322  }
323  return; // finished record
324  }
325  state = START_FIELD;
326  // Fall-through
327 
328  case START_FIELD:
329  _fields.push_back(writeOffset);
330  if (c == '\0') {
331  // finished empty field
332  _record[writeOffset++] = '\0';
333  if (_control.hasNull && _control.null.empty()) {
334  _fields.back() = -1; // NULL
335  }
337  // a trailing delimiter is expected and does not
338  // yield a trailing empty field
339  _fields.pop_back();
340  }
341  return; // finished record
342  } else if (_control.quoting != "QUOTE_NONE" &&
343  c == _control.getQuoteChar()) {
344  // start quoted field
345  state = IN_QUOTED_FIELD;
346  } else if (c == _control.getEscapeChar() &&
347  _control.getEscapeChar() != '\0') {
348  // escape at beginning of field
349  popState = IN_FIELD;
350  state = INITIAL_ESCAPE;
351  } else if (c == _control.getDelimiter()) {
352  // finished empty field
353  _record[writeOffset++] = '\0';
354  if (_control.hasNull && _control.null.empty()) {
355  _fields.back() = -1; // NULL
356  }
357  state = START_FIELD;
358  } else if (_control.skipInitialSpace &&
359  WHITESPACE.find(c) != string::npos) {
360  // eat initial whitespace
361  state = START_FIELD;
362  } else {
363  // start unquoted field
364  _record[writeOffset++] = c;
365  state = IN_FIELD;
366  }
367  break;
368 
369  case IN_FIELD:
370  if (c == '\0') {
371  // finished field
372  _record[writeOffset++] = '\0';
374  _runtimeError("expecting trailing delimiter "
375  "at end of record");
376  }
377  if (_control.hasNull &&
378  _control.null == _record.get() + _fields.back()) {
379  _fields.back() = -1; // NULL
380  }
381  return; // finished record
382  } else if (c == _control.getEscapeChar()) {
383  popState = IN_FIELD;
384  state = ESCAPE;
385  } else if (c == _control.getDelimiter()) {
386  // finished field
387  _record[writeOffset++] = '\0';
388  if (_control.hasNull &&
389  _control.null == _record.get() + _fields.back()) {
390  _fields.back() = -1; // NULL
391  }
392  state = START_FIELD;
393  } else {
394  _record[writeOffset++] = c;
395  state = IN_FIELD;
396  }
397  break;
398 
399  case IN_QUOTED_FIELD:
400  if (c == '\0') {
401  // newline in quoted field
402  _record[writeOffset++] = '\n';
403  // keep reading field on next line
404  if (!_readLine(offset)) {
405  _record[writeOffset++] = '\0';
406  _runtimeError("expecting quote character "
407  "at end of field");
408  }
409  state = IN_QUOTED_FIELD;
410  } else if (c == '\0') {
411  _record[writeOffset++] = '\0';
412  _runtimeError("expecting quote character at end of field");
413  } else if (c == _control.getEscapeChar()) {
414  popState = IN_QUOTED_FIELD;
415  state = ESCAPE;
416  } else if (c == _control.getQuoteChar()) {
417  state = EMBEDDED_QUOTE;
418  } else {
419  _record[writeOffset++] = c;
420  state = IN_QUOTED_FIELD;
421  }
422  break;
423 
424  case INITIAL_ESCAPE:
425  if (c == 'N' && _control.standardEscapes) {
426  char c2 = _record[offset];
427  if (c2 == '\0') {
428  // finished NULL field
429  _fields.back() = -1;
431  _runtimeError("expecting trailing delimiter "
432  "at end of record");
433  }
434  // finished record
435  return;
436  } else if (c2 == _control.getDelimiter()) {
437  // finished NULL field
438  _fields.back() = -1;
439  ++offset;
440  state = START_FIELD;
441  break;
442  }
443  }
444  // Fall-through
445 
446  case ESCAPE:
447  if (c == '\0') {
448  _record[writeOffset++] = '\n';
449  // keep reading field on next line
450  if (!_readLine(offset)) {
451  _record[writeOffset++] = '\0';
452  }
453  }
455  // handle standard escape sequences
456  switch (c) {
457  case 'Z': c = 0x1a; break;
458  case 'b': c = 0x08; break;
459  case 'n': c = '\n'; break;
460  case 'r': c = '\r'; break;
461  case 't': c = '\t'; break;
462  case 'v': c = 0x0b; break;
463  case 'x':
464  // one or two digit hex-escape sequence
465  c2 = _record[offset++];
466  if (c2 >= '0' && c2 <= '9') {
467  c = c2 - '0';
468  } else if (c2 >= 'A' && c2 <= 'F') {
469  c = 10 + (c2 - 'A');
470  } else if (c2 >= 'a' && c2 <= 'f') {
471  c = 10 + (c2 - 'a');
472  } else {
473  _record[writeOffset++] = '\0';
474  _runtimeError("hex escape sequence must be "
475  "followed by one or two hex "
476  "digits");
477  }
478  c2 = _record[offset];
479  if (c2 >= '0' && c2 <= '9') {
480  c = c*16 + (c2 - '0');
481  ++offset;
482  } else if (c2 >= 'A' && c2 <= 'F') {
483  c = c*16 + 10 + (c2 - 'A');
484  ++offset;
485  } else if (c2 >= 'a' && c2 <= 'f') {
486  c = c*16 + 10 + (c2 - 'a');
487  ++offset;
488  }
489  break;
490  default:
491  break;
492  }
493  }
494  _record[writeOffset++] = c;
495  state = popState;
496  break;
497 
498  case EMBEDDED_QUOTE:
499  if (c == '\0') {
500  // finished field
501  _record[writeOffset++] = '\0';
503  _runtimeError("expecting trailing delimiter "
504  "at end of record");
505  }
506  return; // finished record
507  } else if (c == _control.getQuoteChar() &&
509  // save "" as "
510  _record[writeOffset++] = c;
511  state = IN_QUOTED_FIELD;
512  } else if (c == _control.getDelimiter()) {
513  // finished field
514  _record[writeOffset++] = '\0';
515  state = START_FIELD;
516  } else {
517  _record[writeOffset++] = '\0';
518  _runtimeError("expecting delimiter after ending quote");
519  }
520  break;
521 
522  default:
523  throw LSST_EXCEPT(pexExcept::LogicError,
524  "CSV parser bug - state machine reached an "
525  "illegal state");
526  }
527  }
528 }
529 
533 void CsvReader::_checkWhitespace(char const *s, char const *msg) const {
534  for (; WHITESPACE.find(*s) != string::npos; ++s) { }
535  if (*s != '\0') {
536  _runtimeError(msg);
537  }
538 }
539 
540 template <> bool CsvReader::_get<bool>(char const *field) const {
541  for (; WHITESPACE.find(*field) != string::npos; ++field) { }
542  bool value = false;
543  char c = *field++;
544  switch (c) {
545  case '1':
546  value = true;
547  break;
548  case 't':
549  case 'T':
550  value = true;
551  if ((field[0] == 'r' || field[0] == 'R') &&
552  (field[1] == 'u' || field[1] == 'U') &&
553  (field[2] == 'e' || field[2] == 'E')) {
554  field += 3;
555  }
556  break;
557  case 'y':
558  case 'Y':
559  value = true;
560  if ((field[0] == 'e' || field[0] == 'E') &&
561  (field[1] == 's' || field[1] == 'S')) {
562  field += 2;
563  }
564  break;
565  case '0':
566  break;
567  case 'f':
568  case 'F':
569  if ((field[0] == 'a' || field[0] == 'A') &&
570  (field[1] == 'l' || field[1] == 'L') &&
571  (field[2] == 's' || field[2] == 'S') &&
572  (field[3] == 'e' || field[3] == 'E')) {
573  field += 4;
574  }
575  break;
576  case 'n':
577  case 'N':
578  if (field[0] == 'o' || field[0] == 'O') {
579  ++field;
580  }
581  break;
582  default:
583  _runtimeError("failed to convert field value to bool");
584  }
585  _checkWhitespace(field, "failed to convert field value to bool");
586  return value;
587 }
588 
589 template <> char CsvReader::_get<char>(char const *field) const {
590  if (field[0] == '\0') {
591  _runtimeError("empty field");
592  }
593  if (field[1] != '\0') {
594  _runtimeError("field value contains more than one character");
595  }
596  return field[0];
597 }
598 
599 #define LSST_SPECIALIZE_GET(T, C, fun) \
600  template <> T CsvReader::_get<T>(char const *field) const { \
601  char *e; \
602  errno = 0; \
603  C v = fun(field, &e, 10); \
604  if (e == field) { \
605  _runtimeError("failed to convert field value to " #T); \
606  } else if ((errno == ERANGE) || \
607  v > numeric_limits<T>::max() || \
608  v < numeric_limits<T>::min()) { \
609  _runtimeError("field value overflow during conversion to " #T); \
610  } \
611  _checkWhitespace(e, "failed to convert field value to " #T); \
612  return v; \
613  }
614 
615  LSST_SPECIALIZE_GET(signed char, long, strtol)
616  LSST_SPECIALIZE_GET(short, long, strtol)
617  LSST_SPECIALIZE_GET(int, long, strtol)
618  LSST_SPECIALIZE_GET(long, long, strtol)
619  LSST_SPECIALIZE_GET(long long, long long, strtoll)
620  LSST_SPECIALIZE_GET(unsigned char, unsigned long, strtoul)
621  LSST_SPECIALIZE_GET(unsigned short, unsigned long, strtoul)
622  LSST_SPECIALIZE_GET(unsigned int, unsigned long, strtoul)
623  LSST_SPECIALIZE_GET(unsigned long, unsigned long, strtoul)
624  LSST_SPECIALIZE_GET(unsigned long long, unsigned long long, strtoull)
625 #undef LSST_SPECIALIZE_GET
626 
627 #define LSST_SPECIALIZE_GET(T, fun) \
628  template <> T CsvReader::_get<T>(char const *field) const { \
629  char *e; \
630  errno = 0; \
631  T v = fun(field, &e); \
632  if (e == field) { \
633  _runtimeError("failed to convert field value to " #T); \
634  } \
635  _checkWhitespace(e, "failed to convert field value to " #T); \
636  return v; \
637  }
638 
639  LSST_SPECIALIZE_GET(float, strtof)
640  LSST_SPECIALIZE_GET(double, strtod)
641  LSST_SPECIALIZE_GET(long double, strtold)
642 #undef LSST_SPECIALIZE_GET
643 
644 
645 // -- CsvWriter implementation ----
646 
650  std::string const &path,
651  CsvControl const &control,
652  bool truncate,
653  bool append
657 ) :
659  _stream(),
660  _out(0),
661  _control(control),
662  _numRecords(0),
663  _numLines(0),
664  _numFields(0)
665 {
666  ios::openmode mode = ios::out | ios::binary;
667  if (!truncate && boost::filesystem::exists(path)) {
668  if (append) {
669  mode |= ios::app;
670  } else {
671  throw LSST_EXCEPT(pexExcept::IoError,
672  "file " + path + " already exists");
673  }
674  }
675  if (truncate) {
676  mode |= ios::trunc;
677  }
678  _stream.reset(new ofstream(path.c_str(), mode));
679  if (!_stream->good()) {
680  throw LSST_EXCEPT(pexExcept::IoError,
681  "failed to open file " + path + " for writing");
682  }
683  _out = _stream.get();
684  // throw on any kind of output error
685  _stream->exceptions(ios::eofbit | ios::failbit | ios::badbit);
686 }
687 
695 CsvWriter::CsvWriter(std::ostream &out, CsvControl const &control) :
696  _stream(),
697  _out(&out),
698  _control(control),
699  _numRecords(0),
700  _numLines(0),
701  _numFields(0)
702 {
703  if (!out.good()) {
704  throw LSST_EXCEPT(pexExcept::IoError,
705  "std::ostream not good() for writing");
706  }
707  out.exceptions(ios::eofbit | ios::failbit | ios::badbit);
708 }
709 
711  _out = 0;
712 }
713 
718  _out->put(_control.getDelimiter());
719  }
720  _out->put('\n');
721  _numFields = 0;
722  ++_numLines;
723  ++_numRecords;
724 }
725 
728 void CsvWriter::appendFields(std::vector<std::string> const &fields) {
729  for (vector<string>::const_iterator i = fields.begin(), e = fields.end();
730  i != e; ++i) {
731  appendField(*i);
732  }
733 }
734 
735 #define LSST_IMPLEMENT_APPEND_FIELD(T, C, fmt) \
736  void CsvWriter::appendField(T v) { \
737  char buf[64]; \
738  int n = snprintf(buf, sizeof(buf), fmt, static_cast<C>(v)); \
739  if (n <= 0) { \
740  throw LSST_EXCEPT(pexExcept::RuntimeError, \
741  "failed to convert " #T " to a string"); \
742  } else if (n >= static_cast<int>(sizeof(buf))) { \
743  throw LSST_EXCEPT(pexExcept::LogicError, \
744  "internal buffer for string conversion too small"); \
745  } \
746  _write(buf); \
747  }
748 
749  LSST_IMPLEMENT_APPEND_FIELD(short, short, "%hd")
751  LSST_IMPLEMENT_APPEND_FIELD(long, long, "%ld")
752  LSST_IMPLEMENT_APPEND_FIELD(long long, long long, "%lld")
753  LSST_IMPLEMENT_APPEND_FIELD(unsigned short, unsigned short, "%hu")
754  LSST_IMPLEMENT_APPEND_FIELD(unsigned int, unsigned int, "%u")
755  LSST_IMPLEMENT_APPEND_FIELD(unsigned long, unsigned long, "%lu")
756  LSST_IMPLEMENT_APPEND_FIELD(unsigned long long, unsigned long long, "%llu")
757  // Some platforms don't support %hhd/%hhu in snprintf
758  LSST_IMPLEMENT_APPEND_FIELD(signed char, int, "%d")
759  LSST_IMPLEMENT_APPEND_FIELD(unsigned char, unsigned int, "%u")
760 #undef LSST_IMPLEMENT_APPEND_FIELD
761 
762 
763 // For a floating point type F, gymnastics are required to determine the
764 // number of decimal digits to print such that F -> decimal -> F conversions
765 // are guaranteed lossless. Note that this guarantee can only be made if the
766 // platforms conversion functions (snprintf, strtof, strtod, strtold) are
767 // correctly rounded, and the same rounding mode is used on input and output.
768 
769 void CsvWriter::appendField(float v) {
770  char buf[64];
771  char fmt[32];
772  int n = 0;
774  appendNull();
775  return;
776  }
777 #if FLT_RADIX == 2
778  if (FLT_MANT_DIG == 24) {
779  // 32 bit IEEE fast path
780  n = snprintf(buf, sizeof(buf), "%.9g", static_cast<double>(v));
781  } else {
782  // ceil(1 + FLT_MANT_DIG*log10(2))
783  static unsigned long const ndig = 2 + (FLT_MANT_DIG*30103UL)/100000UL;
784 #elif defined(FLT_MAXDIG10)
785  static unsigned long const ndig = FLT_MAXDIG10;
786 #elif defined(DECIMAL_DIG)
787  static unsigned long const ndig = DECIMAL_DIG;
788 #else
789 # error Unable to determine number of digits for lossless float->decimal->float conversion
790 #endif
791  n = snprintf(fmt, sizeof(fmt), "%%.%lug", ndig);
792  if (n <= 0 || n >= static_cast<int>(sizeof(fmt))) {
793  throw LSST_EXCEPT(pexExcept::LogicError, \
794  "internal buffer for string conversion too small"); \
795  }
796  n = snprintf(buf, sizeof(buf), fmt, static_cast<double>(v));
797 #if FLT_RADIX == 2
798  }
799 #endif
800  if (n <= 0 || n >= static_cast<int>(sizeof(buf))) {
801  throw LSST_EXCEPT(pexExcept::LogicError,
802  "snprintf() failed to convert float to string");
803  }
804  _write(buf);
805 }
806 
807 void CsvWriter::appendField(double v) {
808  char buf[64];
809  char fmt[32];
810  int n = 0;
812  appendNull();
813  return;
814  }
815 #if FLT_RADIX == 2
816  if (DBL_MANT_DIG == 53) {
817  // 64 bit IEEE fast path
818  n = snprintf(buf, sizeof(buf), "%.17g", v);
819  } else {
820  // ceil(1 + DBL_MANT_DIG*log10(2))
821  static unsigned long const ndig = 2 + (DBL_MANT_DIG*30103UL)/100000UL;
822 #elif defined(DBL_MAXDIG10)
823  static unsigned long const ndig = DBL_MAXDIG10;
824 #elif defined(DECIMAL_DIG)
825  static unsigned long const ndig = DECIMAL_DIG;
826 #else
827 # error Unable to determine number of digits for lossless double->decimal->double conversion
828 #endif
829  n = snprintf(fmt, sizeof(fmt), "%%.%lug", ndig);
830  if (n <= 0 || n >= static_cast<int>(sizeof(fmt))) {
831  throw LSST_EXCEPT(pexExcept::LogicError,
832  "snprintf() failed to produce format string");
833  }
834  n = snprintf(buf, sizeof(buf), fmt, v);
835 #if FLT_RADIX == 2
836  }
837 #endif
838  if (n <= 0 || n >= static_cast<int>(sizeof(buf))) {
839  throw LSST_EXCEPT(pexExcept::LogicError,
840  "snprintf() failed to convert double to string");
841  }
842  _write(buf);
843 }
844 
845 void CsvWriter::appendField(long double v) {
846  char buf[64];
847  char fmt[32];
848  int n = 0;
850  appendNull();
851  return;
852  }
853 #if FLT_RADIX == 2
854  if (LDBL_MANT_DIG == 64) {
855  // 80bit IEEE long double fast path
856  n = snprintf(buf, sizeof(buf), "%.21Lg", v);
857  } else if (LDBL_MANT_DIG == 113) {
858  // 128bit IEEE long double fast path
859  n = snprintf(buf, sizeof(buf), "%.36Lg", v);
860  } else {
861  // ceil(1 + LDBL_MANT_DIG*log10(2))
862  static unsigned long const ndig = 2 + (LDBL_MANT_DIG*30103UL)/100000UL;
863 #elif defined(DBL_MAXDIG10)
864  static unsigned long const ndig = LDBL_MAXDIG10;
865 #elif defined(DECIMAL_DIG)
866  static unsigned long const ndig = DECIMAL_DIG;
867 #else
868 # error Unable to determine number of digits for lossless long double->decimal->long double conversion
869 #endif
870  n = snprintf(fmt, sizeof(fmt), "%%.%luLg", ndig);
871  if (n <= 0 || n >= static_cast<int>(sizeof(fmt))) {
872  throw LSST_EXCEPT(pexExcept::LogicError,
873  "snprintf() failed to produce format string");
874  }
875  n = snprintf(buf, sizeof(buf), fmt, v);
876 #if FLT_RADIX == 2
877  }
878 #endif
879  if (n <= 0 || n >= static_cast<int>(sizeof(buf))) {
880  throw LSST_EXCEPT(pexExcept::LogicError,
881  "snprintf() failed to convert long double to string");
882  }
883  _write(buf);
884 }
885 
890  if (_control.hasNull) {
891  // output leading delimiter except for the first field in a record
892  if (_numFields > 0) {
893  _out->put(_control.getDelimiter());
894  }
895  ++_numFields;
896  // NULL is never quoted, and guaranteed not to require escaping
897  _out->write(_control.null.c_str(), _control.null.size());
898  } else if (_control.standardEscapes) {
899  // output leading delimiter except for the first field in a record
900  if (_numFields > 0) {
901  _out->put(_control.getDelimiter());
902  }
903  ++_numFields;
904  // write \N
905  _out->put(_control.getEscapeChar());
906  _out->put('N');
907  } else {
908  // write an empty field
909  _write("");
910  }
911 }
912 
915 void CsvWriter::_write(char const *s) {
916  // output leading delimiter except for the first field in a record
917  if (_numFields > 0) {
918  _out->put(_control.getDelimiter());
919  }
920  ++_numFields;
921  if (_control.quoting == "QUOTE_NONE") {
922  if (_control.hasNull && _control.null == s) {
923  throw LSST_EXCEPT(pexExcept::RuntimeError,
924  "Field value coincides with NULL string "
925  "and quoting is disabled");
926  }
927  _writeUnquoted(s);
928  return;
929  } else if (_control.quoting == "QUOTE_ALL" ||
930  (_control.hasNull && _control.null == s)) {
931  _writeQuoted(s);
932  return;
933  }
934 
935  // minimal quoting mode - first determine whether to quote/escape
936  size_t n = 0;
937  bool wantEscape = false;
938  bool wantQuote = false;
939  for (char c = *s; c != '\0'; c = s[++n]) {
940  if (c == '\n' || c == '\r') {
942  wantEscape = true;
943  } else {
944  wantQuote = true;
945  }
946  } else if (c == _control.getDelimiter()) {
947  wantQuote = true;
948  } else if (c == _control.getEscapeChar()) {
949  wantQuote = true;
950  wantEscape = true;
951  } else if (c == _control.getQuoteChar()) {
952  if (_control.doubleQuote) {
953  wantQuote = true;
954  }
955  wantEscape = true;
956  }
957  }
958  // if no escapes are necessary, blast chars to output stream, otherwise
959  // delegate to _writeQuoted() and _writeUnquoted()
960  if (wantQuote) {
961  if (wantEscape) {
962  _writeQuoted(s);
963  } else {
964  _out->put(_control.getQuoteChar());
965  _out->write(s, n);
966  _out->put(_control.getQuoteChar());
967  }
968  } else if (wantEscape) {
969  _writeUnquoted(s);
970  } else {
971  _out->write(s, n);
972  }
973 }
974 
977 void CsvWriter::_writeQuoted(char const *s) {
978  _out->put(_control.getQuoteChar());
979  size_t n = 0;
980  while (true) {
981  char const c = s[n];
982  if (c == '\0') {
983  _out->write(s, n);
984  break;
985  }
986  if (c == _control.getQuoteChar()) {
987  _out->write(s, n);
988  s += n + 1;
989  n = 0;
990  if (_control.doubleQuote) {
991  _out->put(c);
992  _out->put(c);
993  } else if (_control.getEscapeChar() == '\0') {
994  _out->put(c);
995  throw LSST_EXCEPT(pexExcept::InvalidParameterError,
996  "Field value requires escaping, but "
997  "no escape character is set");
998  } else {
999  _out->put(_control.getEscapeChar());
1000  _out->put(c);
1001  }
1002  } else if (c == _control.getEscapeChar()) {
1003  _out->write(s, n);
1004  s += n + 1;
1005  n = 0;
1006  _out->put(c);
1007  _out->put(c);
1008  } else if (_control.standardEscapes && (c == '\n' || c == '\r')) {
1009  _out->write(s, n);
1010  s += n + 1;
1011  n = 0;
1012  if (_control.getEscapeChar() == '\0') {
1013  _out->put(_control.getQuoteChar());
1014  throw LSST_EXCEPT(pexExcept::InvalidParameterError,
1015  "Field value requires escaping, but "
1016  "no escape character is set");
1017  }
1018  _out->put(_control.getEscapeChar());
1019  _out->put(c == '\n' ? 'n' : 'r');
1020  } else {
1021  ++n;
1022  }
1023  }
1024  _out->put(_control.getQuoteChar());
1025 }
1026 
1029 void CsvWriter::_writeUnquoted(char const *s) {
1030  size_t n = 0;
1031  while (true) {
1032  char const c = s[n];
1033  if (c == '\0') {
1034  _out->write(s, n);
1035  break;
1036  }
1037  if (c == '\n' ||
1038  c == '\r' ||
1039  c == _control.getDelimiter() ||
1040  c == _control.getEscapeChar()) {
1041 
1042  _out->write(s, n);
1043  s += n + 1;
1044  n = 0;
1045  if (_control.getEscapeChar() == '\0') {
1046  throw LSST_EXCEPT(pexExcept::InvalidParameterError,
1047  "Field value requires escaping, but "
1048  "no escape character is set");
1049  }
1050  _out->put(_control.getEscapeChar());
1051  if (c == '\n') {
1052  _out->put(_control.standardEscapes ? 'n' : c);
1053  } else if (c == '\r') {
1054  _out->put(_control.standardEscapes ? 'r' : c);
1055  } else {
1056  _out->put(c);
1057  }
1058  continue;
1059  } else if (c == _control.getQuoteChar() &&
1060  _control.quoting != "QUOTE_NONE") {
1061 
1062  _out->write(s, n);
1063  s += n + 1;
1064  n = 0;
1065  if (_control.doubleQuote) {
1066  _out->put(c);
1067  _out->put(c);
1068  } else if (_control.getEscapeChar() == '\0') {
1069  throw LSST_EXCEPT(pexExcept::InvalidParameterError,
1070  "Field value requires escaping, but "
1071  "no escape character is set");
1072  } else {
1073  _out->put(_control.getEscapeChar());
1074  _out->put(c);
1075  }
1076  } else {
1077  ++n;
1078  }
1079  }
1080 }
1081 
1082 }}} // namespace lsst::ap::utils
void _write(char const *s)
Definition: Csv.cc:915
Extent< int, N > truncate(Extent< double, N > const &input)
bool _done
Finished reading file?
Definition: Csv.h:216
void swap(Ellipse< DataT > &a, Ellipse< DataT > &b)
Definition: EllipseTypes.h:90
char getDelimiter() const
Definition: CsvControl.h:155
boost::scoped_ptr< std::ifstream > _stream
File stream.
Definition: Csv.h:210
void appendField(std::string const &v)
Definition: Csv.cc:281
CsvConversionControl const & _control
Definition: csvUtils.cc:120
char getEscapeChar() const
Definition: CsvControl.h:158
char getQuoteChar() const
Definition: CsvControl.h:161
static int const DEFAULT_CAPACITY
Definition: Csv.h:181
void swap(ImageBase< PixelT > &a, ImageBase< PixelT > &b)
Definition: Image.cc:291
Parameters that define a Character-Separated-Value dialect.
Definition: CsvControl.h:48
std::tr1::unordered_map< std::string, int > FieldIndexes
Definition: Csv.h:177
static int const MAX_RECORD_LENGTH
Definition: Csv.h:180
std::ostream * _out
Output stream.
Definition: Csv.h:310
SelectEigenView< T >::Type copy(Eigen::EigenBase< T > const &other)
Copy an arbitrary Eigen expression into a new EigenView.
Definition: eigen.h:390
void setFieldNames(std::vector< std::string > const &names)
Definition: Csv.cc:167
std::istream * _in
Input stream.
Definition: Csv.h:211
void _ioError(char const *msg) const
Definition: Csv.cc:235
bool nonfiniteAsNull
&quot;If true, then non-finite (NaN, Inf, -Inf) floating point values are\n&quot; &quot;written out as NULL field va...
Definition: CsvControl.h:131
void _writeUnquoted(char const *s)
Definition: Csv.cc:1029
boost::scoped_array< char > _record
Data for a single record.
Definition: Csv.h:214
std::string null
&quot;String representation of NULL field values. Never quoted on output.\n&quot; &quot;If specified, the representation may not contain any delimiter,\n&quot; &quot;quote, escape or line terminator characters (&#39;\\n&#39;/&#39;\\r&#39;).\n&quot; ;
Definition: CsvControl.h:61
int _capacity
Capacity of _record.
Definition: Csv.h:215
CsvWriter(std::string const &path, CsvControl const &control, bool truncate=false, bool append=false)
Definition: Csv.cc:649
size_t _numLines
1-based index of current line.
Definition: Csv.h:212
size_t _numFields
Number of fields written.
Definition: Csv.h:314
CsvControl _control
File format.
Definition: Csv.h:206
double min
Definition: attributes.cc:216
int d
Definition: KDTree.cc:89
std::vector< int > _fields
Definition: Csv.h:217
size_t _numRecords
1-based index of current record.
Definition: Csv.h:213
size_t _numRecords
Number of records written.
Definition: Csv.h:312
double max
Definition: attributes.cc:218
#define LSST_IMPLEMENT_APPEND_FIELD(T, C, fmt)
Definition: Csv.cc:735
size_t _numLines
Number of lines written.
Definition: Csv.h:313
double w
Definition: CoaddPsf.cc:57
void appendFields(std::vector< std::string > const &fields)
Definition: Csv.cc:728
void _writeQuoted(char const *s)
Definition: Csv.cc:977
bool isDone() const
Definition: Csv.cc:90
void _checkWhitespace(char const *s, char const *msg) const
Definition: Csv.cc:533
int isfinite(T t)
Definition: ieee.h:100
#define LSST_EXCEPT(type,...)
Definition: Exception.h:46
boost::scoped_ptr< std::ofstream > _stream
Output file stream.
Definition: Csv.h:309
CsvControl _control
File format.
Definition: Csv.h:311
bool skipInitialSpace
&quot;If true, whitespace immediately following the delimiter is ignored.&quot; ;
Definition: CsvControl.h:100
std::string quoting
&quot;Field quoting style for CSV input/output. Legal values are:\n&quot; &quot;\n&quot; &quot;&#39;QUOTE_MINIMAL&#39;: Only quot...
Definition: CsvControl.h:77
bool _readLine(int offset)
Definition: Csv.cc:262
static std::string const WHITESPACE
Definition: Csv.h:179
def exists
Definition: cuda.py:53
bool hasNull
&quot;Indicates whether the null string is valid. If set to false, the only\n&quot; &quot;way NULLs can be recogn...
Definition: CsvControl.h:67
int getNumFields() const
Definition: Csv.cc:119
bool standardEscapes
&quot;Flag indicating whether standard escape sequences should be handled.\n&quot; &quot;If false, then the character sequence &#39;\\C&#39;, where C is any character,\n&quot; &quot;is mapped to C (assuming &#39;\\&#39; is the escape character). If true,\n&quot; &quot;the following special cases are handled differently:\n&quot; &quot;\n&quot; &quot;- &#39;\\b&#39; is mapped to BS - backspace (ASCII 8)\n&quot; &quot;- &#39;\\f&#39; is mapped to FF - form feed (ASCII 12)\n&quot; &quot;- &#39;\\n&#39; is mapped to NL - newline (ASCII 10)\n&quot; &quot;- &#39;\\r&#39; is mapped to CR - carriage return (ASCII 13)\n&quot; &quot;- &#39;\\t&#39; is mapped to TAB - horizontal tab (ASCII 9)\n&quot; &quot;- &#39;\\v&#39; is mapped to VT - vertical tab (ASCII 11)\n&quot; &quot;- &#39;\\xD&#39; and &#39;\\xDD&#39;, where D is a hexadecimal digit, is mapped to\n&quot; &quot; the character with that numeric code.\n&quot; &quot;- A field value of exactly &#39;\\N&#39; (no quotes, whitespace, or other\n&quot; &quot; content) is treated as a NULL.\n&quot; ;
Definition: CsvControl.h:123
FieldIndexes _indexes
Field name to index map.
Definition: Csv.h:208
std::vector< std::string > _names
Field names in order of occurence.
Definition: Csv.h:207
bool trailingDelimiter
&quot;If true, then a trailing delimiter character is expected and written\n&quot; &quot;at end of every record...
Definition: CsvControl.h:127
void _runtimeError(char const *msg) const
Definition: Csv.cc:249
Classes for CSV I/O.
Include files required for standard LSST Exception handling.
std::string _record
CsvReader(std::string const &path, CsvControl const &control, bool namesInFirstRecord=false)
Definition: Csv.cc:76
bool doubleQuote
&quot;If true, embedded quote characters are escaped with a leading quote\n&quot; &quot;character. Otherwise the escape character is used. If escaping and\n&quot; &quot;double-quoting are disabled, writing a field with embedded quote\n&quot; &quot;character will raise an exception.\n&quot; ;
Definition: CsvControl.h:106
#define LSST_SPECIALIZE_GET(T, C, fun)
Definition: Csv.cc:627
std::string _path
File name.
Definition: Csv.h:205