37 #include "boost/filesystem.hpp"
38 #include "boost/regex.hpp"
48 using std::numeric_limits;
50 using std::ostringstream;
63 namespace pexExcept = lsst::pex::exceptions;
66 namespace lsst {
namespace ap {
namespace utils {
77 std::string
const &path,
79 bool namesInFirstRecord
86 _stream(new ifstream(path.c_str(), ios::binary | ios::in)),
90 _record(new char[DEFAULT_CAPACITY]),
91 _capacity(DEFAULT_CAPACITY),
97 "failed to open file " + path +
" for reading");
101 if (namesInFirstRecord && !
isDone()) {
102 vector<string> names;
104 names.push_back(
get(i));
121 bool namesInFirstRecord
132 _record(new char[DEFAULT_CAPACITY]),
133 _capacity(DEFAULT_CAPACITY),
139 "std::istream not good() for reading");
142 in.exceptions(ios::goodbit);
144 if (namesInFirstRecord && !
isDone()) {
145 vector<string> names;
147 names.push_back(
get(i));
169 throw LSST_EXCEPT(pexExcept::InvalidParameterError,
170 "too many field names");
174 for (vector<string>::size_type i = 0; i < names.size(); ++i) {
175 if (names[i].empty()) {
178 pair<FieldIndexes::iterator, bool> p = indexes.insert(make_pair(
179 names[i], static_cast<int>(i)));
181 throw LSST_EXCEPT(pexExcept::InvalidParameterError,
182 "Duplicate field name: " + names[i]);
186 vector<string>
copy(names);
203 std::string
const &names,
204 std::string
const ®ex,
207 vector<string> fieldNames;
208 boost::regex re(regex);
209 boost::sregex_token_iterator i(names.begin(), names.end(), re, -1);
210 boost::sregex_token_iterator e;
211 for (; i != e; ++i) {
212 string name = i->str();
213 if (stripWhitespace) {
215 if (w != string::npos) {
221 if (w != string::npos) {
227 fieldNames.push_back(name);
240 s <<
"CSV file " <<
_path;
254 s <<
"CSV file " <<
_path;
257 throw LSST_EXCEPT(pexExcept::RuntimeError, s.str());
263 bool gotData =
false;
267 if (
_in->gcount() > 0) {
272 }
else if (
_in->bad()) {
273 _ioError(
"std::istream::getline() failed");
274 }
else if (
_in->fail()) {
276 offset +=
_in->gcount();
281 boost::scoped_array<char> rec(
new char[cap]);
287 offset +=
_in->gcount();
329 _fields.push_back(writeOffset);
410 }
else if (c ==
'\0') {
457 case 'Z': c = 0x1a;
break;
458 case 'b': c = 0x08;
break;
459 case 'n': c =
'\n';
break;
460 case 'r': c =
'\r';
break;
461 case 't': c =
'\t';
break;
462 case 'v': c = 0x0b;
break;
466 if (c2 >=
'0' && c2 <=
'9') {
468 }
else if (c2 >=
'A' && c2 <=
'F') {
470 }
else if (c2 >=
'a' && c2 <=
'f') {
475 "followed by one or two hex "
479 if (c2 >=
'0' && c2 <=
'9') {
480 c = c*16 + (c2 -
'0');
482 }
else if (c2 >=
'A' && c2 <=
'F') {
483 c = c*16 + 10 + (c2 -
'A');
485 }
else if (c2 >=
'a' && c2 <=
'f') {
486 c = c*16 + 10 + (c2 -
'a');
524 "CSV parser bug - state machine reached an "
534 for (;
WHITESPACE.find(*s) != string::npos; ++s) { }
540 template <>
bool CsvReader::_get<bool>(
char const *field)
const {
541 for (; WHITESPACE.find(*field) != string::npos; ++field) { }
551 if ((field[0] ==
'r' || field[0] ==
'R') &&
552 (field[1] ==
'u' || field[1] ==
'U') &&
553 (field[2] ==
'e' || field[2] ==
'E')) {
560 if ((field[0] ==
'e' || field[0] ==
'E') &&
561 (field[1] ==
's' || field[1] ==
'S')) {
569 if ((field[0] ==
'a' || field[0] ==
'A') &&
570 (field[1] ==
'l' || field[1] ==
'L') &&
571 (field[2] ==
's' || field[2] ==
'S') &&
572 (field[3] ==
'e' || field[3] ==
'E')) {
578 if (field[0] ==
'o' || field[0] ==
'O') {
583 _runtimeError(
"failed to convert field value to bool");
585 _checkWhitespace(field,
"failed to convert field value to bool");
589 template <>
char CsvReader::_get<char>(
char const *field)
const {
590 if (field[0] ==
'\0') {
591 _runtimeError(
"empty field");
593 if (field[1] !=
'\0') {
594 _runtimeError(
"field value contains more than one character");
599 #define LSST_SPECIALIZE_GET(T, C, fun) \
600 template <> T CsvReader::_get<T>(char const *field) const { \
603 C v = fun(field, &e, 10); \
605 _runtimeError("failed to convert field value to " #T); \
606 } else if ((errno == ERANGE) || \
607 v > numeric_limits<T>::max() || \
608 v < numeric_limits<T>::min()) { \
609 _runtimeError("field value overflow during conversion to " #T); \
611 _checkWhitespace(e, "failed to convert field value to " #T); \
625 #undef LSST_SPECIALIZE_GET
627 #define LSST_SPECIALIZE_GET(T, fun) \
628 template <> T CsvReader::_get<T>(char const *field) const { \
631 T v = fun(field, &e); \
633 _runtimeError("failed to convert field value to " #T); \
635 _checkWhitespace(e, "failed to convert field value to " #T); \
642 #undef LSST_SPECIALIZE_GET
650 std::string
const &path,
666 ios::openmode mode = ios::out | ios::binary;
672 "file " + path +
" already exists");
678 _stream.reset(
new ofstream(path.c_str(), mode));
681 "failed to open file " + path +
" for writing");
685 _stream->exceptions(ios::eofbit | ios::failbit | ios::badbit);
705 "std::ostream not good() for writing");
707 out.exceptions(ios::eofbit | ios::failbit | ios::badbit);
729 for (vector<string>::const_iterator i = fields.begin(), e = fields.end();
735 #define LSST_IMPLEMENT_APPEND_FIELD(T, C, fmt) \
736 void CsvWriter::appendField(T v) { \
738 int n = snprintf(buf, sizeof(buf), fmt, static_cast<C>(v)); \
740 throw LSST_EXCEPT(pexExcept::RuntimeError, \
741 "failed to convert " #T " to a string"); \
742 } else if (n >= static_cast<int>(sizeof(buf))) { \
743 throw LSST_EXCEPT(pexExcept::LogicError, \
744 "internal buffer for string conversion too small"); \
760 #undef LSST_IMPLEMENT_APPEND_FIELD
778 if (FLT_MANT_DIG == 24) {
780 n = snprintf(buf,
sizeof(buf),
"%.9g", static_cast<double>(v));
783 static unsigned long const ndig = 2 + (FLT_MANT_DIG*30103
UL)/100000
UL;
784 #elif defined(FLT_MAXDIG10)
785 static unsigned long const ndig = FLT_MAXDIG10;
786 #elif defined(DECIMAL_DIG)
787 static unsigned long const ndig = DECIMAL_DIG;
789 # error Unable to determine number of digits for lossless float->decimal->float conversion
791 n = snprintf(fmt,
sizeof(fmt),
"%%.%lug", ndig);
792 if (n <= 0 || n >= static_cast<int>(
sizeof(fmt))) {
794 "internal buffer for string conversion too small"); \
796 n = snprintf(buf,
sizeof(buf), fmt, static_cast<double>(v));
800 if (n <= 0 || n >= static_cast<int>(
sizeof(buf))) {
802 "snprintf() failed to convert float to string");
816 if (DBL_MANT_DIG == 53) {
818 n = snprintf(buf,
sizeof(buf),
"%.17g", v);
821 static unsigned long const ndig = 2 + (DBL_MANT_DIG*30103
UL)/100000
UL;
822 #elif defined(DBL_MAXDIG10)
823 static unsigned long const ndig = DBL_MAXDIG10;
824 #elif defined(DECIMAL_DIG)
825 static unsigned long const ndig = DECIMAL_DIG;
827 # error Unable to determine number of digits for lossless double->decimal->double conversion
829 n = snprintf(fmt,
sizeof(fmt),
"%%.%lug", ndig);
830 if (n <= 0 || n >= static_cast<int>(
sizeof(fmt))) {
832 "snprintf() failed to produce format string");
834 n = snprintf(buf,
sizeof(buf), fmt, v);
838 if (n <= 0 || n >= static_cast<int>(
sizeof(buf))) {
840 "snprintf() failed to convert double to string");
854 if (LDBL_MANT_DIG == 64) {
856 n = snprintf(buf,
sizeof(buf),
"%.21Lg", v);
857 }
else if (LDBL_MANT_DIG == 113) {
859 n = snprintf(buf,
sizeof(buf),
"%.36Lg", v);
862 static unsigned long const ndig = 2 + (LDBL_MANT_DIG*30103
UL)/100000
UL;
863 #elif defined(DBL_MAXDIG10)
864 static unsigned long const ndig = LDBL_MAXDIG10;
865 #elif defined(DECIMAL_DIG)
866 static unsigned long const ndig = DECIMAL_DIG;
868 # error Unable to determine number of digits for lossless long double->decimal->long double conversion
870 n = snprintf(fmt,
sizeof(fmt),
"%%.%luLg", ndig);
871 if (n <= 0 || n >= static_cast<int>(
sizeof(fmt))) {
873 "snprintf() failed to produce format string");
875 n = snprintf(buf,
sizeof(buf), fmt, v);
879 if (n <= 0 || n >= static_cast<int>(
sizeof(buf))) {
881 "snprintf() failed to convert long double to string");
924 "Field value coincides with NULL string "
925 "and quoting is disabled");
937 bool wantEscape =
false;
938 bool wantQuote =
false;
939 for (
char c = *s; c !=
'\0'; c = s[++n]) {
940 if (c ==
'\n' || c ==
'\r') {
968 }
else if (wantEscape) {
995 throw LSST_EXCEPT(pexExcept::InvalidParameterError,
996 "Field value requires escaping, but "
997 "no escape character is set");
1014 throw LSST_EXCEPT(pexExcept::InvalidParameterError,
1015 "Field value requires escaping, but "
1016 "no escape character is set");
1019 _out->put(c ==
'\n' ?
'n' :
'r');
1032 char const c = s[n];
1046 throw LSST_EXCEPT(pexExcept::InvalidParameterError,
1047 "Field value requires escaping, but "
1048 "no escape character is set");
1053 }
else if (c ==
'\r') {
1069 throw LSST_EXCEPT(pexExcept::InvalidParameterError,
1070 "Field value requires escaping, but "
1071 "no escape character is set");
void _write(char const *s)
Extent< int, N > truncate(Extent< double, N > const &input)
bool _done
Finished reading file?
void swap(Ellipse< DataT > &a, Ellipse< DataT > &b)
char getDelimiter() const
boost::scoped_ptr< std::ifstream > _stream
File stream.
void appendField(std::string const &v)
CsvConversionControl const & _control
char getEscapeChar() const
char getQuoteChar() const
static int const DEFAULT_CAPACITY
void swap(ImageBase< PixelT > &a, ImageBase< PixelT > &b)
Parameters that define a Character-Separated-Value dialect.
std::tr1::unordered_map< std::string, int > FieldIndexes
static int const MAX_RECORD_LENGTH
std::ostream * _out
Output stream.
SelectEigenView< T >::Type copy(Eigen::EigenBase< T > const &other)
Copy an arbitrary Eigen expression into a new EigenView.
void setFieldNames(std::vector< std::string > const &names)
std::istream * _in
Input stream.
void _ioError(char const *msg) const
bool nonfiniteAsNull
"If true, then non-finite (NaN, Inf, -Inf) floating point values are\n" "written out as NULL field va...
void _writeUnquoted(char const *s)
boost::scoped_array< char > _record
Data for a single record.
std::string null
"String representation of NULL field values. Never quoted on output.\n" "If specified, the representation may not contain any delimiter,\n" "quote, escape or line terminator characters ('\\n'/'\\r').\n" ;
int _capacity
Capacity of _record.
CsvWriter(std::string const &path, CsvControl const &control, bool truncate=false, bool append=false)
size_t _numLines
1-based index of current line.
size_t _numFields
Number of fields written.
CsvControl _control
File format.
std::vector< int > _fields
size_t _numRecords
1-based index of current record.
size_t _numRecords
Number of records written.
#define LSST_IMPLEMENT_APPEND_FIELD(T, C, fmt)
size_t _numLines
Number of lines written.
void appendFields(std::vector< std::string > const &fields)
void _writeQuoted(char const *s)
void _checkWhitespace(char const *s, char const *msg) const
#define LSST_EXCEPT(type,...)
boost::scoped_ptr< std::ofstream > _stream
Output file stream.
CsvControl _control
File format.
bool skipInitialSpace
"If true, whitespace immediately following the delimiter is ignored." ;
std::string quoting
"Field quoting style for CSV input/output. Legal values are:\n" "\n" "'QUOTE_MINIMAL': Only quot...
bool _readLine(int offset)
static std::string const WHITESPACE
bool hasNull
"Indicates whether the null string is valid. If set to false, the only\n" "way NULLs can be recogn...
bool standardEscapes
"Flag indicating whether standard escape sequences should be handled.\n" "If false, then the character sequence '\\C', where C is any character,\n" "is mapped to C (assuming '\\' is the escape character). If true,\n" "the following special cases are handled differently:\n" "\n" "- '\\b' is mapped to BS - backspace (ASCII 8)\n" "- '\\f' is mapped to FF - form feed (ASCII 12)\n" "- '\\n' is mapped to NL - newline (ASCII 10)\n" "- '\\r' is mapped to CR - carriage return (ASCII 13)\n" "- '\\t' is mapped to TAB - horizontal tab (ASCII 9)\n" "- '\\v' is mapped to VT - vertical tab (ASCII 11)\n" "- '\\xD' and '\\xDD', where D is a hexadecimal digit, is mapped to\n" " the character with that numeric code.\n" "- A field value of exactly '\\N' (no quotes, whitespace, or other\n" " content) is treated as a NULL.\n" ;
FieldIndexes _indexes
Field name to index map.
std::vector< std::string > _names
Field names in order of occurence.
bool trailingDelimiter
"If true, then a trailing delimiter character is expected and written\n" "at end of every record...
void _runtimeError(char const *msg) const
Include files required for standard LSST Exception handling.
CsvReader(std::string const &path, CsvControl const &control, bool namesInFirstRecord=false)
bool doubleQuote
"If true, embedded quote characters are escaped with a leading quote\n" "character. Otherwise the escape character is used. If escaping and\n" "double-quoting are disabled, writing a field with embedded quote\n" "character will raise an exception.\n" ;
#define LSST_SPECIALIZE_GET(T, C, fun)
std::string _path
File name.