20 #include "../config.h" 27 #include "ParserEventGeneratorKit.h" 51 #define LIBOFX_DEFAULT_INPUT_ENCODING "CP1252" 52 #define LIBOFX_DEFAULT_OUTPUT_ENCODING "UTF-8" 57 #ifdef MAKEFILE_DTD_PATH 68 #ifdef MAKEFILE_DTD_PATH 71 "/usr/local/share/libofx/dtd",
72 "/usr/share/libofx/dtd",
83 bool ofx_start =
false;
85 bool file_is_xml =
false;
86 bool used_iconv =
false;
87 std::ifstream input_file;
88 std::ofstream tmp_file;
90 char tmp_filename[256];
93 iconv_t conversion_descriptor;
97 if (p_filename != NULL && strcmp(p_filename,
"") != 0)
99 message_out(
DEBUG, std::string(
"ofx_proc_file():Opening file: ") + p_filename);
101 input_file.open(p_filename);
104 message_out(
ERROR,
"ofx_proc_file():Unable to open the input file " + std::string(p_filename));
107 mkTempFileName(
"libofxtmpXXXXXX", tmp_filename,
sizeof(tmp_filename));
109 message_out(
DEBUG,
"ofx_proc_file(): Creating temp file: " + std::string(tmp_filename));
111 tmp_file_fd = mkstemp_win32(tmp_filename);
113 tmp_file_fd = mkstemp(tmp_filename);
117 tmp_file.open(tmp_filename);
120 message_out(
ERROR,
"ofx_proc_file():Unable to open the created temp file " + std::string(tmp_filename));
126 message_out(
ERROR,
"ofx_proc_file():Unable to create a temp file at " + std::string(tmp_filename));
130 if (input_file && tmp_file)
132 std::size_t header_separator_idx;
133 std::string header_name;
134 std::string header_value;
135 std::string ofx_encoding;
136 std::string ofx_charset;
139 std::stringbuf buffer;
140 std::string s_buffer;
141 input_file.get(buffer,
'\n');
143 s_buffer = buffer.str();
147 if (!input_file.eof())
150 if (input_file.fail())
160 if (input_file.peek() ==
'\n')
167 if (ofx_start ==
false && (s_buffer.find(
"<?xml") != std::string::npos))
169 message_out(
DEBUG,
"ofx_proc_file(): File is an actual XML file, iconv conversion will be skipped.");
173 std::size_t ofx_start_idx;
174 if (ofx_start ==
false)
177 (libofx_context->currentFileType() == OFX &&
178 ((ofx_start_idx = s_buffer.find(
"<OFX>")) != std::string::npos ||
179 (ofx_start_idx = s_buffer.find(
"<ofx>")) != std::string::npos))
181 (libofx_context->currentFileType() == OFC &&
182 ((ofx_start_idx = s_buffer.find(
"<OFC>")) != std::string::npos ||
183 (ofx_start_idx = s_buffer.find(
"<ofc>")) != std::string::npos))
187 if (file_is_xml ==
false)
189 s_buffer.erase(0, ofx_start_idx);
193 static char sp_charset_fixed[] =
"SP_CHARSET_FIXED=1";
194 if (putenv(sp_charset_fixed) != 0)
198 #define OPENSP_UTF8_WARNING_TEXT "ofx_proc_file(): OpenSP cannot process an UTF-8 XML file without garbling it. Furthermore, on windows the support for UTF-8 encode SGML files is broken. This is worked around by forcing a single byte encoding. If the file is indeed UTF-8, it should pass through unmolested, but you will likely get 'non SGML character number' errors, even though the output is correct." 199 if (file_is_xml ==
true)
210 static char sp_encoding[] =
"SP_ENCODING=ms-dos";
211 if (putenv(sp_encoding) != 0)
218 static char sp_encoding[] =
"SP_ENCODING=ms-dos";
219 if (putenv(sp_encoding) != 0)
224 std::string fromcode;
226 if (ofx_encoding.compare(
"USASCII") == 0)
228 if (ofx_charset.compare(
"ISO-8859-1") == 0 || ofx_charset.compare(
"8859-1") == 0)
231 fromcode =
"ISO-8859-1";
233 else if (ofx_charset.compare(
"1252") == 0 || ofx_charset.compare(
"CP1252") == 0)
238 else if (ofx_charset.compare(
"NONE") == 0)
240 fromcode = LIBOFX_DEFAULT_INPUT_ENCODING;
244 fromcode = LIBOFX_DEFAULT_INPUT_ENCODING;
247 else if (ofx_encoding.compare(
"UTF-8") == 0 || ofx_encoding.compare(
"UNICODE") == 0)
255 fromcode = LIBOFX_DEFAULT_INPUT_ENCODING;
257 tocode = LIBOFX_DEFAULT_OUTPUT_ENCODING;
258 message_out(
DEBUG,
"ofx_proc_file(): Setting up iconv for fromcode: " + fromcode +
", tocode: " + tocode);
259 conversion_descriptor = iconv_open (tocode.c_str(), fromcode.c_str());
267 if ((header_separator_idx = s_buffer.find(
':')) != std::string::npos)
270 header_name.assign(s_buffer.substr(0, header_separator_idx));
271 header_value.assign(s_buffer.substr(header_separator_idx + 1));
272 while ( header_value.length() > 0 &&
273 ( header_value[header_value.length() - 1 ] ==
'\n' ||
274 header_value[header_value.length() - 1 ] ==
'\r' ))
275 header_value.erase(header_value.length() - 1);
276 message_out(
DEBUG,
"ofx_proc_file():Header: " + header_name +
" with value: " + header_value +
" has been found");
277 if (header_name.compare(
"ENCODING") == 0)
279 ofx_encoding.assign(header_value);
281 if (header_name.compare(
"CHARSET") == 0)
283 ofx_charset.assign(header_value);
289 if (file_is_xml ==
true || (ofx_start ==
true && ofx_end ==
false))
291 if (ofx_start ==
true)
298 if (s_buffer.empty())
302 if (file_is_xml ==
false)
305 size_t inbytesleft = s_buffer.size();
306 size_t outbytesleft = inbytesleft * 2 - 1;
307 char * iconv_buffer = (
char*) malloc (inbytesleft * 2);
308 memset(iconv_buffer, 0, inbytesleft * 2);
309 const char* inchar = s_buffer.c_str();
310 char * outchar = iconv_buffer;
311 int iconv_retval = iconv (conversion_descriptor,
312 #ifdef HAVE_ICONV_CONST
315 const_cast<char**>(&inchar),
317 &inbytesleft, &outchar, &outbytesleft);
318 if (iconv_retval == -1)
324 s_buffer = std::string(iconv_buffer, outchar - iconv_buffer);
329 tmp_file << s_buffer << std::endl;
332 if (ofx_start ==
true &&
334 (libofx_context->currentFileType() == OFX &&
335 ((ofx_start_idx = s_buffer.find(
"</OFX>")) != std::string::npos ||
336 (ofx_start_idx = s_buffer.find(
"</ofx>")) != std::string::npos))
337 || (libofx_context->currentFileType() == OFC &&
338 ((ofx_start_idx = s_buffer.find(
"</OFC>")) != std::string::npos ||
339 (ofx_start_idx = s_buffer.find(
"</ofc>")) != std::string::npos))
348 while (!input_file.eof() && !input_file.bad());
353 if (used_iconv ==
true)
355 iconv_close(conversion_descriptor);
358 char filename_openspdtd[255];
359 char filename_dtd[255];
360 char filename_ofx[255];
362 if (libofx_context->currentFileType() == OFX)
366 else if (libofx_context->currentFileType() == OFC)
372 message_out(
ERROR, std::string(
"ofx_proc_file(): Error unknown file format for the OFX parser"));
375 if ((std::string)filename_dtd !=
"" && (std::string)filename_openspdtd !=
"")
377 strncpy(filename_ofx, tmp_filename, 255);
378 filenames[0] = filename_openspdtd;
379 filenames[1] = filename_dtd;
380 filenames[2] = filename_ofx;
382 if (libofx_context->currentFileType() == OFX)
386 else if (libofx_context->currentFileType() == OFC)
392 message_out(
ERROR, std::string(
"ofx_proc_file(): Error unknown file format for the OFX parser"));
395 if (
remove(tmp_filename) != 0)
397 message_out(
ERROR,
"ofx_proc_file(): Error deleting temporary file " + std::string(tmp_filename));
420 static std::string find_tag_open (std::string& input_string,
size_t& pos_start,
size_t& pos_end)
422 pos_start = input_string.find (
'<', pos_start);
424 if (pos_start == std::string::npos)
426 pos_end = std::string::npos;
427 return std::string();
430 pos_end = input_string.find (
'>', pos_start + 1);
431 if (pos_end != std::string::npos)
432 pos_end = pos_end + 1;
433 size_t tag_size = (pos_end - 1) - (pos_start + 1);
434 return input_string.substr(pos_start + 1, tag_size);
442 static void find_tag_close (std::string& input_string, std::string& tag_name,
size_t& pos)
444 size_t start_idx = input_string.find (
"</" + tag_name +
">", pos);
446 if (start_idx == std::string::npos)
450 std::string new_tag_name = find_tag_open (input_string, start_idx, end_idx);
451 if (!new_tag_name.empty())
453 message_out(
DEBUG,
"find_tag_close() fell back to next open tag: " + new_tag_name);
461 pos = input_string.length();
466 pos = start_idx + tag_name.length() + 3;
485 size_t last_known_good_pos = 0;
486 size_t open_tag_start_pos = last_known_good_pos;
487 size_t open_tag_end_pos;
488 size_t close_tag_end_pos;
490 std::string tag_name = find_tag_open(input_string, open_tag_start_pos, open_tag_end_pos);
491 while (!tag_name.empty())
494 if ((tag_name.find(
'.') != std::string::npos) ||
495 (tag_name ==
"CATEGORY"))
497 close_tag_end_pos = open_tag_end_pos;
498 find_tag_close (input_string, tag_name, close_tag_end_pos);
499 size_t tag_size = close_tag_end_pos - open_tag_start_pos;
500 std::string prop_tag = input_string.substr(open_tag_start_pos, tag_size);
502 input_string.erase(open_tag_start_pos, tag_size);
503 last_known_good_pos = open_tag_start_pos;
507 last_known_good_pos = open_tag_end_pos;
510 open_tag_start_pos = last_known_good_pos;
511 if (last_known_good_pos != std::string::npos)
512 tag_name = find_tag_open(input_string, open_tag_start_pos, open_tag_end_pos);
519 static std::string get_dtd_installation_directory()
523 char ch_fn[MAX_PATH], *p;
526 if (!GetModuleFileName(NULL, ch_fn, MAX_PATH))
return "";
528 if ((p = strrchr(ch_fn,
'\\')) != NULL)
531 p = strrchr(ch_fn,
'\\');
532 if (p && (_stricmp(p + 1,
"bin") == 0 ||
533 _stricmp(p + 1,
"lib") == 0))
537 str_fn +=
"\\share\\libofx\\dtd";
557 std::string
find_dtd(LibofxContextPtr ctx,
const std::string& dtd_filename)
559 std::string dtd_path_filename;
562 dtd_path_filename =
reinterpret_cast<const LibofxContext*
>(ctx)->dtdDir();
563 if (!dtd_path_filename.empty())
565 dtd_path_filename.append(dtd_filename);
566 std::ifstream dtd_file(dtd_path_filename.c_str());
570 return dtd_path_filename;
575 dtd_path_filename = get_dtd_installation_directory();
576 if (!dtd_path_filename.empty())
578 dtd_path_filename.append(DIRSEP);
579 dtd_path_filename.append(dtd_filename);
580 std::ifstream dtd_file(dtd_path_filename.c_str());
584 return dtd_path_filename;
589 env_dtd_path = getenv(
"OFX_DTD_PATH");
592 dtd_path_filename = env_dtd_path;
593 dtd_path_filename.append(DIRSEP);
594 dtd_path_filename.append(dtd_filename);
595 std::ifstream dtd_file(dtd_path_filename.c_str());
598 message_out(
STATUS,
"find_dtd():OFX_DTD_PATH env variable was was present, but unable to open the file " + dtd_path_filename);
603 return dtd_path_filename;
610 dtd_path_filename.append(DIRSEP);
611 dtd_path_filename.append(dtd_filename);
612 std::ifstream dtd_file(dtd_path_filename.c_str());
615 message_out(
DEBUG,
"find_dtd():Unable to open the file " + dtd_path_filename);
620 return dtd_path_filename;
625 dtd_path_filename =
"";
626 dtd_path_filename.append(
"..");
627 dtd_path_filename.append(DIRSEP);
628 dtd_path_filename.append(
"dtd");
629 dtd_path_filename.append(DIRSEP);
630 dtd_path_filename.append(dtd_filename);
631 std::ifstream dtd_file(dtd_path_filename.c_str());
634 message_out(
DEBUG,
"find_dtd(): Unable to open the file " + dtd_path_filename +
", most likely we are not in the source tree.");
639 return dtd_path_filename;
643 message_out(
ERROR,
"find_dtd():Unable to find the DTD named " + dtd_filename);
int ofx_proc_file(LibofxContextPtr ctx, const char *p_filename)
File pre-processing of OFX AND for OFC files.
int message_out(OfxMsgType error_type, const std::string message)
Message output function.
const int DTD_SEARCH_PATH_NUM
The number of different paths to search for DTDs.
OFX/SGML parsing functionality.
const char * DTD_SEARCH_PATH[DTD_SEARCH_PATH_NUM]
The list of paths to search for the DTDs.
int ofc_proc_sgml(LibofxContext *libofx_context, int argc, char *const *argv)
Parses a DTD and OFX file(s)
Various simple functions for type conversion & al.
int ofx_proc_sgml(LibofxContext *libofx_context, int argc, char *const *argv)
Parses a DTD and OFX file(s)
void STRNCPY(T &dest, const std::string &src)
OFX/SGML parsing functionality.
Message IO functionality.
Preprocessing of the OFX files before parsing.
std::string find_dtd(LibofxContextPtr ctx, const std::string &dtd_filename)
Find the appropriate DTD for the file version.
std::string sanitize_proprietary_tags(std::string input_string)
Removes proprietary tags and comments.