LibOFX
ofx_sgml.cpp
Go to the documentation of this file.
1/***************************************************************************
2 ofx_sgml.cpp
3 -------------------
4 copyright : (C) 2002 by Benoit Grégoire
5 email : benoitg@coeus.ca
6***************************************************************************/
12/***************************************************************************
13 * *
14 * This program is free software; you can redistribute it and/or modify *
15 * it under the terms of the GNU General Public License as published by *
16 * the Free Software Foundation; either version 2 of the License, or *
17 * (at your option) any later version. *
18 * *
19 ***************************************************************************/
20
21#ifdef HAVE_CONFIG_H
22#include <config.h>
23#endif
24
25#include <iostream>
26#include <stdlib.h>
27#include <string>
28#include <cassert>
29#include "ParserEventGeneratorKit.h"
30#include "libofx.h"
31#include "ofx_utilities.hh"
32#include "messages.hh"
33#include "ofx_containers.hh"
34#include "ofx_sgml.hh"
35
36
37OfxMainContainer * MainContainer = NULL;
38extern SGMLApplication::OpenEntityPtr entity_ptr;
39extern SGMLApplication::Position position;
40static const std::string MESSAGE_NON_SGML_CHAR = "non SGML character";
41
42
45class OFXApplication : public SGMLApplication
46{
47private:
48 OfxGenericContainer *curr_container_element;
49 OfxGenericContainer *tmp_container_element;
50 bool is_data_element;
51 std::string incoming_data;
52 LibofxContext * libofx_context;
53 unsigned errorCountToIgnore = 0;
54
55public:
56
57 OFXApplication (LibofxContext * p_libofx_context)
58 {
59 MainContainer = NULL;
60 curr_container_element = NULL;
61 is_data_element = false;
62 libofx_context = p_libofx_context;
63 }
64 ~OFXApplication()
65 {
66 message_out(DEBUG, "Entering the OFXApplication's destructor");
67 }
68
69 unsigned getErrorCountToIgnore() const { return errorCountToIgnore; }
70
75 void startElement (const StartElementEvent & event)
76 {
77 std::string identifier = CharStringtostring (event.gi);
78 message_out(PARSER, "startElement event received from OpenSP for element " + identifier);
79
80 position = event.pos;
81
82 switch (event.contentType)
83 {
84 case StartElementEvent::empty:
85 message_out(ERROR, "StartElementEvent::empty\n");
86 break;
87 case StartElementEvent::cdata:
88 message_out(ERROR, "StartElementEvent::cdata\n");
89 break;
90 case StartElementEvent::rcdata:
91 message_out(ERROR, "StartElementEvent::rcdata\n");
92 break;
93 case StartElementEvent::mixed:
94 message_out(PARSER, "StartElementEvent::mixed");
95 is_data_element = true;
96 break;
97 case StartElementEvent::element:
98 message_out(PARSER, "StartElementEvent::element");
99 is_data_element = false;
100 break;
101 default:
102 message_out(ERROR, "Unknown SGML content type?!?!?!? OpenSP interface changed?");
103 }
104
105 if (is_data_element == false)
106 {
107 /*------- The following are OFX entities ---------------*/
108
109 if (identifier == "OFX")
110 {
111 message_out (PARSER, "Element " + identifier + " found");
112 MainContainer = new OfxMainContainer (libofx_context, curr_container_element, identifier);
113 curr_container_element = MainContainer;
114 }
115 else if (identifier == "STATUS")
116 {
117 message_out (PARSER, "Element " + identifier + " found");
118 curr_container_element = new OfxStatusContainer (libofx_context, curr_container_element, identifier);
119 }
120 else if (identifier == "STMTRS" ||
121 identifier == "CCSTMTRS" ||
122 identifier == "INVSTMTRS")
123 {
124 message_out (PARSER, "Element " + identifier + " found");
125 curr_container_element = new OfxStatementContainer (libofx_context, curr_container_element, identifier);
126 }
127 else if (identifier == "BANKTRANLIST" || identifier == "INVTRANLIST")
128 {
129 message_out (PARSER, "Element " + identifier + " found");
130 //BANKTRANLIST ignored, we will process it's attributes directly inside the STATEMENT,
131 if (curr_container_element && curr_container_element->type != "STATEMENT")
132 {
133 message_out(ERROR, "Element " + identifier + " found while not inside a STATEMENT container");
134 }
135 else
136 {
137 curr_container_element = new OfxPushUpContainer (libofx_context, curr_container_element, identifier);
138 }
139 }
140 else if (identifier == "STMTTRN")
141 {
142 message_out (PARSER, "Element " + identifier + " found");
143 if (curr_container_element && curr_container_element->type == "INVESTMENT")
144 {
145 //push up to the INVBANKTRAN OfxInvestmentTransactionContainer
146 curr_container_element = new OfxPushUpContainer (libofx_context, curr_container_element, identifier);
147 }
148 else
149 {
150 curr_container_element = new OfxBankTransactionContainer (libofx_context, curr_container_element, identifier);
151 }
152 }
153 else if (identifier == "BUYDEBT" ||
154 identifier == "BUYMF" ||
155 identifier == "BUYOPT" ||
156 identifier == "BUYOTHER" ||
157 identifier == "BUYSTOCK" ||
158 identifier == "CLOSUREOPT" ||
159 identifier == "INCOME" ||
160 identifier == "INVEXPENSE" ||
161 identifier == "JRNLFUND" ||
162 identifier == "JRNLSEC" ||
163 identifier == "MARGININTEREST" ||
164 identifier == "REINVEST" ||
165 identifier == "RETOFCAP" ||
166 identifier == "SELLDEBT" ||
167 identifier == "SELLMF" ||
168 identifier == "SELLOPT" ||
169 identifier == "SELLOTHER" ||
170 identifier == "SELLSTOCK" ||
171 identifier == "SPLIT" ||
172 identifier == "TRANSFER" ||
173 identifier == "INVBANKTRAN" )
174 {
175 message_out (PARSER, "Element " + identifier + " found");
176 curr_container_element = new OfxInvestmentTransactionContainer (libofx_context, curr_container_element, identifier);
177 }
178 /*The following is a list of OFX elements whose attributes will be processed by the parent container*/
179 else if (identifier == "INVBUY" ||
180 identifier == "INVSELL" ||
181 identifier == "INVTRAN" ||
182 identifier == "SECINFO" ||
183 identifier == "SECID" ||
184 identifier == "CURRENCY" ||
185 identifier == "ORIGCURRENCY")
186 {
187 message_out (PARSER, "Element " + identifier + " found");
188 curr_container_element = new OfxPushUpContainer (libofx_context, curr_container_element, identifier);
189 }
190
191 /* provide a parent for the account list response so its ACCTFROM can be recognized */
192 else if (identifier == "BANKACCTINFO" || identifier == "CCACCTINFO" || identifier == "INVACCTINFO")
193 {
194 message_out (PARSER, "Element " + identifier + " found");
195 curr_container_element = new OfxPushUpContainer (libofx_context, curr_container_element, identifier);
196 }
197
198 /* The different types of accounts */
199 else if (identifier == "BANKACCTFROM" || identifier == "CCACCTFROM" || identifier == "INVACCTFROM")
200 {
201 message_out (PARSER, "Element " + identifier + " found");
202 /* check the container to avoid creating multiple statements for TRANSFERs */
203 if (curr_container_element &&
204 ( curr_container_element->type == "STATEMENT"
205 || curr_container_element->tag_identifier == "BANKACCTINFO"
206 || curr_container_element->tag_identifier == "CCACCTINFO"
207 || curr_container_element->tag_identifier == "INVACCTINFO"))
208 curr_container_element = new OfxAccountContainer (libofx_context, curr_container_element, identifier);
209 else
210 // no new account or statement for a <TRANSFER>
211 curr_container_element = new OfxDummyContainer (libofx_context, curr_container_element, identifier);
212 }
213 else if (identifier == "STOCKINFO" || identifier == "OPTINFO" ||
214 identifier == "DEBTINFO" || identifier == "MFINFO" || identifier == "OTHERINFO")
215 {
216 message_out (PARSER, "Element " + identifier + " found");
217 curr_container_element = new OfxSecurityContainer (libofx_context, curr_container_element, identifier);
218 }
219 /* The different types of balances */
220 else if (identifier == "LEDGERBAL" ||
221 identifier == "AVAILBAL" ||
222 identifier == "INVBAL")
223 {
224 message_out (PARSER, "Element " + identifier + " found");
225 curr_container_element = new OfxBalanceContainer (libofx_context, curr_container_element, identifier);
226 }
227 else if (identifier == "INVPOS")
228 {
229 message_out (PARSER, "Element " + identifier + " found");
230 curr_container_element = new OfxPositionContainer (libofx_context, curr_container_element, identifier);
231 }
232 else
233 {
234 /* We dont know this OFX element, so we create a dummy container */
235 curr_container_element = new OfxDummyContainer(libofx_context, curr_container_element, identifier);
236 }
237 }
238 else
239 {
242 if (identifier == "INV401K")
243 {
244 /* Minimal handler for this section to discard <DTASOF>, <DTSTART> and <DTEND> that need to be ignored */
245 message_out (PARSER, "Element " + identifier + " found");
246 curr_container_element = new OfxInv401kContainer (libofx_context, curr_container_element, identifier);
247 }
248 if (identifier == "INV401KBAL")
249 {
250 message_out (PARSER, "Element " + identifier + " found");
251 curr_container_element = new OfxBalanceContainer (libofx_context, curr_container_element, identifier);
252 }
253 else
254 {
255 /* The element was a data element. OpenSP will call one or several data() callback with the data */
256 message_out (PARSER, "Data element " + identifier + " found");
257 /* There is a bug in OpenSP 1.3.4, which won't send endElement Event for some elements, and will instead send an error like "document type does not allow element "MESSAGE" here". Incoming_data should be empty in such a case, but it will not be if the endElement event was skipped. So we empty it, so at least the last element has a chance of having valid data */
258 if (incoming_data != "")
259 {
260 message_out (ERROR, "startElement: incoming_data should be empty! You are probably using OpenSP <= 1.3.4. The following data was lost: " + incoming_data );
261 incoming_data.assign ("");
262 }
263 }
264 }
265 }
266
271 void endElement (const EndElementEvent & event)
272 {
273 std::string identifier = CharStringtostring (event.gi);
274 bool end_element_for_data_element = is_data_element;
275 message_out(PARSER, "endElement event received from OpenSP for element " + identifier);
276
277 position = event.pos;
278 if (curr_container_element == NULL)
279 {
280 message_out (ERROR, "Tried to close a " + identifier + " without a open element (NULL pointer)");
281 incoming_data.assign ("");
282 }
283 else //curr_container_element != NULL
284 {
285 if (end_element_for_data_element == true)
286 {
287 incoming_data = strip_whitespace(incoming_data);
288
289 curr_container_element->add_attribute (identifier, incoming_data);
290 message_out (PARSER, "endElement: Added data '" + incoming_data + "' from " + identifier + " to " + curr_container_element->type + " container_element");
291 incoming_data.assign ("");
292 is_data_element = false;
293 }
294 else
295 {
296 if (identifier == curr_container_element->tag_identifier)
297 {
298 if (incoming_data != "")
299 {
300 message_out(ERROR, "End tag for non data element " + identifier + ", incoming data should be empty but contains: " + incoming_data + " DATA HAS BEEN LOST SOMEWHERE!");
301 }
302
303 if (identifier == "OFX")
304 {
305 /* The main container is a special case */
306 tmp_container_element = curr_container_element;
307 curr_container_element = curr_container_element->getparent ();
308 if (curr_container_element == NULL)
309 {
310 //Defensive coding, this isn't supposed to happen
311 curr_container_element = tmp_container_element;
312 }
313 if (MainContainer != NULL)
314 {
315 MainContainer->gen_event();
316 delete MainContainer;
317 MainContainer = NULL;
318 curr_container_element = NULL;
319 message_out (DEBUG, "Element " + identifier + " closed, MainContainer destroyed");
320 }
321 else
322 {
323 message_out (DEBUG, "Element " + identifier + " closed, but there was no MainContainer to destroy (probably a malformed file)!");
324 }
325 }
326 else
327 {
328 tmp_container_element = curr_container_element;
329 curr_container_element = curr_container_element->getparent ();
330 if (MainContainer != NULL)
331 {
335 if (identifier == "CURRENCY" || identifier == "ORIGCURRENCY")
336 {
337 tmp_container_element->add_attribute (identifier, incoming_data);
338 message_out (DEBUG, "Element " + identifier + " closed, container " + tmp_container_element->type + " updated");
339 }
340 else
341 {
342 tmp_container_element->add_to_main_tree();
343 message_out (PARSER, "Element " + identifier + " closed, object added to MainContainer");
344 }
345 }
346 else
347 {
348 message_out (ERROR, "MainContainer is NULL trying to add element " + identifier);
349 }
350 }
351 }
352 else
353 {
354 message_out (ERROR, "Tried to close a " + identifier + " but a " + curr_container_element->type + " is currently open.");
355 }
356 }
357 }
358 }
359
364 void data (const DataEvent & event)
365 {
366 std::string tmp;
367 position = event.pos;
368 AppendCharStringtostring (event.data, incoming_data);
369 message_out(PARSER, "data event received from OpenSP, incoming_data is now: " + incoming_data);
370 }
371
376 void error (const ErrorEvent & event)
377 {
378 std::string message;
379 OfxMsgType error_type = ERROR;
380 const std::string eventMessage = CharStringtostring (event.message);
381
382 position = event.pos;
383 message = message + "OpenSP parser: ";
384 switch (event.type)
385 {
386 case SGMLApplication::ErrorEvent::quantity:
387 message = message + "quantity (Exceeding a quantity limit):";
388 error_type = ERROR;
389 break;
390 case SGMLApplication::ErrorEvent::idref:
391 message = message + "idref (An IDREF to a non-existent ID):";
392 error_type = ERROR;
393 break;
394 case SGMLApplication::ErrorEvent::capacity:
395 message = message + "capacity (Exceeding a capacity limit):";
396 error_type = ERROR;
397 break;
398 case SGMLApplication::ErrorEvent::otherError:
399 // #60: If the SGML parser encounters a non-ascii char, it sends an error
400 // message, even though those characters are being forwarded just fine.
401 // Hence we count the occurrence of those errors and subtract it from the
402 // final number of errors.
403 if (eventMessage.find(MESSAGE_NON_SGML_CHAR) != std::string::npos) {
404 ++errorCountToIgnore;
405 message = message + "ignored character error:";
406 error_type = INFO;
407 } else {
408 message = message + "otherError (misc parse error):";
409 error_type = ERROR;
410 }
411 break;
412 case SGMLApplication::ErrorEvent::warning:
413 message = message + "warning (Not actually an error.):";
414 error_type = WARNING;
415 break;
416 case SGMLApplication::ErrorEvent::info:
417 message = message + "info (An informationnal message. Not actually an error):";
418 error_type = INFO;
419 break;
420 default:
421 message = message + "OpenSP sent an unknown error to LibOFX (You probably have a newer version of OpenSP):";
422 }
423 message = message + "\n" + eventMessage;
424 message_out (error_type, message);
425 }
426
431 void openEntityChange (const OpenEntityPtr & para_entity_ptr)
432 {
433 message_out(DEBUG, "openEntityChange()\n");
434 entity_ptr = para_entity_ptr;
435
436 };
437
438private:
439};
440
444int ofx_proc_sgml(LibofxContext * libofx_context, int argc, char * const* argv)
445{
446 message_out(DEBUG, "Begin ofx_proc_sgml()");
447 assert(argc >= 3);
448 message_out(DEBUG, argv[0]);
449 message_out(DEBUG, argv[1]);
450 message_out(DEBUG, argv[2]);
451
452 ParserEventGeneratorKit parserKit;
453 parserKit.setOption (ParserEventGeneratorKit::showOpenEntities);
454 EventGenerator *egp = parserKit.makeEventGenerator (argc, argv);
455 egp->inhibitMessages (true); /* Error output is handled by libofx not OpenSP */
456 OFXApplication app(libofx_context);
457 unsigned originalErrorCount = egp->run (app); /* Begin parsing */
458 unsigned nErrors = originalErrorCount - app.getErrorCountToIgnore(); // but ignore certain known errors that we want to ignore
459 delete egp; //Note that this is where bug is triggered
460 return nErrors > 0;
461}
This object is driven by OpenSP as it parses the SGML from the ofx file(s)
Definition ofx_sgml.cpp:46
void data(const DataEvent &event)
Callback: Data from an OFX element.
Definition ofx_sgml.cpp:364
void error(const ErrorEvent &event)
Callback: SGML parse error.
Definition ofx_sgml.cpp:376
void openEntityChange(const OpenEntityPtr &para_entity_ptr)
Callback: Receive internal OpenSP state.
Definition ofx_sgml.cpp:431
void startElement(const StartElementEvent &event)
Callback: Start of an OFX element.
Definition ofx_sgml.cpp:75
void endElement(const EndElementEvent &event)
Callback: End of an OFX element.
Definition ofx_sgml.cpp:271
Represents a bank account or a credit card account.
Represents the <BALANCE>, <INVBAL> or <INV401KBAL> OFX SGML entity.
Represents a bank or credid card transaction.
A container to hold OFX SGML elements that LibOFX knows nothing about.
A generic container for an OFX SGML element. Every container inherits from OfxGenericContainer.
A container to hold OFX SGML elements for <INV401K>
Represents a bank or credid card transaction.
The root container. Created by the <OFX> OFX element or by the export functions.
Represents an investment position, such as a stock or bond.
A container to hold a OFX SGML element for which you want the parent to process it's data elements.
Represents a security, such as a stock or bond.
Represents a statement for either a bank account or a credit card account.
Represents the <STATUS> OFX SGML entity.
SGMLApplication::Position position
Definition messages.cpp:34
SGMLApplication::OpenEntityPtr entity_ptr
Definition messages.cpp:33
int message_out(OfxMsgType error_type, const std::string message)
Message output function.
Definition messages.cpp:67
Message IO functionality.
OfxMsgType
Definition messages.hh:24
@ DEBUG
Definition messages.hh:25
@ PARSER
Definition messages.hh:35
@ ERROR
Definition messages.hh:34
@ INFO
Definition messages.hh:32
@ WARNING
Definition messages.hh:33
LibOFX internal object code.
int ofx_proc_sgml(LibofxContext *libofx_context, int argc, char *const *argv)
Parses a DTD and OFX file(s)
Definition ofx_sgml.cpp:444
SGMLApplication::Position position
Definition messages.cpp:34
SGMLApplication::OpenEntityPtr entity_ptr
Definition messages.cpp:33
OFX/SGML parsing functionality.
std::string CharStringtostring(const SGMLApplication::CharString source)
Convert OpenSP CharString to a C++ STL string.
std::string strip_whitespace(const std::string para_string)
Sanitize a string coming from OpenSP.
void AppendCharStringtostring(const SGMLApplication::CharString source, std::string &dest)
Append an OpenSP CharString to an existing C++ STL string.
Various simple functions for type conversion & al.