alaCarte Maps
Renderer for OpenStreetMap tiles
importer.cpp
Go to the documentation of this file.
1 
23 #include <boost/filesystem/path.hpp>
24 #include <boost/unordered_map.hpp>
25 #include <boost/filesystem/operations.hpp>
26 #include <boost/type_traits/is_same.hpp>
27 #include <unordered_set>
28 
29 #include "../../extras/eaglexml/eaglexml.hpp"
30 #include "../../extras/eaglexml/eaglexml_iterators.hpp"
31 
32 #include "importer/importer.hpp"
33 
35 #include "general/geodata.hpp"
36 
37 #include "general/geo_object.hpp"
38 #include "general/node.hpp"
39 #include "general/way.hpp"
40 #include "general/relation.hpp"
41 
42 
43 using boost::filesystem::path;
44 using nl = std::numeric_limits<double>;
45 
46 
53  : public eaglexml::stream_cache<>::stream_cache_observer
54 {
55 public:
57  static const int parser_flags = eaglexml::parse_no_data_nodes
58  | eaglexml::parse_normalize_whitespace
59  | eaglexml::parse_trim_whitespace
60  | eaglexml::parse_validate_closing_tags
61  | eaglexml::parse_default;
62 
64  typedef uint64_t OsmIdType;
65 
66 public:
71  OsmXmlParser(bool ignoreUnknownEntities, const FloatRect& bounds = { -nl::max(), -nl::max(), nl::max(), nl::max() })
72  : ignoreUnknownEntities(ignoreUnknownEntities)
73  , clippingBounds(bounds)
74  , alreadyRead(0)
75  , fileSize(0)
76  , segmentSize(1024 * 1024)
77  , outputIgnoreRelation(false)
78  , outputIgnoreBounds(false)
79  {
80  }
81 
93  void parse(const path& xml_file)
94  {
95  assert(!nodes);
96  assert(!ways);
97  assert(!relations);
98 
99  std::ifstream xml_stream(xml_file.string());
100 
101  if(!xml_stream)
102  BOOST_THROW_EXCEPTION(excp::FileNotFoundException() << excp::InfoFileName(xml_file.string()));
103 
104  LOG_SEV(importer_log, info) << "Load xml-file \"" << xml_file.string() << "\"";
105 
106  fileSize = boost::filesystem::file_size(xml_file);
107 
108  LOG_SEV(importer_log, info) << "File size is " << fileSize / (1024) << "kb";
109 
110  nodes = boost::make_shared<std::vector<Node> >();
111  ways = boost::make_shared<std::vector<Way> >();
112  relations = boost::make_shared<std::vector<Relation> >();
113 
114  // Use a cache with an 8 mb buffer
115  eaglexml::stream_cache<> cache(xml_stream, 8 * 1024 * 1024);
116  eaglexml::xml_document<> document;
117 
118  cache.segment_size(segmentSize);
119  cache.max_segments_to_read(1);
120  cache.observer(this);
121 
122 
123  try {
124  // Init the xml parser
125  document.parse<parser_flags>(&cache);
126 
127  eaglexml::xml_node<>* osm = document.first_node("osm");
128 
129  if(!osm)
130  BOOST_THROW_EXCEPTION(excp::InputFormatException() << excp::InfoXmlEntityName("<osm>") << excp::InfoWhat("Missing \"<osm>\" xml node!"));
131 
132  parseEntities(osm);
133 
134  }catch(excp::InputFormatException& e) {
135  e << excp::InfoFileName(xml_file.string());
136  throw;
137  }catch(eaglexml::parse_error& e)
138  {
139  BOOST_THROW_EXCEPTION(excp::InputFormatException() << excp::InfoFileName(xml_file.string()) << excp::InfoWhat(e.what()));
140  }
141  }
142 
148  shared_ptr< std::vector<Node> > getParsedNodes() const
149  {
150  assert(nodes);
151  return nodes;
152  }
153 
159  shared_ptr< std::vector<Way> > getParsedWays() const
160  {
161  assert(ways);
162  return ways;
163  }
164 
170  shared_ptr< std::vector<Relation> > getParsedRelations() const
171  {
172  assert(relations);
173  return relations;
174  }
175 
181  std::size_t getNumberOfClippedNodes() const
182  {
183  return clippedNodes.size();
184  }
185 
186 private:
192  void parseEntities(eaglexml::xml_node<>* osmRoot)
193  {
194  assert(osmRoot);
195 
196  std::map<std::string, void (OsmXmlParser::*)(eaglexml::xml_node<>*)> entities;
197 
198  // Entities that can be parsed
199  entities["bounds"] = &OsmXmlParser::parseBounds;
200  entities["node"] = &OsmXmlParser::parseNode;
201  entities["way"] = &OsmXmlParser::parseWay;
202  entities["relation"] = &OsmXmlParser::parseRelation;
203 
204  for(eaglexml::node_iterator<> it(osmRoot);
205  it != eaglexml::node_iterator<>();
206  ++it)
207  {
208  auto entityIt = entities.find(it->name());
209 
210  if(!ignoreUnknownEntities && entityIt == entities.end())
211  BOOST_THROW_EXCEPTION(excp::InputFormatException() << excp::InfoXmlEntityName(it->name()) << excp::InfoWhat("Unknown entity in xml file!"));
212 
213  try{
214  (this->*(entityIt->second))(&*it);
215  }catch(excp::BadOsmIdException& e) {
216  const auto id = *boost::get_error_info<excp::InfoUnresolvableId>(e);
217  if (!clippedNodes.count(id)) {
218  LOG_SEV(importer_log, warning) << "Bad osm id[" << id << "]. Entity is skipped!";
219  }
220  }
221  }
222  }
223 
229  void parseBounds(eaglexml::xml_node<>* node)
230  {
231  if(!outputIgnoreBounds)
232  {
233  LOG_SEV(importer_log, info) << "Bounds tag in osm data is ignored by this software!";
234  outputIgnoreBounds = true;
235  }
236  }
237 
243  void parseNode(eaglexml::xml_node<>* node)
244  {
245  assert(node);
246 
247  OsmIdType id;
248  double lon, lat;
249  extractAttributeFromNode("id", node, &id);
250  extractAttributeFromNode("lon", node, &lon);
251  extractAttributeFromNode("lat", node, &lat);
252 
253  FloatPoint loc = {lon, lat};
254  if (clippingBounds.contains(loc)) {
256  parseProperties<Node>(node->first_node(), &tags, nullptr, nullptr, nullptr, nullptr);
257 
258  nodeIdMapping.insert(std::make_pair(id, NodeId(nodes->size())));
259  nodes->push_back(Node(loc, tags));
260  } else {
261  clippedNodes.insert(id);
262  }
263  }
264 
270  void parseWay(eaglexml::xml_node<>* way)
271  {
272  assert(way);
273 
274  OsmIdType id;
275  extractAttributeFromNode("id", way, &id);
276 
277 
279  std::vector<NodeId> nodeIds;
280 
281  parseProperties<Way>(way->first_node(), &tags, &nodeIds, nullptr, nullptr, nullptr);
282 
283  if (nodeIds.size() == 0)
284  return;
285 
286  wayIdMapping.insert(std::make_pair(id, WayId(ways->size())));
287  ways->push_back(Way(nodeIds, tags));
288  }
289 
295  void parseRelation(eaglexml::xml_node<>* relation)
296  {
297  assert(relation);
298 
299  OsmIdType id;
300  extractAttributeFromNode("id", relation, &id);
301 
303  std::vector<NodeId> nodeIds;
304  std::vector<WayId> wayIds;
307 
308 
309  parseProperties<Relation>(relation->first_node(), &tags, &nodeIds, &nodeRoles, &wayIds, &wayRoles);
310 
311  if (nodeIds.size() == 0 && wayIds.size() == 0)
312  return;
313 
314  relations->push_back(Relation(nodeIds, nodeRoles, wayIds, wayRoles, tags));
315  }
316 
317 
333  template<typename Target>
334  inline void parseProperties(eaglexml::xml_node<>* firstProp,
336  std::vector<NodeId>* nodeRefIds,
337  DataMap<NodeId, CachedString>* nodeRoleMap,
338  std::vector<WayId>* wayRefIds,
339  DataMap<WayId, CachedString>* wayRoleMap)
340  {
341  BOOST_STATIC_ASSERT((boost::is_same<Target, Node>::value || boost::is_same<Target, Way>::value || boost::is_same<Target, Relation>::value));
342  assert(tagMap);
343 
344  // Go through all properties and parse them
345  eaglexml::xml_node<>* prop = firstProp;
346  for(;prop; prop = prop->next_sibling())
347  {
348  const char* propName = firstProp->name();
349 
350  if(std::strcmp(propName, "tag") == 0)
351  {
352  // Parse tag
353  CachedString key;
354  CachedString value;
355  extractAttributeFromNode("k", prop, &key);
356  extractAttributeFromNode("v", prop, &value);
357 
358  (*tagMap)[key] = value;
359 
360  }else if(boost::is_same<Target, Way>::value && std::strcmp(propName, "nd") == 0)
361  {
362  assert(nodeRefIds);
363 
364  // Parse a node reference
365  OsmIdType osmId;
366  extractAttributeFromNode("ref", prop, &osmId);
367  nodeRefIds->push_back(resolveOsmId(osmId, nodeIdMapping));
368  }else if(boost::is_same<Target, Relation>::value && std::strcmp(propName, "member") == 0)
369  {
370  // Parse a relation member
371  assert(nodeRefIds);
372  assert(nodeRoleMap);
373  assert(wayRefIds);
374  assert(wayRoleMap);
375 
376 
377  eaglexml::xml_attribute<>* attr = prop->first_attribute("type");
378 
379  if(!attr || attr->value_size() == 0)
380  BOOST_THROW_EXCEPTION(excp::InputFormatException() << excp::InfoXmlEntityName("relation::member::type"));
381 
383  OsmIdType osmRefId;
384  extractAttributeFromNode("ref", prop, &osmRefId);
385 
386  // Evaluate type attribute
387  const char* type = attr->value();
388  if(std::strcmp(type, "node") == 0)
389  {
390  NodeId nodeId = resolveOsmId(osmRefId, nodeIdMapping);
391  nodeRefIds->push_back(nodeId);
392  extractAttributeFromNode("role", prop, &((*nodeRoleMap)[nodeId]));
393  }else if(std::strcmp(type, "way") == 0)
394  {
395  WayId wayId = resolveOsmId(osmRefId, wayIdMapping);
396  wayRefIds->push_back(wayId);
397  extractAttributeFromNode("role", prop, &((*wayRoleMap)[wayId]));
398  }else if(std::strcmp(type, "relation") == 0) {
399 
401  {
402  LOG_SEV(importer_log, warning) << "This software does not support relation member in relations!";
403  LOG_SEV(importer_log, warning) << "Reference is ignored!";
404  outputIgnoreRelation = true;
405  }
406  continue;
407  }else{
408 
409  BOOST_THROW_EXCEPTION(excp::InputFormatException() << excp::InfoXmlEntityName("relation::member::type::value"));
410  }
411 
412  }
413  }
414  }
415 
425  template<typename T>
426  void extractAttributeFromNode(const string& attrname, eaglexml::xml_node<>* node, T* dest)
427  {
428  assert(node);
429  assert(dest);
430 
431  eaglexml::xml_attribute<>* attr = node->first_attribute(attrname.c_str(), attrname.size());
432 
433  if(!attr)
434  BOOST_THROW_EXCEPTION(excp::InputFormatException() << excp::InfoXmlEntityName(attrname));
435 
436  const char* value = attr->value();
437  try {
438  *dest = boost::lexical_cast<T>(value);
439  }catch(boost::bad_lexical_cast& e)
440  {
441  (void)e;
442  BOOST_THROW_EXCEPTION(excp::InputFormatException() << excp::InfoXmlEntityName(attrname) << excp::InfoBadSourceValue(value));
443  }
444  }
445 
456  template<typename IdType>
457  IdType resolveOsmId(OsmIdType osmId, const boost::unordered_map<OsmIdType, IdType>& table)
458  {
459  auto it = table.find(osmId);
460 
461  if(it == table.end())
462  BOOST_THROW_EXCEPTION(excp::BadOsmIdException() << excp::InfoUnresolvableId(osmId));
463 
464  return it->second;
465  }
466 
467 
468  virtual void on_fetch( unsigned int chars_left, unsigned int need, node_type* active_node ) {}
469  virtual void on_buffer_resize() {}
470  virtual void on_segment_read() {}
471 
472  virtual void on_read_begin( unsigned int segments )
473  {
474  int before = int(100 * (double)alreadyRead / (double)fileSize);
475  alreadyRead += segments * segmentSize;
476  int after = int(100 * (double)alreadyRead / (double)fileSize);
477 
478  if(after != before)
479  LOG_SEV(importer_log, info) << "Loading [" << std::min(after, 99) << "%]";
480  }
481 
482 
483 
484 private:
485 
489 
493 
495  boost::unordered_map<OsmIdType, NodeId> nodeIdMapping;
496 
498  boost::unordered_map<OsmIdType, WayId> wayIdMapping;
499 
501  std::unordered_set<OsmIdType> clippedNodes;
502 
504  shared_ptr< std::vector<Node> > nodes;
505 
507  shared_ptr< std::vector<Way> > ways;
508 
510  shared_ptr< std::vector<Relation> > relations;
511 
513  std::uintmax_t fileSize;
514 
516  std::uintmax_t alreadyRead;
517 
519  unsigned int segmentSize;
520 
523 };
524 
525 
526 
527 
528 
529 
530 
531 
532 
538 Importer::Importer(const shared_ptr<Configuration>& config)
539  : config(config)
540 {
541 }
542 
548 shared_ptr<Geodata> Importer::importXML()
549 {
550  FloatRect bounds = {
551  FloatPoint(config->get(opt::importer::min_lon, -nl::max()), config->get(opt::importer::min_lat, -nl::max())),
552  FloatPoint(config->get(opt::importer::max_lon, nl::max()), config->get(opt::importer::max_lat, nl::max()))
553  };
554  LOG_SEV(importer_log, info) << "Clipping nodes with [lon: " << bounds.minX << " to " << bounds.maxX << ", lat: " << bounds.minY << " to " << bounds.maxY << "]";
555  OsmXmlParser parser(!config->get<bool>(opt::importer::check_xml_entities), bounds);
556  shared_ptr<Geodata> geodata = boost::make_shared<Geodata>();
557 
558  path xml_file = config->get<string>(opt::importer::path_to_osmdata);
559  LOG_SEV(importer_log, info) << "Start parsing...";
560  parser.parse(xml_file);
561 
562  LOG_SEV(importer_log, info) << "Insert into geodata...";
563  geodata->insertNodes(parser.getParsedNodes());
564  geodata->insertWays(parser.getParsedWays());
565  geodata->insertRelations(parser.getParsedRelations());
566 
567  const auto node_count = parser.getParsedNodes()->size();
568  LOG_SEV(importer_log, info) << "Clipped " << parser.getNumberOfClippedNodes() << " / " << (parser.getNumberOfClippedNodes() + node_count) << " nodes. " << node_count << " nodes remaining.";
569 
570  return geodata;
571 }
shared_ptr< std::vector< Relation > > getParsedRelations() const
Returns a vector with parsed relations.
Definition: importer.cpp:170
std::numeric_limits< double > nl
Definition: importer.cpp:44
void parseNode(eaglexml::xml_node<> *node)
parses a node entity
Definition: importer.cpp:243
IdType resolveOsmId(OsmIdType osmId, const boost::unordered_map< OsmIdType, IdType > &table)
Resolves a osm id into an internal id.
Definition: importer.cpp:457
bool ignoreUnknownEntities
Specifies weather the parser should ignore unknown entities.
Definition: importer.cpp:488
shared_ptr< std::vector< Node > > nodes
List to be filled with nodes.
Definition: importer.cpp:504
void parseEntities(eaglexml::xml_node<> *osmRoot)
parses the osm-root element and creates given objects
Definition: importer.cpp:192
static const char * check_xml_entities
Check all xml entities.
boost::unordered_map< OsmIdType, WayId > wayIdMapping
Mapping from osm ids to internal ids for ways.
Definition: importer.cpp:498
std::uintmax_t alreadyRead
Bytes already read from the xml file.
Definition: importer.cpp:516
shared_ptr< std::vector< Way > > ways
List to be filled with ways.
Definition: importer.cpp:507
boost::error_info< struct TagXmlEntityName, string > InfoXmlEntityName
Use this this to specify the exception related xml entity.
Definition: exceptions.hpp:45
TESTABLE shared_ptr< Geodata > importXML()
Parses an osm xml file specified in configuration containing osm data.
Definition: importer.cpp:548
static const char * max_lat
maximum node latitude to include into imported data
Thrown if an osm id was not specified before resolving.
Definition: exceptions.hpp:79
shared_ptr< std::vector< Node > > getParsedNodes() const
Returns a vector with parsed nodes.
Definition: importer.cpp:148
Selects nodes.
Definition: mapcss_def.hpp:51
void parseProperties(eaglexml::xml_node<> *firstProp, DataMap< CachedString, CachedString > *tagMap, std::vector< NodeId > *nodeRefIds, DataMap< NodeId, CachedString > *nodeRoleMap, std::vector< WayId > *wayRefIds, DataMap< WayId, CachedString > *wayRoleMap)
parses properties of an osm-object
Definition: importer.cpp:334
boost::error_info< struct TagBadSourceValue, string > InfoBadSourceValue
String representation of a bad source value.
Definition: exceptions.hpp:47
Thrown if input was not in the right format.
Definition: exceptions.hpp:77
Selects relations.
Definition: mapcss_def.hpp:53
bool contains(const basic_vector2< T > &p) const
Definition: rect.hpp:130
static const char * min_lat
minimum node latitude to include into imported data
T minY
Definition: rect.hpp:52
TypedId< 0 > NodeId
Definition: settings.hpp:135
virtual void on_read_begin(unsigned int segments)
Definition: importer.cpp:472
Selects ways.
Definition: mapcss_def.hpp:52
Importer(const shared_ptr< Configuration > &config)
Initializes the importer.
Definition: importer.cpp:538
OsmXmlParser(bool ignoreUnknownEntities, const FloatRect &bounds={-nl::max(),-nl::max(), nl::max(), nl::max()})
Creates a new parser and sets default settings.
Definition: importer.cpp:71
TypedId< 1 > WayId
Definition: settings.hpp:136
static const char * path_to_osmdata
Option to get the path to osm xml file (type: string)
std::uintmax_t fileSize
Size of the xml file in bytes.
Definition: importer.cpp:513
virtual void on_segment_read()
Definition: importer.cpp:470
const FloatRect clippingBounds
Specifies the rectangular area in which the nodes are kept.
Definition: importer.cpp:492
boost::unordered_map< OsmIdType, NodeId > nodeIdMapping
Mapping from osm ids to internal ids for nodes.
Definition: importer.cpp:495
#define LOG_SEV(log, lvl)
Definition: settings.hpp:78
boost::error_info< struct TagWhatInfo, string > InfoWhat
Use this info to give an what msg to the exception.
Definition: exceptions.hpp:41
unsigned int segmentSize
Number of bytes read by one read operation.
Definition: importer.cpp:519
uint64_t OsmIdType
Type, where osm ids stored in.
Definition: importer.cpp:64
bool outputIgnoreRelation
Booleans for some output, which should only appear once.
Definition: importer.cpp:522
boost::error_info< struct TagUnresolvableId, long > InfoUnresolvableId
Specifies the id, which was not resolvable.
Definition: exceptions.hpp:49
Thrown if a file was not found.
Definition: exceptions.hpp:73
static const char * min_lon
minimum node longitude to include into imported data
shared_ptr< Configuration > config
Definition: importer.hpp:41
void parseWay(eaglexml::xml_node<> *way)
parses the way entity
Definition: importer.cpp:270
void parse(const path &xml_file)
Will parse a given xml file.
Definition: importer.cpp:93
basic_vector2< double > FloatPoint
Definition: point.hpp:145
void extractAttributeFromNode(const string &attrname, eaglexml::xml_node<> *node, T *dest)
Extracts the value of a specified xml-attribute from a given node.
Definition: importer.cpp:426
static const char * max_lon
maximum node longitude to include into imported data
void parseBounds(eaglexml::xml_node<> *node)
parses the bound entity
Definition: importer.cpp:229
std::unordered_set< OsmIdType > clippedNodes
Id of clipped nodes.
Definition: importer.cpp:501
static const int parser_flags
flags for parsing the xml file
Definition: importer.cpp:57
shared_ptr< std::vector< Way > > getParsedWays() const
Returns a vector with parsed ways.
Definition: importer.cpp:159
Represents a string which is cached into an internal cache.
shared_ptr< std::vector< Relation > > relations
List to be filled with relations.
Definition: importer.cpp:510
std::size_t getNumberOfClippedNodes() const
Returns the number of clipped nodes.
Definition: importer.cpp:181
T maxX
Definition: rect.hpp:53
void parseRelation(eaglexml::xml_node<> *relation)
parses the relation entity
Definition: importer.cpp:295
Parser for osm-xml-data.
Definition: importer.cpp:52
virtual void on_fetch(unsigned int chars_left, unsigned int need, node_type *active_node)
Definition: importer.cpp:468
boost::error_info< struct TagFileName, string > InfoFileName
Use this to inform about a file name.
Definition: exceptions.hpp:43
T maxY
Definition: rect.hpp:54
virtual void on_buffer_resize()
Definition: importer.cpp:469
T minX
Definition: rect.hpp:51