1   /* Copyright 2002-2021 CS GROUP
2    * Licensed to CS GROUP (CS) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * CS licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *   http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.orekit.files.ccsds.utils.lexical;
18  
19  import java.io.IOException;
20  import java.io.InputStream;
21  import java.io.Reader;
22  import java.util.Map;
23  
24  import javax.xml.parsers.ParserConfigurationException;
25  import javax.xml.parsers.SAXParser;
26  import javax.xml.parsers.SAXParserFactory;
27  
28  import org.hipparchus.exception.DummyLocalizable;
29  import org.orekit.data.DataSource;
30  import org.orekit.errors.OrekitException;
31  import org.orekit.errors.OrekitMessages;
32  import org.orekit.files.ccsds.utils.FileFormat;
33  import org.xml.sax.Attributes;
34  import org.xml.sax.InputSource;
35  import org.xml.sax.Locator;
36  import org.xml.sax.SAXException;
37  import org.xml.sax.helpers.DefaultHandler;
38  
39  /** Lexical analyzer for XML CCSDS messages.
40   * @author Maxime Journot
41   * @author Luc Maisonobe
42   * @since 11.0
43   */
44  public class XmlLexicalAnalyzer implements LexicalAnalyzer {
45  
46      /** Source providing the data to analyze. */
47      private final DataSource source;
48  
49      /** Simple constructor.
50       * @param source source providing the data to parse
51       */
52      public XmlLexicalAnalyzer(final DataSource source) {
53          this.source = source;
54      }
55  
56      /** {@inheritDoc} */
57      @Override
58      public <T> T accept(final MessageParser<T> messageParser) {
59          try {
60              // Create the handler
61              final DefaultHandler handler = new XMLHandler(messageParser);
62  
63              // Create the XML SAX parser factory
64              final SAXParserFactory factory = SAXParserFactory.newInstance();
65  
66              // Build the parser
67              final SAXParser saxParser = factory.newSAXParser();
68  
69              // Read the xml file
70              messageParser.reset(FileFormat.XML);
71              final DataSource.Opener opener = source.getOpener();
72              if (opener.rawDataIsBinary()) {
73                  try (InputStream is = opener.openStreamOnce()) {
74                      if (is == null) {
75                          throw new OrekitException(OrekitMessages.UNABLE_TO_FIND_FILE, source.getName());
76                      }
77                      saxParser.parse(new InputSource(is), handler);
78                  }
79              } else {
80                  try (Reader reader = opener.openReaderOnce()) {
81                      if (reader == null) {
82                          throw new OrekitException(OrekitMessages.UNABLE_TO_FIND_FILE, source.getName());
83                      }
84                      saxParser.parse(new InputSource(reader), handler);
85                  }
86              }
87  
88              // Get the content of the file
89              return messageParser.build();
90  
91          } catch (SAXException | ParserConfigurationException | IOException e) {
92              // throw caught exception as an OrekitException
93              throw new OrekitException(e, new DummyLocalizable(e.getMessage()));
94          }
95      }
96  
97      /** Handler for parsing XML file formats.
98       */
99      private class XMLHandler extends DefaultHandler {
100 
101         /** CCSDS Message parser to use. */
102         private final MessageParser<?> messageParser;
103 
104         /** Builder for regular elements. */
105         private final XmlTokenBuilder regularBuilder;
106 
107         /** Builders for special elements. */
108         private Map<String, XmlTokenBuilder> specialElements;
109 
110         /** Locator used to get current line number. */
111         private Locator locator;
112 
113         /** Name of the current element. */
114         private String currentElementName;
115 
116         /** Line number of the current entry. */
117         private int currentLineNumber;
118 
119         /** Content of the current entry. */
120         private String currentContent;
121 
122         /** Attributes of the current element. */
123         private Attributes currentAttributes;
124 
125         /** Simple constructor.
126          * @param messageParser CCSDS Message parser to use
127          */
128         XMLHandler(final MessageParser<?> messageParser) {
129             this.messageParser   = messageParser;
130             this.regularBuilder  = new RegularXmlTokenBuilder();
131             this.specialElements = messageParser.getSpecialXmlElementsBuilders();
132         }
133 
134         /** Get a builder for the current element.
135          * @param qName XML element ualified name
136          * @return builder for this element
137          */
138         private XmlTokenBuilder getBuilder(final String qName) {
139             final XmlTokenBuilder specialBuilder = specialElements.get(qName);
140             return (specialBuilder != null) ? specialBuilder : regularBuilder;
141         }
142 
143         /** {@inheritDoc} */
144         @Override
145         public void setDocumentLocator(final Locator documentLocator) {
146             this.locator = documentLocator;
147         }
148 
149         /** {@inheritDoc} */
150         @Override
151         public void characters(final char[] ch, final int start, final int length) throws SAXException {
152             // we are only interested in leaf elements between one start and one end tag
153             // when nested elements occur, this method is called with the spurious whitespace
154             // characters (space, tab, end of line) that occur between two successive start
155             // tags, two successive end tags, or one end tag and the following start tag of
156             // next element at same level.
157             // We need to identify the characters we want and the characters we drop.
158 
159             // check if we are after a start tag (thus already dropping the characters
160             // between and end tag and a following start or end tag)
161             if (currentElementName != null) {
162                 // we are after a start tag, we don't know yet if the next tag will be
163                 // another start tag (in which case we ignore the characters) or if
164                 // it is the end tag of a leaf element, so we just store the characters
165                 // and will either use them or drop them when this next tag is seen
166                 currentLineNumber = locator.getLineNumber();
167                 currentContent    = new String(ch, start, length);
168             }
169         }
170 
171         /** {@inheritDoc} */
172         @Override
173         public void startElement(final String uri, final String localName, final String qName, final Attributes attributes) {
174 
175             currentElementName = qName;
176             currentAttributes  = attributes;
177             currentLineNumber  = locator.getLineNumber();
178             currentContent     = null;
179 
180             for (final ParseToken token : getBuilder(qName).
181                                           buildTokens(true, qName, currentContent, currentAttributes,
182                                           currentLineNumber, source.getName())) {
183                 messageParser.process(token);
184             }
185 
186         }
187 
188         /** {@inheritDoc} */
189         @Override
190         public void endElement(final String uri, final String localName, final String qName) {
191 
192             if (currentContent == null) {
193                 // for an end tag without content, we keep the line number of the end tag itself
194                 currentLineNumber = locator.getLineNumber();
195             }
196 
197             for (final ParseToken token : getBuilder(qName).
198                                           buildTokens(false, qName, currentContent, currentAttributes,
199                                                       currentLineNumber, source.getName())) {
200                 messageParser.process(token);
201             }
202 
203             currentElementName = null;
204             currentLineNumber  = -1;
205             currentContent     = null;
206 
207         }
208 
209         /** {@inheritDoc} */
210         @Override
211         public InputSource resolveEntity(final String publicId, final String systemId) {
212             // disable external entities
213             return new InputSource();
214         }
215 
216     }
217 
218 }