1   /* Copyright 2002-2021 CS GROUP
2    * Licensed to CS GROUP (CS) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * CS licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *   http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.orekit.files.ccsds.utils.lexical;
18  
19  import java.io.BufferedReader;
20  import java.io.IOException;
21  import java.io.Reader;
22  import java.util.regex.Matcher;
23  import java.util.regex.Pattern;
24  
25  import org.hipparchus.exception.DummyLocalizable;
26  import org.orekit.data.DataSource;
27  import org.orekit.errors.OrekitException;
28  import org.orekit.errors.OrekitMessages;
29  import org.orekit.files.ccsds.utils.FileFormat;
30  import org.orekit.utils.units.Unit;
31  import org.orekit.utils.units.UnitsCache;
32  
33  /** Lexical analyzer for Key-Value Notation CCSDS messages.
34   * @author Luc Maisonobe
35   * @since 11.0
36   */
37  public class KvnLexicalAnalyzer implements LexicalAnalyzer {
38  
39      /** Regular expression matching blanks at start of line. */
40      private static final String LINE_START         = "^\\p{Blank}*";
41  
42      /** Regular expression matching the special COMMENT key that must be stored in the matcher. */
43      private static final String COMMENT_KEY        = "(COMMENT)\\p{Blank}*";
44  
45      /** Regular expression matching a non-comment key that must be stored in the matcher. */
46      private static final String NON_COMMENT_KEY    = "([A-Z][A-Z_0-9]*)\\p{Blank}*=\\p{Blank}*";
47  
48      /** Regular expression matching a no-value key starting a block that must be stored in the matcher. */
49      private static final String START_KEY          = "([A-Z][A-Z_0-9]*)_START";
50  
51      /** Regular expression matching a no-value key ending a block that must be stored in the matcher. */
52      private static final String STOP_KEY           = "([A-Z][A-Z_0-9]*)_STOP";
53  
54      /** Regular expression matching a value that must be stored in the matcher. */
55      private static final String VALUE              = "(\\p{Graph}.*?)";
56  
57      /** Operators allowed in units specifications. */
58      private static final String UNITS_OPERATORS    = "-+*×.·/⁄^√⁺⁻";
59  
60      /** Letters allowed in units specifications. */
61      private static final String UNITS_LETTERS      = "A-Za-zµμ"; // beware µ (U+00B5) and μ (U+03BC) look similar but are different
62  
63      /** Digits allowed in units specifications. */
64      private static final String UNITS_DIGITS       = "0-9⁰¹²³⁴⁵⁶⁷⁸⁹";
65  
66      /** Fractions allowed in units specifications. */
67      private static final String UNITS_FRACTIONS    = "¼½¾⅐⅑⅒⅓⅔⅕⅖⅗⅘⅙⅚⅛⅜⅝⅞";
68  
69      /** Symbols allowed in units specifications. */
70      private static final String UNITS_SYMBOLS      = "%°◦′'″\\\"#";
71  
72      /** Parentheses allowed in units specifications. */
73      private static final String UNITS_PARENTHESES  = "()";
74  
75      /** Regular expression matching units that must be stored in the matcher. */
76      private static final String UNITS              = "(?:\\p{Blank}+\\[([" +
77                                                       UNITS_OPERATORS + UNITS_LETTERS + UNITS_DIGITS +
78                                                       UNITS_FRACTIONS + UNITS_SYMBOLS + UNITS_PARENTHESES +
79                                                      "]*)\\])?";
80  
81      /** Regular expression matching blanks at end of line. */
82      private static final String LINE_END           = "\\p{Blank}*$";
83  
84      /** Regular expression matching comment entry. */
85      private static final Pattern COMMENT_ENTRY     = Pattern.compile(LINE_START + COMMENT_KEY + VALUE + LINE_END);
86  
87      /** Regular expression matching non-comment entry with optional units. */
88      private static final Pattern NON_COMMENT_ENTRY = Pattern.compile(LINE_START + NON_COMMENT_KEY + VALUE + UNITS + LINE_END);
89  
90      /** Regular expression matching no-value entry starting a block. */
91      private static final Pattern START_ENTRY       = Pattern.compile(LINE_START + START_KEY + LINE_END);
92  
93      /** Regular expression matching no-value entry ending a block. */
94      private static final Pattern STOP_ENTRY        = Pattern.compile(LINE_START + STOP_KEY + LINE_END);
95  
96      /** Source providing the data to analyze. */
97      private final DataSource source;
98  
99      /** Parsed units cache. */
100     private final UnitsCache cache;
101 
102     /** Simple constructor.
103      * @param source source providing the data to parse
104      */
105     public KvnLexicalAnalyzer(final DataSource source) {
106         this.source = source;
107         this.cache  = new UnitsCache();
108     }
109 
110     /** {@inheritDoc} */
111     @Override
112     public <T> T accept(final MessageParser<T> messageParser) {
113 
114         messageParser.reset(FileFormat.KVN);
115 
116         try (Reader         reader = source.getOpener().openReaderOnce();
117              BufferedReader br     = (reader == null) ? null : new BufferedReader(reader)) {
118 
119             if (br == null) {
120                 throw new OrekitException(OrekitMessages.UNABLE_TO_FIND_FILE, source.getName());
121             }
122 
123             int lineNumber = 0;
124             for (String line = br.readLine(); line != null; line = br.readLine()) {
125                 ++lineNumber;
126                 if (line.trim().length() == 0) {
127                     continue;
128                 }
129 
130                 final Matcher nonComment = NON_COMMENT_ENTRY.matcher(line);
131                 if (nonComment.matches()) {
132                     // regular key=value line
133                     final Unit units = cache.getUnits(nonComment.groupCount() > 2 ? nonComment.group(3) : null);
134                     messageParser.process(new ParseToken(TokenType.ENTRY,
135                                                          nonComment.group(1), nonComment.group(2),
136                                                          units, lineNumber, source.getName()));
137                 } else {
138                     final Matcher comment = COMMENT_ENTRY.matcher(line);
139                     if (comment.matches()) {
140                         // comment line
141                         messageParser.process(new ParseToken(TokenType.ENTRY,
142                                                              comment.group(1), comment.group(2), null,
143                                                              lineNumber, source.getName()));
144                     } else {
145                         final Matcher start = START_ENTRY.matcher(line);
146                         if (start.matches()) {
147                             // block start
148                             messageParser.process(new ParseToken(TokenType.START,
149                                                                  start.group(1), null, null,
150                                                                  lineNumber, source.getName()));
151                         } else {
152                             final Matcher stop = STOP_ENTRY.matcher(line);
153                             if (stop.matches()) {
154                                 // block end
155                                 messageParser.process(new ParseToken(TokenType.STOP,
156                                                                      stop.group(1), null, null,
157                                                                      lineNumber, source.getName()));
158                             } else {
159                                 // raw data line
160                                 messageParser.process(new ParseToken(TokenType.RAW_LINE,
161                                                                      null, line, null,
162                                                                      lineNumber, source.getName()));
163                             }
164                         }
165                     }
166                 }
167 
168             }
169 
170             return messageParser.build();
171 
172         } catch (IOException ioe) {
173             throw new OrekitException(ioe, new DummyLocalizable(ioe.getMessage()));
174         }
175     }
176 
177 }