1 /* Copyright 2002-2024 CS GROUP
2 * Licensed to CS GROUP (CS) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * CS licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.orekit.files.ccsds.utils.lexical;
18
19 import java.io.BufferedReader;
20 import java.io.IOException;
21 import java.io.Reader;
22 import java.util.regex.Matcher;
23 import java.util.regex.Pattern;
24
25 import org.hipparchus.exception.DummyLocalizable;
26 import org.orekit.data.DataSource;
27 import org.orekit.errors.OrekitException;
28 import org.orekit.errors.OrekitMessages;
29 import org.orekit.files.ccsds.utils.FileFormat;
30 import org.orekit.utils.units.Unit;
31 import org.orekit.utils.units.UnitsCache;
32
33 /** Lexical analyzer for Key-Value Notation CCSDS messages.
34 * @author Luc Maisonobe
35 * @since 11.0
36 */
37 public class KvnLexicalAnalyzer implements LexicalAnalyzer {
38
39 /** Regular expression matching blanks at start of line. */
40 private static final String LINE_START = "^\\p{Blank}*";
41
42 /** Regular expression matching the special COMMENT key that must be stored in the matcher. */
43 private static final String COMMENT_KEY = "(COMMENT)\\p{Blank}*";
44
45 /** Regular expression matching a non-comment key that must be stored in the matcher. */
46 private static final String NON_COMMENT_KEY = "([A-Z][A-Z_0-9]*)\\p{Blank}*=\\p{Blank}*";
47
48 /** Regular expression matching a no-value key starting a block that must be stored in the matcher. */
49 private static final String START_KEY = "([A-Z][A-Z_0-9]*)_START";
50
51 /** Regular expression matching a no-value key ending a block that must be stored in the matcher. */
52 private static final String STOP_KEY = "([A-Z][A-Z_0-9]*)_STOP";
53
54 /** Regular expression matching a value that must be stored in the matcher. */
55 private static final String OPTIONAL_VALUE = "((?:(?:\\p{Graph}.*?)?))";
56
57 /** Operators allowed in units specifications. */
58 private static final String UNITS_OPERATORS = "-+*×.·/⁄^√⁺⁻";
59
60 /** Letters allowed in units specifications. */
61 private static final String UNITS_LETTERS = "A-Za-zµμ"; // beware µ (U+00B5) and μ (U+03BC) look similar but are different
62
63 /** Digits allowed in units specifications. */
64 private static final String UNITS_DIGITS = "0-9⁰¹²³⁴⁵⁶⁷⁸⁹";
65
66 /** Fractions allowed in units specifications. */
67 private static final String UNITS_FRACTIONS = "¼½¾⅐⅑⅒⅓⅔⅕⅖⅗⅘⅙⅚⅛⅜⅝⅞";
68
69 /** Symbols allowed in units specifications. */
70 private static final String UNITS_SYMBOLS = "%°◦′'″\\\"#";
71
72 /** Parentheses allowed in units specifications. */
73 private static final String UNITS_PARENTHESES = "()";
74
75 /** Regular expression matching units that must be stored in the matcher. */
76 private static final String UNITS = "(?:\\p{Blank}+\\[([" +
77 UNITS_OPERATORS + UNITS_LETTERS + UNITS_DIGITS +
78 UNITS_FRACTIONS + UNITS_SYMBOLS + UNITS_PARENTHESES +
79 "]*)\\])?";
80
81 /** Regular expression matching blanks at end of line. */
82 private static final String LINE_END = "\\p{Blank}*$";
83
84 /** Regular expression matching comment entry. */
85 private static final Pattern COMMENT_ENTRY = Pattern.compile(LINE_START + COMMENT_KEY + OPTIONAL_VALUE + LINE_END);
86
87 /** Regular expression matching non-comment entry with optional units.
88 * <p>
89 * Note than since 12.0, we allow empty values at lexical analysis level and detect them at parsing level
90 * </p>
91 */
92 private static final Pattern NON_COMMENT_ENTRY = Pattern.compile(LINE_START + NON_COMMENT_KEY + OPTIONAL_VALUE + UNITS + LINE_END);
93
94 /** Regular expression matching no-value entry starting a block. */
95 private static final Pattern START_ENTRY = Pattern.compile(LINE_START + START_KEY + LINE_END);
96
97 /** Regular expression matching no-value entry ending a block. */
98 private static final Pattern STOP_ENTRY = Pattern.compile(LINE_START + STOP_KEY + LINE_END);
99
100 /** Source providing the data to analyze. */
101 private final DataSource source;
102
103 /** Parsed units cache. */
104 private final UnitsCache cache;
105
106 /** Simple constructor.
107 * @param source source providing the data to parse
108 */
109 public KvnLexicalAnalyzer(final DataSource source) {
110 this.source = source;
111 this.cache = new UnitsCache();
112 }
113
114 /** {@inheritDoc} */
115 @Override
116 public <T> T accept(final MessageParser<T> messageParser) {
117
118 messageParser.reset(FileFormat.KVN);
119
120 try (Reader reader = source.getOpener().openReaderOnce();
121 BufferedReader br = (reader == null) ? null : new BufferedReader(reader)) {
122
123 if (br == null) {
124 throw new OrekitException(OrekitMessages.UNABLE_TO_FIND_FILE, source.getName());
125 }
126
127 int lineNumber = 0;
128 for (String line = br.readLine(); line != null; line = br.readLine()) {
129 ++lineNumber;
130 if (line.trim().length() == 0) {
131 continue;
132 }
133
134 final Matcher nonComment = NON_COMMENT_ENTRY.matcher(line);
135 if (nonComment.matches()) {
136 // regular key=value line
137 final Unit units = cache.getUnits(nonComment.groupCount() > 2 ? nonComment.group(3) : null);
138 messageParser.process(new ParseToken(TokenType.ENTRY,
139 nonComment.group(1), nonComment.group(2),
140 units, lineNumber, source.getName()));
141 } else {
142 final Matcher comment = COMMENT_ENTRY.matcher(line);
143 if (comment.matches()) {
144 // comment line
145 messageParser.process(new ParseToken(TokenType.ENTRY,
146 comment.group(1), comment.group(2), null,
147 lineNumber, source.getName()));
148 } else {
149 final Matcher start = START_ENTRY.matcher(line);
150 if (start.matches()) {
151 // block start
152 messageParser.process(new ParseToken(TokenType.START,
153 start.group(1), null, null,
154 lineNumber, source.getName()));
155 } else {
156 final Matcher stop = STOP_ENTRY.matcher(line);
157 if (stop.matches()) {
158 // block end
159 messageParser.process(new ParseToken(TokenType.STOP,
160 stop.group(1), null, null,
161 lineNumber, source.getName()));
162 } else {
163 // raw data line
164 messageParser.process(new ParseToken(TokenType.RAW_LINE,
165 null, line, null,
166 lineNumber, source.getName()));
167 }
168 }
169 }
170 }
171
172 }
173
174 return messageParser.build();
175
176 } catch (IOException ioe) {
177 throw new OrekitException(ioe, new DummyLocalizable(ioe.getMessage()));
178 }
179 }
180
181 }