KvnLexicalAnalyzer.java

  1. /* Copyright 2002-2025 CS GROUP
  2.  * Licensed to CS GROUP (CS) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * CS licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *   http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.orekit.files.ccsds.utils.lexical;

  18. import java.io.BufferedReader;
  19. import java.io.IOException;
  20. import java.io.Reader;
  21. import java.util.regex.Matcher;
  22. import java.util.regex.Pattern;

  23. import org.hipparchus.exception.DummyLocalizable;
  24. import org.orekit.data.DataSource;
  25. import org.orekit.errors.OrekitException;
  26. import org.orekit.errors.OrekitMessages;
  27. import org.orekit.files.ccsds.utils.FileFormat;
  28. import org.orekit.utils.units.Unit;
  29. import org.orekit.utils.units.UnitsCache;

  30. /** Lexical analyzer for Key-Value Notation CCSDS messages.
  31.  * @author Luc Maisonobe
  32.  * @since 11.0
  33.  */
  34. public class KvnLexicalAnalyzer implements LexicalAnalyzer {

  35.     /** Regular expression matching blanks at start of line. */
  36.     private static final String LINE_START         = "^\\p{Blank}*";

  37.     /** Regular expression matching the special COMMENT key that must be stored in the matcher. */
  38.     private static final String COMMENT_KEY        = "(COMMENT)\\p{Blank}*";

  39.     /** Regular expression matching a non-comment key that must be stored in the matcher. */
  40.     private static final String NON_COMMENT_KEY    = "([A-Z][A-Z_0-9]*)\\p{Blank}*=\\p{Blank}*";

  41.     /** Regular expression matching a no-value key starting a block that must be stored in the matcher. */
  42.     private static final String START_KEY          = "([A-Z][A-Z_0-9]*)_START";

  43.     /** Regular expression matching a no-value key ending a block that must be stored in the matcher. */
  44.     private static final String STOP_KEY           = "([A-Z][A-Z_0-9]*)_STOP";

  45.     /** Regular expression matching a value that must be stored in the matcher. */
  46.     private static final String OPTIONAL_VALUE     = "((?:(?:\\p{Graph}.*?)?))";

  47.     /** Operators allowed in units specifications. */
  48.     private static final String UNITS_OPERATORS    = "-+*×.·/⁄^√⁺⁻";

  49.     /** Letters allowed in units specifications. */
  50.     private static final String UNITS_LETTERS      = "A-Za-zµμ"; // beware µ (U+00B5) and μ (U+03BC) look similar but are different

  51.     /** Digits allowed in units specifications. */
  52.     private static final String UNITS_DIGITS       = "0-9⁰¹²³⁴⁵⁶⁷⁸⁹";

  53.     /** Fractions allowed in units specifications. */
  54.     private static final String UNITS_FRACTIONS    = "¼½¾⅐⅑⅒⅓⅔⅕⅖⅗⅘⅙⅚⅛⅜⅝⅞";

  55.     /** Symbols allowed in units specifications. */
  56.     private static final String UNITS_SYMBOLS      = "%°◦′'″\\\"#";

  57.     /** Parentheses allowed in units specifications. */
  58.     private static final String UNITS_PARENTHESES  = "()";

  59.     /** Regular expression matching units that must be stored in the matcher. */
  60.     private static final String UNITS              = "(?:\\p{Blank}+\\[([" +
  61.                                                      UNITS_OPERATORS + UNITS_LETTERS + UNITS_DIGITS +
  62.                                                      UNITS_FRACTIONS + UNITS_SYMBOLS + UNITS_PARENTHESES +
  63.                                                     "]*)\\])?";

  64.     /** Regular expression matching blanks at end of line. */
  65.     private static final String LINE_END           = "\\p{Blank}*$";

  66.     /** Regular expression matching comment entry. */
  67.     private static final Pattern COMMENT_ENTRY     = Pattern.compile(LINE_START + COMMENT_KEY + OPTIONAL_VALUE + LINE_END);

  68.     /** Regular expression matching non-comment entry with optional units.
  69.      * <p>
  70.      * Note than since 12.0, we allow empty values at lexical analysis level and detect them at parsing level
  71.      * </p>
  72.      */
  73.     private static final Pattern NON_COMMENT_ENTRY = Pattern.compile(LINE_START + NON_COMMENT_KEY + OPTIONAL_VALUE + UNITS + LINE_END);

  74.     /** Regular expression matching no-value entry starting a block. */
  75.     private static final Pattern START_ENTRY       = Pattern.compile(LINE_START + START_KEY + LINE_END);

  76.     /** Regular expression matching no-value entry ending a block. */
  77.     private static final Pattern STOP_ENTRY        = Pattern.compile(LINE_START + STOP_KEY + LINE_END);

  78.     /** Source providing the data to analyze. */
  79.     private final DataSource source;

  80.     /** Parsed units cache. */
  81.     private final UnitsCache cache;

  82.     /** Simple constructor.
  83.      * @param source source providing the data to parse
  84.      */
  85.     public KvnLexicalAnalyzer(final DataSource source) {
  86.         this.source = source;
  87.         this.cache  = new UnitsCache();
  88.     }

  89.     /** {@inheritDoc} */
  90.     @Override
  91.     public <T> T accept(final MessageParser<T> messageParser) {

  92.         messageParser.reset(FileFormat.KVN);

  93.         try (Reader         reader = source.getOpener().openReaderOnce();
  94.              BufferedReader br     = (reader == null) ? null : new BufferedReader(reader)) {

  95.             if (br == null) {
  96.                 throw new OrekitException(OrekitMessages.UNABLE_TO_FIND_FILE, source.getName());
  97.             }

  98.             int lineNumber = 0;
  99.             for (String line = br.readLine(); line != null; line = br.readLine()) {
  100.                 ++lineNumber;
  101.                 if (line.trim().length() == 0) {
  102.                     continue;
  103.                 }

  104.                 final Matcher nonComment = NON_COMMENT_ENTRY.matcher(line);
  105.                 if (nonComment.matches()) {
  106.                     // regular key=value line
  107.                     final Unit units = cache.getUnits(nonComment.groupCount() > 2 ? nonComment.group(3) : null);
  108.                     messageParser.process(new ParseToken(TokenType.ENTRY,
  109.                                                          nonComment.group(1), nonComment.group(2),
  110.                                                          units, lineNumber, source.getName()));
  111.                 } else {
  112.                     final Matcher comment = COMMENT_ENTRY.matcher(line);
  113.                     if (comment.matches()) {
  114.                         // comment line
  115.                         messageParser.process(new ParseToken(TokenType.ENTRY,
  116.                                                              comment.group(1), comment.group(2), null,
  117.                                                              lineNumber, source.getName()));
  118.                     } else {
  119.                         final Matcher start = START_ENTRY.matcher(line);
  120.                         if (start.matches()) {
  121.                             // block start
  122.                             messageParser.process(new ParseToken(TokenType.START,
  123.                                                                  start.group(1), null, null,
  124.                                                                  lineNumber, source.getName()));
  125.                         } else {
  126.                             final Matcher stop = STOP_ENTRY.matcher(line);
  127.                             if (stop.matches()) {
  128.                                 // block end
  129.                                 messageParser.process(new ParseToken(TokenType.STOP,
  130.                                                                      stop.group(1), null, null,
  131.                                                                      lineNumber, source.getName()));
  132.                             } else {
  133.                                 // raw data line
  134.                                 messageParser.process(new ParseToken(TokenType.RAW_LINE,
  135.                                                                      null, line, null,
  136.                                                                      lineNumber, source.getName()));
  137.                             }
  138.                         }
  139.                     }
  140.                 }

  141.             }

  142.             return messageParser.build();

  143.         } catch (IOException ioe) {
  144.             throw new OrekitException(ioe, new DummyLocalizable(ioe.getMessage()));
  145.         }
  146.     }

  147. }