Source code

001package org.hl7.fhir.r5.formats;
002
003public class JsonNumberCanonicalizer {
004
005  /**
006   * Converts a number string to canonical JSON representation per RFC 8785
007   * Following ECMAScript Section 7.1.12.1 algorithm
008   */
009  public static String toCanonicalJson(String numberString) {
010      try {
011          // Parse as double (IEEE 754 double precision)
012          double value = Double.parseDouble(numberString);
013          
014          // Handle special cases
015          if (Double.isNaN(value)) {
016              throw new IllegalArgumentException("NaN is not valid in JSON");
017          }
018          if (Double.isInfinite(value)) {
019              throw new IllegalArgumentException("Infinity is not valid in JSON");
020          }
021          
022          // Use the ECMAScript-compatible algorithm
023          return doubleToCanonicalString(value);
024          
025      } catch (NumberFormatException e) {
026          throw new IllegalArgumentException("Invalid number format: " + numberString);
027      }
028  }
029  
030  /**
031   * Implements ECMAScript Number.prototype.toString() algorithm
032   * Based on Section 7.1.12.1 of ECMA-262 with Note 2 enhancement
033   */
034  private static String doubleToCanonicalString(double value) {
035      // Handle zero (positive and negative zero both become "0")
036      if (value == 0.0) {
037          return "0";
038      }
039      
040      // Handle negative numbers
041      if (value < 0) {
042          return "-" + doubleToCanonicalString(-value);
043      }
044      
045      // Apply ECMAScript formatting rules
046      return formatWithEcmaScriptRules(value);
047  }
048  
049  /**
050   * Format double using ECMAScript rules per ECMA-262 Section 7.1.12.1
051   * This follows the exact algorithm specified in the ECMAScript standard
052   */
053  private static String formatWithEcmaScriptRules(double value) {
054      // Step 1: Find the shortest string that round-trips to the same value
055      String result = findShortestString(value);
056      
057      // Step 2: Apply ECMAScript notation rules
058      return applyNotationRules(value, result);
059  }
060  
061  /**
062   * Find the shortest string representation that converts back to the exact same double
063   */
064  private static String findShortestString(double value) {
065      // Use Java's built-in algorithm which is close to what we need
066      String javaDefault = Double.toString(value);
067      
068      // Try to find a shorter representation
069      String shortest = javaDefault;
070      
071      // Try fixed-point notation with different precisions
072      for (int precision = 0; precision <= 17; precision++) {
073          String candidate = String.format("%." + precision + "f", value);
074          candidate = removeTrailingZeros(candidate);
075          
076          // Verify round-trip accuracy
077          if (isExactRepresentation(candidate, value) && candidate.length() < shortest.length()) {
078              shortest = candidate;
079          }
080      }
081      
082      // Try scientific notation
083      String scientific = String.format("%.15e", value);
084      scientific = cleanupScientificNotation(scientific);
085      if (isExactRepresentation(scientific, value) && scientific.length() < shortest.length()) {
086          shortest = scientific;
087      }
088      
089      return shortest;
090  }
091  
092  /**
093   * Check if a string representation exactly round-trips to the same double
094   */
095  private static boolean isExactRepresentation(String str, double original) {
096      try {
097          double parsed = Double.parseDouble(str);
098          return Double.doubleToLongBits(parsed) == Double.doubleToLongBits(original);
099      } catch (NumberFormatException e) {
100          return false;
101      }
102  }
103  
104  /**
105   * Apply ECMAScript notation rules to choose between decimal and exponential
106   */
107  private static String applyNotationRules(double value, String representation) {
108      // Calculate the exponent k (position of most significant digit)
109      int k = calculateExponent(value);
110      
111      // ECMAScript rules from Section 7.1.12.1:
112      // - If k <= -7 or k >= 21, use exponential notation
113      // - Otherwise, use decimal notation
114      
115      if (k <= -7 || k >= 21) {
116          return formatExponential(value);
117      } else {
118          // Use decimal notation, but ensure proper formatting
119          return formatDecimal(value, k);
120      }
121  }
122  
123  /**
124   * Calculate the exponent k as defined in ECMAScript
125   */
126  private static int calculateExponent(double value) {
127      if (value == 0.0) return 0;
128      
129      double abs = Math.abs(value);
130      if (abs >= 1.0) {
131          return (int) Math.floor(Math.log10(abs));
132      } else {
133          return (int) Math.floor(Math.log10(abs));
134      }
135  }
136  
137  /**
138   * Format in decimal notation following ECMAScript rules
139   */
140  private static String formatDecimal(double value, int k) {
141      if (k >= 0) {
142          // Large enough for normal decimal representation
143          return removeTrailingZeros(String.format("%.15f", value));
144      } else {
145          // Small number - use appropriate decimal places
146          int decimalPlaces = Math.max(0, -k + 15);
147          String result = String.format("%." + decimalPlaces + "f", value);
148          return removeTrailingZeros(result);
149      }
150  }
151  
152  /**
153   * Format in exponential notation following ECMAScript rules
154   */
155  private static String formatExponential(double value) {
156      // Use the format that matches ECMAScript exactly
157      String formatted = String.format("%.15e", value);
158      return cleanupScientificNotation(formatted);
159  }
160  
161  /**
162   * Get the effective exponent for ECMAScript formatting decisions
163   */
164  private static int getEffectiveExponent(double value) {
165      if (value == 0.0) return 0;
166      
167      // For ECMAScript, we need the position of the most significant digit
168      // relative to the decimal point
169      double abs = Math.abs(value);
170      if (abs >= 1.0) {
171          return (int) Math.floor(Math.log10(abs));
172      } else {
173          return (int) Math.floor(Math.log10(abs));
174      }
175  }
176  
177  /**
178   * Convert to scientific notation following ECMAScript rules exactly
179   */
180  private static String toEcmaScientific(double value) {
181      // Use Java's scientific notation as starting point
182      String formatted = String.format("%.16e", value);
183      
184      // Parse and reformat to match ECMAScript exactly
185      String[] parts = formatted.toLowerCase().split("e");
186      String mantissa = removeTrailingZeros(parts[0]);
187      int exp = Integer.parseInt(parts[1]);
188      
189      // ECMAScript format: always include sign for exponent
190      String expStr = (exp >= 0) ? "+" + exp : String.valueOf(exp);
191      
192      return mantissa + "e" + expStr;
193  }
194  
195  /**
196   * ECMAScript-compliant scientific notation
197   */
198  private static String toEcmaScriptScientific(double value) {
199      // Handle the specific formatting requirements
200      if (value == 0.0) return "0";
201      
202      boolean negative = value < 0;
203      if (negative) value = -value;
204      
205      // Find the exponent
206      int exponent = (int) Math.floor(Math.log10(value));
207      
208      // Scale the mantissa
209      double mantissa = value / Math.pow(10, exponent);
210      
211      // Format mantissa with minimal precision
212      String mantissaStr = findShortestMantissa(mantissa);
213      
214      // Format exponent with proper sign
215      String expStr = (exponent >= 0) ? "+" + exponent : String.valueOf(exponent);
216      
217      String result = mantissaStr + "e" + expStr;
218      return negative ? "-" + result : result;
219  }
220  
221  /**
222   * Find shortest mantissa representation
223   */
224  private static String findShortestMantissa(double mantissa) {
225      for (int precision = 1; precision <= 16; precision++) {
226          String candidate = String.format("%." + precision + "f", mantissa);
227          candidate = removeTrailingZeros(candidate);
228          
229          double test = Double.parseDouble(candidate);
230          if (Math.abs(test - mantissa) < 1e-15) {
231              return candidate;
232          }
233      }
234      return removeTrailingZeros(String.format("%.15f", mantissa));
235  }
236  
237  /**
238   * Remove trailing zeros from decimal representation
239   */
240  private static String removeTrailingZeros(String str) {
241      if (!str.contains(".")) {
242          return str;
243      }
244      
245      // Remove trailing zeros after decimal point
246      str = str.replaceAll("0+$", "");
247      
248      // Remove decimal point if no fractional part remains
249      if (str.endsWith(".")) {
250          str = str.substring(0, str.length() - 1);
251      }
252      
253      return str;
254  }
255  
256  /**
257   * More precise implementation using round-trip verification
258   * This should handle the RFC 8785 test cases correctly
259   */
260  public static String toCanonicalJsonPrecise(String numberString) {
261      double value = Double.parseDouble(numberString);
262      
263      // Handle special cases
264      if (value == 0.0) return "0";
265      if (Double.isNaN(value)) throw new IllegalArgumentException("NaN not allowed");
266      if (Double.isInfinite(value)) throw new IllegalArgumentException("Infinity not allowed");
267      
268      if (value < 0) {
269          return "-" + toCanonicalJsonPrecise(String.valueOf(-value));
270      }
271      
272      // This is the core algorithm following ECMAScript rules exactly
273      return formatWithEcmaScriptRules(value);
274  }
275  
276  private static String cleanupScientificNotation(String str) {
277      if (!str.contains("e")) return str;
278      
279      // Convert to lowercase and split
280      str = str.toLowerCase();
281      String[] parts = str.split("e");
282      String mantissa = removeTrailingZeros(parts[0]);
283      String exponent = parts[1];
284      
285      // Remove leading zeros from exponent but keep sign
286      if (exponent.startsWith("+")) {
287          exponent = exponent.substring(1);
288      }
289      exponent = String.valueOf(Integer.parseInt(exponent)); // removes leading zeros
290      
291      // ECMAScript requires explicit + for positive exponents
292      if (!exponent.startsWith("-")) {
293          exponent = "+" + exponent;
294      }
295      
296      return mantissa + "e" + exponent;
297  }
298  
299}