-
Notifications
You must be signed in to change notification settings - Fork 163
Add Support for CharSetUtil and PreParser resolves #312 #360
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,187 @@ | ||
| /* | ||
| The contents of this file are subject to the Mozilla Public License Version 1.1 | ||
| (the "License"); you may not use this file except in compliance with the License. | ||
| You may obtain a copy of the License at http://www.mozilla.org/MPL/ | ||
| Software distributed under the License is distributed on an "AS IS" basis, | ||
| WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the | ||
| specific language governing rights and limitations under the License. | ||
|
|
||
| The Original Code is "CharSetUtil.java". | ||
|
|
||
| The Initial Developer of the Original Code is University Health Network. Copyright (C) | ||
| 2001. All Rights Reserved. | ||
|
|
||
| Contributor(s): Jens Kristian Villadsen from Cetrea A/S; Christian Ohr, Jake Aitchison | ||
|
|
||
| Alternatively, the contents of this file may be used under the terms of the | ||
| GNU General Public License (the "GPL"), in which case the provisions of the GPL are | ||
| applicable instead of those above. If you wish to allow use of your version of this | ||
| file only under the terms of the GPL and not to allow others to use your version | ||
| of this file under the MPL, indicate your decision by deleting the provisions above | ||
| and replace them with the notice and other provisions required by the GPL License. | ||
| If you do not delete the provisions above, a recipient may use your version of | ||
| this file under either the MPL or the GPL. | ||
| */ | ||
|
|
||
| namespace NHapi.Base.Llp | ||
| { | ||
| using System.Collections.Generic; | ||
| using System.Linq; | ||
| using System.Text; | ||
|
|
||
| using NHapi.Base.Log; | ||
| using NHapi.Base.Parser; | ||
| using NHapi.Base.PreParser; | ||
|
|
||
| public class CharSetUtility | ||
| { | ||
| private static readonly IHapiLog Log = HapiLogFactory.GetHapiLog(typeof(CharSetUtility)); | ||
|
|
||
| public static byte[] WithoutBom(byte[] messageBytes) | ||
| { | ||
| var bom = Bom.GetBom(messageBytes); | ||
| return messageBytes.Skip(bom.BomBytes.Length).ToArray(); | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Attempt to determine the HL7 character set (<see cref="Encoding"/>) of the HL7 message. | ||
| /// </summary> | ||
| /// <param name="message">HL7 message as bytes.</param> | ||
| /// <returns>The detected Hl7 character set, if none detected defaults to ASCII (us-ascii).</returns> | ||
| public static Encoding CheckCharset(byte[] message) | ||
| { | ||
| return CheckCharset(message, Encoding.ASCII); | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Attempt to determine the HL7 character set (<see cref="Encoding"/>) of the HL7 message. | ||
| /// </summary> | ||
| /// <param name="message">HL7 message as <see cref="T:byte[]"/>.</param> | ||
| /// <param name="encoding">HL7 Character to be used should one not be detected.</param> | ||
| /// <returns>The detected Hl7 character set, if none detected defaults to the one provided by the | ||
| /// <paramref name="encoding"/> parameter. | ||
| /// </returns> | ||
| public static Encoding CheckCharset(byte[] message, Encoding encoding) | ||
| { | ||
| encoding ??= Encoding.ASCII; | ||
| var messageFromBytes = Bom.SkipBom(message); | ||
|
|
||
| return CheckCharset(messageFromBytes, encoding); | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Attempt to determine the HL7 character set (<see cref="Encoding"/>) of the HL7 message. | ||
| /// </summary> | ||
| /// <param name="message">HL7 message as a <see cref="string"/>.</param> | ||
| /// <returns>The detected Hl7 character set, if none detected defaults to ASCII (us-ascii).</returns> | ||
| public static Encoding CheckCharset(string message) | ||
| { | ||
| return CheckCharset(message, Encoding.ASCII); | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Attempt to determine the HL7 character set (<see cref="Encoding"/>) of the HL7 message. | ||
| /// </summary> | ||
| /// <param name="message">HL7 message as a <see cref="string"/>.</param> | ||
| /// <param name="encoding">HL7 Character to be used should one not be detected.</param> | ||
| /// <returns>The detected Hl7 character set, if none detected defaults to the one provided by the | ||
| /// <paramref name="encoding"/> parameter. | ||
| /// </returns> | ||
| public static Encoding CheckCharset(string message, Encoding encoding) | ||
| { | ||
| encoding ??= Encoding.ASCII; | ||
|
|
||
| try | ||
| { | ||
| var fields = PreParser.GetFields(message, "MSH-18(0)"); | ||
| var hl7CharsetName = StripNonLowAscii(fields[0]); | ||
| if (hl7CharsetName.Length > 0) | ||
| { | ||
| encoding = Hl7CharSets.FromHl7Encoding(hl7CharsetName); | ||
| } | ||
|
|
||
| Log.Trace($"Detected MSH-18 value {hl7CharsetName} so using encoding {encoding.EncodingName}"); | ||
| } | ||
| catch (EncodingNotSupportedException ex) | ||
| { | ||
| Log.Warn($"Invalid or unsupported encoding in MSH-18. Defaulting to {encoding.EncodingName}", ex); | ||
| } | ||
| catch (HL7Exception ex) | ||
| { | ||
| Log.Warn($"Failed to parse MSH segment. Defaulting to {encoding.EncodingName}", ex); | ||
| } | ||
|
|
||
| return encoding; | ||
| } | ||
|
|
||
| private static string StripNonLowAscii(string theString) | ||
| { | ||
| if (theString == null) | ||
| { | ||
| return string.Empty; | ||
| } | ||
|
|
||
| var builder = new StringBuilder(); | ||
|
|
||
| foreach (var next in theString) | ||
| { | ||
| if (next > 0 && next < 127) | ||
| { | ||
| builder.Append(next); | ||
| } | ||
| } | ||
|
|
||
| return builder.ToString(); | ||
| } | ||
|
|
||
| private class Bom | ||
| { | ||
| private static readonly IList<Bom> KnownBoms = new List<Bom> | ||
| { | ||
| new Bom(new byte[] { 0xFF, 0xFE, 0x00, 0x00 }, Encoding.UTF32), // UTF-32LE | ||
| new Bom(new byte[] { 0x00, 0x00, 0xFE, 0xFF }, new UTF32Encoding(true, true)), // UTF-32BE | ||
| new Bom(new byte[] { 0xEF, 0xBB, 0xBF }, Encoding.UTF8), // Unicode (UTF-8) | ||
| new Bom(new byte[] { 0xFE, 0xFF, 0xBF }, Encoding.BigEndianUnicode), // UTF-16BE | ||
| new Bom(new byte[] { 0xFF, 0xFE }, Encoding.Unicode), // UTF-16LE | ||
| }; | ||
|
|
||
| private static readonly Bom DefaultBom = new Bom(new byte[] { }, Encoding.ASCII); // ASCII (us-ascii) | ||
|
|
||
| public Bom(byte[] bomBytes, Encoding encoding) | ||
| { | ||
| BomBytes = bomBytes; | ||
| Encoding = encoding; | ||
| } | ||
|
|
||
| public byte[] BomBytes { get; } | ||
|
|
||
| public Encoding Encoding { get; } | ||
|
|
||
| public static string SkipBom(byte[] messageBytes) | ||
| { | ||
| var bom = GetBom(messageBytes); | ||
| var messageBytesWithoutBom = messageBytes.Skip(bom.BomBytes.Length).ToArray(); | ||
| return bom.Encoding.GetString(messageBytesWithoutBom); | ||
| } | ||
|
|
||
| public static Bom GetBom(byte[] messageBytes) | ||
| { | ||
| if (messageBytes == null) | ||
| { | ||
| return KnownBoms[0]; | ||
| } | ||
|
|
||
| foreach (var bom in KnownBoms) | ||
| { | ||
| var messageBomBytes = messageBytes.Take(bom.BomBytes.Length); | ||
| if (bom.BomBytes.SequenceEqual(messageBomBytes)) | ||
| { | ||
| return bom; | ||
| } | ||
| } | ||
|
|
||
| return DefaultBom; | ||
| } | ||
| } | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,98 @@ | ||
| /* | ||
| The contents of this file are subject to the Mozilla Public License Version 1.1 | ||
| (the "License"); you may not use this file except in compliance with the License. | ||
| You may obtain a copy of the License at http://www.mozilla.org/MPL/ | ||
| Software distributed under the License is distributed on an "AS IS" basis, | ||
| WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the | ||
| specific language governing rights and limitations under the License. | ||
|
|
||
| The Original Code is "HL7Charsets.java". Description: | ||
| "Content of HL7 table 0211 mapped to dotnet Encoding" | ||
|
|
||
| The Initial Developer of the Original Code is University Health Network. Copyright (C) | ||
| 2001. All Rights Reserved. | ||
|
|
||
| Contributor(s): Christian Ohr, Jake Aitchison | ||
|
|
||
| Alternatively, the contents of this file may be used under the terms of the | ||
| GNU General Public License (the "GPL"), in which case the provisions of the GPL are | ||
| applicable instead of those above. If you wish to allow use of your version of this | ||
| file only under the terms of the GPL and not to allow others to use your version | ||
| of this file under the MPL, indicate your decision by deleting the provisions above | ||
| and replace them with the notice and other provisions required by the GPL License. | ||
| If you do not delete the provisions above, a recipient may use your version of | ||
| this file under either the MPL or the GPL. | ||
| */ | ||
|
|
||
| namespace NHapi.Base.Llp | ||
| { | ||
| using System; | ||
| using System.Collections.Generic; | ||
| using System.Text; | ||
|
|
||
| using NHapi.Base.Parser; | ||
|
|
||
| /// <summary> | ||
| /// HL7 Charsets from Table 0211 mapped to dotnet <see cref="Encoding"/>. | ||
| /// </summary> | ||
| internal static class Hl7CharSets | ||
| { | ||
| private static readonly Dictionary<string, string> EncodingMap = new () | ||
| { | ||
| { "ASCII", Encoding.ASCII.BodyName }, // ASCII (us-ascii) | ||
| { "8859/1", "iso-8859-1" }, // Western European (ISO) | ||
| { "8859/2", "iso-8859-2" }, // Central European (ISO) | ||
| { "8859/3", "iso-8859-3" }, // Latin 3 (ISO) | ||
| { "8859/4", "iso-8859-4" }, // Baltic (ISO) | ||
| { "8859/5", "iso-8859-5" }, // Cyrillic (ISO) | ||
| { "8859/6", "iso-8859-6" }, // Arabic (ISO) | ||
| { "8859/7", "iso-8859-7" }, // Greek (ISO) | ||
| { "8859/8", "iso-8859-8" }, // Hebrew (ISO-Visual) | ||
| { "8859/9", "iso-8859-9" }, // Turkish (ISO) | ||
| { "8859/15", "iso-8859-15" }, // Latin 9 (ISO) | ||
| { "ISO IR6", "ISO IR6" }, | ||
| { "ISO IR14", "ISO IR14" }, | ||
| { "ISO IR87", "ISO IR87" }, | ||
| { "ISO IR159", "ISO IR159" }, | ||
| { "GB 18030-2000", "gb18030" }, // Chinese Simplified (GB18030) | ||
| { "KS X 1001", "euc-kr" }, // Korean (EUC) | ||
| { "CNS 11643-1992", "CNS 11643-1992" }, | ||
| { "BIG-5", "big5" }, // Chinese Traditional (Big5) | ||
| { "UNICODE", Encoding.UTF8.BodyName }, // Unicode (UTF-8) | ||
| { "UNICODE UTF-8", Encoding.UTF8.BodyName }, // Unicode (UTF-8) | ||
| { "UNICODE UTF-16", Encoding.Unicode.BodyName }, // Unicode (UTF-16LE) | ||
| { "UNICODE UTF-32", Encoding.UTF32.BodyName }, // Unicode (UTF-32LE) | ||
| }; | ||
|
|
||
| /// <summary> | ||
| /// Returns the dotnet <see cref="Encoding"/> for the HL7 charset name. | ||
| /// <a href="https://learn.microsoft.com/en-us/dotnet/api/system.text.encoding#list-of-encodings">list of supported encodings</a>. | ||
| /// </summary> | ||
| /// <param name="hl7EncodingName"></param> | ||
| /// <returns></returns> | ||
| /// <exception cref="ArgumentException">When null empty or white-space <paramref name="hl7EncodingName"/>.</exception> | ||
| /// <exception cref="ArgumentException"> | ||
| /// <paramref name="hl7EncodingName" /> is not a valid code page name. | ||
| /// -or- | ||
| /// The code page indicated by <paramref name="hl7EncodingName" /> is not supported by the underlying platform.</exception> | ||
| /// <exception cref="EncodingNotSupportedException"><paramref name="hl7EncodingName"/> is unknown.</exception> | ||
| public static Encoding FromHl7Encoding(string hl7EncodingName) | ||
| { | ||
| #if NET35 | ||
| if (string.IsNullOrEmpty(hl7EncodingName) || hl7EncodingName.Trim().Length == 0) | ||
| #else | ||
| if (string.IsNullOrWhiteSpace(hl7EncodingName)) | ||
| #endif | ||
| { | ||
| throw new ArgumentException("Should not be null empty or white-space.", nameof(hl7EncodingName)); | ||
| } | ||
|
|
||
| if (!EncodingMap.TryGetValue(hl7EncodingName, out var mappedEncoding)) | ||
| { | ||
| throw new EncodingNotSupportedException(hl7EncodingName); | ||
| } | ||
|
|
||
| return Encoding.GetEncoding(mappedEncoding); | ||
| } | ||
| } | ||
| } |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.