diff --git a/Emby.Server.Implementations/TV/TVSeriesManager.cs b/Emby.Server.Implementations/TV/TVSeriesManager.cs index 6525574667..3a9d99aa16 100644 --- a/Emby.Server.Implementations/TV/TVSeriesManager.cs +++ b/Emby.Server.Implementations/TV/TVSeriesManager.cs @@ -1,14 +1,14 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using MediaBrowser.Controller.Configuration; +using MediaBrowser.Controller.Dto; using MediaBrowser.Controller.Entities; using MediaBrowser.Controller.Entities.TV; using MediaBrowser.Controller.Library; using MediaBrowser.Controller.TV; using MediaBrowser.Model.Entities; using MediaBrowser.Model.Querying; -using System; -using System.Collections.Generic; -using System.Linq; -using MediaBrowser.Controller.Configuration; -using MediaBrowser.Controller.Dto; namespace Emby.Server.Implementations.TV { diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/DetectorFactory.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/DetectorFactory.cs index 9d75b83566..c80757e682 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/DetectorFactory.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/DetectorFactory.cs @@ -1,10 +1,8 @@ using System; using System.Collections.Generic; -using System.IO; -using System.IO.Compression; -using NLangDetect.Core.Utils; -using MediaBrowser.Model.Serialization; using System.Linq; +using MediaBrowser.Model.Serialization; +using NLangDetect.Core.Utils; namespace NLangDetect.Core { diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/ErrorCode.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/ErrorCode.cs index 3ffd3b2d91..cba084c8bb 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/ErrorCode.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/ErrorCode.cs @@ -1,15 +1,15 @@ namespace NLangDetect.Core { - public enum ErrorCode - { - NoTextError, - FormatError, - FileLoadError, - DuplicateLangError, - NeedLoadProfileError, - CantDetectError, - CantOpenTrainData, - TrainDataFormatError, - InitParamError, - } + public enum ErrorCode + { + NoTextError, + FormatError, + FileLoadError, + DuplicateLangError, + NeedLoadProfileError, + CantDetectError, + CantOpenTrainData, + TrainDataFormatError, + InitParamError, + } } diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/CharExtensions.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/CharExtensions.cs index cd77a30eb3..24fc6cd87a 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/CharExtensions.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/CharExtensions.cs @@ -2,13 +2,13 @@ namespace NLangDetect.Core.Extensions { - public static class CharExtensions - { - private const int MIN_CODE_POINT = 0x000000; - private const int MAX_CODE_POINT = 0x10ffff; + public static class CharExtensions + { + private const int MIN_CODE_POINT = 0x000000; + private const int MAX_CODE_POINT = 0x10ffff; - private static readonly int[] _unicodeBlockStarts = - { + private static readonly int[] _unicodeBlockStarts = + { #region Unicode block starts 0x0000, // Basic Latin @@ -165,8 +165,8 @@ namespace NLangDetect.Core.Extensions #endregion }; - private static readonly UnicodeBlock?[] _unicodeBlocks = - { + private static readonly UnicodeBlock?[] _unicodeBlocks = + { #region Unicode blocks UnicodeBlock.BasicLatin, UnicodeBlock.Latin1Supplement, @@ -322,53 +322,53 @@ namespace NLangDetect.Core.Extensions #endregion }; - #region Public methods + #region Public methods - /// - /// Taken from JDK source: http://grepcode.com/file/repository.grepcode.com/java/root/jdk/openjdk/6-b14/java/lang/Character.java#Character.UnicodeBlock.0LATIN_EXTENDED_ADDITIONAL - /// - public static UnicodeBlock? GetUnicodeBlock(this char ch) - { - int codePoint = ch; - - if (!IsValidCodePoint(codePoint)) - { - throw new ArgumentException("Argument is not a valid code point.", nameof(ch)); - } - - int top, bottom, current; - - bottom = 0; - top = _unicodeBlockStarts.Length; - current = top / 2; - - // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom] - while (top - bottom > 1) - { - if (codePoint >= _unicodeBlockStarts[current]) + /// + /// Taken from JDK source: http://grepcode.com/file/repository.grepcode.com/java/root/jdk/openjdk/6-b14/java/lang/Character.java#Character.UnicodeBlock.0LATIN_EXTENDED_ADDITIONAL + /// + public static UnicodeBlock? GetUnicodeBlock(this char ch) { - bottom = current; - } - else - { - top = current; + int codePoint = ch; + + if (!IsValidCodePoint(codePoint)) + { + throw new ArgumentException("Argument is not a valid code point.", nameof(ch)); + } + + int top, bottom, current; + + bottom = 0; + top = _unicodeBlockStarts.Length; + current = top / 2; + + // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom] + while (top - bottom > 1) + { + if (codePoint >= _unicodeBlockStarts[current]) + { + bottom = current; + } + else + { + top = current; + } + + current = (top + bottom) / 2; + } + + return _unicodeBlocks[current]; } - current = (top + bottom) / 2; - } + #endregion - return _unicodeBlocks[current]; + #region Private helper methods + + private static bool IsValidCodePoint(int codePoint) + { + return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT; + } + + #endregion } - - #endregion - - #region Private helper methods - - private static bool IsValidCodePoint(int codePoint) - { - return codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT; - } - - #endregion - } } diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/RandomExtensions.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/RandomExtensions.cs index d55ca80df6..ec8784df33 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/RandomExtensions.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/RandomExtensions.cs @@ -2,50 +2,50 @@ namespace NLangDetect.Core.Extensions { - public static class RandomExtensions - { - private const double _Epsilon = 2.22044604925031E-15; - - private static readonly object _mutex = new object(); - - private static double _nextNextGaussian; - private static bool _hasNextNextGaussian; - - /// - /// Returns the next pseudorandom, Gaussian ("normally") distributed double value with mean 0.0 and standard deviation 1.0 from this random number generator's sequence. - /// The general contract of nextGaussian is that one double value, chosen from (approximately) the usual normal distribution with mean 0.0 and standard deviation 1.0, is pseudorandomly generated and returned. - /// - /// - /// Taken from: http://download.oracle.com/javase/6/docs/api/java/util/Random.html (nextGaussian()) - /// - public static double NextGaussian(this Random random) + public static class RandomExtensions { - lock (_mutex) - { - if (_hasNextNextGaussian) + private const double _Epsilon = 2.22044604925031E-15; + + private static readonly object _mutex = new object(); + + private static double _nextNextGaussian; + private static bool _hasNextNextGaussian; + + /// + /// Returns the next pseudorandom, Gaussian ("normally") distributed double value with mean 0.0 and standard deviation 1.0 from this random number generator's sequence. + /// The general contract of nextGaussian is that one double value, chosen from (approximately) the usual normal distribution with mean 0.0 and standard deviation 1.0, is pseudorandomly generated and returned. + /// + /// + /// Taken from: http://download.oracle.com/javase/6/docs/api/java/util/Random.html (nextGaussian()) + /// + public static double NextGaussian(this Random random) { - _hasNextNextGaussian = false; + lock (_mutex) + { + if (_hasNextNextGaussian) + { + _hasNextNextGaussian = false; - return _nextNextGaussian; + return _nextNextGaussian; + } + + double v1, v2, s; + + do + { + v1 = 2.0 * random.NextDouble() - 1.0; // between -1.0 and 1.0 + v2 = 2.0 * random.NextDouble() - 1.0; // between -1.0 and 1.0 + s = v1 * v1 + v2 * v2; + } + while (s >= 1.0 || Math.Abs(s - 0.0) < _Epsilon); + + double multiplier = Math.Sqrt(-2.0 * Math.Log(s) / s); + + _nextNextGaussian = v2 * multiplier; + _hasNextNextGaussian = true; + + return v1 * multiplier; + } } - - double v1, v2, s; - - do - { - v1 = 2.0 * random.NextDouble() - 1.0; // between -1.0 and 1.0 - v2 = 2.0 * random.NextDouble() - 1.0; // between -1.0 and 1.0 - s = v1 * v1 + v2 * v2; - } - while (s >= 1.0 || Math.Abs(s - 0.0) < _Epsilon); - - double multiplier = Math.Sqrt(-2.0 * Math.Log(s) / s); - - _nextNextGaussian = v2 * multiplier; - _hasNextNextGaussian = true; - - return v1 * multiplier; - } } - } } diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/UnicodeBlock.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/UnicodeBlock.cs index 71b5de75e1..bda12fc2f2 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/UnicodeBlock.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Extensions/UnicodeBlock.cs @@ -1,131 +1,131 @@ namespace NLangDetect.Core.Extensions { - public enum UnicodeBlock - { - BasicLatin, - Latin1Supplement, - LatinExtendedA, - LatinExtendedB, - IpaExtensions, - SpacingModifierLetters, - CombiningDiacriticalMarks, - Greek, - Cyrillic, - CyrillicSupplementary, - Armenian, - Hebrew, - Arabic, - Syriac, - Thaana, - Devanagari, - Bengali, - Gurmukhi, - Gujarati, - Oriya, - Tamil, - Telugu, - Kannada, - Malayalam, - Sinhala, - Thai, - Lao, - Tibetan, - Myanmar, - Georgian, - HangulJamo, - Ethiopic, - Cherokee, - UnifiedCanadianAboriginalSyllabics, - Ogham, - Runic, - Tagalog, - Hanunoo, - Buhid, - Tagbanwa, - Khmer, - Mongolian, - Limbu, - TaiLe, - KhmerSymbols, - PhoneticExtensions, - LatinExtendedAdditional, - GreekExtended, - GeneralPunctuation, - SuperscriptsAndSubscripts, - CurrencySymbols, - CombiningMarksForSymbols, - LetterlikeSymbols, - NumberForms, - Arrows, - MathematicalOperators, - MiscellaneousTechnical, - ControlPictures, - OpticalCharacterRecognition, - EnclosedAlphanumerics, - BoxDrawing, - BlockElements, - GeometricShapes, - MiscellaneousSymbols, - Dingbats, - MiscellaneousMathematicalSymbolsA, - SupplementalArrowsA, - BraillePatterns, - SupplementalArrowsB, - MiscellaneousMathematicalSymbolsB, - SupplementalMathematicalOperators, - MiscellaneousSymbolsAndArrows, - CjkRadicalsSupplement, - KangxiRadicals, - IdeographicDescriptionCharacters, - CjkSymbolsAndPunctuation, - Hiragana, - Katakana, - Bopomofo, - HangulCompatibilityJamo, - Kanbun, - BopomofoExtended, - KatakanaPhoneticExtensions, - EnclosedCjkLettersAndMonths, - CjkCompatibility, - CjkUnifiedIdeographsExtensionA, - YijingHexagramSymbols, - CjkUnifiedIdeographs, - YiSyllables, - YiRadicals, - HangulSyllables, - HighSurrogates, - HighPrivateUseSurrogates, - LowSurrogates, - PrivateUseArea, - CjkCompatibilityIdeographs, - AlphabeticPresentationForms, - ArabicPresentationFormsA, - VariationSelectors, - CombiningHalfMarks, - CjkCompatibilityForms, - SmallFormVariants, - ArabicPresentationFormsB, - HalfwidthAndFullwidthForms, - Specials, - LinearBSyllabary, - LinearBIdeograms, - AegeanNumbers, - OldItalic, - Gothic, - Ugaritic, - Deseret, - Shavian, - Osmanya, - CypriotSyllabary, - ByzantineMusicalSymbols, - MusicalSymbols, - TaiXuanJingSymbols, - MathematicalAlphanumericSymbols, - CjkUnifiedIdeographsExtensionB, - CjkCompatibilityIdeographsSupplement, - Tags, - VariationSelectorsSupplement, - SupplementaryPrivateUseAreaA, - SupplementaryPrivateUseAreaB, - } + public enum UnicodeBlock + { + BasicLatin, + Latin1Supplement, + LatinExtendedA, + LatinExtendedB, + IpaExtensions, + SpacingModifierLetters, + CombiningDiacriticalMarks, + Greek, + Cyrillic, + CyrillicSupplementary, + Armenian, + Hebrew, + Arabic, + Syriac, + Thaana, + Devanagari, + Bengali, + Gurmukhi, + Gujarati, + Oriya, + Tamil, + Telugu, + Kannada, + Malayalam, + Sinhala, + Thai, + Lao, + Tibetan, + Myanmar, + Georgian, + HangulJamo, + Ethiopic, + Cherokee, + UnifiedCanadianAboriginalSyllabics, + Ogham, + Runic, + Tagalog, + Hanunoo, + Buhid, + Tagbanwa, + Khmer, + Mongolian, + Limbu, + TaiLe, + KhmerSymbols, + PhoneticExtensions, + LatinExtendedAdditional, + GreekExtended, + GeneralPunctuation, + SuperscriptsAndSubscripts, + CurrencySymbols, + CombiningMarksForSymbols, + LetterlikeSymbols, + NumberForms, + Arrows, + MathematicalOperators, + MiscellaneousTechnical, + ControlPictures, + OpticalCharacterRecognition, + EnclosedAlphanumerics, + BoxDrawing, + BlockElements, + GeometricShapes, + MiscellaneousSymbols, + Dingbats, + MiscellaneousMathematicalSymbolsA, + SupplementalArrowsA, + BraillePatterns, + SupplementalArrowsB, + MiscellaneousMathematicalSymbolsB, + SupplementalMathematicalOperators, + MiscellaneousSymbolsAndArrows, + CjkRadicalsSupplement, + KangxiRadicals, + IdeographicDescriptionCharacters, + CjkSymbolsAndPunctuation, + Hiragana, + Katakana, + Bopomofo, + HangulCompatibilityJamo, + Kanbun, + BopomofoExtended, + KatakanaPhoneticExtensions, + EnclosedCjkLettersAndMonths, + CjkCompatibility, + CjkUnifiedIdeographsExtensionA, + YijingHexagramSymbols, + CjkUnifiedIdeographs, + YiSyllables, + YiRadicals, + HangulSyllables, + HighSurrogates, + HighPrivateUseSurrogates, + LowSurrogates, + PrivateUseArea, + CjkCompatibilityIdeographs, + AlphabeticPresentationForms, + ArabicPresentationFormsA, + VariationSelectors, + CombiningHalfMarks, + CjkCompatibilityForms, + SmallFormVariants, + ArabicPresentationFormsB, + HalfwidthAndFullwidthForms, + Specials, + LinearBSyllabary, + LinearBIdeograms, + AegeanNumbers, + OldItalic, + Gothic, + Ugaritic, + Deseret, + Shavian, + Osmanya, + CypriotSyllabary, + ByzantineMusicalSymbols, + MusicalSymbols, + TaiXuanJingSymbols, + MathematicalAlphanumericSymbols, + CjkUnifiedIdeographsExtensionB, + CjkCompatibilityIdeographsSupplement, + Tags, + VariationSelectorsSupplement, + SupplementaryPrivateUseAreaA, + SupplementaryPrivateUseAreaB, + } } diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/GenProfile.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/GenProfile.cs index 5895f68aea..c2b007c052 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/GenProfile.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/GenProfile.cs @@ -1,67 +1,67 @@ using System; +using System.IO; using System.IO.Compression; using System.Xml; using NLangDetect.Core.Utils; -using System.IO; namespace NLangDetect.Core { - // TODO IMM HI: xml reader not tested - public static class GenProfile - { - #region Public methods - - public static LangProfile load(string lang, string file) + // TODO IMM HI: xml reader not tested + public static class GenProfile { - LangProfile profile = new LangProfile(lang); - TagExtractor tagextractor = new TagExtractor("abstract", 100); - Stream inputStream = null; + #region Public methods - try - { - inputStream = File.OpenRead(file); - - string extension = Path.GetExtension(file) ?? ""; - - if (extension.ToUpper() == ".GZ") + public static LangProfile load(string lang, string file) { - inputStream = new GZipStream(inputStream, CompressionMode.Decompress); - } + LangProfile profile = new LangProfile(lang); + TagExtractor tagextractor = new TagExtractor("abstract", 100); + Stream inputStream = null; - using (XmlReader xmlReader = XmlReader.Create(inputStream)) - { - while (xmlReader.Read()) - { - switch (xmlReader.NodeType) + try { - case XmlNodeType.Element: - tagextractor.SetTag(xmlReader.Name); - break; + inputStream = File.OpenRead(file); - case XmlNodeType.Text: - tagextractor.Add(xmlReader.Value); - break; + string extension = Path.GetExtension(file) ?? ""; - case XmlNodeType.EndElement: - tagextractor.CloseTag(profile); - break; + if (extension.ToUpper() == ".GZ") + { + inputStream = new GZipStream(inputStream, CompressionMode.Decompress); + } + + using (XmlReader xmlReader = XmlReader.Create(inputStream)) + { + while (xmlReader.Read()) + { + switch (xmlReader.NodeType) + { + case XmlNodeType.Element: + tagextractor.SetTag(xmlReader.Name); + break; + + case XmlNodeType.Text: + tagextractor.Add(xmlReader.Value); + break; + + case XmlNodeType.EndElement: + tagextractor.CloseTag(profile); + break; + } + } + } + } + finally + { + if (inputStream != null) + { + inputStream.Close(); + } } - } - } - } - finally - { - if (inputStream != null) - { - inputStream.Close(); - } - } - Console.WriteLine(lang + ": " + tagextractor.Count); + Console.WriteLine(lang + ": " + tagextractor.Count); - return profile; + return profile; + } + + #endregion } - - #endregion - } } diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/InternalException.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/InternalException.cs index 32e50a219b..11e8f876d8 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/InternalException.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/InternalException.cs @@ -2,21 +2,21 @@ namespace NLangDetect.Core { - [Serializable] - public class InternalException : Exception - { - #region Constructor(s) - - public InternalException(string message, Exception innerException) - : base(message, innerException) + [Serializable] + public class InternalException : Exception { - } + #region Constructor(s) - public InternalException(string message) - : this(message, null) - { - } + public InternalException(string message, Exception innerException) + : base(message, innerException) + { + } - #endregion - } + public InternalException(string message) + : this(message, null) + { + } + + #endregion + } } diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Language.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Language.cs index f4b4b153ee..e15263c051 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Language.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Language.cs @@ -2,44 +2,44 @@ using System.Globalization; namespace NLangDetect.Core { - // TODO IMM HI: name?? - public class Language - { - #region Constructor(s) - - public Language(string name, double probability) + // TODO IMM HI: name?? + public class Language { - Name = name; - Probability = probability; + #region Constructor(s) + + public Language(string name, double probability) + { + Name = name; + Probability = probability; + } + + #endregion + + #region Object overrides + + public override string ToString() + { + if (Name == null) + { + return ""; + } + + return + string.Format( + CultureInfo.InvariantCulture.NumberFormat, + "{0}:{1:0.000000}", + Name, + Probability); + } + + #endregion + + #region Properties + + public string Name { get; set; } + + public double Probability { get; set; } + + #endregion } - - #endregion - - #region Object overrides - - public override string ToString() - { - if (Name == null) - { - return ""; - } - - return - string.Format( - CultureInfo.InvariantCulture.NumberFormat, - "{0}:{1:0.000000}", - Name, - Probability); - } - - #endregion - - #region Properties - - public string Name { get; set; } - - public double Probability { get; set; } - - #endregion - } } diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/NLangDetectException.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/NLangDetectException.cs index e0d066020c..99825bcf38 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/NLangDetectException.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/NLangDetectException.cs @@ -2,22 +2,22 @@ namespace NLangDetect.Core { - public class NLangDetectException : Exception - { - #region Constructor(s) - - public NLangDetectException(string message, ErrorCode errorCode) - : base(message) + public class NLangDetectException : Exception { - ErrorCode = errorCode; + #region Constructor(s) + + public NLangDetectException(string message, ErrorCode errorCode) + : base(message) + { + ErrorCode = errorCode; + } + + #endregion + + #region Properties + + public ErrorCode ErrorCode { get; private set; } + + #endregion } - - #endregion - - #region Properties - - public ErrorCode ErrorCode { get; private set; } - - #endregion - } } diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/ProbVector.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/ProbVector.cs index c5a20dbf0a..0684d91c31 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/ProbVector.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/ProbVector.cs @@ -3,33 +3,33 @@ using System.Collections.Generic; namespace NLangDetect.Core { - public class ProbVector - { - private readonly Dictionary _dict = new Dictionary(); - - public double this[int key] + public class ProbVector { - get - { - double value; + private readonly Dictionary _dict = new Dictionary(); - return _dict.TryGetValue(key, out value) ? value : 0.0; - } - - set - { - if (Math.Abs(value) < double.Epsilon) + public double this[int key] { - if (_dict.ContainsKey(key)) - { - _dict.Remove(key); - } + get + { + double value; - return; + return _dict.TryGetValue(key, out value) ? value : 0.0; + } + + set + { + if (Math.Abs(value) < double.Epsilon) + { + if (_dict.ContainsKey(key)) + { + _dict.Remove(key); + } + + return; + } + + _dict[key] = value; + } } - - _dict[key] = value; - } } - } } diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/Messages.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/Messages.cs index 1d605cc47f..058f350b2c 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/Messages.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/Messages.cs @@ -1,10 +1,9 @@ +using System; using System.Collections.Generic; using System.Globalization; using System.IO; -using System.Reflection; -using System.Text.RegularExpressions; using System.Linq; -using System; +using System.Text.RegularExpressions; namespace NLangDetect.Core.Utils { @@ -29,7 +28,7 @@ namespace NLangDetect.Core.Utils private static Dictionary LoadMessages() { - var manifestName = typeof(Messages).Assembly.GetManifestResourceNames().FirstOrDefault(i => i.IndexOf("messages.properties", StringComparison.Ordinal) != -1) ; + var manifestName = typeof(Messages).Assembly.GetManifestResourceNames().FirstOrDefault(i => i.IndexOf("messages.properties", StringComparison.Ordinal) != -1); Stream messagesStream = typeof(Messages).Assembly diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs index b1738f7ca9..2d29ec6973 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/NGram.cs @@ -6,14 +6,14 @@ using NLangDetect.Core.Extensions; namespace NLangDetect.Core.Utils { - public class NGram - { - public const int GramsCount = 3; + public class NGram + { + public const int GramsCount = 3; - private static readonly string Latin1Excluded = Messages.getString("NGram.LATIN1_EXCLUDE"); + private static readonly string Latin1Excluded = Messages.getString("NGram.LATIN1_EXCLUDE"); - private static readonly string[] CjkClass = - { + private static readonly string[] CjkClass = + { #region CJK classes Messages.getString("NGram.KANJI_1_0"), @@ -146,185 +146,185 @@ namespace NLangDetect.Core.Utils #endregion }; - private static readonly Dictionary _cjkMap; + private static readonly Dictionary _cjkMap; - private StringBuilder _grams; - private bool _capitalword; + private StringBuilder _grams; + private bool _capitalword; - #region Constructor(s) + #region Constructor(s) - static NGram() - { - _cjkMap = new Dictionary(); - - foreach (string cjk_list in CjkClass) - { - char representative = cjk_list[0]; - - for (int i = 0; i < cjk_list.Length; i++) + static NGram() { - _cjkMap.Add(cjk_list[i], representative); - } - } - } + _cjkMap = new Dictionary(); - public NGram() - { - _grams = new StringBuilder(" "); - _capitalword = false; - } - - #endregion - - #region Public methods - - public static char Normalize(char ch) - { - UnicodeBlock? unicodeBlock = ch.GetUnicodeBlock(); - - if (!unicodeBlock.HasValue) - { - return ch; - } - - switch (unicodeBlock.Value) - { - case UnicodeBlock.BasicLatin: - { - if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z') + foreach (string cjk_list in CjkClass) { - return ' '; + char representative = cjk_list[0]; + + for (int i = 0; i < cjk_list.Length; i++) + { + _cjkMap.Add(cjk_list[i], representative); + } } - - break; - } - - case UnicodeBlock.Latin1Supplement: - { - if (Latin1Excluded.IndexOf(ch) >= 0) - { - return ' '; - } - - break; - } - - case UnicodeBlock.GeneralPunctuation: - { - return ' '; - } - - case UnicodeBlock.Arabic: - { - if (ch == '\u06cc') - { - return '\u064a'; - } - - break; - } - - case UnicodeBlock.LatinExtendedAdditional: - { - if (ch >= '\u1ea0') - { - return '\u1ec3'; - } - - break; - } - - case UnicodeBlock.Hiragana: - { - return '\u3042'; - } - - case UnicodeBlock.Katakana: - { - return '\u30a2'; - } - - case UnicodeBlock.Bopomofo: - case UnicodeBlock.BopomofoExtended: - { - return '\u3105'; - } - - case UnicodeBlock.CjkUnifiedIdeographs: - { - if (_cjkMap.ContainsKey(ch)) - { - return _cjkMap[ch]; - } - - break; - } - - case UnicodeBlock.HangulSyllables: - { - return '\uac00'; - } - } - - return ch; - } - - public void AddChar(char ch) - { - ch = Normalize(ch); - char lastchar = _grams[_grams.Length - 1]; - if (lastchar == ' ') - { - _grams = new StringBuilder(" "); - _capitalword = false; - if (ch == ' ') return; - } - else if (_grams.Length >= GramsCount) - { - _grams.Remove(0, 1); - } - _grams.Append(ch); - - if (char.IsUpper(ch)) - { - if (char.IsUpper(lastchar)) _capitalword = true; - } - else - { - _capitalword = false; - } - } - - public string Get(int n) - { - if (_capitalword) - { - return null; - } - - int len = _grams.Length; - - if (n < 1 || n > 3 || len < n) - { - return null; - } - - if (n == 1) - { - char ch = _grams[len - 1]; - - if (ch == ' ') - { - return null; } - return ch.ToString(); - } + public NGram() + { + _grams = new StringBuilder(" "); + _capitalword = false; + } - // TODO IMM HI: is ToString() here effective? - return _grams.ToString().SubSequence(len - n, len); + #endregion + + #region Public methods + + public static char Normalize(char ch) + { + UnicodeBlock? unicodeBlock = ch.GetUnicodeBlock(); + + if (!unicodeBlock.HasValue) + { + return ch; + } + + switch (unicodeBlock.Value) + { + case UnicodeBlock.BasicLatin: + { + if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z') + { + return ' '; + } + + break; + } + + case UnicodeBlock.Latin1Supplement: + { + if (Latin1Excluded.IndexOf(ch) >= 0) + { + return ' '; + } + + break; + } + + case UnicodeBlock.GeneralPunctuation: + { + return ' '; + } + + case UnicodeBlock.Arabic: + { + if (ch == '\u06cc') + { + return '\u064a'; + } + + break; + } + + case UnicodeBlock.LatinExtendedAdditional: + { + if (ch >= '\u1ea0') + { + return '\u1ec3'; + } + + break; + } + + case UnicodeBlock.Hiragana: + { + return '\u3042'; + } + + case UnicodeBlock.Katakana: + { + return '\u30a2'; + } + + case UnicodeBlock.Bopomofo: + case UnicodeBlock.BopomofoExtended: + { + return '\u3105'; + } + + case UnicodeBlock.CjkUnifiedIdeographs: + { + if (_cjkMap.ContainsKey(ch)) + { + return _cjkMap[ch]; + } + + break; + } + + case UnicodeBlock.HangulSyllables: + { + return '\uac00'; + } + } + + return ch; + } + + public void AddChar(char ch) + { + ch = Normalize(ch); + char lastchar = _grams[_grams.Length - 1]; + if (lastchar == ' ') + { + _grams = new StringBuilder(" "); + _capitalword = false; + if (ch == ' ') return; + } + else if (_grams.Length >= GramsCount) + { + _grams.Remove(0, 1); + } + _grams.Append(ch); + + if (char.IsUpper(ch)) + { + if (char.IsUpper(lastchar)) _capitalword = true; + } + else + { + _capitalword = false; + } + } + + public string Get(int n) + { + if (_capitalword) + { + return null; + } + + int len = _grams.Length; + + if (n < 1 || n > 3 || len < n) + { + return null; + } + + if (n == 1) + { + char ch = _grams[len - 1]; + + if (ch == ' ') + { + return null; + } + + return ch.ToString(); + } + + // TODO IMM HI: is ToString() here effective? + return _grams.ToString().SubSequence(len - n, len); + } + + #endregion } - - #endregion - } } diff --git a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/TagExtractor.cs b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/TagExtractor.cs index 896fd0960a..4441ecd0fb 100644 --- a/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/TagExtractor.cs +++ b/Emby.Server.Implementations/TextEncoding/NLangDetect/Utils/TagExtractor.cs @@ -2,75 +2,75 @@ using System.Text; namespace NLangDetect.Core.Utils { - public class TagExtractor - { - // TODO IMM HI: do the really need to be internal? - internal string Target; - internal int Threshold; - internal StringBuilder StringBuilder; - internal string Tag; - - #region Constructor(s) - - public TagExtractor(string tag, int threshold) + public class TagExtractor { - Target = tag; - Threshold = threshold; - Count = 0; - Clear(); - } + // TODO IMM HI: do the really need to be internal? + internal string Target; + internal int Threshold; + internal StringBuilder StringBuilder; + internal string Tag; - #endregion + #region Constructor(s) - #region Public methods - - public void Clear() - { - StringBuilder = new StringBuilder(); - Tag = null; - } - - public void SetTag(string tag) - { - Tag = tag; - } - - public void Add(string line) - { - if (Tag == Target && line != null) - { - StringBuilder.Append(line); - } - } - - public void CloseTag(LangProfile profile) - { - if (profile != null && Tag == Target && StringBuilder.Length > Threshold) - { - var gram = new NGram(); - - for (int i = 0; i < StringBuilder.Length; i++) + public TagExtractor(string tag, int threshold) { - gram.AddChar(StringBuilder[i]); - - for (int n = 1; n <= NGram.GramsCount; n++) - { - profile.Add(gram.Get(n)); - } + Target = tag; + Threshold = threshold; + Count = 0; + Clear(); } - Count++; - } + #endregion - Clear(); + #region Public methods + + public void Clear() + { + StringBuilder = new StringBuilder(); + Tag = null; + } + + public void SetTag(string tag) + { + Tag = tag; + } + + public void Add(string line) + { + if (Tag == Target && line != null) + { + StringBuilder.Append(line); + } + } + + public void CloseTag(LangProfile profile) + { + if (profile != null && Tag == Target && StringBuilder.Length > Threshold) + { + var gram = new NGram(); + + for (int i = 0; i < StringBuilder.Length; i++) + { + gram.AddChar(StringBuilder[i]); + + for (int n = 1; n <= NGram.GramsCount; n++) + { + profile.Add(gram.Get(n)); + } + } + + Count++; + } + + Clear(); + } + + #endregion + + #region Properties + + public int Count { get; private set; } + + #endregion } - - #endregion - - #region Properties - - public int Count { get; private set; } - - #endregion - } } diff --git a/Emby.Server.Implementations/TextEncoding/TextEncoding.cs b/Emby.Server.Implementations/TextEncoding/TextEncoding.cs index 8f15d5a7b9..09705d3815 100644 --- a/Emby.Server.Implementations/TextEncoding/TextEncoding.cs +++ b/Emby.Server.Implementations/TextEncoding/TextEncoding.cs @@ -1,9 +1,9 @@ using System; using System.Text; using MediaBrowser.Model.IO; -using Microsoft.Extensions.Logging; using MediaBrowser.Model.Serialization; using MediaBrowser.Model.Text; +using Microsoft.Extensions.Logging; using NLangDetect.Core; using UniversalDetector; diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/CharsetDetector.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/CharsetDetector.cs index 922239c541..8cd2ae658d 100644 --- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/CharsetDetector.cs +++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/CharsetDetector.cs @@ -100,7 +100,7 @@ namespace UniversalDetector this.confidence = 0.0f; base.Reset(); } - + public string Charset => charset; public float Confidence => confidence; @@ -109,9 +109,9 @@ namespace UniversalDetector { this.charset = charset; this.confidence = confidence; -// if (Finished != null) { -// Finished(charset, confidence); -// } + // if (Finished != null) { + // Finished(charset, confidence); + // } } } diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Big5Prober.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Big5Prober.cs index 19152a7acc..68c7d681bb 100644 --- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Big5Prober.cs +++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Big5Prober.cs @@ -57,27 +57,34 @@ namespace UniversalDetector.Core int codingState = 0; int max = offset + len; - for (int i = offset; i < max; i++) { + for (int i = offset; i < max; i++) + { codingState = codingSM.NextState(buf[i]); - if (codingState == SMModel.ERROR) { + if (codingState == SMModel.ERROR) + { state = ProbingState.NotMe; break; } - if (codingState == SMModel.ITSME) { + if (codingState == SMModel.ITSME) + { state = ProbingState.FoundIt; break; } - if (codingState == SMModel.START) { + if (codingState == SMModel.START) + { int charLen = codingSM.CurrentCharLen; - if (i == offset) { + if (i == offset) + { lastChar[1] = buf[offset]; distributionAnalyser.HandleOneChar(lastChar, 0, charLen); - } else { - distributionAnalyser.HandleOneChar(buf, i-1, charLen); + } + else + { + distributionAnalyser.HandleOneChar(buf, i - 1, charLen); } } } - lastChar[0] = buf[max-1]; + lastChar[0] = buf[max - 1]; if (state == ProbingState.Detecting) if (distributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/BitPackage.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/BitPackage.cs index 19bcdc7792..8554cba66b 100644 --- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/BitPackage.cs +++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/BitPackage.cs @@ -40,20 +40,20 @@ namespace UniversalDetector.Core { public class BitPackage { - public static int INDEX_SHIFT_4BITS = 3; - public static int INDEX_SHIFT_8BITS = 2; + public static int INDEX_SHIFT_4BITS = 3; + public static int INDEX_SHIFT_8BITS = 2; public static int INDEX_SHIFT_16BITS = 1; - public static int SHIFT_MASK_4BITS = 7; - public static int SHIFT_MASK_8BITS = 3; + public static int SHIFT_MASK_4BITS = 7; + public static int SHIFT_MASK_8BITS = 3; public static int SHIFT_MASK_16BITS = 1; - public static int BIT_SHIFT_4BITS = 2; - public static int BIT_SHIFT_8BITS = 3; + public static int BIT_SHIFT_4BITS = 2; + public static int BIT_SHIFT_8BITS = 3; public static int BIT_SHIFT_16BITS = 4; - public static int UNIT_MASK_4BITS = 0x0000000F; - public static int UNIT_MASK_8BITS = 0x000000FF; + public static int UNIT_MASK_4BITS = 0x0000000F; + public static int UNIT_MASK_8BITS = 0x000000FF; public static int UNIT_MASK_16BITS = 0x0000FFFF; private int indexShift; @@ -94,5 +94,5 @@ namespace UniversalDetector.Core return (data[i >> indexShift] >> ((i & shiftMask) << bitShift)) & unitMask; } - } + } } diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CharDistributionAnalyser.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CharDistributionAnalyser.cs index da59959327..472dfdc51d 100644 --- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CharDistributionAnalyser.cs +++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CharDistributionAnalyser.cs @@ -97,9 +97,11 @@ namespace UniversalDetector.Core { //we only care about 2-bytes character in our distribution analysis int order = (charLen == 2) ? GetOrder(buf, offset) : -1; - if (order >= 0) { + if (order >= 0) + { totalChars++; - if (order < tableSize) { // order is valid + if (order < tableSize) + { // order is valid if (512 > charToFreqOrder[order]) freqChars++; } @@ -124,7 +126,8 @@ namespace UniversalDetector.Core // negative answer if (totalChars <= 0 || freqChars <= MINIMUM_DATA_THRESHOLD) return SURE_NO; - if (totalChars != freqChars) { + if (totalChars != freqChars) + { float r = freqChars / ((totalChars - freqChars) * typicalDistributionRatio); if (r < SURE_YES) return r; @@ -610,8 +613,8 @@ namespace UniversalDetector.Core /// public override int GetOrder(byte[] buf, int offset) { - if (buf[offset] >= 0xB0 && buf[offset+1] >= 0xA1) - return 94 * (buf[offset] - 0xb0) + buf[offset+1] - 0xA1; + if (buf[offset] >= 0xB0 && buf[offset + 1] >= 0xA1) + return 94 * (buf[offset] - 0xb0) + buf[offset + 1] - 0xA1; else return -1; } @@ -1040,7 +1043,7 @@ namespace UniversalDetector.Core public override int GetOrder(byte[] buf, int offset) { if (buf[offset] >= 0xC4) - return 94 * (buf[offset] - 0xC4) + buf[offset+1] - 0xA1; + return 94 * (buf[offset] - 0xC4) + buf[offset + 1] - 0xA1; else return -1; } @@ -1048,7 +1051,7 @@ namespace UniversalDetector.Core public class EUCKRDistributionAnalyser : CharDistributionAnalyser { - // Sampling from about 20M text materials include literature and computer technology + // Sampling from about 20M text materials include literature and computer technology /* * 128 --> 0.79 * 256 --> 0.92 @@ -1634,7 +1637,7 @@ namespace UniversalDetector.Core public override int GetOrder(byte[] buf, int offset) { if (buf[offset] >= 0xB0) - return 94 * (buf[offset] - 0xB0) + buf[offset+1] - 0xA1; + return 94 * (buf[offset] - 0xB0) + buf[offset + 1] - 0xA1; else return -1; } @@ -2559,12 +2562,15 @@ namespace UniversalDetector.Core /// public override int GetOrder(byte[] buf, int offset) { - if (buf[offset] >= 0xA4) { - if (buf[offset+1] >= 0xA1) - return 157 * (buf[offset] - 0xA4) + buf[offset+1] - 0xA1 + 63; + if (buf[offset] >= 0xA4) + { + if (buf[offset + 1] >= 0xA1) + return 157 * (buf[offset] - 0xA4) + buf[offset + 1] - 0xA1 + 63; else - return 157 * (buf[offset] - 0xA4) + buf[offset+1] - 0x40; - } else { + return 157 * (buf[offset] - 0xA4) + buf[offset + 1] - 0x40; + } + else + { return -1; } } @@ -3140,9 +3146,9 @@ namespace UniversalDetector.Core order = 188 * (buf[offset] - 0xE0 + 31); else return -1; - order += buf[offset+1] - 0x40; + order += buf[offset + 1] - 0x40; - if (buf[offset+1] > 0x7F) + if (buf[offset + 1] > 0x7F) order--; return order; } @@ -3162,7 +3168,7 @@ namespace UniversalDetector.Core public override int GetOrder(byte[] buf, int offset) { if (buf[offset] >= 0xA0) - return 94 * (buf[offset] - 0xA1) + buf[offset+1] - 0xA1; + return 94 * (buf[offset] - 0xA1) + buf[offset + 1] - 0xA1; else return -1; } diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CharsetProber.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CharsetProber.cs index cc4539058d..158dc89696 100644 --- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CharsetProber.cs +++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CharsetProber.cs @@ -40,7 +40,8 @@ using System.IO; namespace UniversalDetector.Core { - public enum ProbingState { + public enum ProbingState + { Detecting = 0, // no sure answer yet, but caller can ask for confidence FoundIt = 1, // positive answer NotMe = 2 // negative answer @@ -107,21 +108,27 @@ namespace UniversalDetector.Core { byte[] result = null; - using (MemoryStream ms = new MemoryStream(buf.Length)) { + using (MemoryStream ms = new MemoryStream(buf.Length)) + { bool meetMSB = false; int max = offset + len; int prev = offset; int cur = offset; - while (cur < max) { + while (cur < max) + { byte b = buf[cur]; - if ((b & 0x80) != 0) { + if ((b & 0x80) != 0) + { meetMSB = true; - } else if (b < CAPITAL_A || (b > CAPITAL_Z && b < SMALL_A) - || b > SMALL_Z) { - if (meetMSB && cur > prev) { + } + else if (b < CAPITAL_A || (b > CAPITAL_Z && b < SMALL_A) + || b > SMALL_Z) + { + if (meetMSB && cur > prev) + { ms.Write(buf, prev, cur - prev); ms.WriteByte(SPACE); meetMSB = false; @@ -149,14 +156,16 @@ namespace UniversalDetector.Core { byte[] result = null; - using (MemoryStream ms = new MemoryStream(buf.Length)) { + using (MemoryStream ms = new MemoryStream(buf.Length)) + { bool inTag = false; int max = offset + len; int prev = offset; int cur = offset; - while (cur < max) { + while (cur < max) + { byte b = buf[cur]; @@ -167,8 +176,10 @@ namespace UniversalDetector.Core // it's ascii, but it's not a letter if ((b & 0x80) == 0 && (b < CAPITAL_A || b > SMALL_Z - || (b > CAPITAL_Z && b < SMALL_A))) { - if (cur > prev && !inTag) { + || (b > CAPITAL_Z && b < SMALL_A))) + { + if (cur > prev && !inTag) + { ms.Write(buf, prev, cur - prev); ms.WriteByte(SPACE); } diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CodingStateMachine.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CodingStateMachine.cs index 34f24161df..7ba1f2aa38 100644 --- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CodingStateMachine.cs +++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/CodingStateMachine.cs @@ -60,7 +60,8 @@ namespace UniversalDetector.Core // for each byte we get its class, if it is first byte, // we also get byte length int byteCls = model.GetClass(b); - if (currentState == SMModel.START) { + if (currentState == SMModel.START) + { currentBytePos = 0; currentCharLen = model.charLenTable[byteCls]; } diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EUCJPProber.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EUCJPProber.cs index eac67fe956..56bcf22741 100644 --- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EUCJPProber.cs +++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EUCJPProber.cs @@ -62,29 +62,36 @@ namespace UniversalDetector.Core int codingState; int max = offset + len; - for (int i = offset; i < max; i++) { + for (int i = offset; i < max; i++) + { codingState = codingSM.NextState(buf[i]); - if (codingState == SMModel.ERROR) { + if (codingState == SMModel.ERROR) + { state = ProbingState.NotMe; break; } - if (codingState == SMModel.ITSME) { + if (codingState == SMModel.ITSME) + { state = ProbingState.FoundIt; break; } - if (codingState == SMModel.START) { + if (codingState == SMModel.START) + { int charLen = codingSM.CurrentCharLen; - if (i == offset) { + if (i == offset) + { lastChar[1] = buf[offset]; contextAnalyser.HandleOneChar(lastChar, 0, charLen); distributionAnalyser.HandleOneChar(lastChar, 0, charLen); - } else { - contextAnalyser.HandleOneChar(buf, i-1, charLen); - distributionAnalyser.HandleOneChar(buf, i-1, charLen); + } + else + { + contextAnalyser.HandleOneChar(buf, i - 1, charLen); + distributionAnalyser.HandleOneChar(buf, i - 1, charLen); } } } - lastChar[0] = buf[max-1]; + lastChar[0] = buf[max - 1]; if (state == ProbingState.Detecting) if (contextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) state = ProbingState.FoundIt; diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EUCKRProber.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EUCKRProber.cs index b1543dae16..ac9a0b559b 100644 --- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EUCKRProber.cs +++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EUCKRProber.cs @@ -60,27 +60,34 @@ namespace UniversalDetector.Core int codingState; int max = offset + len; - for (int i = offset; i < max; i++) { + for (int i = offset; i < max; i++) + { codingState = codingSM.NextState(buf[i]); - if (codingState == SMModel.ERROR) { + if (codingState == SMModel.ERROR) + { state = ProbingState.NotMe; break; } - if (codingState == SMModel.ITSME) { + if (codingState == SMModel.ITSME) + { state = ProbingState.FoundIt; break; } - if (codingState == SMModel.START) { + if (codingState == SMModel.START) + { int charLen = codingSM.CurrentCharLen; - if (i == offset) { + if (i == offset) + { lastChar[1] = buf[offset]; distributionAnalyser.HandleOneChar(lastChar, 0, charLen); - } else { - distributionAnalyser.HandleOneChar(buf, i-1, charLen); + } + else + { + distributionAnalyser.HandleOneChar(buf, i - 1, charLen); } } } - lastChar[0] = buf[max-1]; + lastChar[0] = buf[max - 1]; if (state == ProbingState.Detecting) if (distributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EUCTWProber.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EUCTWProber.cs index 65a521760a..94a14d1669 100644 --- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EUCTWProber.cs +++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EUCTWProber.cs @@ -56,27 +56,34 @@ namespace UniversalDetector.Core int codingState; int max = offset + len; - for (int i = 0; i < max; i++) { + for (int i = 0; i < max; i++) + { codingState = codingSM.NextState(buf[i]); - if (codingState == SMModel.ERROR) { + if (codingState == SMModel.ERROR) + { state = ProbingState.NotMe; break; } - if (codingState == SMModel.ITSME) { + if (codingState == SMModel.ITSME) + { state = ProbingState.FoundIt; break; } - if (codingState == SMModel.START) { + if (codingState == SMModel.START) + { int charLen = codingSM.CurrentCharLen; - if (i == offset) { + if (i == offset) + { lastChar[1] = buf[offset]; distributionAnalyser.HandleOneChar(lastChar, 0, charLen); - } else { - distributionAnalyser.HandleOneChar(buf, i-1, charLen); + } + else + { + distributionAnalyser.HandleOneChar(buf, i - 1, charLen); } } } - lastChar[0] = buf[max-1]; + lastChar[0] = buf[max - 1]; if (state == ProbingState.Detecting) if (distributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EscCharsetProber.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EscCharsetProber.cs index f457bf4902..e8da73c1c7 100644 --- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EscCharsetProber.cs +++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EscCharsetProber.cs @@ -67,22 +67,30 @@ namespace UniversalDetector.Core { int max = offset + len; - for (int i = offset; i < max && state == ProbingState.Detecting; i++) { - for (int j = activeSM - 1; j >= 0; j--) { + for (int i = offset; i < max && state == ProbingState.Detecting; i++) + { + for (int j = activeSM - 1; j >= 0; j--) + { // byte is feed to all active state machine int codingState = codingSM[j].NextState(buf[i]); - if (codingState == SMModel.ERROR) { + if (codingState == SMModel.ERROR) + { // got negative answer for this state machine, make it inactive activeSM--; - if (activeSM == 0) { + if (activeSM == 0) + { state = ProbingState.NotMe; return state; - } else if (j != activeSM) { + } + else if (j != activeSM) + { CodingStateMachine t = codingSM[activeSM]; codingSM[activeSM] = codingSM[j]; codingSM[j] = t; } - } else if (codingState == SMModel.ITSME) { + } + else if (codingState == SMModel.ITSME) + { state = ProbingState.FoundIt; detectedCharset = codingSM[j].ModelName; return state; diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EscSM.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EscSM.cs index 6ebfa8a4ca..38471e28cc 100644 --- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EscSM.cs +++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/EscSM.cs @@ -87,7 +87,7 @@ namespace UniversalDetector.Core BitPackage.Pack4bits( 4, ITSME, START, START, START, START, START, START) //28-2f }; - private readonly static int[] HZCharLenTable = {0, 0, 0, 0, 0, 0}; + private readonly static int[] HZCharLenTable = { 0, 0, 0, 0, 0, 0 }; public HZSMModel() : base( new BitPackage(BitPackage.INDEX_SHIFT_4BITS, @@ -153,7 +153,7 @@ namespace UniversalDetector.Core BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ITSME,ERROR,START) //38-3f }; - private readonly static int[] ISO2022CNCharLenTable = {0, 0, 0, 0, 0, 0, 0, 0, 0}; + private readonly static int[] ISO2022CNCharLenTable = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; public ISO2022CNSMModel() : base( new BitPackage(BitPackage.INDEX_SHIFT_4BITS, @@ -220,7 +220,7 @@ namespace UniversalDetector.Core BitPackage.Pack4bits(ERROR, ERROR, ERROR,ERROR,ITSME,ERROR,START,START) //40-47 }; - private readonly static int[] ISO2022JPCharLenTable = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + private readonly static int[] ISO2022JPCharLenTable = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; public ISO2022JPSMModel() : base( new BitPackage(BitPackage.INDEX_SHIFT_4BITS, @@ -284,7 +284,7 @@ namespace UniversalDetector.Core BitPackage.Pack4bits(ERROR,ERROR,ERROR,ITSME,START,START,START,START) //20-27 }; - private readonly static int[] ISO2022KRCharLenTable = {0, 0, 0, 0, 0, 0}; + private readonly static int[] ISO2022KRCharLenTable = { 0, 0, 0, 0, 0, 0 }; public ISO2022KRSMModel() : base( new BitPackage(BitPackage.INDEX_SHIFT_4BITS, diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/GB18030Prober.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/GB18030Prober.cs index 0d2ebd8c73..f805524eb2 100644 --- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/GB18030Prober.cs +++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/GB18030Prober.cs @@ -64,30 +64,38 @@ namespace UniversalDetector.Core int codingState = SMModel.START; int max = offset + len; - for (int i = offset; i < max; i++) { + for (int i = offset; i < max; i++) + { codingState = codingSM.NextState(buf[i]); - if (codingState == SMModel.ERROR) { + if (codingState == SMModel.ERROR) + { state = ProbingState.NotMe; break; } - if (codingState == SMModel.ITSME) { + if (codingState == SMModel.ITSME) + { state = ProbingState.FoundIt; break; } - if (codingState == SMModel.START) { + if (codingState == SMModel.START) + { int charLen = codingSM.CurrentCharLen; - if (i == offset) { + if (i == offset) + { lastChar[1] = buf[offset]; analyser.HandleOneChar(lastChar, 0, charLen); - } else { - analyser.HandleOneChar(buf, i-1, charLen); + } + else + { + analyser.HandleOneChar(buf, i - 1, charLen); } } } - lastChar[0] = buf[max-1]; + lastChar[0] = buf[max - 1]; - if (state == ProbingState.Detecting) { + if (state == ProbingState.Detecting) + { if (analyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) state = ProbingState.FoundIt; } diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/HebrewProber.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/HebrewProber.cs index 2cbf33075c..bd7490ad76 100644 --- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/HebrewProber.cs +++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/HebrewProber.cs @@ -36,7 +36,6 @@ * * ***** END LICENSE BLOCK ***** */ -using System; /** * General ideas of the Hebrew charset recognition @@ -144,11 +143,11 @@ namespace UniversalDetector.Core public class HebrewProber : CharsetProber { // windows-1255 / ISO-8859-8 code points of interest - private const byte FINAL_KAF = 0xEA; + private const byte FINAL_KAF = 0xEA; private const byte NORMAL_KAF = 0xEB; - private const byte FINAL_MEM = 0xED; + private const byte FINAL_MEM = 0xED; private const byte NORMAL_MEM = 0xEE; - private const byte FINAL_NUN = 0xEF; + private const byte FINAL_NUN = 0xEF; private const byte NORMAL_NUN = 0xF0; private const byte FINAL_PE = 0xF3; private const byte NORMAL_PE = 0xF4; @@ -217,14 +216,17 @@ namespace UniversalDetector.Core int max = offset + len; - for (int i = offset; i < max; i++) { + for (int i = offset; i < max; i++) + { byte b = buf[i]; // a word just ended - if (b == 0x20) { + if (b == 0x20) + { // *(curPtr-2) was not a space so prev is not a 1 letter word - if (beforePrev != 0x20) { + if (beforePrev != 0x20) + { // case (1) [-2:not space][-1:final letter][cur:space] if (IsFinal(prev)) finalCharLogicalScore++; @@ -233,7 +235,9 @@ namespace UniversalDetector.Core finalCharVisualScore++; } - } else { + } + else + { // case (3) [-2:space][-1:final letter][cur:not space] if ((beforePrev == 0x20) && (IsFinal(prev)) && (b != ' ')) ++finalCharVisualScore; diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/JapaneseContextAnalyser.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/JapaneseContextAnalyser.cs index 7d28224c55..a2bf04ba09 100644 --- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/JapaneseContextAnalyser.cs +++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/JapaneseContextAnalyser.cs @@ -160,7 +160,7 @@ namespace UniversalDetector.Core { // This is just one way to calculate confidence. It works well for me. if (totalRel > MINIMUM_DATA_THRESHOLD) - return ((float)(totalRel - relSample[0]))/totalRel; + return ((float)(totalRel - relSample[0])) / totalRel; else return DONT_KNOW; } @@ -181,22 +181,28 @@ namespace UniversalDetector.Core // to record those bytes as well and analyse the character once it // is complete, but since a character will not make much difference, // skipping it will simplify our logic and improve performance. - for (int i = needToSkipCharNum+offset; i < max; ) { + for (int i = needToSkipCharNum + offset; i < max;) + { int order = GetOrder(buf, i, out charLen); i += charLen; - if (i > max) { + if (i > max) + { needToSkipCharNum = i - max; lastCharOrder = -1; - } else { - if (order != -1 && lastCharOrder != -1) { - totalRel ++; - if (totalRel > MAX_REL_THRESHOLD) { + } + else + { + if (order != -1 && lastCharOrder != -1) + { + totalRel++; + if (totalRel > MAX_REL_THRESHOLD) + { done = true; break; } relSample[jp2CharContext[lastCharOrder, order]]++; - } - lastCharOrder = order; + } + lastCharOrder = order; } } } @@ -210,7 +216,8 @@ namespace UniversalDetector.Core // Only 2-bytes characters are of our interest int order = (charLen == 2) ? GetOrder(buf, offset) : -1; - if (order != -1 && lastCharOrder != -1) { + if (order != -1 && lastCharOrder != -1) + { totalRel++; // count this sequence to its category counter relSample[jp2CharContext[lastCharOrder, order]]++; @@ -221,7 +228,8 @@ namespace UniversalDetector.Core public void Reset() { totalRel = 0; - for (int i = 0; i < CATEGORIES_NUM; i++) { + for (int i = 0; i < CATEGORIES_NUM; i++) + { relSample[i] = 0; needToSkipCharNum = 0; lastCharOrder = -1; @@ -254,8 +262,9 @@ namespace UniversalDetector.Core charLen = 1; // return its order if it is hiragana - if (buf[offset] == HIRAGANA_FIRST_BYTE) { - byte low = buf[offset+1]; + if (buf[offset] == HIRAGANA_FIRST_BYTE) + { + byte low = buf[offset + 1]; if (low >= 0x9F && low <= 0xF1) return low - 0x9F; } @@ -265,8 +274,9 @@ namespace UniversalDetector.Core protected override int GetOrder(byte[] buf, int offset) { // We are only interested in Hiragana - if (buf[offset] == HIRAGANA_FIRST_BYTE) { - byte low = buf[offset+1]; + if (buf[offset] == HIRAGANA_FIRST_BYTE) + { + byte low = buf[offset + 1]; if (low >= 0x9F && low <= 0xF1) return low - 0x9F; } @@ -292,8 +302,9 @@ namespace UniversalDetector.Core charLen = 1; // return its order if it is hiragana - if (high == HIRAGANA_FIRST_BYTE) { - byte low = buf[offset+1]; + if (high == HIRAGANA_FIRST_BYTE) + { + byte low = buf[offset + 1]; if (low >= 0xA1 && low <= 0xF3) return low - 0xA1; } @@ -303,8 +314,9 @@ namespace UniversalDetector.Core protected override int GetOrder(byte[] buf, int offset) { // We are only interested in Hiragana - if (buf[offset] == HIRAGANA_FIRST_BYTE) { - byte low = buf[offset+1]; + if (buf[offset] == HIRAGANA_FIRST_BYTE) + { + byte low = buf[offset + 1]; if (low >= 0xA1 && low <= 0xF3) return low - 0xA1; } diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Latin1Prober.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Latin1Prober.cs index 5d57e30e1f..11ce90c874 100644 --- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Latin1Prober.cs +++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/Latin1Prober.cs @@ -36,7 +36,6 @@ * * ***** END LICENSE BLOCK ***** */ -using System; namespace UniversalDetector.Core { @@ -135,12 +134,14 @@ namespace UniversalDetector.Core byte[] newbuf = FilterWithEnglishLetters(buf, offset, len); byte charClass, freq; - for (int i = 0; i < newbuf.Length; i++) { + for (int i = 0; i < newbuf.Length; i++) + { charClass = Latin1_CharToClass[newbuf[i]]; freq = Latin1ClassModel[lastCharClass * CLASS_NUM + charClass]; - if (freq == 0) { - state = ProbingState.NotMe; - break; + if (freq == 0) + { + state = ProbingState.NotMe; + break; } freqCounter[freq]++; lastCharClass = charClass; @@ -155,13 +156,17 @@ namespace UniversalDetector.Core float confidence = 0.0f; int total = 0; - for (int i = 0; i < FREQ_CAT_NUM; i++) { + for (int i = 0; i < FREQ_CAT_NUM; i++) + { total += freqCounter[i]; } - if (total <= 0) { + if (total <= 0) + { confidence = 0.0f; - } else { + } + else + { confidence = freqCounter[3] * 1.0f / total; confidence -= freqCounter[1] * 20.0f / total; } diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/MBCSGroupProber.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/MBCSGroupProber.cs index b4f6928a46..e7fa2d7192 100644 --- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/MBCSGroupProber.cs +++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/MBCSGroupProber.cs @@ -36,7 +36,6 @@ * * ***** END LICENSE BLOCK ***** */ -using System; namespace UniversalDetector.Core { @@ -67,7 +66,8 @@ namespace UniversalDetector.Core public override string GetCharsetName() { - if (bestGuess == -1) { + if (bestGuess == -1) + { GetConfidence(); if (bestGuess == -1) bestGuess = 0; @@ -78,13 +78,17 @@ namespace UniversalDetector.Core public override void Reset() { activeNum = 0; - for (int i = 0; i < probers.Length; i++) { - if (probers[i] != null) { - probers[i].Reset(); - isActive[i] = true; - ++activeNum; - } else { - isActive[i] = false; + for (int i = 0; i < probers.Length; i++) + { + if (probers[i] != null) + { + probers[i].Reset(); + isActive[i] = true; + ++activeNum; + } + else + { + isActive[i] = false; } } bestGuess = -1; @@ -100,13 +104,18 @@ namespace UniversalDetector.Core bool keepNext = true; int max = offset + len; - for (int i = offset; i < max; i++) { - if ((buf[i] & 0x80) != 0) { + for (int i = offset; i < max; i++) + { + if ((buf[i] & 0x80) != 0) + { highbyteBuf[hptr++] = buf[i]; keepNext = true; - } else { + } + else + { //if previous is highbyte, keep this even it is a ASCII - if (keepNext) { + if (keepNext) + { highbyteBuf[hptr++] = buf[i]; keepNext = false; } @@ -115,18 +124,23 @@ namespace UniversalDetector.Core ProbingState st = ProbingState.NotMe; - for (int i = 0; i < probers.Length; i++) { + for (int i = 0; i < probers.Length; i++) + { if (!isActive[i]) continue; st = probers[i].HandleData(highbyteBuf, 0, hptr); - if (st == ProbingState.FoundIt) { + if (st == ProbingState.FoundIt) + { bestGuess = i; state = ProbingState.FoundIt; break; - } else if (st == ProbingState.NotMe) { + } + else if (st == ProbingState.NotMe) + { isActive[i] = false; activeNum--; - if (activeNum <= 0) { + if (activeNum <= 0) + { state = ProbingState.NotMe; break; } @@ -140,16 +154,23 @@ namespace UniversalDetector.Core float bestConf = 0.0f; float cf = 0.0f; - if (state == ProbingState.FoundIt) { + if (state == ProbingState.FoundIt) + { return 0.99f; - } else if (state == ProbingState.NotMe) { + } + else if (state == ProbingState.NotMe) + { return 0.01f; - } else { - for (int i = 0; i < PROBERS_NUM; i++) { + } + else + { + for (int i = 0; i < PROBERS_NUM; i++) + { if (!isActive[i]) continue; cf = probers[i].GetConfidence(); - if (bestConf < cf) { + if (bestConf < cf) + { bestConf = cf; bestGuess = i; } @@ -162,10 +183,14 @@ namespace UniversalDetector.Core { float cf; GetConfidence(); - for (int i = 0; i < PROBERS_NUM; i++) { - if (!isActive[i]) { + for (int i = 0; i < PROBERS_NUM; i++) + { + if (!isActive[i]) + { //Console.WriteLine(" MBCS inactive: {0} (confidence is too low).", ProberName[i]); - } else { + } + else + { cf = probers[i].GetConfidence(); //Console.WriteLine(" MBCS {0}: [{1}]", cf, ProberName[i]); } diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/MBCSSM.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/MBCSSM.cs index 65e04292ab..b753401668 100644 --- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/MBCSSM.cs +++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/MBCSSM.cs @@ -174,7 +174,7 @@ namespace UniversalDetector.Core // it is used for frequency analysis only, and we are validating // each code range there as well. So it is safe to set it to be // 2 here. - private readonly static int[] GB18030CharLenTable = {0, 1, 1, 1, 1, 1, 2}; + private readonly static int[] GB18030CharLenTable = { 0, 1, 1, 1, 1, 1, 2 }; public GB18030SMModel() : base( new BitPackage(BitPackage.INDEX_SHIFT_4BITS, @@ -235,7 +235,7 @@ namespace UniversalDetector.Core BitPackage.Pack4bits(ERROR,START,START,START,START,START,START,START) //10-17 }; - private readonly static int[] BIG5CharLenTable = {0, 1, 1, 2, 0}; + private readonly static int[] BIG5CharLenTable = { 0, 1, 1, 2, 0 }; public BIG5SMModel() : base( new BitPackage(BitPackage.INDEX_SHIFT_4BITS, diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SBCSGroupProber.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SBCSGroupProber.cs index 640b19c4a4..336726aab2 100644 --- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SBCSGroupProber.cs +++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SBCSGroupProber.cs @@ -36,7 +36,6 @@ * * ***** END LICENSE BLOCK ***** */ -using System; namespace UniversalDetector.Core { @@ -88,19 +87,24 @@ namespace UniversalDetector.Core if (newBuf.Length == 0) return state; // Nothing to see here, move on. - for (int i = 0; i < PROBERS_NUM; i++) { + for (int i = 0; i < PROBERS_NUM; i++) + { if (!isActive[i]) continue; st = probers[i].HandleData(newBuf, 0, newBuf.Length); - if (st == ProbingState.FoundIt) { + if (st == ProbingState.FoundIt) + { bestGuess = i; state = ProbingState.FoundIt; break; - } else if (st == ProbingState.NotMe) { + } + else if (st == ProbingState.NotMe) + { isActive[i] = false; activeNum--; - if (activeNum <= 0) { + if (activeNum <= 0) + { state = ProbingState.NotMe; break; } @@ -112,24 +116,25 @@ namespace UniversalDetector.Core public override float GetConfidence() { float bestConf = 0.0f, cf; - switch (state) { - case ProbingState.FoundIt: - return 0.99f; //sure yes - case ProbingState.NotMe: - return 0.01f; //sure no - default: - for (int i = 0; i < PROBERS_NUM; i++) - { - if (!isActive[i]) - continue; - cf = probers[i].GetConfidence(); - if (bestConf < cf) + switch (state) + { + case ProbingState.FoundIt: + return 0.99f; //sure yes + case ProbingState.NotMe: + return 0.01f; //sure no + default: + for (int i = 0; i < PROBERS_NUM; i++) { - bestConf = cf; - bestGuess = i; + if (!isActive[i]) + continue; + cf = probers[i].GetConfidence(); + if (bestConf < cf) + { + bestConf = cf; + bestGuess = i; + } } - } - break; + break; } return bestConf; } @@ -137,8 +142,9 @@ namespace UniversalDetector.Core public override void DumpStatus() { float cf = GetConfidence(); - // Console.WriteLine(" SBCS Group Prober --------begin status"); - for (int i = 0; i < PROBERS_NUM; i++) { + // Console.WriteLine(" SBCS Group Prober --------begin status"); + for (int i = 0; i < PROBERS_NUM; i++) + { if (isActive[i]) probers[i].DumpStatus(); //else @@ -148,15 +154,19 @@ namespace UniversalDetector.Core //Console.WriteLine(" SBCS Group found best match [{0}] confidence {1}.", probers[bestGuess].GetCharsetName(), cf); } - public override void Reset () + public override void Reset() { int activeNum = 0; - for (int i = 0; i < PROBERS_NUM; i++) { - if (probers[i] != null) { + for (int i = 0; i < PROBERS_NUM; i++) + { + if (probers[i] != null) + { probers[i].Reset(); isActive[i] = true; activeNum++; - } else { + } + else + { isActive[i] = false; } } @@ -167,7 +177,8 @@ namespace UniversalDetector.Core public override string GetCharsetName() { //if we have no answer yet - if (bestGuess == -1) { + if (bestGuess == -1) + { GetConfidence(); //no charset seems positive if (bestGuess == -1) diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SBCharsetProber.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SBCharsetProber.cs index 65c0f8ca81..b87ac21703 100644 --- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SBCharsetProber.cs +++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SBCharsetProber.cs @@ -36,7 +36,6 @@ * * ***** END LICENSE BLOCK ***** */ -using System; namespace UniversalDetector.Core { @@ -49,7 +48,7 @@ namespace UniversalDetector.Core private const float NEGATIVE_SHORTCUT_THRESHOLD = 0.05f; private const int SYMBOL_CAT_ORDER = 250; private const int NUMBER_OF_SEQ_CAT = 4; - private const int POSITIVE_CAT = NUMBER_OF_SEQ_CAT-1; + private const int POSITIVE_CAT = NUMBER_OF_SEQ_CAT - 1; private const int NEGATIVE_CAT = 0; protected SequenceModel model; @@ -89,28 +88,33 @@ namespace UniversalDetector.Core { int max = offset + len; - for (int i = offset; i < max; i++) { + for (int i = offset; i < max; i++) + { byte order = model.GetOrder(buf[i]); if (order < SYMBOL_CAT_ORDER) totalChar++; - if (order < SAMPLE_SIZE) { + if (order < SAMPLE_SIZE) + { freqChar++; - if (lastOrder < SAMPLE_SIZE) { + if (lastOrder < SAMPLE_SIZE) + { totalSeqs++; if (!reversed) - ++(seqCounters[model.GetPrecedence(lastOrder*SAMPLE_SIZE+order)]); + ++(seqCounters[model.GetPrecedence(lastOrder * SAMPLE_SIZE + order)]); else // reverse the order of the letters in the lookup - ++(seqCounters[model.GetPrecedence(order*SAMPLE_SIZE+lastOrder)]); + ++(seqCounters[model.GetPrecedence(order * SAMPLE_SIZE + lastOrder)]); } } lastOrder = order; } - if (state == ProbingState.Detecting) { - if (totalSeqs > SB_ENOUGH_REL_THRESHOLD) { + if (state == ProbingState.Detecting) + { + if (totalSeqs > SB_ENOUGH_REL_THRESHOLD) + { float cf = GetConfidence(); if (cf > POSITIVE_SHORTCUT_THRESHOLD) state = ProbingState.FoundIt; @@ -139,7 +143,8 @@ namespace UniversalDetector.Core // POSITIVE_APPROACH float r = 0.0f; - if (totalSeqs > 0) { + if (totalSeqs > 0) + { r = 1.0f * seqCounters[POSITIVE_CAT] / totalSeqs / model.TypicalPositiveRatio; r = r * freqChar / totalChar; if (r >= 1.0f) diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SJISProber.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SJISProber.cs index e1fbb873e4..d01a683bc9 100644 --- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SJISProber.cs +++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SJISProber.cs @@ -69,29 +69,36 @@ namespace UniversalDetector.Core int codingState; int max = offset + len; - for (int i = offset; i < max; i++) { + for (int i = offset; i < max; i++) + { codingState = codingSM.NextState(buf[i]); - if (codingState == SMModel.ERROR) { + if (codingState == SMModel.ERROR) + { state = ProbingState.NotMe; break; } - if (codingState == SMModel.ITSME) { + if (codingState == SMModel.ITSME) + { state = ProbingState.FoundIt; break; } - if (codingState == SMModel.START) { + if (codingState == SMModel.START) + { int charLen = codingSM.CurrentCharLen; - if (i == offset) { + if (i == offset) + { lastChar[1] = buf[offset]; - contextAnalyser.HandleOneChar(lastChar, 2-charLen, charLen); + contextAnalyser.HandleOneChar(lastChar, 2 - charLen, charLen); distributionAnalyser.HandleOneChar(lastChar, 0, charLen); - } else { - contextAnalyser.HandleOneChar(buf, i+1-charLen, charLen); - distributionAnalyser.HandleOneChar(buf, i-1, charLen); + } + else + { + contextAnalyser.HandleOneChar(buf, i + 1 - charLen, charLen); + distributionAnalyser.HandleOneChar(buf, i - 1, charLen); } } } - lastChar[0] = buf[max-1]; + lastChar[0] = buf[max - 1]; if (state == ProbingState.Detecting) if (contextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD) state = ProbingState.FoundIt; diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SMModel.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SMModel.cs index cb2f201aad..f11b01e211 100644 --- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SMModel.cs +++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SMModel.cs @@ -36,7 +36,6 @@ * * ***** END LICENSE BLOCK ***** */ -using System; namespace UniversalDetector.Core { @@ -54,7 +53,7 @@ namespace UniversalDetector.Core public int[] charLenTable; private string name; - + public string Name => name; private int classFactor; diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SequenceModel.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SequenceModel.cs index b813dda768..85afd0ed6c 100644 --- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SequenceModel.cs +++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/SequenceModel.cs @@ -36,7 +36,6 @@ * * ***** END LICENSE BLOCK ***** */ -using System; namespace UniversalDetector.Core { @@ -51,12 +50,12 @@ namespace UniversalDetector.Core // freqSeqs / totalSeqs protected float typicalPositiveRatio; - + public float TypicalPositiveRatio => typicalPositiveRatio; // not used protected bool keepEnglishLetter; - + public bool KeepEnglishLetter => keepEnglishLetter; protected string charsetName; diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/UTF8Prober.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/UTF8Prober.cs index a469e2a0c6..2a625576f2 100644 --- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/UTF8Prober.cs +++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/UTF8Prober.cs @@ -51,7 +51,8 @@ namespace UniversalDetector.Core Reset(); } - public override string GetCharsetName() { + public override string GetCharsetName() + { return "UTF-8"; } @@ -67,21 +68,25 @@ namespace UniversalDetector.Core int codingState = SMModel.START; int max = offset + len; - for (int i = offset; i < max; i++) { + for (int i = offset; i < max; i++) + { codingState = codingSM.NextState(buf[i]); - if (codingState == SMModel.ERROR) { + if (codingState == SMModel.ERROR) + { state = ProbingState.NotMe; break; } - if (codingState == SMModel.ITSME) { + if (codingState == SMModel.ITSME) + { state = ProbingState.FoundIt; break; } - if (codingState == SMModel.START) { + if (codingState == SMModel.START) + { if (codingSM.CurrentCharLen >= 2) numOfMBChar++; } @@ -98,11 +103,14 @@ namespace UniversalDetector.Core float unlike = 0.99f; float confidence = 0.0f; - if (numOfMBChar < 6) { + if (numOfMBChar < 6) + { for (int i = 0; i < numOfMBChar; i++) unlike *= ONE_CHAR_PROB; confidence = 1.0f - unlike; - } else { + } + else + { confidence = 0.99f; } return confidence; diff --git a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/UniversalDetector.cs b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/UniversalDetector.cs index 4dcb282cc9..28a50ea3ea 100644 --- a/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/UniversalDetector.cs +++ b/Emby.Server.Implementations/TextEncoding/UniversalDetector/Core/UniversalDetector.cs @@ -39,7 +39,7 @@ namespace UniversalDetector.Core { - enum InputState { PureASCII=0, EscASCII=1, Highbyte=2 }; + enum InputState { PureASCII = 0, EscASCII = 1, Highbyte = 2 }; public abstract class UniversalDetector { @@ -70,7 +70,8 @@ namespace UniversalDetector.Core protected CharsetProber escCharsetProber; protected string detectedCharset; - public UniversalDetector(int languageFilter) { + public UniversalDetector(int languageFilter) + { this.start = true; this.inputState = InputState.PureASCII; this.lastChar = 0x00; @@ -80,7 +81,8 @@ namespace UniversalDetector.Core public virtual void Feed(byte[] buf, int offset, int len) { - if (done) { + if (done) + { return; } @@ -88,52 +90,60 @@ namespace UniversalDetector.Core gotData = true; // If the data starts with BOM, we know it is UTF - if (start) { + if (start) + { start = false; - if (len > 3) { - switch (buf[0]) { - case 0xEF: - if (0xBB == buf[1] && 0xBF == buf[2]) - detectedCharset = "UTF-8"; - break; - case 0xFE: - if (0xFF == buf[1] && 0x00 == buf[2] && 0x00 == buf[3]) - // FE FF 00 00 UCS-4, unusual octet order BOM (3412) - detectedCharset = "X-ISO-10646-UCS-4-3412"; - else if (0xFF == buf[1]) - detectedCharset = "UTF-16BE"; - break; - case 0x00: - if (0x00 == buf[1] && 0xFE == buf[2] && 0xFF == buf[3]) - detectedCharset = "UTF-32BE"; - else if (0x00 == buf[1] && 0xFF == buf[2] && 0xFE == buf[3]) - // 00 00 FF FE UCS-4, unusual octet order BOM (2143) - detectedCharset = "X-ISO-10646-UCS-4-2143"; - break; - case 0xFF: - if (0xFE == buf[1] && 0x00 == buf[2] && 0x00 == buf[3]) - detectedCharset = "UTF-32LE"; - else if (0xFE == buf[1]) - detectedCharset = "UTF-16LE"; - break; + if (len > 3) + { + switch (buf[0]) + { + case 0xEF: + if (0xBB == buf[1] && 0xBF == buf[2]) + detectedCharset = "UTF-8"; + break; + case 0xFE: + if (0xFF == buf[1] && 0x00 == buf[2] && 0x00 == buf[3]) + // FE FF 00 00 UCS-4, unusual octet order BOM (3412) + detectedCharset = "X-ISO-10646-UCS-4-3412"; + else if (0xFF == buf[1]) + detectedCharset = "UTF-16BE"; + break; + case 0x00: + if (0x00 == buf[1] && 0xFE == buf[2] && 0xFF == buf[3]) + detectedCharset = "UTF-32BE"; + else if (0x00 == buf[1] && 0xFF == buf[2] && 0xFE == buf[3]) + // 00 00 FF FE UCS-4, unusual octet order BOM (2143) + detectedCharset = "X-ISO-10646-UCS-4-2143"; + break; + case 0xFF: + if (0xFE == buf[1] && 0x00 == buf[2] && 0x00 == buf[3]) + detectedCharset = "UTF-32LE"; + else if (0xFE == buf[1]) + detectedCharset = "UTF-16LE"; + break; } // switch } - if (detectedCharset != null) { + if (detectedCharset != null) + { done = true; return; } } - for (int i = 0; i < len; i++) { + for (int i = 0; i < len; i++) + { // other than 0xa0, if every other character is ascii, the page is ascii - if ((buf[i] & 0x80) != 0 && buf[i] != 0xA0) { + if ((buf[i] & 0x80) != 0 && buf[i] != 0xA0) + { // we got a non-ascii byte (high-byte) - if (inputState != InputState.Highbyte) { + if (inputState != InputState.Highbyte) + { inputState = InputState.Highbyte; // kill EscCharsetProber if it is active - if (escCharsetProber != null) { + if (escCharsetProber != null) + { escCharsetProber = null; } @@ -145,9 +155,12 @@ namespace UniversalDetector.Core if (charsetProbers[2] == null) charsetProbers[2] = new Latin1Prober(); } - } else { + } + else + { if (inputState == InputState.PureASCII && - (buf[i] == 0x33 || (buf[i] == 0x7B && lastChar == 0x7E))) { + (buf[i] == 0x33 || (buf[i] == 0x7B && lastChar == 0x7E))) + { // found escape character or HZ "~{" inputState = InputState.EscASCII; } @@ -157,25 +170,31 @@ namespace UniversalDetector.Core ProbingState st = ProbingState.NotMe; - switch (inputState) { + switch (inputState) + { case InputState.EscASCII: - if (escCharsetProber == null) { + if (escCharsetProber == null) + { escCharsetProber = new EscCharsetProber(); } st = escCharsetProber.HandleData(buf, offset, len); - if (st == ProbingState.FoundIt) { + if (st == ProbingState.FoundIt) + { done = true; detectedCharset = escCharsetProber.GetCharsetName(); } break; case InputState.Highbyte: - for (int i = 0; i < PROBERS_NUM; i++) { - if (charsetProbers[i] != null) { + for (int i = 0; i < PROBERS_NUM; i++) + { + if (charsetProbers[i] != null) + { st = charsetProbers[i].HandleData(buf, offset, len); - #if DEBUG +#if DEBUG charsetProbers[i].DumpStatus(); - #endif - if (st == ProbingState.FoundIt) { +#endif + if (st == ProbingState.FoundIt) + { done = true; detectedCharset = charsetProbers[i].GetCharsetName(); return; @@ -195,38 +214,47 @@ namespace UniversalDetector.Core /// public virtual void DataEnd() { - if (!gotData) { + if (!gotData) + { // we haven't got any data yet, return immediately // caller program sometimes call DataEnd before anything has // been sent to detector return; } - if (detectedCharset != null) { + if (detectedCharset != null) + { done = true; Report(detectedCharset, 1.0f); return; } - if (inputState == InputState.Highbyte) { + if (inputState == InputState.Highbyte) + { float proberConfidence = 0.0f; float maxProberConfidence = 0.0f; int maxProber = 0; - for (int i = 0; i < PROBERS_NUM; i++) { - if (charsetProbers[i] != null) { + for (int i = 0; i < PROBERS_NUM; i++) + { + if (charsetProbers[i] != null) + { proberConfidence = charsetProbers[i].GetConfidence(); - if (proberConfidence > maxProberConfidence) { + if (proberConfidence > maxProberConfidence) + { maxProberConfidence = proberConfidence; maxProber = i; } } } - if (maxProberConfidence > MINIMUM_THRESHOLD) { + if (maxProberConfidence > MINIMUM_THRESHOLD) + { Report(charsetProbers[maxProber].GetCharsetName(), maxProberConfidence); } - } else if (inputState == InputState.PureASCII) { + } + else if (inputState == InputState.PureASCII) + { Report("ASCII", 1.0f); } }