Merge pull request #2591 from MediaBrowser/beta

Beta
This commit is contained in:
Luke 2017-04-20 23:49:50 -04:00 committed by GitHub
commit f525f5a89e
135 changed files with 10654 additions and 1158 deletions

View file

@ -179,6 +179,11 @@ namespace Emby.Common.Implementations
}
}
public virtual PackageVersionClass SystemUpdateLevel
{
get { return PackageVersionClass.Release; }
}
public virtual string OperatingSystemDisplayName
{
get { return EnvironmentInfo.OperatingSystemName; }

View file

@ -1,16 +1,25 @@
using System.Text;
using System;
using System.Text;
using MediaBrowser.Model.IO;
using MediaBrowser.Model.Text;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
using MediaBrowser.Model.MediaInfo;
using MediaBrowser.Model.Logging;
using UniversalDetector;
namespace Emby.Common.Implementations.TextEncoding
{
public class TextEncoding : ITextEncoding
{
private readonly IFileSystem _fileSystem;
private readonly ILogger _logger;
public TextEncoding(IFileSystem fileSystem)
public TextEncoding(IFileSystem fileSystem, ILogger logger)
{
_fileSystem = fileSystem;
_logger = logger;
}
public Encoding GetASCIIEncoding()
@ -18,16 +27,8 @@ namespace Emby.Common.Implementations.TextEncoding
return Encoding.ASCII;
}
public Encoding GetFileEncoding(string srcFile)
private Encoding GetInitialEncoding(byte[] buffer)
{
// *** Detect byte order mark if any - otherwise assume default
var buffer = new byte[5];
using (var file = _fileSystem.OpenRead(srcFile))
{
file.Read(buffer, 0, 5);
}
if (buffer[0] == 0xef && buffer[1] == 0xbb && buffer[2] == 0xbf)
return Encoding.UTF8;
if (buffer[0] == 0xfe && buffer[1] == 0xff)
@ -37,7 +38,154 @@ namespace Emby.Common.Implementations.TextEncoding
if (buffer[0] == 0x2b && buffer[1] == 0x2f && buffer[2] == 0x76)
return Encoding.UTF7;
var result = new TextEncodingDetect().DetectEncoding(buffer, buffer.Length);
switch (result)
{
case TextEncodingDetect.CharacterEncoding.Ansi:
return Encoding.ASCII;
case TextEncodingDetect.CharacterEncoding.Ascii:
return Encoding.ASCII;
case TextEncodingDetect.CharacterEncoding.Utf16BeBom:
return Encoding.UTF32;
case TextEncodingDetect.CharacterEncoding.Utf16BeNoBom:
return Encoding.UTF32;
case TextEncodingDetect.CharacterEncoding.Utf16LeBom:
return Encoding.UTF32;
case TextEncodingDetect.CharacterEncoding.Utf16LeNoBom:
return Encoding.UTF32;
case TextEncodingDetect.CharacterEncoding.Utf8Bom:
return Encoding.UTF8;
case TextEncodingDetect.CharacterEncoding.Utf8Nobom:
return Encoding.UTF8;
default:
return null;
}
}
public string GetDetectedEncodingName(byte[] bytes, string language)
{
var encoding = GetInitialEncoding(bytes);
if (encoding != null && encoding.Equals(Encoding.UTF8))
{
return "utf-8";
}
var charset = DetectCharset(bytes, language);
if (!string.IsNullOrWhiteSpace(charset))
{
if (string.Equals(charset, "utf-8", StringComparison.OrdinalIgnoreCase))
{
return "utf-8";
}
if (!string.Equals(charset, "windows-1252", StringComparison.OrdinalIgnoreCase))
{
return charset;
}
}
if (!string.IsNullOrWhiteSpace(language))
{
return GetFileCharacterSetFromLanguage(language);
}
return null;
}
public Encoding GetEncodingFromCharset(string charset)
{
if (string.IsNullOrWhiteSpace(charset))
{
throw new ArgumentNullException("charset");
}
_logger.Debug("Getting encoding object for character set: {0}", charset);
try
{
return Encoding.GetEncoding(charset);
}
catch (ArgumentException)
{
charset = charset.Replace("-", string.Empty);
_logger.Debug("Getting encoding object for character set: {0}", charset);
return Encoding.GetEncoding(charset);
}
}
public Encoding GetDetectedEncoding(byte[] bytes, string language)
{
var charset = GetDetectedEncodingName(bytes, language);
return GetEncodingFromCharset(charset);
}
private string GetFileCharacterSetFromLanguage(string language)
{
// https://developer.xamarin.com/api/type/System.Text.Encoding/
switch (language.ToLower())
{
case "hun":
return "windows-1252";
case "pol":
case "cze":
case "ces":
case "slo":
case "slk":
case "slv":
case "srp":
case "hrv":
case "rum":
case "ron":
case "rup":
case "alb":
case "sqi":
return "windows-1250";
case "ara":
return "windows-1256";
case "heb":
return "windows-1255";
case "grc":
case "gre":
return "windows-1253";
case "crh":
case "ota":
case "tur":
return "windows-1254";
case "rus":
return "windows-1251";
case "vie":
return "windows-1258";
case "kor":
return "cp949";
default:
return "windows-1252";
}
}
private string DetectCharset(byte[] bytes, string language)
{
var detector = new CharsetDetector();
detector.Feed(bytes, 0, bytes.Length);
detector.DataEnd();
var charset = detector.Charset;
// This is often incorrectly indetected. If this happens, try to use other techniques instead
if (string.Equals("x-mac-cyrillic", charset, StringComparison.OrdinalIgnoreCase))
{
if (!string.IsNullOrWhiteSpace(language))
{
return null;
}
}
return charset;
}
}
}

View file

@ -0,0 +1,414 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
namespace Emby.Common.Implementations.TextEncoding
{
// Copyright 2015-2016 Jonathan Bennett <jon@autoitscript.com>
//
// https://www.autoitscript.com
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/// <summary>
/// Credit: https://github.com/AutoIt/text-encoding-detect
/// </summary>
public class TextEncodingDetect
{
private readonly byte[] _utf16BeBom =
{
0xFE,
0xFF
};
private readonly byte[] _utf16LeBom =
{
0xFF,
0xFE
};
private readonly byte[] _utf8Bom =
{
0xEF,
0xBB,
0xBF
};
private bool _nullSuggestsBinary = true;
private double _utf16ExpectedNullPercent = 70;
private double _utf16UnexpectedNullPercent = 10;
public enum CharacterEncoding
{
None, // Unknown or binary
Ansi, // 0-255
Ascii, // 0-127
Utf8Bom, // UTF8 with BOM
Utf8Nobom, // UTF8 without BOM
Utf16LeBom, // UTF16 LE with BOM
Utf16LeNoBom, // UTF16 LE without BOM
Utf16BeBom, // UTF16-BE with BOM
Utf16BeNoBom // UTF16-BE without BOM
}
/// <summary>
/// Sets if the presence of nulls in a buffer indicate the buffer is binary data rather than text.
/// </summary>
public bool NullSuggestsBinary
{
set
{
_nullSuggestsBinary = value;
}
}
public double Utf16ExpectedNullPercent
{
set
{
if (value > 0 && value < 100)
{
_utf16ExpectedNullPercent = value;
}
}
}
public double Utf16UnexpectedNullPercent
{
set
{
if (value > 0 && value < 100)
{
_utf16UnexpectedNullPercent = value;
}
}
}
/// <summary>
/// Gets the BOM length for a given Encoding mode.
/// </summary>
/// <param name="encoding"></param>
/// <returns>The BOM length.</returns>
public static int GetBomLengthFromEncodingMode(CharacterEncoding encoding)
{
int length;
switch (encoding)
{
case CharacterEncoding.Utf16BeBom:
case CharacterEncoding.Utf16LeBom:
length = 2;
break;
case CharacterEncoding.Utf8Bom:
length = 3;
break;
default:
length = 0;
break;
}
return length;
}
/// <summary>
/// Checks for a BOM sequence in a byte buffer.
/// </summary>
/// <param name="buffer"></param>
/// <param name="size"></param>
/// <returns>Encoding type or Encoding.None if no BOM.</returns>
public CharacterEncoding CheckBom(byte[] buffer, int size)
{
// Check for BOM
if (size >= 2 && buffer[0] == _utf16LeBom[0] && buffer[1] == _utf16LeBom[1])
{
return CharacterEncoding.Utf16LeBom;
}
if (size >= 2 && buffer[0] == _utf16BeBom[0] && buffer[1] == _utf16BeBom[1])
{
return CharacterEncoding.Utf16BeBom;
}
if (size >= 3 && buffer[0] == _utf8Bom[0] && buffer[1] == _utf8Bom[1] && buffer[2] == _utf8Bom[2])
{
return CharacterEncoding.Utf8Bom;
}
return CharacterEncoding.None;
}
/// <summary>
/// Automatically detects the Encoding type of a given byte buffer.
/// </summary>
/// <param name="buffer">The byte buffer.</param>
/// <param name="size">The size of the byte buffer.</param>
/// <returns>The Encoding type or Encoding.None if unknown.</returns>
public CharacterEncoding DetectEncoding(byte[] buffer, int size)
{
// First check if we have a BOM and return that if so
CharacterEncoding encoding = CheckBom(buffer, size);
if (encoding != CharacterEncoding.None)
{
return encoding;
}
// Now check for valid UTF8
encoding = CheckUtf8(buffer, size);
if (encoding != CharacterEncoding.None)
{
return encoding;
}
// Now try UTF16
encoding = CheckUtf16NewlineChars(buffer, size);
if (encoding != CharacterEncoding.None)
{
return encoding;
}
encoding = CheckUtf16Ascii(buffer, size);
if (encoding != CharacterEncoding.None)
{
return encoding;
}
// ANSI or None (binary) then
if (!DoesContainNulls(buffer, size))
{
return CharacterEncoding.Ansi;
}
// Found a null, return based on the preference in null_suggests_binary_
return _nullSuggestsBinary ? CharacterEncoding.None : CharacterEncoding.Ansi;
}
/// <summary>
/// Checks if a buffer contains text that looks like utf16 by scanning for
/// newline chars that would be present even in non-english text.
/// </summary>
/// <param name="buffer">The byte buffer.</param>
/// <param name="size">The size of the byte buffer.</param>
/// <returns>Encoding.none, Encoding.Utf16LeNoBom or Encoding.Utf16BeNoBom.</returns>
private static CharacterEncoding CheckUtf16NewlineChars(byte[] buffer, int size)
{
if (size < 2)
{
return CharacterEncoding.None;
}
// Reduce size by 1 so we don't need to worry about bounds checking for pairs of bytes
size--;
var leControlChars = 0;
var beControlChars = 0;
uint pos = 0;
while (pos < size)
{
byte ch1 = buffer[pos++];
byte ch2 = buffer[pos++];
if (ch1 == 0)
{
if (ch2 == 0x0a || ch2 == 0x0d)
{
++beControlChars;
}
}
else if (ch2 == 0)
{
if (ch1 == 0x0a || ch1 == 0x0d)
{
++leControlChars;
}
}
// If we are getting both LE and BE control chars then this file is not utf16
if (leControlChars > 0 && beControlChars > 0)
{
return CharacterEncoding.None;
}
}
if (leControlChars > 0)
{
return CharacterEncoding.Utf16LeNoBom;
}
return beControlChars > 0 ? CharacterEncoding.Utf16BeNoBom : CharacterEncoding.None;
}
/// <summary>
/// Checks if a buffer contains any nulls. Used to check for binary vs text data.
/// </summary>
/// <param name="buffer">The byte buffer.</param>
/// <param name="size">The size of the byte buffer.</param>
private static bool DoesContainNulls(byte[] buffer, int size)
{
uint pos = 0;
while (pos < size)
{
if (buffer[pos++] == 0)
{
return true;
}
}
return false;
}
/// <summary>
/// Checks if a buffer contains text that looks like utf16. This is done based
/// on the use of nulls which in ASCII/script like text can be useful to identify.
/// </summary>
/// <param name="buffer">The byte buffer.</param>
/// <param name="size">The size of the byte buffer.</param>
/// <returns>Encoding.none, Encoding.Utf16LeNoBom or Encoding.Utf16BeNoBom.</returns>
private CharacterEncoding CheckUtf16Ascii(byte[] buffer, int size)
{
var numOddNulls = 0;
var numEvenNulls = 0;
// Get even nulls
uint pos = 0;
while (pos < size)
{
if (buffer[pos] == 0)
{
numEvenNulls++;
}
pos += 2;
}
// Get odd nulls
pos = 1;
while (pos < size)
{
if (buffer[pos] == 0)
{
numOddNulls++;
}
pos += 2;
}
double evenNullThreshold = numEvenNulls * 2.0 / size;
double oddNullThreshold = numOddNulls * 2.0 / size;
double expectedNullThreshold = _utf16ExpectedNullPercent / 100.0;
double unexpectedNullThreshold = _utf16UnexpectedNullPercent / 100.0;
// Lots of odd nulls, low number of even nulls
if (evenNullThreshold < unexpectedNullThreshold && oddNullThreshold > expectedNullThreshold)
{
return CharacterEncoding.Utf16LeNoBom;
}
// Lots of even nulls, low number of odd nulls
if (oddNullThreshold < unexpectedNullThreshold && evenNullThreshold > expectedNullThreshold)
{
return CharacterEncoding.Utf16BeNoBom;
}
// Don't know
return CharacterEncoding.None;
}
/// <summary>
/// Checks if a buffer contains valid utf8.
/// </summary>
/// <param name="buffer">The byte buffer.</param>
/// <param name="size">The size of the byte buffer.</param>
/// <returns>
/// Encoding type of Encoding.None (invalid UTF8), Encoding.Utf8NoBom (valid utf8 multibyte strings) or
/// Encoding.ASCII (data in 0.127 range).
/// </returns>
/// <returns>2</returns>
private CharacterEncoding CheckUtf8(byte[] buffer, int size)
{
// UTF8 Valid sequences
// 0xxxxxxx ASCII
// 110xxxxx 10xxxxxx 2-byte
// 1110xxxx 10xxxxxx 10xxxxxx 3-byte
// 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 4-byte
//
// Width in UTF8
// Decimal Width
// 0-127 1 byte
// 194-223 2 bytes
// 224-239 3 bytes
// 240-244 4 bytes
//
// Subsequent chars are in the range 128-191
var onlySawAsciiRange = true;
uint pos = 0;
while (pos < size)
{
byte ch = buffer[pos++];
if (ch == 0 && _nullSuggestsBinary)
{
return CharacterEncoding.None;
}
int moreChars;
if (ch <= 127)
{
// 1 byte
moreChars = 0;
}
else if (ch >= 194 && ch <= 223)
{
// 2 Byte
moreChars = 1;
}
else if (ch >= 224 && ch <= 239)
{
// 3 Byte
moreChars = 2;
}
else if (ch >= 240 && ch <= 244)
{
// 4 Byte
moreChars = 3;
}
else
{
return CharacterEncoding.None; // Not utf8
}
// Check secondary chars are in range if we are expecting any
while (moreChars > 0 && pos < size)
{
onlySawAsciiRange = false; // Seen non-ascii chars now
ch = buffer[pos++];
if (ch < 128 || ch > 191)
{
return CharacterEncoding.None; // Not utf8
}
--moreChars;
}
}
// If we get to here then only valid UTF-8 sequences have been processed
// If we only saw chars in the range 0-127 then we can't assume UTF8 (the caller will need to decide)
return onlySawAsciiRange ? CharacterEncoding.Ascii : CharacterEncoding.Utf8Nobom;
}
}
}

View file

@ -0,0 +1,125 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
using System.IO;
namespace UniversalDetector
{
/// <summary>
/// Default implementation of charset detection interface.
/// The detector can be fed by a System.IO.Stream:
/// <example>
/// <code>
/// using (FileStream fs = File.OpenRead(filename)) {
/// CharsetDetector cdet = new CharsetDetector();
/// cdet.Feed(fs);
/// cdet.DataEnd();
/// Console.WriteLine("{0}, {1}", cdet.Charset, cdet.Confidence);
/// </code>
/// </example>
///
/// or by a byte a array:
///
/// <example>
/// <code>
/// byte[] buff = new byte[1024];
/// int read;
/// while ((read = stream.Read(buff, 0, buff.Length)) > 0 && !done)
/// Feed(buff, 0, read);
/// cdet.DataEnd();
/// Console.WriteLine("{0}, {1}", cdet.Charset, cdet.Confidence);
/// </code>
/// </example>
/// </summary>
public class CharsetDetector : Core.UniversalDetector, ICharsetDetector
{
private string charset;
private float confidence;
//public event DetectorFinished Finished;
public CharsetDetector() : base(FILTER_ALL)
{
}
public void Feed(Stream stream)
{
byte[] buff = new byte[1024];
int read;
while ((read = stream.Read(buff, 0, buff.Length)) > 0 && !done)
{
Feed(buff, 0, read);
}
}
public bool IsDone()
{
return done;
}
public override void Reset()
{
this.charset = null;
this.confidence = 0.0f;
base.Reset();
}
public string Charset {
get { return charset; }
}
public float Confidence {
get { return confidence; }
}
protected override void Report(string charset, float confidence)
{
this.charset = charset;
this.confidence = confidence;
// if (Finished != null) {
// Finished(charset, confidence);
// }
}
}
//public delegate void DetectorFinished(string charset, float confidence);
}

View file

@ -0,0 +1,106 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
namespace UniversalDetector.Core
{
public class Big5Prober : CharsetProber
{
//void GetDistribution(PRUint32 aCharLen, const char* aStr);
private CodingStateMachine codingSM;
private BIG5DistributionAnalyser distributionAnalyser;
private byte[] lastChar = new byte[2];
public Big5Prober()
{
this.codingSM = new CodingStateMachine(new BIG5SMModel());
this.distributionAnalyser = new BIG5DistributionAnalyser();
this.Reset();
}
public override ProbingState HandleData(byte[] buf, int offset, int len)
{
int codingState = 0;
int max = offset + len;
for (int i = offset; i < max; i++) {
codingState = codingSM.NextState(buf[i]);
if (codingState == SMModel.ERROR) {
state = ProbingState.NotMe;
break;
}
if (codingState == SMModel.ITSME) {
state = ProbingState.FoundIt;
break;
}
if (codingState == SMModel.START) {
int charLen = codingSM.CurrentCharLen;
if (i == offset) {
lastChar[1] = buf[offset];
distributionAnalyser.HandleOneChar(lastChar, 0, charLen);
} else {
distributionAnalyser.HandleOneChar(buf, i-1, charLen);
}
}
}
lastChar[0] = buf[max-1];
if (state == ProbingState.Detecting)
if (distributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
state = ProbingState.FoundIt;
return state;
}
public override void Reset()
{
codingSM.Reset();
state = ProbingState.Detecting;
distributionAnalyser.Reset();
}
public override string GetCharsetName()
{
return "Big-5";
}
public override float GetConfidence()
{
return distributionAnalyser.GetConfidence();
}
}
}

View file

@ -0,0 +1,98 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Kohei TAKETA <k-tak@void.in> (Java port)
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
namespace UniversalDetector.Core
{
public class BitPackage
{
public static int INDEX_SHIFT_4BITS = 3;
public static int INDEX_SHIFT_8BITS = 2;
public static int INDEX_SHIFT_16BITS = 1;
public static int SHIFT_MASK_4BITS = 7;
public static int SHIFT_MASK_8BITS = 3;
public static int SHIFT_MASK_16BITS = 1;
public static int BIT_SHIFT_4BITS = 2;
public static int BIT_SHIFT_8BITS = 3;
public static int BIT_SHIFT_16BITS = 4;
public static int UNIT_MASK_4BITS = 0x0000000F;
public static int UNIT_MASK_8BITS = 0x000000FF;
public static int UNIT_MASK_16BITS = 0x0000FFFF;
private int indexShift;
private int shiftMask;
private int bitShift;
private int unitMask;
private int[] data;
public BitPackage(int indexShift, int shiftMask,
int bitShift, int unitMask, int[] data)
{
this.indexShift = indexShift;
this.shiftMask = shiftMask;
this.bitShift = bitShift;
this.unitMask = unitMask;
this.data = data;
}
public static int Pack16bits(int a, int b)
{
return ((b << 16) | a);
}
public static int Pack8bits(int a, int b, int c, int d)
{
return Pack16bits((b << 8) | a, (d << 8) | c);
}
public static int Pack4bits(int a, int b, int c, int d,
int e, int f, int g, int h)
{
return Pack8bits((b << 4) | a, (d << 4) | c,
(f << 4) | e, (h << 4) | g);
}
public int Unpack(int i)
{
return (data[i >> indexShift] >>
((i & shiftMask) << bitShift)) & unitMask;
}
}
}

View file

@ -0,0 +1,191 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
using System.IO;
namespace UniversalDetector.Core
{
public enum ProbingState {
Detecting = 0, // no sure answer yet, but caller can ask for confidence
FoundIt = 1, // positive answer
NotMe = 2 // negative answer
};
public abstract class CharsetProber
{
protected const float SHORTCUT_THRESHOLD = 0.95F;
protected ProbingState state;
// ASCII codes
private const byte SPACE = 0x20;
private const byte CAPITAL_A = 0x41;
private const byte CAPITAL_Z = 0x5A;
private const byte SMALL_A = 0x61;
private const byte SMALL_Z = 0x7A;
private const byte LESS_THAN = 0x3C;
private const byte GREATER_THAN = 0x3E;
/// <summary>
/// Feed data to the prober
/// </summary>
/// <param name="buf">a buffer</param>
/// <param name="offset">offset into buffer</param>
/// <param name="len">number of bytes available into buffer</param>
/// <returns>
/// A <see cref="ProbingState"/>
/// </returns>
public abstract ProbingState HandleData(byte[] buf, int offset, int len);
/// <summary>
/// Reset prober state
/// </summary>
public abstract void Reset();
public abstract string GetCharsetName();
public abstract float GetConfidence();
public virtual ProbingState GetState()
{
return state;
}
public virtual void SetOption()
{
}
public virtual void DumpStatus()
{
}
//
// Helper functions used in the Latin1 and Group probers
//
/// <summary>
///
/// </summary>
/// <returns>filtered buffer</returns>
protected static byte[] FilterWithoutEnglishLetters(byte[] buf, int offset, int len)
{
byte[] result = null;
using (MemoryStream ms = new MemoryStream(buf.Length)) {
bool meetMSB = false;
int max = offset + len;
int prev = offset;
int cur = offset;
while (cur < max) {
byte b = buf[cur];
if ((b & 0x80) != 0) {
meetMSB = true;
} else if (b < CAPITAL_A || (b > CAPITAL_Z && b < SMALL_A)
|| b > SMALL_Z) {
if (meetMSB && cur > prev) {
ms.Write(buf, prev, cur - prev);
ms.WriteByte(SPACE);
meetMSB = false;
}
prev = cur + 1;
}
cur++;
}
if (meetMSB && cur > prev)
ms.Write(buf, prev, cur - prev);
ms.SetLength(ms.Position);
result = ms.ToArray();
}
return result;
}
/// <summary>
/// Do filtering to reduce load to probers (Remove ASCII symbols,
/// collapse spaces). This filter applies to all scripts which contain
/// both English characters and upper ASCII characters.
/// </summary>
/// <returns>a filtered copy of the input buffer</returns>
protected static byte[] FilterWithEnglishLetters(byte[] buf, int offset, int len)
{
byte[] result = null;
using (MemoryStream ms = new MemoryStream(buf.Length)) {
bool inTag = false;
int max = offset + len;
int prev = offset;
int cur = offset;
while (cur < max) {
byte b = buf[cur];
if (b == GREATER_THAN)
inTag = false;
else if (b == LESS_THAN)
inTag = true;
// it's ascii, but it's not a letter
if ((b & 0x80) == 0 && (b < CAPITAL_A || b > SMALL_Z
|| (b > CAPITAL_Z && b < SMALL_A))) {
if (cur > prev && !inTag) {
ms.Write(buf, prev, cur - prev);
ms.WriteByte(SPACE);
}
prev = cur + 1;
}
cur++;
}
// If the current segment contains more than just a symbol
// and it is not inside a tag then keep it.
if (!inTag && cur > prev)
ms.Write(buf, prev, cur - prev);
ms.SetLength(ms.Position);
result = ms.ToArray();
}
return result;
}
}
}

View file

@ -0,0 +1,149 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
namespace UniversalDetector.Core
{
public static class Charsets
{
public const string ASCII = "ASCII";
public const string UTF8 = "UTF-8";
public const string UTF16_LE = "UTF-16LE";
public const string UTF16_BE = "UTF-16BE";
public const string UTF32_BE = "UTF-32BE";
public const string UTF32_LE = "UTF-32LE";
/// <summary>
/// Unusual BOM (3412 order)
/// </summary>
public const string UCS4_3412 = "X-ISO-10646-UCS-4-3412";
/// <summary>
/// Unusual BOM (2413 order)
/// </summary>
public const string UCS4_2413 = "X-ISO-10646-UCS-4-2413";
/// <summary>
/// Cyrillic (based on bulgarian and russian data)
/// </summary>
public const string WIN1251 = "windows-1251";
/// <summary>
/// Latin-1, almost identical to ISO-8859-1
/// </summary>
public const string WIN1252 = "windows-1252";
/// <summary>
/// Greek
/// </summary>
public const string WIN1253 = "windows-1253";
/// <summary>
/// Logical hebrew (includes ISO-8859-8-I and most of x-mac-hebrew)
/// </summary>
public const string WIN1255 = "windows-1255";
/// <summary>
/// Traditional chinese
/// </summary>
public const string BIG5 = "Big-5";
public const string EUCKR = "EUC-KR";
public const string EUCJP = "EUC-JP";
public const string EUCTW = "EUC-TW";
/// <summary>
/// Note: gb2312 is a subset of gb18030
/// </summary>
public const string GB18030 = "gb18030";
public const string ISO2022_JP = "ISO-2022-JP";
public const string ISO2022_CN = "ISO-2022-CN";
public const string ISO2022_KR = "ISO-2022-KR";
/// <summary>
/// Simplified chinese
/// </summary>
public const string HZ_GB_2312 = "HZ-GB-2312";
public const string SHIFT_JIS = "Shift-JIS";
public const string MAC_CYRILLIC = "x-mac-cyrillic";
public const string KOI8R = "KOI8-R";
public const string IBM855 = "IBM855";
public const string IBM866 = "IBM866";
/// <summary>
/// East-Europe. Disabled because too similar to windows-1252
/// (latin-1). Should use tri-grams models to discriminate between
/// these two charsets.
/// </summary>
public const string ISO8859_2 = "ISO-8859-2";
/// <summary>
/// Cyrillic
/// </summary>
public const string ISO8859_5 = "ISO-8859-5";
/// <summary>
/// Greek
/// </summary>
public const string ISO_8859_7 = "ISO-8859-7";
/// <summary>
/// Visual Hebrew
/// </summary>
public const string ISO8859_8 = "ISO-8859-8";
/// <summary>
/// Thai. This recognizer is not enabled yet.
/// </summary>
public const string TIS620 = "TIS620";
}
}

View file

@ -0,0 +1,90 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Kohei TAKETA <k-tak@void.in> (Java port)
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
namespace UniversalDetector.Core
{
/// <summary>
/// Parallel state machine for the Coding Scheme Method
/// </summary>
public class CodingStateMachine
{
private int currentState;
private SMModel model;
private int currentCharLen;
private int currentBytePos;
public CodingStateMachine(SMModel model)
{
this.currentState = SMModel.START;
this.model = model;
}
public int NextState(byte b)
{
// for each byte we get its class, if it is first byte,
// we also get byte length
int byteCls = model.GetClass(b);
if (currentState == SMModel.START) {
currentBytePos = 0;
currentCharLen = model.charLenTable[byteCls];
}
// from byte's class and stateTable, we get its next state
currentState = model.stateTable.Unpack(
currentState * model.ClassFactor + byteCls);
currentBytePos++;
return currentState;
}
public void Reset()
{
currentState = SMModel.START;
}
public int CurrentCharLen
{
get { return currentCharLen; }
}
public string ModelName
{
get { return model.Name; }
}
}
}

View file

@ -0,0 +1,110 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
namespace UniversalDetector.Core
{
public class EUCJPProber : CharsetProber
{
private CodingStateMachine codingSM;
private EUCJPContextAnalyser contextAnalyser;
private EUCJPDistributionAnalyser distributionAnalyser;
private byte[] lastChar = new byte[2];
public EUCJPProber()
{
codingSM = new CodingStateMachine(new EUCJPSMModel());
distributionAnalyser = new EUCJPDistributionAnalyser();
contextAnalyser = new EUCJPContextAnalyser();
Reset();
}
public override string GetCharsetName()
{
return "EUC-JP";
}
public override ProbingState HandleData(byte[] buf, int offset, int len)
{
int codingState;
int max = offset + len;
for (int i = offset; i < max; i++) {
codingState = codingSM.NextState(buf[i]);
if (codingState == SMModel.ERROR) {
state = ProbingState.NotMe;
break;
}
if (codingState == SMModel.ITSME) {
state = ProbingState.FoundIt;
break;
}
if (codingState == SMModel.START) {
int charLen = codingSM.CurrentCharLen;
if (i == offset) {
lastChar[1] = buf[offset];
contextAnalyser.HandleOneChar(lastChar, 0, charLen);
distributionAnalyser.HandleOneChar(lastChar, 0, charLen);
} else {
contextAnalyser.HandleOneChar(buf, i-1, charLen);
distributionAnalyser.HandleOneChar(buf, i-1, charLen);
}
}
}
lastChar[0] = buf[max-1];
if (state == ProbingState.Detecting)
if (contextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
state = ProbingState.FoundIt;
return state;
}
public override void Reset()
{
codingSM.Reset();
state = ProbingState.Detecting;
contextAnalyser.Reset();
distributionAnalyser.Reset();
}
public override float GetConfidence()
{
float contxtCf = contextAnalyser.GetConfidence();
float distribCf = distributionAnalyser.GetConfidence();
return (contxtCf > distribCf ? contxtCf : distribCf);
}
}
}

View file

@ -0,0 +1,107 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
namespace UniversalDetector.Core
{
public class EUCKRProber : CharsetProber
{
private CodingStateMachine codingSM;
private EUCKRDistributionAnalyser distributionAnalyser;
private byte[] lastChar = new byte[2];
public EUCKRProber()
{
codingSM = new CodingStateMachine(new EUCKRSMModel());
distributionAnalyser = new EUCKRDistributionAnalyser();
Reset();
}
public override string GetCharsetName()
{
return "EUC-KR";
}
public override ProbingState HandleData(byte[] buf, int offset, int len)
{
int codingState;
int max = offset + len;
for (int i = offset; i < max; i++) {
codingState = codingSM.NextState(buf[i]);
if (codingState == SMModel.ERROR) {
state = ProbingState.NotMe;
break;
}
if (codingState == SMModel.ITSME) {
state = ProbingState.FoundIt;
break;
}
if (codingState == SMModel.START) {
int charLen = codingSM.CurrentCharLen;
if (i == offset) {
lastChar[1] = buf[offset];
distributionAnalyser.HandleOneChar(lastChar, 0, charLen);
} else {
distributionAnalyser.HandleOneChar(buf, i-1, charLen);
}
}
}
lastChar[0] = buf[max-1];
if (state == ProbingState.Detecting)
if (distributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
state = ProbingState.FoundIt;
return state;
}
public override float GetConfidence()
{
return distributionAnalyser.GetConfidence();
}
public override void Reset()
{
codingSM.Reset();
state = ProbingState.Detecting;
distributionAnalyser.Reset();
//mContextAnalyser.Reset();
}
}
}

View file

@ -0,0 +1,106 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
namespace UniversalDetector.Core
{
public class EUCTWProber : CharsetProber
{
private CodingStateMachine codingSM;
private EUCTWDistributionAnalyser distributionAnalyser;
private byte[] lastChar = new byte[2];
public EUCTWProber()
{
this.codingSM = new CodingStateMachine(new EUCTWSMModel());
this.distributionAnalyser = new EUCTWDistributionAnalyser();
this.Reset();
}
public override ProbingState HandleData(byte[] buf, int offset, int len)
{
int codingState;
int max = offset + len;
for (int i = 0; i < max; i++) {
codingState = codingSM.NextState(buf[i]);
if (codingState == SMModel.ERROR) {
state = ProbingState.NotMe;
break;
}
if (codingState == SMModel.ITSME) {
state = ProbingState.FoundIt;
break;
}
if (codingState == SMModel.START) {
int charLen = codingSM.CurrentCharLen;
if (i == offset) {
lastChar[1] = buf[offset];
distributionAnalyser.HandleOneChar(lastChar, 0, charLen);
} else {
distributionAnalyser.HandleOneChar(buf, i-1, charLen);
}
}
}
lastChar[0] = buf[max-1];
if (state == ProbingState.Detecting)
if (distributionAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
state = ProbingState.FoundIt;
return state;
}
public override string GetCharsetName()
{
return "x-euc-tw";
}
public override void Reset()
{
codingSM.Reset();
state = ProbingState.Detecting;
distributionAnalyser.Reset();
}
public override float GetConfidence()
{
return distributionAnalyser.GetConfidence();
}
}
}

View file

@ -0,0 +1,105 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
namespace UniversalDetector.Core
{
public class EscCharsetProber : CharsetProber
{
private const int CHARSETS_NUM = 4;
private string detectedCharset;
private CodingStateMachine[] codingSM;
int activeSM;
public EscCharsetProber()
{
codingSM = new CodingStateMachine[CHARSETS_NUM];
codingSM[0] = new CodingStateMachine(new HZSMModel());
codingSM[1] = new CodingStateMachine(new ISO2022CNSMModel());
codingSM[2] = new CodingStateMachine(new ISO2022JPSMModel());
codingSM[3] = new CodingStateMachine(new ISO2022KRSMModel());
Reset();
}
public override void Reset()
{
state = ProbingState.Detecting;
for (int i = 0; i < CHARSETS_NUM; i++)
codingSM[i].Reset();
activeSM = CHARSETS_NUM;
detectedCharset = null;
}
public override ProbingState HandleData(byte[] buf, int offset, int len)
{
int max = offset + len;
for (int i = offset; i < max && state == ProbingState.Detecting; i++) {
for (int j = activeSM - 1; j >= 0; j--) {
// byte is feed to all active state machine
int codingState = codingSM[j].NextState(buf[i]);
if (codingState == SMModel.ERROR) {
// got negative answer for this state machine, make it inactive
activeSM--;
if (activeSM == 0) {
state = ProbingState.NotMe;
return state;
} else if (j != activeSM) {
CodingStateMachine t = codingSM[activeSM];
codingSM[activeSM] = codingSM[j];
codingSM[j] = t;
}
} else if (codingState == SMModel.ITSME) {
state = ProbingState.FoundIt;
detectedCharset = codingSM[j].ModelName;
return state;
}
}
}
return state;
}
public override string GetCharsetName()
{
return detectedCharset;
}
public override float GetConfidence()
{
return 0.99f;
}
}
}

View file

@ -0,0 +1,304 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Kohei TAKETA <k-tak@void.in> (Java port)
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
/// <summary>
/// Escaped charsets state machines
/// </summary>
namespace UniversalDetector.Core
{
public class HZSMModel : SMModel
{
private readonly static int[] HZ_cls = {
BitPackage.Pack4bits(1,0,0,0,0,0,0,0), // 00 - 07
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 08 - 0f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17
BitPackage.Pack4bits(0,0,0,1,0,0,0,0), // 18 - 1f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 20 - 27
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 28 - 2f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 40 - 47
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 48 - 4f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77
BitPackage.Pack4bits(0,0,0,4,0,5,2,0), // 78 - 7f
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 80 - 87
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 88 - 8f
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 90 - 97
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 98 - 9f
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // a0 - a7
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // a8 - af
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // b0 - b7
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // b8 - bf
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // c0 - c7
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // c8 - cf
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // d0 - d7
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // d8 - df
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // e0 - e7
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // e8 - ef
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // f0 - f7
BitPackage.Pack4bits(1,1,1,1,1,1,1,1) // f8 - ff
};
private readonly static int[] HZ_st = {
BitPackage.Pack4bits(START, ERROR, 3, START, START, START, ERROR, ERROR),//00-07
BitPackage.Pack4bits(ERROR, ERROR, ERROR, ERROR, ITSME, ITSME, ITSME, ITSME),//08-0f
BitPackage.Pack4bits(ITSME, ITSME, ERROR, ERROR, START, START, 4, ERROR),//10-17
BitPackage.Pack4bits( 5, ERROR, 6, ERROR, 5, 5, 4, ERROR),//18-1f
BitPackage.Pack4bits( 4, ERROR, 4, 4, 4, ERROR, 4, ERROR),//20-27
BitPackage.Pack4bits( 4, ITSME, START, START, START, START, START, START) //28-2f
};
private readonly static int[] HZCharLenTable = {0, 0, 0, 0, 0, 0};
public HZSMModel() : base(
new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, HZ_cls),
6,
new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, HZ_st),
HZCharLenTable, "HZ-GB-2312")
{
}
}
public class ISO2022CNSMModel : SMModel
{
private readonly static int[] ISO2022CN_cls = {
BitPackage.Pack4bits(2,0,0,0,0,0,0,0), // 00 - 07
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 08 - 0f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17
BitPackage.Pack4bits(0,0,0,1,0,0,0,0), // 18 - 1f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 20 - 27
BitPackage.Pack4bits(0,3,0,0,0,0,0,0), // 28 - 2f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f
BitPackage.Pack4bits(0,0,0,4,0,0,0,0), // 40 - 47
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 48 - 4f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 78 - 7f
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 80 - 87
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 88 - 8f
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 90 - 97
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 98 - 9f
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a0 - a7
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a8 - af
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c8 - cf
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e0 - e7
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e8 - ef
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // f0 - f7
BitPackage.Pack4bits(2,2,2,2,2,2,2,2) // f8 - ff
};
private readonly static int[] ISO2022CN_st = {
BitPackage.Pack4bits(START, 3,ERROR,START,START,START,START,START),//00-07
BitPackage.Pack4bits(START,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//08-0f
BitPackage.Pack4bits(ERROR,ERROR,ITSME,ITSME,ITSME,ITSME,ITSME,ITSME),//10-17
BitPackage.Pack4bits(ITSME,ITSME,ITSME,ERROR,ERROR,ERROR, 4,ERROR),//18-1f
BitPackage.Pack4bits(ERROR,ERROR,ERROR,ITSME,ERROR,ERROR,ERROR,ERROR),//20-27
BitPackage.Pack4bits( 5, 6,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//28-2f
BitPackage.Pack4bits(ERROR,ERROR,ERROR,ITSME,ERROR,ERROR,ERROR,ERROR),//30-37
BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ITSME,ERROR,START) //38-3f
};
private readonly static int[] ISO2022CNCharLenTable = {0, 0, 0, 0, 0, 0, 0, 0, 0};
public ISO2022CNSMModel() : base(
new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, ISO2022CN_cls),
9,
new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, ISO2022CN_st),
ISO2022CNCharLenTable, "ISO-2022-CN")
{
}
}
public class ISO2022JPSMModel : SMModel
{
private readonly static int[] ISO2022JP_cls = {
BitPackage.Pack4bits(2,0,0,0,0,0,0,0), // 00 - 07
BitPackage.Pack4bits(0,0,0,0,0,0,2,2), // 08 - 0f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17
BitPackage.Pack4bits(0,0,0,1,0,0,0,0), // 18 - 1f
BitPackage.Pack4bits(0,0,0,0,7,0,0,0), // 20 - 27
BitPackage.Pack4bits(3,0,0,0,0,0,0,0), // 28 - 2f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f
BitPackage.Pack4bits(6,0,4,0,8,0,0,0), // 40 - 47
BitPackage.Pack4bits(0,9,5,0,0,0,0,0), // 48 - 4f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 78 - 7f
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 80 - 87
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 88 - 8f
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 90 - 97
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 98 - 9f
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a0 - a7
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a8 - af
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c8 - cf
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e0 - e7
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e8 - ef
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // f0 - f7
BitPackage.Pack4bits(2,2,2,2,2,2,2,2) // f8 - ff
};
private readonly static int[] ISO2022JP_st = {
BitPackage.Pack4bits(START, 3, ERROR,START,START,START,START,START),//00-07
BitPackage.Pack4bits(START, START, ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//08-0f
BitPackage.Pack4bits(ERROR, ERROR, ERROR,ERROR,ITSME,ITSME,ITSME,ITSME),//10-17
BitPackage.Pack4bits(ITSME, ITSME, ITSME,ITSME,ITSME,ITSME,ERROR,ERROR),//18-1f
BitPackage.Pack4bits(ERROR, 5, ERROR,ERROR,ERROR, 4,ERROR,ERROR),//20-27
BitPackage.Pack4bits(ERROR, ERROR, ERROR, 6,ITSME,ERROR,ITSME,ERROR),//28-2f
BitPackage.Pack4bits(ERROR, ERROR, ERROR,ERROR,ERROR,ERROR,ITSME,ITSME),//30-37
BitPackage.Pack4bits(ERROR, ERROR, ERROR,ITSME,ERROR,ERROR,ERROR,ERROR),//38-3f
BitPackage.Pack4bits(ERROR, ERROR, ERROR,ERROR,ITSME,ERROR,START,START) //40-47
};
private readonly static int[] ISO2022JPCharLenTable = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
public ISO2022JPSMModel() : base(
new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, ISO2022JP_cls),
10,
new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, ISO2022JP_st),
ISO2022JPCharLenTable, "ISO-2022-JP")
{
}
}
public class ISO2022KRSMModel : SMModel
{
private readonly static int[] ISO2022KR_cls = {
BitPackage.Pack4bits(2,0,0,0,0,0,0,0), // 00 - 07
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 08 - 0f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17
BitPackage.Pack4bits(0,0,0,1,0,0,0,0), // 18 - 1f
BitPackage.Pack4bits(0,0,0,0,3,0,0,0), // 20 - 27
BitPackage.Pack4bits(0,4,0,0,0,0,0,0), // 28 - 2f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f
BitPackage.Pack4bits(0,0,0,5,0,0,0,0), // 40 - 47
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 48 - 4f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 78 - 7f
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 80 - 87
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 88 - 8f
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 90 - 97
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 98 - 9f
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a0 - a7
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a8 - af
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c8 - cf
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e0 - e7
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e8 - ef
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // f0 - f7
BitPackage.Pack4bits(2,2,2,2,2,2,2,2) // f8 - ff
};
private readonly static int[] ISO2022KR_st = {
BitPackage.Pack4bits(START, 3,ERROR,START,START,START,ERROR,ERROR),//00-07
BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ITSME,ITSME,ITSME,ITSME),//08-0f
BitPackage.Pack4bits(ITSME,ITSME,ERROR,ERROR,ERROR, 4,ERROR,ERROR),//10-17
BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR, 5,ERROR,ERROR,ERROR),//18-1f
BitPackage.Pack4bits(ERROR,ERROR,ERROR,ITSME,START,START,START,START) //20-27
};
private readonly static int[] ISO2022KRCharLenTable = {0, 0, 0, 0, 0, 0};
public ISO2022KRSMModel() : base(
new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, ISO2022KR_cls),
6,
new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, ISO2022KR_st),
ISO2022KRCharLenTable, "ISO-2022-KR")
{
}
}
}

View file

@ -0,0 +1,111 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
namespace UniversalDetector.Core
{
// We use gb18030 to replace gb2312, because 18030 is a superset.
public class GB18030Prober : CharsetProber
{
private CodingStateMachine codingSM;
private GB18030DistributionAnalyser analyser;
private byte[] lastChar;
public GB18030Prober()
{
lastChar = new byte[2];
codingSM = new CodingStateMachine(new GB18030SMModel());
analyser = new GB18030DistributionAnalyser();
Reset();
}
public override string GetCharsetName()
{
return "gb18030";
}
public override ProbingState HandleData(byte[] buf, int offset, int len)
{
int codingState = SMModel.START;
int max = offset + len;
for (int i = offset; i < max; i++) {
codingState = codingSM.NextState(buf[i]);
if (codingState == SMModel.ERROR) {
state = ProbingState.NotMe;
break;
}
if (codingState == SMModel.ITSME) {
state = ProbingState.FoundIt;
break;
}
if (codingState == SMModel.START) {
int charLen = codingSM.CurrentCharLen;
if (i == offset) {
lastChar[1] = buf[offset];
analyser.HandleOneChar(lastChar, 0, charLen);
} else {
analyser.HandleOneChar(buf, i-1, charLen);
}
}
}
lastChar[0] = buf[max-1];
if (state == ProbingState.Detecting) {
if (analyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
state = ProbingState.FoundIt;
}
return state;
}
public override float GetConfidence()
{
return analyser.GetConfidence();
}
public override void Reset()
{
codingSM.Reset();
state = ProbingState.Detecting;
analyser.Reset();
}
}
}

View file

@ -0,0 +1,324 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
using System;
/**
* General ideas of the Hebrew charset recognition
*
* Four main charsets exist in Hebrew:
* "ISO-8859-8" - Visual Hebrew
* "windows-1255" - Logical Hebrew
* "ISO-8859-8-I" - Logical Hebrew
* "x-mac-hebrew" - ?? Logical Hebrew ??
*
* Both "ISO" charsets use a completely identical set of code points, whereas
* "windows-1255" and "x-mac-hebrew" are two different proper supersets of
* these code points. windows-1255 defines additional characters in the range
* 0x80-0x9F as some misc punctuation marks as well as some Hebrew-specific
* diacritics and additional 'Yiddish' ligature letters in the range 0xc0-0xd6.
* x-mac-hebrew defines similar additional code points but with a different
* mapping.
*
* As far as an average Hebrew text with no diacritics is concerned, all four
* charsets are identical with respect to code points. Meaning that for the
* main Hebrew alphabet, all four map the same values to all 27 Hebrew letters
* (including final letters).
*
* The dominant difference between these charsets is their directionality.
* "Visual" directionality means that the text is ordered as if the renderer is
* not aware of a BIDI rendering algorithm. The renderer sees the text and
* draws it from left to right. The text itself when ordered naturally is read
* backwards. A buffer of Visual Hebrew generally looks like so:
* "[last word of first line spelled backwards] [whole line ordered backwards
* and spelled backwards] [first word of first line spelled backwards]
* [end of line] [last word of second line] ... etc' "
* adding punctuation marks, numbers and English text to visual text is
* naturally also "visual" and from left to right.
*
* "Logical" directionality means the text is ordered "naturally" according to
* the order it is read. It is the responsibility of the renderer to display
* the text from right to left. A BIDI algorithm is used to place general
* punctuation marks, numbers and English text in the text.
*
* Texts in x-mac-hebrew are almost impossible to find on the Internet. From
* what little evidence I could find, it seems that its general directionality
* is Logical.
*
* To sum up all of the above, the Hebrew probing mechanism knows about two
* charsets:
* Visual Hebrew - "ISO-8859-8" - backwards text - Words and sentences are
* backwards while line order is natural. For charset recognition purposes
* the line order is unimportant (In fact, for this implementation, even
* word order is unimportant).
* Logical Hebrew - "windows-1255" - normal, naturally ordered text.
*
* "ISO-8859-8-I" is a subset of windows-1255 and doesn't need to be
* specifically identified.
* "x-mac-hebrew" is also identified as windows-1255. A text in x-mac-hebrew
* that contain special punctuation marks or diacritics is displayed with
* some unconverted characters showing as question marks. This problem might
* be corrected using another model prober for x-mac-hebrew. Due to the fact
* that x-mac-hebrew texts are so rare, writing another model prober isn't
* worth the effort and performance hit.
*
* *** The Prober ***
*
* The prober is divided between two nsSBCharSetProbers and an nsHebrewProber,
* all of which are managed, created, fed data, inquired and deleted by the
* nsSBCSGroupProber. The two nsSBCharSetProbers identify that the text is in
* fact some kind of Hebrew, Logical or Visual. The final decision about which
* one is it is made by the nsHebrewProber by combining final-letter scores
* with the scores of the two nsSBCharSetProbers to produce a final answer.
*
* The nsSBCSGroupProber is responsible for stripping the original text of HTML
* tags, English characters, numbers, low-ASCII punctuation characters, spaces
* and new lines. It reduces any sequence of such characters to a single space.
* The buffer fed to each prober in the SBCS group prober is pure text in
* high-ASCII.
* The two nsSBCharSetProbers (model probers) share the same language model:
* Win1255Model.
* The first nsSBCharSetProber uses the model normally as any other
* nsSBCharSetProber does, to recognize windows-1255, upon which this model was
* built. The second nsSBCharSetProber is told to make the pair-of-letter
* lookup in the language model backwards. This in practice exactly simulates
* a visual Hebrew model using the windows-1255 logical Hebrew model.
*
* The nsHebrewProber is not using any language model. All it does is look for
* final-letter evidence suggesting the text is either logical Hebrew or visual
* Hebrew. Disjointed from the model probers, the results of the nsHebrewProber
* alone are meaningless. nsHebrewProber always returns 0.00 as confidence
* since it never identifies a charset by itself. Instead, the pointer to the
* nsHebrewProber is passed to the model probers as a helper "Name Prober".
* When the Group prober receives a positive identification from any prober,
* it asks for the name of the charset identified. If the prober queried is a
* Hebrew model prober, the model prober forwards the call to the
* nsHebrewProber to make the final decision. In the nsHebrewProber, the
* decision is made according to the final-letters scores maintained and Both
* model probers scores. The answer is returned in the form of the name of the
* charset identified, either "windows-1255" or "ISO-8859-8".
*
*/
namespace UniversalDetector.Core
{
/// <summary>
/// This prober doesn't actually recognize a language or a charset.
/// It is a helper prober for the use of the Hebrew model probers
/// </summary>
public class HebrewProber : CharsetProber
{
// windows-1255 / ISO-8859-8 code points of interest
private const byte FINAL_KAF = 0xEA;
private const byte NORMAL_KAF = 0xEB;
private const byte FINAL_MEM = 0xED;
private const byte NORMAL_MEM = 0xEE;
private const byte FINAL_NUN = 0xEF;
private const byte NORMAL_NUN = 0xF0;
private const byte FINAL_PE = 0xF3;
private const byte NORMAL_PE = 0xF4;
private const byte FINAL_TSADI = 0xF5;
private const byte NORMAL_TSADI = 0xF6;
// Minimum Visual vs Logical final letter score difference.
// If the difference is below this, don't rely solely on the final letter score distance.
private const int MIN_FINAL_CHAR_DISTANCE = 5;
// Minimum Visual vs Logical model score difference.
// If the difference is below this, don't rely at all on the model score distance.
private const float MIN_MODEL_DISTANCE = 0.01f;
protected const string VISUAL_HEBREW_NAME = "ISO-8859-8";
protected const string LOGICAL_HEBREW_NAME = "windows-1255";
// owned by the group prober.
protected CharsetProber logicalProber, visualProber;
protected int finalCharLogicalScore, finalCharVisualScore;
// The two last bytes seen in the previous buffer.
protected byte prev, beforePrev;
public HebrewProber()
{
Reset();
}
public void SetModelProbers(CharsetProber logical, CharsetProber visual)
{
logicalProber = logical;
visualProber = visual;
}
/**
* Final letter analysis for logical-visual decision.
* Look for evidence that the received buffer is either logical Hebrew or
* visual Hebrew.
* The following cases are checked:
* 1) A word longer than 1 letter, ending with a final letter. This is an
* indication that the text is laid out "naturally" since the final letter
* really appears at the end. +1 for logical score.
* 2) A word longer than 1 letter, ending with a Non-Final letter. In normal
* Hebrew, words ending with Kaf, Mem, Nun, Pe or Tsadi, should not end with
* the Non-Final form of that letter. Exceptions to this rule are mentioned
* above in isNonFinal(). This is an indication that the text is laid out
* backwards. +1 for visual score
* 3) A word longer than 1 letter, starting with a final letter. Final letters
* should not appear at the beginning of a word. This is an indication that
* the text is laid out backwards. +1 for visual score.
*
* The visual score and logical score are accumulated throughout the text and
* are finally checked against each other in GetCharSetName().
* No checking for final letters in the middle of words is done since that case
* is not an indication for either Logical or Visual text.
*
* The input buffer should not contain any white spaces that are not (' ')
* or any low-ascii punctuation marks.
*/
public override ProbingState HandleData(byte[] buf, int offset, int len)
{
// Both model probers say it's not them. No reason to continue.
if (GetState() == ProbingState.NotMe)
return ProbingState.NotMe;
int max = offset + len;
for (int i = offset; i < max; i++) {
byte b = buf[i];
// a word just ended
if (b == 0x20) {
// *(curPtr-2) was not a space so prev is not a 1 letter word
if (beforePrev != 0x20) {
// case (1) [-2:not space][-1:final letter][cur:space]
if (IsFinal(prev))
finalCharLogicalScore++;
// case (2) [-2:not space][-1:Non-Final letter][cur:space]
else if (IsNonFinal(prev))
finalCharVisualScore++;
}
} else {
// case (3) [-2:space][-1:final letter][cur:not space]
if ((beforePrev == 0x20) && (IsFinal(prev)) && (b != ' '))
++finalCharVisualScore;
}
beforePrev = prev;
prev = b;
}
// Forever detecting, till the end or until both model probers
// return NotMe (handled above).
return ProbingState.Detecting;
}
// Make the decision: is it Logical or Visual?
public override string GetCharsetName()
{
// If the final letter score distance is dominant enough, rely on it.
int finalsub = finalCharLogicalScore - finalCharVisualScore;
if (finalsub >= MIN_FINAL_CHAR_DISTANCE)
return LOGICAL_HEBREW_NAME;
if (finalsub <= -(MIN_FINAL_CHAR_DISTANCE))
return VISUAL_HEBREW_NAME;
// It's not dominant enough, try to rely on the model scores instead.
float modelsub = logicalProber.GetConfidence() - visualProber.GetConfidence();
if (modelsub > MIN_MODEL_DISTANCE)
return LOGICAL_HEBREW_NAME;
if (modelsub < -(MIN_MODEL_DISTANCE))
return VISUAL_HEBREW_NAME;
// Still no good, back to final letter distance, maybe it'll save the day.
if (finalsub < 0)
return VISUAL_HEBREW_NAME;
// (finalsub > 0 - Logical) or (don't know what to do) default to Logical.
return LOGICAL_HEBREW_NAME;
}
public override void Reset()
{
finalCharLogicalScore = 0;
finalCharVisualScore = 0;
prev = 0x20;
beforePrev = 0x20;
}
public override ProbingState GetState()
{
// Remain active as long as any of the model probers are active.
if (logicalProber.GetState() == ProbingState.NotMe &&
visualProber.GetState() == ProbingState.NotMe)
return ProbingState.NotMe;
return ProbingState.Detecting;
}
public override void DumpStatus()
{
//Console.WriteLine(" HEB: {0} - {1} [Logical-Visual score]", finalCharLogicalScore, finalCharVisualScore);
}
public override float GetConfidence()
{
return 0.0f;
}
protected static bool IsFinal(byte b)
{
return (b == FINAL_KAF || b == FINAL_MEM || b == FINAL_NUN
|| b == FINAL_PE || b == FINAL_TSADI);
}
protected static bool IsNonFinal(byte b)
{
// The normal Tsadi is not a good Non-Final letter due to words like
// 'lechotet' (to chat) containing an apostrophe after the tsadi. This
// apostrophe is converted to a space in FilterWithoutEnglishLetters causing
// the Non-Final tsadi to appear at an end of a word even though this is not
// the case in the original text.
// The letters Pe and Kaf rarely display a related behavior of not being a
// good Non-Final letter. Words like 'Pop', 'Winamp' and 'Mubarak' for
// example legally end with a Non-Final Pe or Kaf. However, the benefit of
// these letters as Non-Final letters outweighs the damage since these words
// are quite rare.
return (b == NORMAL_KAF || b == NORMAL_MEM || b == NORMAL_NUN
|| b == NORMAL_PE);
}
}
}

View file

@ -0,0 +1,315 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
namespace UniversalDetector.Core
{
public abstract class JapaneseContextAnalyser
{
protected const int CATEGORIES_NUM = 6;
protected const int ENOUGH_REL_THRESHOLD = 100;
protected const int MAX_REL_THRESHOLD = 1000;
protected const int MINIMUM_DATA_THRESHOLD = 4;
protected const float DONT_KNOW = -1.0f;
// hiragana frequency category table
// This is hiragana 2-char sequence table, the number in each cell represents its frequency category
protected static byte[,] jp2CharContext = {
{ 0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,},
{ 2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4,},
{ 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,},
{ 0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4,},
{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,},
{ 0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4,},
{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,},
{ 0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3,},
{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,},
{ 0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4,},
{ 1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4,},
{ 0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3,},
{ 0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3,},
{ 0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3,},
{ 0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4,},
{ 0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3,},
{ 2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4,},
{ 0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3,},
{ 0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5,},
{ 0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3,},
{ 2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5,},
{ 0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4,},
{ 1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4,},
{ 0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3,},
{ 0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3,},
{ 0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3,},
{ 0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5,},
{ 0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4,},
{ 0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5,},
{ 0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3,},
{ 0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4,},
{ 0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4,},
{ 0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4,},
{ 0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1,},
{ 0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,},
{ 1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3,},
{ 0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0,},
{ 0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3,},
{ 0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3,},
{ 0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5,},
{ 0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4,},
{ 2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5,},
{ 0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3,},
{ 0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3,},
{ 0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3,},
{ 0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3,},
{ 0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4,},
{ 0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4,},
{ 0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2,},
{ 0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3,},
{ 0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3,},
{ 0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3,},
{ 0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3,},
{ 0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4,},
{ 0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3,},
{ 0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4,},
{ 0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3,},
{ 0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3,},
{ 0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4,},
{ 0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4,},
{ 0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3,},
{ 2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4,},
{ 0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4,},
{ 0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3,},
{ 0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4,},
{ 0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4,},
{ 1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4,},
{ 0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3,},
{ 0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2,},
{ 0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2,},
{ 0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3,},
{ 0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3,},
{ 0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5,},
{ 0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3,},
{ 0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4,},
{ 1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4,},
{ 0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4,},
{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,},
{ 0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3,},
{ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1,},
{ 0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2,},
{ 0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3,},
{ 0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1,},
};
// category counters, each integer counts sequence in its category
int[] relSample = new int[CATEGORIES_NUM];
// total sequence received
int totalRel;
// The order of previous char
int lastCharOrder;
// if last byte in current buffer is not the last byte of a character,
// we need to know how many byte to skip in next buffer.
int needToSkipCharNum;
// If this flag is set to true, detection is done and conclusion has
// been made
bool done;
public JapaneseContextAnalyser()
{
Reset();
}
public float GetConfidence()
{
// This is just one way to calculate confidence. It works well for me.
if (totalRel > MINIMUM_DATA_THRESHOLD)
return ((float)(totalRel - relSample[0]))/totalRel;
else
return DONT_KNOW;
}
public void HandleData(byte[] buf, int offset, int len)
{
int charLen = 0;
int max = offset + len;
if (done)
return;
// The buffer we got is byte oriented, and a character may span
// more than one buffer. In case the last one or two byte in last
// buffer is not complete, we record how many byte needed to
// complete that character and skip these bytes here. We can choose
// to record those bytes as well and analyse the character once it
// is complete, but since a character will not make much difference,
// skipping it will simplify our logic and improve performance.
for (int i = needToSkipCharNum+offset; i < max; ) {
int order = GetOrder(buf, i, out charLen);
i += charLen;
if (i > max) {
needToSkipCharNum = i - max;
lastCharOrder = -1;
} else {
if (order != -1 && lastCharOrder != -1) {
totalRel ++;
if (totalRel > MAX_REL_THRESHOLD) {
done = true;
break;
}
relSample[jp2CharContext[lastCharOrder, order]]++;
}
lastCharOrder = order;
}
}
}
public void HandleOneChar(byte[] buf, int offset, int charLen)
{
if (totalRel > MAX_REL_THRESHOLD)
done = true;
if (done)
return;
// Only 2-bytes characters are of our interest
int order = (charLen == 2) ? GetOrder(buf, offset) : -1;
if (order != -1 && lastCharOrder != -1) {
totalRel++;
// count this sequence to its category counter
relSample[jp2CharContext[lastCharOrder, order]]++;
}
lastCharOrder = order;
}
public void Reset()
{
totalRel = 0;
for (int i = 0; i < CATEGORIES_NUM; i++) {
relSample[i] = 0;
needToSkipCharNum = 0;
lastCharOrder = -1;
done = false;
}
}
protected abstract int GetOrder(byte[] buf, int offset, out int charLen);
protected abstract int GetOrder(byte[] buf, int offset);
public bool GotEnoughData()
{
return totalRel > ENOUGH_REL_THRESHOLD;
}
}
public class SJISContextAnalyser : JapaneseContextAnalyser
{
private const byte HIRAGANA_FIRST_BYTE = 0x82;
protected override int GetOrder(byte[] buf, int offset, out int charLen)
{
//find out current char's byte length
if (buf[offset] >= 0x81 && buf[offset] <= 0x9F
|| buf[offset] >= 0xe0 && buf[offset] <= 0xFC)
charLen = 2;
else
charLen = 1;
// return its order if it is hiragana
if (buf[offset] == HIRAGANA_FIRST_BYTE) {
byte low = buf[offset+1];
if (low >= 0x9F && low <= 0xF1)
return low - 0x9F;
}
return -1;
}
protected override int GetOrder(byte[] buf, int offset)
{
// We are only interested in Hiragana
if (buf[offset] == HIRAGANA_FIRST_BYTE) {
byte low = buf[offset+1];
if (low >= 0x9F && low <= 0xF1)
return low - 0x9F;
}
return -1;
}
}
public class EUCJPContextAnalyser : JapaneseContextAnalyser
{
private const byte HIRAGANA_FIRST_BYTE = 0xA4;
protected override int GetOrder(byte[] buf, int offset, out int charLen)
{
byte high = buf[offset];
//find out current char's byte length
if (high == 0x8E || high >= 0xA1 && high <= 0xFE)
charLen = 2;
else if (high == 0xBF)
charLen = 3;
else
charLen = 1;
// return its order if it is hiragana
if (high == HIRAGANA_FIRST_BYTE) {
byte low = buf[offset+1];
if (low >= 0xA1 && low <= 0xF3)
return low - 0xA1;
}
return -1;
}
protected override int GetOrder(byte[] buf, int offset)
{
// We are only interested in Hiragana
if (buf[offset] == HIRAGANA_FIRST_BYTE) {
byte low = buf[offset+1];
if (low >= 0xA1 && low <= 0xF3)
return low - 0xA1;
}
return -1;
}
}
}

View file

@ -0,0 +1,246 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
namespace UniversalDetector.Core
{
public abstract class BulgarianModel : SequenceModel
{
//Model Table:
//total sequences: 100%
//first 512 sequences: 96.9392%
//first 1024 sequences:3.0618%
//rest sequences: 0.2992%
//negative sequences: 0.0020%
private static byte[] BULGARIAN_LANG_MODEL = {
0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,3,3,3,3,3,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,2,2,1,2,2,
3,1,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,0,1,
0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,3,3,0,3,1,0,
0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0,
0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,1,3,2,3,3,3,3,3,3,3,3,0,3,0,0,
0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,2,3,2,2,1,3,3,3,3,2,2,2,1,1,2,0,1,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,3,3,3,3,2,3,2,2,3,3,1,1,2,3,3,2,3,3,3,3,2,1,2,0,2,0,3,0,0,
0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,3,3,3,3,1,3,3,3,3,3,2,3,2,3,3,3,3,3,2,3,3,1,3,0,3,0,2,0,0,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,3,3,3,3,3,1,3,3,2,3,3,3,1,3,3,2,3,2,2,2,0,0,2,0,2,0,2,0,0,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,3,3,3,3,3,3,0,3,3,3,2,2,3,3,3,1,2,2,3,2,1,1,2,0,2,0,0,0,0,
1,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,3,3,3,3,2,3,3,1,2,3,2,2,2,3,3,3,3,3,2,2,3,1,2,0,2,1,2,0,0,
0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,3,3,1,3,3,3,3,3,2,3,3,3,2,3,3,2,3,2,2,2,3,1,2,0,1,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,3,3,3,3,3,3,3,3,1,1,1,2,2,1,3,1,3,2,2,3,0,0,1,0,1,0,1,0,0,
0,0,0,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,3,3,2,2,3,2,2,3,1,2,1,1,1,2,3,1,3,1,2,2,0,1,1,1,1,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,3,3,1,3,2,2,3,3,1,2,3,1,1,3,3,3,3,1,2,2,1,1,1,0,2,0,2,0,1,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,2,2,3,3,3,2,2,1,1,2,0,2,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
3,0,1,2,1,3,3,2,3,3,3,3,3,2,3,2,1,0,3,1,2,1,2,1,2,3,2,1,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,2,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,1,3,3,2,3,3,2,2,2,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,3,3,3,3,0,3,3,3,3,3,2,1,1,2,1,3,3,0,3,1,1,1,1,3,2,0,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
3,3,2,2,2,3,3,3,3,3,3,3,3,3,3,3,1,1,3,1,3,3,2,3,2,2,2,3,0,2,0,0,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,2,3,3,2,2,3,2,1,1,1,1,1,3,1,3,1,1,0,0,0,1,0,0,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,2,3,2,0,3,2,0,3,0,2,0,0,2,1,3,1,0,0,1,0,0,0,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,3,2,1,1,1,1,2,1,1,2,1,1,1,2,2,1,2,1,1,1,0,1,1,0,1,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,3,2,1,3,1,1,2,1,3,2,1,1,0,1,2,3,2,1,1,1,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,3,3,3,3,2,2,1,0,1,0,0,1,0,0,0,2,1,0,3,0,0,1,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,2,3,2,3,3,1,3,2,1,1,1,2,1,1,2,1,3,0,1,0,0,0,1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,1,1,2,2,3,3,2,3,2,2,2,3,1,2,2,1,1,2,1,1,2,2,0,1,1,0,1,0,2,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,2,1,3,1,0,2,2,1,3,2,1,0,0,2,0,2,0,1,0,0,0,0,0,0,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,
3,3,3,3,3,3,1,2,0,2,3,1,2,3,2,0,1,3,1,2,1,1,1,0,0,1,0,0,2,2,2,3,
2,2,2,2,1,2,1,1,2,2,1,1,2,0,1,1,1,0,0,1,1,0,0,1,1,0,0,0,1,1,0,1,
3,3,3,3,3,2,1,2,2,1,2,0,2,0,1,0,1,2,1,2,1,1,0,0,0,1,0,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,
3,3,2,3,3,1,1,3,1,0,3,2,1,0,0,0,1,2,0,2,0,1,0,0,0,1,0,1,2,1,2,2,
1,1,1,1,1,1,1,2,2,2,1,1,1,1,1,1,1,0,1,2,1,1,1,0,0,0,0,0,1,1,0,0,
3,1,0,1,0,2,3,2,2,2,3,2,2,2,2,2,1,0,2,1,2,1,1,1,0,1,2,1,2,2,2,1,
1,1,2,2,2,2,1,2,1,1,0,1,2,1,2,2,2,1,1,1,0,1,1,1,1,2,0,1,0,0,0,0,
2,3,2,3,3,0,0,2,1,0,2,1,0,0,0,0,2,3,0,2,0,0,0,0,0,1,0,0,2,0,1,2,
2,1,2,1,2,2,1,1,1,2,1,1,1,0,1,2,2,1,1,1,1,1,0,1,1,1,0,0,1,2,0,0,
3,3,2,2,3,0,2,3,1,1,2,0,0,0,1,0,0,2,0,2,0,0,0,1,0,1,0,1,2,0,2,2,
1,1,1,1,2,1,0,1,2,2,2,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,1,0,0,
2,3,2,3,3,0,0,3,0,1,1,0,1,0,0,0,2,2,1,2,0,0,0,0,0,0,0,0,2,0,1,2,
2,2,1,1,1,1,1,2,2,2,1,0,2,0,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,
3,3,3,3,2,2,2,2,2,0,2,1,1,1,1,2,1,2,1,1,0,2,0,1,0,1,0,0,2,0,1,2,
1,1,1,1,1,1,1,2,2,1,1,0,2,0,1,0,2,0,0,1,1,1,0,0,2,0,0,0,1,1,0,0,
2,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,0,0,0,1,2,0,1,2,
2,2,2,1,1,2,1,1,2,2,2,1,2,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,1,1,0,0,
2,3,3,3,3,0,2,2,0,2,1,0,0,0,1,1,1,2,0,2,0,0,0,3,0,0,0,0,2,0,2,2,
1,1,1,2,1,2,1,1,2,2,2,1,2,0,1,1,1,0,1,1,1,1,0,2,1,0,0,0,1,1,0,0,
2,3,3,3,3,0,2,1,0,0,2,0,0,0,0,0,1,2,0,2,0,0,0,0,0,0,0,0,2,0,1,2,
1,1,1,2,1,1,1,1,2,2,2,0,1,0,1,1,1,0,0,1,1,1,0,0,1,0,0,0,0,1,0,0,
3,3,2,2,3,0,1,0,1,0,0,0,0,0,0,0,1,1,0,3,0,0,0,0,0,0,0,0,1,0,2,2,
1,1,1,1,1,2,1,1,2,2,1,2,2,1,0,1,1,1,1,1,0,1,0,0,1,0,0,0,1,1,0,0,
3,1,0,1,0,2,2,2,2,3,2,1,1,1,2,3,0,0,1,0,2,1,1,0,1,1,1,1,2,1,1,1,
1,2,2,1,2,1,2,2,1,1,0,1,2,1,2,2,1,1,1,0,0,1,1,1,2,1,0,1,0,0,0,0,
2,1,0,1,0,3,1,2,2,2,2,1,2,2,1,1,1,0,2,1,2,2,1,1,2,1,1,0,2,1,1,1,
1,2,2,2,2,2,2,2,1,2,0,1,1,0,2,1,1,1,1,1,0,0,1,1,1,1,0,1,0,0,0,0,
2,1,1,1,1,2,2,2,2,1,2,2,2,1,2,2,1,1,2,1,2,3,2,2,1,1,1,1,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,2,2,3,2,0,1,2,0,1,2,1,1,0,1,0,1,2,1,2,0,0,0,1,1,0,0,0,1,0,0,2,
1,1,0,0,1,1,0,1,1,1,1,0,2,0,1,1,1,0,0,1,1,0,0,0,0,1,0,0,0,1,0,0,
2,0,0,0,0,1,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,2,1,1,1,
1,2,2,2,2,1,1,2,1,2,1,1,1,0,2,1,2,1,1,1,0,2,1,1,1,1,0,1,0,0,0,0,
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,
1,1,0,1,0,1,1,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,2,2,3,2,0,0,0,0,1,0,0,0,0,0,0,1,1,0,2,0,0,0,0,0,0,0,0,1,0,1,2,
1,1,1,1,1,1,0,0,2,2,2,2,2,0,1,1,0,1,1,1,1,1,0,0,1,0,0,0,1,1,0,1,
2,3,1,2,1,0,1,1,0,2,2,2,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,1,0,1,2,
1,1,1,1,2,1,1,1,1,1,1,1,1,0,1,1,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,
2,2,2,2,2,0,0,2,0,0,2,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,0,2,2,
1,1,1,1,1,0,0,1,2,1,1,0,1,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
1,2,2,2,2,0,0,2,0,1,1,0,0,0,1,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,1,1,
0,0,0,1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
1,2,2,3,2,0,0,1,0,0,1,0,0,0,0,0,0,1,0,2,0,0,0,1,0,0,0,0,0,0,0,2,
1,1,0,0,1,0,0,0,1,1,0,0,1,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
2,1,2,2,2,1,2,1,2,2,1,1,2,1,1,1,0,1,1,1,1,2,0,1,0,1,1,1,1,0,1,1,
1,1,2,1,1,1,1,1,1,0,0,1,2,1,1,1,1,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,
1,0,0,1,3,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,2,2,2,1,0,0,1,0,2,0,0,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,2,0,0,1,
0,2,0,1,0,0,1,1,2,0,1,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
1,2,2,2,2,0,1,1,0,2,1,0,1,1,1,0,0,1,0,2,0,1,0,0,0,0,0,0,0,0,0,1,
0,1,0,0,1,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
2,2,2,2,2,0,0,1,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,
0,1,0,1,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
2,0,1,0,0,1,2,1,1,1,1,1,1,2,2,1,0,0,1,0,1,0,0,0,0,1,1,1,1,0,0,0,
1,1,2,1,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,2,1,2,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,
0,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,0,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
0,1,1,0,1,1,1,0,0,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,
1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,1,0,2,0,0,2,0,1,0,0,1,0,0,1,
1,1,0,0,1,1,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,
1,1,1,1,1,1,1,2,0,0,0,0,0,0,2,1,0,1,1,0,0,1,1,1,0,1,0,0,0,0,0,0,
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,1,1,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
};
public BulgarianModel(byte[] charToOrderMap, string name)
: base(charToOrderMap, BULGARIAN_LANG_MODEL, 0.969392f, false, name)
{
}
}
public class Latin5BulgarianModel : BulgarianModel
{
//255: Control characters that usually does not exist in any text
//254: Carriage/Return
//253: symbol (punctuation) that does not belong to word
//252: 0 - 9
// Character Mapping Table:
// this table is modified base on win1251BulgarianCharToOrderMap, so
// only number <64 is sure valid
private static byte[] LATIN5_CHAR_TO_ORDER_MAP = {
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30
253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, //40
110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, //50
253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, //60
116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, //70
194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209, //80
210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225, //90
81,226,227,228,229,230,105,231,232,233,234,235,236, 45,237,238, //a0
31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, //b0
39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,239, 67,240, 60, 56, //c0
1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, //d0
7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,241, 42, 16, //e0
62,242,243,244, 58,245, 98,246,247,248,249,250,251, 91,252,253, //f0
};
public Latin5BulgarianModel() : base(LATIN5_CHAR_TO_ORDER_MAP, "ISO-8859-5")
{
}
}
public class Win1251BulgarianModel : BulgarianModel
{
private static byte[] WIN1251__CHAR_TO_ORDER_MAP = {
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30
253, 77, 90, 99,100, 72,109,107,101, 79,185, 81,102, 76, 94, 82, //40
110,186,108, 91, 74,119, 84, 96,111,187,115,253,253,253,253,253, //50
253, 65, 69, 70, 66, 63, 68,112,103, 92,194,104, 95, 86, 87, 71, //60
116,195, 85, 93, 97,113,196,197,198,199,200,253,253,253,253,253, //70
206,207,208,209,210,211,212,213,120,214,215,216,217,218,219,220, //80
221, 78, 64, 83,121, 98,117,105,222,223,224,225,226,227,228,229, //90
88,230,231,232,233,122, 89,106,234,235,236,237,238, 45,239,240, //a0
73, 80,118,114,241,242,243,244,245, 62, 58,246,247,248,249,250, //b0
31, 32, 35, 43, 37, 44, 55, 47, 40, 59, 33, 46, 38, 36, 41, 30, //c0
39, 28, 34, 51, 48, 49, 53, 50, 54, 57, 61,251, 67,252, 60, 56, //d0
1, 18, 9, 20, 11, 3, 23, 15, 2, 26, 12, 10, 14, 6, 4, 13, //e0
7, 8, 5, 19, 29, 25, 22, 21, 27, 24, 17, 75, 52,253, 42, 16, //f0
};
public Win1251BulgarianModel() : base(WIN1251__CHAR_TO_ORDER_MAP, "windows-1251")
{
}
}
}

View file

@ -0,0 +1,345 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
namespace UniversalDetector.Core
{
public abstract class CyrillicModel : SequenceModel
{
// Model Table:
// total sequences: 100%
// first 512 sequences: 97.6601%
// first 1024 sequences: 2.3389%
// rest sequences: 0.1237%
// negative sequences: 0.0009%
protected readonly static byte[] RUSSIAN_LANG_MODEL = {
0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,3,3,3,3,1,3,3,3,2,3,2,3,3,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,3,2,2,2,2,2,0,0,2,
3,3,3,2,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,2,3,2,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,2,2,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,2,3,3,1,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,2,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1,
0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,0,0,3,3,3,3,3,3,3,3,3,3,3,2,1,
0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,2,2,2,3,1,3,3,1,3,3,3,3,2,2,3,0,2,2,2,3,3,2,1,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,3,3,3,3,3,2,2,3,2,3,3,3,2,1,2,2,0,1,2,2,2,2,2,2,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,3,0,2,2,3,3,2,1,2,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,3,3,1,2,3,2,2,3,2,3,3,3,3,2,2,3,0,3,2,2,3,1,1,1,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,3,3,3,3,2,2,2,0,3,3,3,2,2,2,2,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,2,3,2,3,3,3,3,3,3,2,3,2,2,0,1,3,2,1,2,2,1,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,2,1,1,3,0,1,1,1,1,2,1,1,0,2,2,2,1,2,0,1,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,3,3,2,2,2,2,1,3,2,3,2,3,2,1,2,2,0,1,1,2,1,2,1,2,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,2,3,3,3,2,2,2,2,0,2,2,2,2,3,1,1,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
3,2,3,2,2,3,3,3,3,3,3,3,3,3,1,3,2,0,0,3,3,3,3,2,3,3,3,3,2,3,2,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,3,3,3,3,3,2,2,3,3,0,2,1,0,3,2,3,2,3,0,0,1,2,0,0,1,0,1,2,1,1,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,3,0,2,3,3,3,3,2,3,3,3,3,1,2,2,0,0,2,3,2,2,2,3,2,3,2,2,3,0,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,2,3,0,2,3,2,3,0,1,2,3,3,2,0,2,3,0,0,2,3,2,2,0,1,3,1,3,2,2,1,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,1,3,0,2,3,3,3,3,3,3,3,3,2,1,3,2,0,0,2,2,3,3,3,2,3,3,0,2,2,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,2,3,3,2,2,2,3,3,0,0,1,1,1,1,1,2,0,0,1,1,1,1,0,1,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,2,3,3,3,3,3,3,3,0,3,2,3,3,2,3,2,0,2,1,0,1,1,0,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,3,3,3,2,2,2,2,3,1,3,2,3,1,1,2,1,0,2,2,2,2,1,3,1,0,
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
2,2,3,3,3,3,3,1,2,2,1,3,1,0,3,0,0,3,0,0,0,1,1,0,1,2,1,0,0,0,0,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,2,2,1,1,3,3,3,2,2,1,2,2,3,1,1,2,0,0,2,2,1,3,0,0,2,1,1,2,1,1,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,2,3,3,3,3,1,2,2,2,1,2,1,3,3,1,1,2,1,2,1,2,2,0,2,0,0,1,1,0,1,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,3,3,3,3,3,2,1,3,2,2,3,2,0,3,2,0,3,0,1,0,1,1,0,0,1,1,1,1,0,1,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,2,3,3,3,2,2,2,3,3,1,2,1,2,1,0,1,0,1,1,0,1,0,0,2,1,1,1,0,1,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
3,1,1,2,1,2,3,3,2,2,1,2,2,3,0,2,1,0,0,2,2,3,2,1,2,2,2,2,2,3,1,0,
0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,1,1,0,1,1,2,2,1,1,3,0,0,1,3,1,1,1,0,0,0,1,0,1,1,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,1,3,3,3,2,0,0,0,2,1,0,1,0,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,0,1,0,0,2,3,2,2,2,1,2,2,2,1,2,1,0,0,1,1,1,0,2,0,1,1,1,0,0,1,1,
1,0,0,0,0,0,1,2,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
2,3,3,3,3,0,0,0,0,1,0,0,0,0,3,0,1,2,1,0,0,0,0,0,0,0,1,1,0,0,1,1,
1,0,1,0,1,2,0,0,1,1,2,1,0,1,1,1,1,0,1,1,1,1,0,1,0,0,1,0,0,1,1,0,
2,2,3,2,2,2,3,1,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,0,1,0,1,1,1,0,2,1,
1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,0,1,1,0,1,1,1,0,1,1,0,
3,3,3,2,2,2,2,3,2,2,1,1,2,2,2,2,1,1,3,1,2,1,2,0,0,1,1,0,1,0,2,1,
1,1,1,1,1,2,1,0,1,1,1,1,0,1,0,0,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,0,
2,0,0,1,0,3,2,2,2,2,1,2,1,2,1,2,0,0,0,2,1,2,2,1,1,2,2,0,1,1,0,2,
1,1,1,1,1,0,1,1,1,2,1,1,1,2,1,0,1,2,1,1,1,1,0,1,1,1,0,0,1,0,0,1,
1,3,2,2,2,1,1,1,2,3,0,0,0,0,2,0,2,2,1,0,0,0,0,0,0,1,0,0,0,0,1,1,
1,0,1,1,0,1,0,1,1,0,1,1,0,2,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,
2,3,2,3,2,1,2,2,2,2,1,0,0,0,2,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,2,1,
1,1,2,1,0,2,0,0,1,0,1,0,0,1,0,0,1,1,0,1,1,0,0,0,0,0,1,0,0,0,0,0,
3,0,0,1,0,2,2,2,3,2,2,2,2,2,2,2,0,0,0,2,1,2,1,1,1,2,2,0,0,0,1,2,
1,1,1,1,1,0,1,2,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,
2,3,2,3,3,2,0,1,1,1,0,0,1,0,2,0,1,1,3,1,0,0,0,0,0,0,0,1,0,0,2,1,
1,1,1,1,1,1,1,0,1,0,1,1,1,1,0,1,1,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,
2,3,3,3,3,1,2,2,2,2,0,1,1,0,2,1,1,1,2,1,0,1,1,0,0,1,0,1,0,0,2,0,
0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,3,3,3,2,0,0,1,1,2,2,1,0,0,2,0,1,1,3,0,0,1,0,0,0,0,0,1,0,1,2,1,
1,1,2,0,1,1,1,0,1,0,1,1,0,1,0,1,1,1,1,0,1,0,0,0,0,0,0,1,0,1,1,0,
1,3,2,3,2,1,0,0,2,2,2,0,1,0,2,0,1,1,1,0,1,0,0,0,3,0,1,1,0,0,2,1,
1,1,1,0,1,1,0,0,0,0,1,1,0,1,0,0,2,1,1,0,1,0,0,0,1,0,1,0,0,1,1,0,
3,1,2,1,1,2,2,2,2,2,2,1,2,2,1,1,0,0,0,2,2,2,0,0,0,1,2,1,0,1,0,1,
2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,2,1,1,1,0,1,0,1,1,0,1,1,1,0,0,1,
3,0,0,0,0,2,0,1,1,1,1,1,1,1,0,1,0,0,0,1,1,1,0,1,0,1,1,0,0,1,0,1,
1,1,0,0,1,0,0,0,1,0,1,1,0,0,1,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,
1,3,3,2,2,0,0,0,2,2,0,0,0,1,2,0,1,1,2,0,0,0,0,0,0,0,0,1,0,0,2,1,
0,1,1,0,0,1,1,0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
2,3,2,3,2,0,0,0,0,1,1,0,0,0,2,0,2,0,2,0,0,0,0,0,1,0,0,1,0,0,1,1,
1,1,2,0,1,2,1,0,1,1,2,1,1,1,1,1,2,1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,
1,3,2,2,2,1,0,0,2,2,1,0,1,2,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,1,
0,0,1,1,0,1,1,0,0,1,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
1,0,0,1,0,2,3,1,2,2,2,2,2,2,1,1,0,0,0,1,0,1,0,2,1,1,1,0,0,0,0,1,
1,1,0,1,1,0,1,1,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,
2,0,2,0,0,1,0,3,2,1,2,1,2,2,0,1,0,0,0,2,1,0,0,2,1,1,1,1,0,2,0,2,
2,1,1,1,1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,0,0,0,1,1,1,1,0,1,0,0,1,
1,2,2,2,2,1,0,0,1,0,0,0,0,0,2,0,1,1,1,1,0,0,0,0,1,0,1,2,0,0,2,0,
1,0,1,1,1,2,1,0,1,0,1,1,0,0,1,0,1,1,1,0,1,0,0,0,1,0,0,1,0,1,1,0,
2,1,2,2,2,0,3,0,1,1,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
0,0,0,1,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,
1,2,2,3,2,2,0,0,1,1,2,0,1,2,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,
0,1,1,0,0,1,1,0,0,1,1,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,1,1,0,
2,2,1,1,2,1,2,2,2,2,2,1,2,2,0,1,0,0,0,1,2,2,2,1,2,1,1,1,1,1,2,1,
1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,0,1,
1,2,2,2,2,0,1,0,2,2,0,0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
0,0,1,0,0,1,0,0,0,0,1,0,1,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,2,2,2,2,0,0,0,2,2,2,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,
0,1,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,2,2,2,2,0,0,0,0,1,0,0,1,1,2,0,0,0,0,1,0,1,0,0,1,0,0,2,0,0,0,1,
0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,
1,2,2,2,1,1,2,0,2,1,1,1,1,0,2,2,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,1,
0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
1,0,2,1,2,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,
0,0,1,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
1,0,0,0,0,2,0,1,2,1,0,1,1,1,0,1,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,1,
0,0,0,0,0,1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,
2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
1,1,1,0,1,0,1,0,0,1,1,1,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
1,1,0,1,1,0,1,0,1,0,0,0,0,1,1,0,1,1,0,0,0,0,0,1,0,1,1,0,1,0,0,0,
0,1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
};
public CyrillicModel(byte[] charToOrderMap, string name)
: base(charToOrderMap, RUSSIAN_LANG_MODEL, 0.976601f, false, name)
{
}
}
public class Koi8rModel : CyrillicModel
{
private readonly static byte[] KOI8R_CHAR_TO_ORDER_MAP = {
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, //40
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, //50
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, //70
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206, //80
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222, //90
223,224,225, 68,226,227,228,229,230,231,232,233,234,235,236,237, //a0
238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253, //b0
27, 3, 21, 28, 13, 2, 39, 19, 26, 4, 23, 11, 8, 12, 5, 1, //c0
15, 16, 9, 7, 6, 14, 24, 10, 17, 18, 20, 25, 30, 29, 22, 54, //d0
59, 37, 44, 58, 41, 48, 53, 46, 55, 42, 60, 36, 49, 38, 31, 34, //e0
35, 43, 45, 32, 40, 52, 56, 33, 61, 62, 51, 57, 47, 63, 50, 70, //f0
};
public Koi8rModel() : base(KOI8R_CHAR_TO_ORDER_MAP, "KOI8-R")
{
}
}
public class Win1251Model : CyrillicModel
{
private readonly static byte[] WIN1251_CHAR_TO_ORDER_MAP = {
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, //40
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, //50
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, //70
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
239,240,241,242,243,244,245,246, 68,247,248,249,250,251,252,253,
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
};
public Win1251Model() : base(WIN1251_CHAR_TO_ORDER_MAP, "windows-1251")
{
}
}
public class Latin5Model : CyrillicModel
{
private readonly static byte[] LATIN5_CHAR_TO_ORDER_MAP = {
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, //40
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, //50
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, //70
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
};
public Latin5Model() : base(LATIN5_CHAR_TO_ORDER_MAP, "ISO-8859-5")
{
}
}
public class MacCyrillicModel : CyrillicModel
{
private readonly static byte[] MACCYRILLIC_CHAR_TO_ORDER_MAP = {
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, //40
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, //50
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, //70
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
239,240,241,242,243,244,245,246,247,248,249,250,251,252, 68, 16,
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27,255,
};
public MacCyrillicModel() : base(MACCYRILLIC_CHAR_TO_ORDER_MAP,
"x-mac-cyrillic")
{
}
}
public class Ibm855Model : CyrillicModel
{
private readonly static byte[] IBM855_BYTE_TO_ORDER_MAP = {
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, //40
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, //50
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, //70
191,192,193,194, 68,195,196,197,198,199,200,201,202,203,204,205,
206,207,208,209,210,211,212,213,214,215,216,217, 27, 59, 54, 70,
3, 37, 21, 44, 28, 58, 13, 41, 2, 48, 39, 53, 19, 46,218,219,
220,221,222,223,224, 26, 55, 4, 42,225,226,227,228, 23, 60,229,
230,231,232,233,234,235, 11, 36,236,237,238,239,240,241,242,243,
8, 49, 12, 38, 5, 31, 1, 34, 15,244,245,246,247, 35, 16,248,
43, 9, 45, 7, 32, 6, 40, 14, 52, 24, 56, 10, 33, 17, 61,249,
250, 18, 62, 20, 51, 25, 57, 30, 47, 29, 63, 22, 50,251,252,255,
};
public Ibm855Model() : base(IBM855_BYTE_TO_ORDER_MAP, "IBM855")
{
}
}
public class Ibm866Model : CyrillicModel
{
private readonly static byte[] IBM866_CHAR_TO_ORDER_MAP = {
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30
253,142,143,144,145,146,147,148,149,150,151,152, 74,153, 75,154, //40
155,156,157,158,159,160,161,162,163,164,165,253,253,253,253,253, //50
253, 71,172, 66,173, 65,174, 76,175, 64,176,177, 77, 72,178, 69, //60
67,179, 78, 73,180,181, 79,182,183,184,185,253,253,253,253,253, //70
37, 44, 33, 46, 41, 48, 56, 51, 42, 60, 36, 49, 38, 31, 34, 35,
45, 32, 40, 52, 53, 55, 58, 50, 57, 63, 70, 62, 61, 47, 59, 43,
3, 21, 10, 19, 13, 2, 24, 20, 4, 23, 11, 8, 12, 5, 1, 15,
191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,
207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,
223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,
9, 7, 6, 14, 39, 26, 28, 22, 25, 29, 54, 18, 17, 30, 27, 16,
239, 68,240,241,242,243,244,245,246,247,248,249,250,251,252,255,
};
public Ibm866Model() : base(IBM866_CHAR_TO_ORDER_MAP, "IBM866")
{
}
}
}

View file

@ -0,0 +1,244 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
namespace UniversalDetector.Core
{
public abstract class GreekModel : SequenceModel
{
// Model Table:
// total sequences: 100%
// first 512 sequences: 98.2851%
// first 1024 sequences:1.7001%
// rest sequences: 0.0359%
// negative sequences: 0.0148%
private readonly static byte[] GREEK_LANG_MODEL = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,3,2,2,3,3,3,3,3,3,3,3,1,3,3,3,0,2,2,3,3,0,3,0,3,2,0,3,3,3,0,
3,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,3,0,3,3,0,3,2,3,3,0,3,2,3,3,3,0,0,3,0,3,0,3,3,2,0,0,0,
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
0,2,3,2,2,3,3,3,3,3,3,3,3,0,3,3,3,3,0,2,3,3,0,3,3,3,3,2,3,3,3,0,
2,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,0,2,1,3,3,3,3,2,3,3,2,3,3,2,0,
0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,3,2,3,3,0,
2,0,1,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
0,3,3,3,3,3,2,3,0,0,0,0,3,3,0,3,1,3,3,3,0,3,3,0,3,3,3,3,0,0,0,0,
2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,3,0,3,0,3,3,3,3,3,0,3,2,2,2,3,0,2,3,3,3,3,3,2,3,3,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,3,3,2,2,2,3,3,3,3,0,3,1,3,3,3,3,2,3,3,3,3,3,3,3,2,2,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,3,2,0,3,0,0,0,3,3,2,3,3,3,3,3,0,0,3,2,3,0,2,3,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,0,3,3,3,3,0,0,3,3,0,2,3,0,3,0,3,3,3,0,0,3,0,3,0,2,2,3,3,0,0,
0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,3,2,0,3,2,3,3,3,3,0,3,3,3,3,3,0,3,3,2,3,2,3,3,2,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,2,3,2,3,3,3,3,3,3,0,2,3,2,3,2,2,2,3,2,3,3,2,3,0,2,2,2,3,0,
2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,3,0,0,0,3,3,3,2,3,3,0,0,3,0,3,0,0,0,3,2,0,3,0,3,0,0,2,0,2,0,
0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,0,3,3,3,3,3,3,0,3,3,0,3,0,0,0,3,3,0,3,3,3,0,0,1,2,3,0,
3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,3,2,0,0,3,2,2,3,3,0,3,3,3,3,3,2,1,3,0,3,2,3,3,2,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,3,3,0,2,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,3,0,3,2,3,0,0,3,3,3,0,
3,0,0,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,0,3,3,3,3,3,3,0,0,3,0,3,0,0,0,3,2,0,3,2,3,0,0,3,2,3,0,
2,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,3,1,2,2,3,3,3,3,3,3,0,2,3,0,3,0,0,0,3,3,0,3,0,2,0,0,2,3,1,0,
2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,0,3,3,3,3,0,3,0,3,3,2,3,0,3,3,3,3,3,3,0,3,3,3,0,2,3,0,0,3,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,0,3,3,3,0,0,3,0,0,0,3,3,0,3,0,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,3,0,0,0,3,3,3,3,3,3,0,0,3,0,2,0,0,0,3,3,0,3,0,3,0,0,2,0,2,0,
0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,3,3,0,3,0,2,0,3,2,0,3,2,3,2,3,0,0,3,2,3,2,3,3,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,3,0,0,2,3,3,3,3,3,0,0,0,3,0,2,1,0,0,3,2,2,2,0,3,0,0,2,2,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,0,3,3,3,2,0,3,0,3,0,3,3,0,2,1,2,3,3,0,0,3,0,3,0,3,3,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,3,3,3,0,3,3,3,3,3,3,0,2,3,0,3,0,0,0,2,1,0,2,2,3,0,0,2,2,2,0,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,3,0,0,2,3,3,3,2,3,0,0,1,3,0,2,0,0,0,0,3,0,1,0,2,0,0,1,1,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,3,1,0,3,0,0,0,3,2,0,3,2,3,3,3,0,0,3,0,3,2,2,2,1,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,0,3,3,3,0,0,3,0,0,0,0,2,0,2,3,3,2,2,2,2,3,0,2,0,2,2,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,3,3,3,2,0,0,0,0,0,0,2,3,0,2,0,2,3,2,0,0,3,0,3,0,3,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,3,2,3,3,2,2,3,0,2,0,3,0,0,0,2,0,0,0,0,1,2,0,2,0,2,0,
0,2,0,2,0,2,2,0,0,1,0,2,2,2,0,2,2,2,0,2,2,2,0,0,2,0,0,1,0,0,0,0,
0,2,0,3,3,2,0,0,0,0,0,0,1,3,0,2,0,2,2,2,0,0,2,0,3,0,0,2,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,0,2,3,2,0,2,2,0,2,0,2,2,0,2,0,2,2,2,0,0,0,0,0,0,2,3,0,0,0,2,
0,1,2,0,0,0,0,2,2,0,0,0,2,1,0,2,2,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,
0,0,2,1,0,2,3,2,2,3,2,3,2,0,0,3,3,3,0,0,3,2,0,0,0,1,1,0,2,0,2,2,
0,2,0,2,0,2,2,0,0,2,0,2,2,2,0,2,2,2,2,0,0,2,0,0,0,2,0,1,0,0,0,0,
0,3,0,3,3,2,2,0,3,0,0,0,2,2,0,2,2,2,1,2,0,0,1,2,2,0,0,3,0,0,0,2,
0,1,2,0,0,0,1,2,0,0,0,0,0,0,0,2,2,0,1,0,0,2,0,0,0,2,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,3,3,2,2,0,0,0,2,0,2,3,3,0,2,0,0,0,0,0,0,2,2,2,0,2,2,0,2,0,2,
0,2,2,0,0,2,2,2,2,1,0,0,2,2,0,2,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,
0,2,0,3,2,3,0,0,0,3,0,0,2,2,0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,0,2,
0,0,2,2,0,0,2,2,2,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,2,0,0,3,2,0,2,2,2,2,2,0,0,0,2,0,0,0,0,2,0,1,0,0,2,0,1,0,0,0,
0,2,2,2,0,2,2,0,1,2,0,2,2,2,0,2,2,2,2,1,2,2,0,0,2,0,0,0,0,0,0,0,
0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
0,2,0,2,0,2,2,0,0,0,0,1,2,1,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,3,2,3,0,0,2,0,0,0,2,2,0,2,0,0,0,1,0,0,2,0,2,0,2,2,0,0,0,0,
0,0,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,
0,2,2,3,2,2,0,0,0,0,0,0,1,3,0,2,0,2,2,0,0,0,1,0,2,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,0,2,0,3,2,0,2,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
0,0,2,0,0,0,0,1,1,0,0,2,1,2,0,2,2,0,1,0,0,1,0,0,0,2,0,0,0,0,0,0,
0,3,0,2,2,2,0,0,2,0,0,0,2,0,0,0,2,3,0,2,0,0,0,0,0,0,2,2,0,0,0,2,
0,1,2,0,0,0,1,2,2,1,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,1,2,0,2,2,0,2,0,0,2,0,0,0,0,1,2,1,0,2,1,0,0,0,0,0,0,0,0,0,0,
0,0,2,0,0,0,3,1,2,2,0,2,0,0,0,0,2,0,0,0,2,0,0,3,0,0,0,0,2,2,2,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,1,0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,0,0,2,
0,2,2,0,0,2,2,2,2,2,0,1,2,0,0,0,2,2,0,1,0,2,0,0,2,2,0,0,0,0,0,0,
0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,0,0,0,0,2,0,2,0,0,0,0,2,
0,1,2,0,0,0,0,2,2,1,0,1,0,1,0,2,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,
0,2,0,1,2,0,0,0,0,0,0,0,0,0,0,2,0,0,2,2,0,0,0,0,1,0,0,0,0,0,0,2,
0,2,2,0,0,0,0,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,
0,2,2,2,2,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,1,
0,0,2,0,0,0,0,1,2,0,0,0,0,0,0,2,2,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,
0,2,0,2,2,2,0,0,2,0,0,0,0,0,0,0,2,2,2,0,0,0,2,0,0,0,0,0,0,0,0,2,
0,0,1,0,0,0,0,2,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
0,3,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,2,
0,0,2,0,0,0,0,2,2,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,2,0,2,2,1,0,0,0,0,0,0,2,0,0,2,0,2,2,2,0,0,0,0,0,0,2,0,0,0,0,2,
0,0,2,0,0,2,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,2,0,0,0,2,0,0,0,0,0,0,
0,0,3,0,0,0,2,2,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,
0,2,2,2,2,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,
0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,2,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
0,2,0,0,0,2,0,0,0,0,0,1,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,2,0,0,0,
0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,2,0,2,0,0,0,
0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,2,0,0,0,1,2,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
};
public GreekModel(byte[] charToOrderMap, string name)
: base(charToOrderMap, GREEK_LANG_MODEL, 0.982851f, false, name)
{
}
}
public class Latin7Model : GreekModel
{
/****************************************************************
255: Control characters that usually does not exist in any text
254: Carriage/Return
253: symbol (punctuation) that does not belong to word
252: 0 - 9
*****************************************************************/
//Character Mapping Table:
private readonly static byte[] LATIN7_CHAR_TO_ORDER_MAP = {
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30
253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, //40
79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, //50
253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, //60
78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, //70
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //80
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //90
+253,233, 90,253,253,253,253,253,253,253,253,253,253, 74,253,253, //a0
253,253,253,253,247,248, 61, 36, 46, 71, 73,253, 54,253,108,123, //b0
110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, //c0
35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, //d0
124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, //e0
9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, //f0
};
public Latin7Model() : base(LATIN7_CHAR_TO_ORDER_MAP, "ISO-8859-7")
{
}
}
public class Win1253Model : GreekModel
{
private readonly static byte[] WIN1253__CHAR_TO_ORDER_MAP = {
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30
253, 82,100,104, 94, 98,101,116,102,111,187,117, 92, 88,113, 85, //40
79,118,105, 83, 67,114,119, 95, 99,109,188,253,253,253,253,253, //50
253, 72, 70, 80, 81, 60, 96, 93, 89, 68,120, 97, 77, 86, 69, 55, //60
78,115, 65, 66, 58, 76,106,103, 87,107,112,253,253,253,253,253, //70
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //80
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //90
+253,233, 61,253,253,253,253,253,253,253,253,253,253, 74,253,253, //a0
253,253,253,253,247,253,253, 36, 46, 71, 73,253, 54,253,108,123, //b0
110, 31, 51, 43, 41, 34, 91, 40, 52, 47, 44, 53, 38, 49, 59, 39, //c0
35, 48,250, 37, 33, 45, 56, 50, 84, 57,120,121, 17, 18, 22, 15, //d0
124, 1, 29, 20, 21, 3, 32, 13, 25, 5, 11, 16, 10, 6, 30, 4, //e0
9, 8, 14, 7, 2, 12, 28, 23, 42, 24, 64, 75, 19, 26, 27,253, //f0
};
public Win1253Model() : base(WIN1253__CHAR_TO_ORDER_MAP, "windows-1253")
{
}
}
}

View file

@ -0,0 +1,220 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
namespace UniversalDetector.Core
{
public abstract class HebrewModel : SequenceModel
{
//Model Table:
//total sequences: 100%
//first 512 sequences: 98.4004%
//first 1024 sequences: 1.5981%
//rest sequences: 0.087%
//negative sequences: 0.0015%
private readonly static byte[] HEBREW_LANG_MODEL = {
0,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,3,2,1,2,0,1,0,0,
3,0,3,1,0,0,1,3,2,0,1,1,2,0,2,2,2,1,1,1,1,2,1,1,1,2,0,0,2,2,0,1,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,
1,2,1,2,1,2,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,
1,2,1,3,1,1,0,0,2,0,0,0,1,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,1,2,2,1,3,
1,2,1,1,2,2,0,0,2,2,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,1,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,2,2,2,3,2,
1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,2,3,2,2,3,2,2,2,1,2,2,2,2,
1,2,1,1,2,2,0,1,2,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,0,2,2,2,2,2,
0,2,0,2,2,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,0,2,2,2,
0,2,1,2,2,2,0,0,2,1,0,0,0,0,1,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,
3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,2,1,2,3,2,2,2,
1,2,1,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,0,
3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,1,0,2,0,2,
0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,2,0,0,1,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,2,3,2,1,2,1,1,1,
0,1,1,1,1,1,3,0,1,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,1,1,0,0,1,0,0,1,0,0,0,0,
0,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,
0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
3,3,3,3,3,3,3,3,3,2,3,3,3,2,1,2,3,3,2,3,3,3,3,2,3,2,1,2,0,2,1,2,
0,2,0,2,2,2,0,0,1,2,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,
3,3,3,3,3,3,3,3,3,2,3,3,3,1,2,2,3,3,2,3,2,3,2,2,3,1,2,2,0,2,2,2,
0,2,1,2,2,2,0,0,1,2,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,2,2,3,3,3,3,1,3,2,2,2,
0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,2,3,2,2,2,1,2,2,0,2,2,2,2,
0,2,0,2,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,1,3,2,3,3,2,3,3,2,2,1,2,2,2,2,2,2,
0,2,1,2,1,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,
3,3,3,3,3,3,2,3,2,3,3,2,3,3,3,3,2,3,2,3,3,3,3,3,2,2,2,2,2,2,2,1,
0,2,0,1,2,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
3,3,3,3,3,3,3,3,3,2,1,2,3,3,3,3,3,3,3,2,3,2,3,2,1,2,3,0,2,1,2,2,
0,2,1,1,2,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,2,0,
3,3,3,3,3,3,3,3,3,2,3,3,3,3,2,1,3,1,2,2,2,1,2,3,3,1,2,1,2,2,2,2,
0,1,1,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,0,2,3,3,3,1,3,3,3,1,2,2,2,2,1,1,2,2,2,2,2,2,
0,2,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
3,3,3,3,3,3,2,3,3,3,2,2,3,3,3,2,1,2,3,2,3,2,2,2,2,1,2,1,1,1,2,2,
0,2,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,1,0,0,0,0,0,
1,0,1,0,0,0,0,0,2,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,2,3,3,2,3,1,2,2,2,2,3,2,3,1,1,2,2,1,2,2,1,1,0,2,2,2,2,
0,1,0,1,2,2,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,
3,0,0,1,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,2,0,
0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,1,0,1,0,1,1,0,1,1,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,
0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,0,0,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
3,2,2,1,2,2,2,2,2,2,2,1,2,2,1,2,2,1,1,1,1,1,1,1,1,2,1,1,0,3,3,3,
0,3,0,2,2,2,2,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
2,2,2,3,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,1,2,2,2,1,1,1,2,0,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,0,2,2,0,0,0,0,0,0,
0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,3,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,1,0,2,1,0,
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
0,3,1,1,2,2,2,2,2,1,2,2,2,1,1,2,2,2,2,2,2,2,1,2,2,1,0,1,1,1,1,0,
0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,2,1,1,1,1,2,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,
0,0,2,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,1,0,0,
2,1,1,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,2,1,2,1,2,1,1,1,1,0,0,0,0,
0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,2,1,2,2,2,2,2,2,2,2,2,2,1,2,1,2,1,1,2,1,1,1,2,1,2,1,2,0,1,0,1,
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,3,1,2,2,2,1,2,2,2,2,2,2,2,2,1,2,1,1,1,1,1,1,2,1,2,1,1,0,1,0,1,
0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,1,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,
0,2,0,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,1,1,1,1,1,1,1,0,1,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,2,0,1,1,1,0,1,0,0,0,1,1,0,1,1,0,0,0,0,0,1,1,0,0,
0,1,1,1,2,1,2,2,2,0,2,0,2,0,1,1,2,1,1,1,1,2,1,0,1,1,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,0,1,0,0,0,0,0,1,0,1,2,2,0,1,0,0,1,1,2,2,1,2,0,2,0,0,0,1,2,0,1,
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,2,0,2,1,2,0,2,0,0,1,1,1,1,1,1,0,1,0,0,0,1,0,0,1,
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,1,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,1,2,2,0,0,1,0,0,0,1,0,0,1,
1,1,2,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,2,2,1,
0,2,0,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,1,0,0,1,0,1,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,2,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,1,1,0,1,1,0,1,0,0,0,1,1,0,1,
2,0,1,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,1,0,1,1,1,0,1,0,0,1,1,2,1,1,2,0,1,0,0,0,1,1,0,1,
1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,0,0,2,1,1,2,0,2,0,0,0,1,1,0,1,
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,2,2,1,2,1,1,0,1,0,0,0,1,1,0,1,
2,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,1,0,1,
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,1,2,2,0,0,0,0,2,1,1,1,0,2,1,1,0,0,0,2,1,0,1,
1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,0,2,1,1,0,1,0,0,0,1,1,0,1,
2,2,1,1,1,0,1,1,0,1,1,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,1,0,2,1,1,0,1,0,0,1,1,0,1,2,1,0,2,0,0,0,1,1,0,1,
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,
0,1,0,0,2,0,2,1,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,1,0,1,1,2,0,1,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,1,
1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,0,0,0,0,0,0,0,1,0,1,1,0,0,1,0,0,2,1,1,1,1,1,0,1,0,0,0,0,1,0,1,
0,1,1,1,2,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,1,2,1,0,0,0,0,0,1,1,1,1,1,0,1,0,0,0,1,1,0,0,
};
public HebrewModel(byte[] charToOrderMap, string name)
: base(charToOrderMap, HEBREW_LANG_MODEL, 0.984004f, false, name)
{
}
}
public class Win1255Model : HebrewModel
{
/*
255: Control characters that usually does not exist in any text
254: Carriage/Return
253: symbol (punctuation) that does not belong to word
252: 0 - 9
*/
//Windows-1255 language model
//Character Mapping Table:
private readonly static byte[] WIN1255_CHAR_TO_ORDER_MAP = {
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30
253, 69, 91, 79, 80, 92, 89, 97, 90, 68,111,112, 82, 73, 95, 85, //40
78,121, 86, 71, 67,102,107, 84,114,103,115,253,253,253,253,253, //50
253, 50, 74, 60, 61, 42, 76, 70, 64, 53,105, 93, 56, 65, 54, 49, //60
66,110, 51, 43, 44, 63, 81, 77, 98, 75,108,253,253,253,253,253, //70
124,202,203,204,205, 40, 58,206,207,208,209,210,211,212,213,214,
215, 83, 52, 47, 46, 72, 32, 94,216,113,217,109,218,219,220,221,
34,116,222,118,100,223,224,117,119,104,125,225,226, 87, 99,227,
106,122,123,228, 55,229,230,101,231,232,120,233, 48, 39, 57,234,
30, 59, 41, 88, 33, 37, 36, 31, 29, 35,235, 62, 28,236,126,237,
238, 38, 45,239,240,241,242,243,127,244,245,246,247,248,249,250,
9, 8, 20, 16, 3, 2, 24, 14, 22, 1, 25, 15, 4, 11, 6, 23,
12, 19, 13, 26, 18, 27, 21, 17, 7, 10, 5,251,252,128, 96,253,
};
public Win1255Model() : base(WIN1255_CHAR_TO_ORDER_MAP, "windows-1255")
{
}
}
}

View file

@ -0,0 +1,238 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
namespace UniversalDetector.Core
{
public abstract class HungarianModel : SequenceModel
{
//Model Table:
//total sequences: 100%
//first 512 sequences: 94.7368%
//first 1024 sequences:5.2623%
//rest sequences: 0.8894%
//negative sequences: 0.0009%
private readonly static byte[] HUNGARIAN_LANG_MODEL = {
0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2,3,3,1,1,2,2,2,2,2,1,2,
3,2,2,3,3,3,3,3,2,3,3,3,3,3,3,1,2,3,3,3,3,2,3,3,1,1,3,3,0,1,1,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,
3,2,1,3,3,3,3,3,2,3,3,3,3,3,1,1,2,3,3,3,3,3,3,3,1,1,3,2,0,1,1,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
3,3,3,3,3,3,3,3,3,3,3,1,1,2,3,3,3,1,3,3,3,3,3,1,3,3,2,2,0,3,2,3,
0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,3,3,2,3,3,2,2,3,2,3,2,0,3,2,2,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,
3,3,3,3,3,3,2,3,3,3,3,3,2,3,3,3,1,2,3,2,2,3,1,2,3,3,2,2,0,3,3,3,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,3,2,3,3,3,3,2,3,3,3,3,0,2,3,2,
0,0,0,1,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,3,3,3,1,1,1,3,3,2,1,3,2,2,3,2,1,3,2,2,1,0,3,3,1,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
3,2,2,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,3,2,2,3,1,1,3,2,0,1,1,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
3,3,3,3,3,3,3,3,2,2,3,3,3,3,3,2,1,3,3,3,3,3,2,2,1,3,3,3,0,1,1,2,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,2,3,3,2,3,3,3,2,0,3,2,3,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,
3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,1,3,2,2,2,3,1,1,3,3,1,1,0,3,3,2,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,2,3,3,3,2,3,2,3,3,3,2,3,3,3,3,3,1,2,3,2,2,0,2,2,2,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
3,3,3,2,2,2,3,1,3,3,2,2,1,3,3,3,1,1,3,1,2,3,2,3,2,2,2,1,0,2,2,2,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,2,1,3,3,3,2,2,3,2,1,0,3,2,0,1,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,1,1,3,3,3,3,3,1,2,3,3,3,3,1,1,0,3,3,3,3,0,2,3,0,0,2,1,0,1,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,2,2,3,3,2,2,2,2,3,3,0,1,2,3,2,3,2,2,3,2,1,2,0,2,2,2,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,
3,3,3,3,3,3,1,2,3,3,3,2,1,2,3,3,2,2,2,3,2,3,3,1,3,3,1,1,0,2,3,2,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
3,3,3,1,2,2,2,2,3,3,3,1,1,1,3,3,1,1,3,1,1,3,2,1,2,3,1,1,0,2,2,2,
0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
3,3,3,2,1,2,1,1,3,3,1,1,1,1,3,3,1,1,2,2,1,2,1,1,2,2,1,1,0,2,2,1,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
3,3,3,1,1,2,1,1,3,3,1,0,1,1,3,3,2,0,1,1,2,3,1,0,2,2,1,0,0,1,3,2,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
3,2,1,3,3,3,3,3,1,2,3,2,3,3,2,1,1,3,2,3,2,1,2,2,0,1,2,1,0,0,1,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
3,3,3,3,2,2,2,2,3,1,2,2,1,1,3,3,0,3,2,1,2,3,2,1,3,3,1,1,0,2,1,3,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
3,3,3,2,2,2,3,2,3,3,3,2,1,1,3,3,1,1,1,2,2,3,2,3,2,2,2,1,0,2,2,1,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
1,0,0,3,3,3,3,3,0,0,3,3,2,3,0,0,0,2,3,3,1,0,1,2,0,0,1,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,1,2,3,3,3,3,3,1,2,3,3,2,2,1,1,0,3,3,2,2,1,2,2,1,0,2,2,0,1,1,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,2,2,1,3,1,2,3,3,2,2,1,1,2,2,1,1,1,1,3,2,1,1,1,1,2,1,0,1,2,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,
2,3,3,1,1,1,1,1,3,3,3,0,1,1,3,3,1,1,1,1,1,2,2,0,3,1,1,2,0,2,1,1,
0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,
3,1,0,1,2,1,2,2,0,1,2,3,1,2,0,0,0,2,1,1,1,1,1,2,0,0,1,1,0,0,0,0,
1,2,1,2,2,2,1,2,1,2,0,2,0,2,2,1,1,2,1,1,2,1,1,1,0,1,0,0,0,1,1,0,
1,1,1,2,3,2,3,3,0,1,2,2,3,1,0,1,0,2,1,2,2,0,1,1,0,0,1,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,0,0,3,3,2,2,1,0,0,3,2,3,2,0,0,0,1,1,3,0,0,1,1,0,0,2,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,1,1,2,2,3,3,1,0,1,3,2,3,1,1,1,0,1,1,1,1,1,3,1,0,0,2,2,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,1,1,1,2,2,2,1,0,1,2,3,3,2,0,0,0,2,1,1,1,2,1,1,1,0,1,1,1,0,0,0,
1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,2,1,1,1,1,1,1,0,1,1,1,0,0,1,1,
3,2,2,1,0,0,1,1,2,2,0,3,0,1,2,1,1,0,0,1,1,1,0,1,1,1,1,0,2,1,1,1,
2,2,1,1,1,2,1,2,1,1,1,1,1,1,1,2,1,1,1,2,3,1,1,1,1,1,1,1,1,1,0,1,
2,3,3,0,1,0,0,0,3,3,1,0,0,1,2,2,1,0,0,0,0,2,0,0,1,1,1,0,2,1,1,1,
2,1,1,1,1,1,1,2,1,1,0,1,1,0,1,1,1,0,1,2,1,1,0,1,1,1,1,1,1,1,0,1,
2,3,3,0,1,0,0,0,2,2,0,0,0,0,1,2,2,0,0,0,0,1,0,0,1,1,0,0,2,0,1,0,
2,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1,
3,2,2,0,1,0,1,0,2,3,2,0,0,1,2,2,1,0,0,1,1,1,0,0,2,1,0,1,2,2,1,1,
2,1,1,1,1,1,1,2,1,1,1,1,1,1,0,2,1,0,1,1,0,1,1,1,0,1,1,2,1,1,0,1,
2,2,2,0,0,1,0,0,2,2,1,1,0,0,2,1,1,0,0,0,1,2,0,0,2,1,0,0,2,1,1,1,
2,1,1,1,1,2,1,2,1,1,1,2,2,1,1,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,0,1,
1,2,3,0,0,0,1,0,3,2,1,0,0,1,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,2,1,
1,1,0,0,0,1,0,1,1,1,1,1,2,0,0,1,0,0,0,2,0,0,1,1,1,1,1,1,1,1,0,1,
3,0,0,2,1,2,2,1,0,0,2,1,2,2,0,0,0,2,1,1,1,0,1,1,0,0,1,1,2,0,0,0,
1,2,1,2,2,1,1,2,1,2,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,0,0,1,
1,3,2,0,0,0,1,0,2,2,2,0,0,0,2,2,1,0,0,0,0,3,1,1,1,1,0,0,2,1,1,1,
2,1,0,1,1,1,0,1,1,1,1,1,1,1,0,2,1,0,0,1,0,1,1,0,1,1,1,1,1,1,0,1,
2,3,2,0,0,0,1,0,2,2,0,0,0,0,2,1,1,0,0,0,0,2,1,0,1,1,0,0,2,1,1,0,
2,1,1,1,1,2,1,2,1,2,0,1,1,1,0,2,1,1,1,2,1,1,1,1,0,1,1,1,1,1,0,1,
3,1,1,2,2,2,3,2,1,1,2,2,1,1,0,1,0,2,2,1,1,1,1,1,0,0,1,1,0,1,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,2,2,0,0,0,0,0,2,2,0,0,0,0,2,2,1,0,0,0,1,1,0,0,1,2,0,0,2,1,1,1,
2,2,1,1,1,2,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,1,1,0,1,2,1,1,1,0,1,
1,0,0,1,2,3,2,1,0,0,2,0,1,1,0,0,0,1,1,1,1,0,1,1,0,0,1,0,0,0,0,0,
1,2,1,2,1,2,1,1,1,2,0,2,1,1,1,0,1,2,0,0,1,1,1,0,0,0,0,0,0,0,0,0,
2,3,2,0,0,0,0,0,1,1,2,1,0,0,1,1,1,0,0,0,0,2,0,0,1,1,0,0,2,1,1,1,
2,1,1,1,1,1,1,2,1,0,1,1,1,1,0,2,1,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,
1,2,2,0,1,1,1,0,2,2,2,0,0,0,3,2,1,0,0,0,1,1,0,0,1,1,0,1,1,1,0,0,
1,1,0,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,0,0,1,1,1,0,1,0,1,
2,1,0,2,1,1,2,2,1,1,2,1,1,1,0,0,0,1,1,0,1,1,1,1,0,0,1,1,1,0,0,0,
1,2,2,2,2,2,1,1,1,2,0,2,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,0,0,0,1,0,
1,2,3,0,0,0,1,0,2,2,0,0,0,0,2,2,0,0,0,0,0,1,0,0,1,0,0,0,2,0,1,0,
2,1,1,1,1,1,0,2,0,0,0,1,2,1,1,1,1,0,1,2,0,1,0,1,0,1,1,1,0,1,0,1,
2,2,2,0,0,0,1,0,2,1,2,0,0,0,1,1,2,0,0,0,0,1,0,0,1,1,0,0,2,1,0,1,
2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,0,1,1,1,1,1,0,1,
1,2,2,0,0,0,1,0,2,2,2,0,0,0,1,1,0,0,0,0,0,1,1,0,2,0,0,1,1,1,0,1,
1,0,1,1,1,1,1,1,0,1,1,1,1,0,0,1,0,0,1,1,0,1,0,1,1,1,1,1,0,0,0,1,
1,0,0,1,0,1,2,1,0,0,1,1,1,2,0,0,0,1,1,0,1,0,1,1,0,0,1,0,0,0,0,0,
0,2,1,2,1,1,1,1,1,2,0,2,0,1,1,0,1,2,1,0,1,1,1,0,0,0,0,0,0,1,0,0,
2,1,1,0,1,2,0,0,1,1,1,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,2,1,0,1,
2,2,1,1,1,1,1,2,1,1,0,1,1,1,1,2,1,1,1,2,1,1,0,1,0,1,1,1,1,1,0,1,
1,2,2,0,0,0,0,0,1,1,0,0,0,0,2,1,0,0,0,0,0,2,0,0,2,2,0,0,2,0,0,1,
2,1,1,1,1,1,1,1,0,1,1,0,1,1,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,0,0,1,
1,1,2,0,0,3,1,0,2,1,1,1,0,0,1,1,1,0,0,0,1,1,0,0,0,1,0,0,1,0,1,0,
1,2,1,0,1,1,1,2,1,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,0,1,0,0,
2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,2,0,0,0,
2,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,2,1,1,0,0,1,1,1,1,1,0,1,
2,1,1,1,2,1,1,1,0,1,1,2,1,0,0,0,0,1,1,1,1,0,1,0,0,0,0,1,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,0,1,1,1,1,1,0,0,1,1,2,1,0,0,0,1,1,0,0,0,1,1,0,0,1,0,1,0,0,0,
1,2,1,1,1,1,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,1,0,0,
2,0,0,0,1,1,1,1,0,0,1,1,0,0,0,0,0,1,1,1,2,0,0,1,0,0,1,0,1,0,0,0,
0,1,1,1,1,1,1,1,1,2,0,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
1,0,0,1,1,1,1,1,0,0,2,1,0,1,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,
0,1,1,1,1,1,1,0,1,1,0,1,0,1,1,0,1,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0,
1,0,0,1,1,1,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
0,1,1,1,1,1,0,0,1,1,0,1,0,1,0,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
0,0,0,1,0,0,0,0,0,0,1,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,1,1,1,0,1,0,0,1,1,0,1,0,1,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,
2,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,0,1,0,0,1,0,1,0,1,1,1,0,0,1,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
0,1,1,1,1,1,1,0,1,1,0,1,0,1,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,
};
public HungarianModel(byte[] charToOrderMap, string name)
: base(charToOrderMap, HUNGARIAN_LANG_MODEL, 0.947368f,
false, name)
{
}
}
public class Latin2HungarianModel : HungarianModel
{
private readonly static byte[] LATIN2_CHAR_TO_ORDER_MAP = {
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30
253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,
46, 71, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253,
253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8,
23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253,
159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,
175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,
191,192,193,194,195,196,197, 75,198,199,200,201,202,203,204,205,
79,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,
221, 51, 81,222, 78,223,224,225,226, 44,227,228,229, 61,230,231,
232,233,234, 58,235, 66, 59,236,237,238, 60, 69, 63,239,240,241,
82, 14, 74,242, 70, 80,243, 72,244, 15, 83, 77, 84, 30, 76, 85,
245,246,247, 25, 73, 42, 24,248,249,250, 31, 56, 29,251,252,253,
};
public Latin2HungarianModel() : base(LATIN2_CHAR_TO_ORDER_MAP, "ISO-8859-2")
{
}
}
public class Win1250HungarianModel : HungarianModel
{
private readonly static byte[] WIN1250_CHAR_TO_ORDER_MAP = {
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30
253, 28, 40, 54, 45, 32, 50, 49, 38, 39, 53, 36, 41, 34, 35, 47,
46, 72, 43, 33, 37, 57, 48, 64, 68, 55, 52,253,253,253,253,253,
253, 2, 18, 26, 17, 1, 27, 12, 20, 9, 22, 7, 6, 13, 4, 8,
23, 67, 10, 5, 3, 21, 19, 65, 62, 16, 11,253,253,253,253,253,
161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,
177,178,179,180, 78,181, 69,182,183,184,185,186,187,188,189,190,
191,192,193,194,195,196,197, 76,198,199,200,201,202,203,204,205,
81,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,
221, 51, 83,222, 80,223,224,225,226, 44,227,228,229, 61,230,231,
232,233,234, 58,235, 66, 59,236,237,238, 60, 70, 63,239,240,241,
84, 14, 75,242, 71, 82,243, 73,244, 15, 85, 79, 86, 30, 77, 87,
245,246,247, 25, 74, 42, 24,248,249,250, 31, 56, 29,251,252,253,
};
public Win1250HungarianModel() : base(WIN1250_CHAR_TO_ORDER_MAP, "windows-1250")
{
}
}
}

View file

@ -0,0 +1,213 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
namespace UniversalDetector.Core
{
public class ThaiModel : SequenceModel
{
/****************************************************************
255: Control characters that usually does not exist in any text
254: Carriage/Return
253: symbol (punctuation) that does not belong to word
252: 0 - 9
*****************************************************************/
// The following result for thai was collected from a limited sample (1M)
private readonly static byte[] TIS620_CHAR_TO_ORDER_MAP = {
255,255,255,255,255,255,255,255,255,255,254,255,255,254,255,255, //00
255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, //10
+253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253, //20
252,252,252,252,252,252,252,252,252,252,253,253,253,253,253,253, //30
253,182,106,107,100,183,184,185,101, 94,186,187,108,109,110,111, //40
188,189,190, 89, 95,112,113,191,192,193,194,253,253,253,253,253, //50
253, 64, 72, 73,114, 74,115,116,102, 81,201,117, 90,103, 78, 82, //60
96,202, 91, 79, 84,104,105, 97, 98, 92,203,253,253,253,253,253, //70
209,210,211,212,213, 88,214,215,216,217,218,219,220,118,221,222,
223,224, 99, 85, 83,225,226,227,228,229,230,231,232,233,234,235,
236, 5, 30,237, 24,238, 75, 8, 26, 52, 34, 51,119, 47, 58, 57,
49, 53, 55, 43, 20, 19, 44, 14, 48, 3, 17, 25, 39, 62, 31, 54,
45, 9, 16, 2, 61, 15,239, 12, 42, 46, 18, 21, 76, 4, 66, 63,
22, 10, 1, 36, 23, 13, 40, 27, 32, 35, 86,240,241,242,243,244,
11, 28, 41, 29, 33,245, 50, 37, 6, 7, 67, 77, 38, 93,246,247,
68, 56, 59, 65, 69, 60, 70, 80, 71, 87,248,249,250,251,252,253,
};
//Model Table:
//total sequences: 100%
//first 512 sequences: 92.6386%
//first 1024 sequences:7.3177%
//rest sequences: 1.0230%
//negative sequences: 0.0436%
private readonly static byte[] THAI_LANG_MODEL = {
0,1,3,3,3,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,0,0,3,3,3,0,3,3,3,3,
0,3,3,0,0,0,1,3,0,3,3,2,3,3,0,1,2,3,3,3,3,0,2,0,2,0,0,3,2,1,2,2,
3,0,3,3,2,3,0,0,3,3,0,3,3,0,3,3,3,3,3,3,3,3,3,0,3,2,3,0,2,2,2,3,
0,2,3,0,0,0,0,1,0,1,2,3,1,1,3,2,2,0,1,1,0,0,1,0,0,0,0,0,0,0,1,1,
3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,3,3,2,3,2,3,3,2,2,2,
3,1,2,3,0,3,3,2,2,1,2,3,3,1,2,0,1,3,0,1,0,0,1,0,0,0,0,0,0,0,1,1,
3,3,2,2,3,3,3,3,1,2,3,3,3,3,3,2,2,2,2,3,3,2,2,3,3,2,2,3,2,3,2,2,
3,3,1,2,3,1,2,2,3,3,1,0,2,1,0,0,3,1,2,1,0,0,1,0,0,0,0,0,0,1,0,1,
3,3,3,3,3,3,2,2,3,3,3,3,2,3,2,2,3,3,2,2,3,2,2,2,2,1,1,3,1,2,1,1,
3,2,1,0,2,1,0,1,0,1,1,0,1,1,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,
3,3,3,2,3,2,3,3,2,2,3,2,3,3,2,3,1,1,2,3,2,2,2,3,2,2,2,2,2,1,2,1,
2,2,1,1,3,3,2,1,0,1,2,2,0,1,3,0,0,0,1,1,0,0,0,0,0,2,3,0,0,2,1,1,
3,3,2,3,3,2,0,0,3,3,0,3,3,0,2,2,3,1,2,2,1,1,1,0,2,2,2,0,2,2,1,1,
0,2,1,0,2,0,0,2,0,1,0,0,1,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,
3,3,2,3,3,2,0,0,3,3,0,2,3,0,2,1,2,2,2,2,1,2,0,0,2,2,2,0,2,2,1,1,
0,2,1,0,2,0,0,2,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,
3,3,2,3,2,3,2,0,2,2,1,3,2,1,3,2,1,2,3,2,2,3,0,2,3,2,2,1,2,2,2,2,
1,2,2,0,0,0,0,2,0,1,2,0,1,1,1,0,1,0,3,1,1,0,0,0,0,0,0,0,0,0,1,0,
3,3,2,3,3,2,3,2,2,2,3,2,2,3,2,2,1,2,3,2,2,3,1,3,2,2,2,3,2,2,2,3,
3,2,1,3,0,1,1,1,0,2,1,1,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,2,0,0,
1,0,0,3,0,3,3,3,3,3,0,0,3,0,2,2,3,3,3,3,3,0,0,0,1,1,3,0,0,0,0,2,
0,0,1,0,0,0,0,0,0,0,2,3,0,0,0,3,0,2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
2,0,3,3,3,3,0,0,2,3,0,0,3,0,3,3,2,3,3,3,3,3,0,0,3,3,3,0,0,0,3,3,
0,0,3,0,0,0,0,2,0,0,2,1,1,3,0,0,1,0,0,2,3,0,1,0,0,0,0,0,0,0,1,0,
3,3,3,3,2,3,3,3,3,3,3,3,1,2,1,3,3,2,2,1,2,2,2,3,1,1,2,0,2,1,2,1,
2,2,1,0,0,0,1,1,0,1,0,1,1,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,
3,0,2,1,2,3,3,3,0,2,0,2,2,0,2,1,3,2,2,1,2,1,0,0,2,2,1,0,2,1,2,2,
0,1,1,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,2,1,3,3,1,1,3,0,2,3,1,1,3,2,1,1,2,0,2,2,3,2,1,1,1,1,1,2,
3,0,0,1,3,1,2,1,2,0,3,0,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,
3,3,1,1,3,2,3,3,3,1,3,2,1,3,2,1,3,2,2,2,2,1,3,3,1,2,1,3,1,2,3,0,
2,1,1,3,2,2,2,1,2,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,
3,3,2,3,2,3,3,2,3,2,3,2,3,3,2,1,0,3,2,2,2,1,2,2,2,1,2,2,1,2,1,1,
2,2,2,3,0,1,3,1,1,1,1,0,1,1,0,2,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,2,3,2,2,1,1,3,2,3,2,3,2,0,3,2,2,1,2,0,2,2,2,1,2,2,2,2,1,
3,2,1,2,2,1,0,2,0,1,0,0,1,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,1,
3,3,3,3,3,2,3,1,2,3,3,2,2,3,0,1,1,2,0,3,3,2,2,3,0,1,1,3,0,0,0,0,
3,1,0,3,3,0,2,0,2,1,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,2,3,2,3,3,0,1,3,1,1,2,1,2,1,1,3,1,1,0,2,3,1,1,1,1,1,1,1,1,
3,1,1,2,2,2,2,1,1,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
3,2,2,1,1,2,1,3,3,2,3,2,2,3,2,2,3,1,2,2,1,2,0,3,2,1,2,2,2,2,2,1,
3,2,1,2,2,2,1,1,1,1,0,0,1,1,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,3,3,3,3,1,3,3,0,2,1,0,3,2,0,0,3,1,0,1,1,0,1,0,0,0,0,0,1,
1,0,0,1,0,3,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,2,2,2,3,0,0,1,3,0,3,2,0,3,2,2,3,3,3,3,3,1,0,2,2,2,0,2,2,1,2,
0,2,3,0,0,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,
3,0,2,3,1,3,3,2,3,3,0,3,3,0,3,2,2,3,2,3,3,3,0,0,2,2,3,0,1,1,1,3,
0,0,3,0,0,0,2,2,0,1,3,0,1,2,2,2,3,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,
3,2,3,3,2,0,3,3,2,2,3,1,3,2,1,3,2,0,1,2,2,0,2,3,2,1,0,3,0,0,0,0,
3,0,0,2,3,1,3,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,1,3,2,2,2,1,2,0,1,3,1,1,3,1,3,0,0,2,1,1,1,1,2,1,1,1,0,2,1,0,1,
1,2,0,0,0,3,1,1,0,0,0,0,1,0,1,0,0,1,0,1,0,0,0,0,0,3,1,0,0,0,1,0,
3,3,3,3,2,2,2,2,2,1,3,1,1,1,2,0,1,1,2,1,2,1,3,2,0,0,3,1,1,1,1,1,
3,1,0,2,3,0,0,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,2,3,0,3,3,0,2,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,2,3,1,3,0,0,1,2,0,0,2,0,3,3,2,3,3,3,2,3,0,0,2,2,2,0,0,0,2,2,
0,0,1,0,0,0,0,3,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,1,2,3,1,3,3,0,0,1,0,3,0,0,0,0,0,
0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,1,2,3,1,2,3,1,0,3,0,2,2,1,0,2,1,1,2,0,1,0,0,1,1,1,1,0,1,0,0,
1,0,0,0,0,1,1,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,3,3,2,1,0,1,1,1,3,1,2,2,2,2,2,2,1,1,1,1,0,3,1,0,1,3,1,1,1,1,
1,1,0,2,0,1,3,1,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1,
3,0,2,2,1,3,3,2,3,3,0,1,1,0,2,2,1,2,1,3,3,1,0,0,3,2,0,0,0,0,2,1,
0,1,0,0,0,0,1,2,0,1,1,3,1,1,2,2,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
0,0,3,0,0,1,0,0,0,3,0,0,3,0,3,1,0,1,1,1,3,2,0,0,0,3,0,0,0,0,2,0,
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
3,3,1,3,2,1,3,3,1,2,2,0,1,2,1,0,1,2,0,0,0,0,0,3,0,0,0,3,0,0,0,0,
3,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,1,2,0,3,3,3,2,2,0,1,1,0,1,3,0,0,0,2,2,0,0,0,0,3,1,0,1,0,0,0,
0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,2,3,1,2,0,0,2,1,0,3,1,0,1,2,0,1,1,1,1,3,0,0,3,1,1,0,2,2,1,1,
0,2,0,0,0,0,0,1,0,1,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,0,3,1,2,0,0,2,2,0,1,2,0,1,0,1,3,1,2,1,0,0,0,2,0,3,0,0,0,1,0,
0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,1,1,2,2,0,0,0,2,0,2,1,0,1,1,0,1,1,1,2,1,0,0,1,1,1,0,2,1,1,1,
0,1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,
0,0,0,2,0,1,3,1,1,1,1,0,0,0,0,3,2,0,1,0,0,0,1,2,0,0,0,1,0,0,0,0,
0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,3,3,3,3,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,0,2,3,2,2,0,0,0,1,0,0,0,0,2,3,2,1,2,2,3,0,0,0,2,3,1,0,0,0,1,1,
0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,
3,3,2,2,0,1,0,0,0,0,2,0,2,0,1,0,0,0,1,1,0,0,0,2,1,0,1,0,1,1,0,0,
0,1,0,2,0,0,1,0,3,0,1,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,1,0,0,1,0,0,0,0,0,1,1,2,0,0,0,0,1,0,0,1,3,1,0,0,0,0,1,1,0,0,
0,1,0,0,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,
3,3,1,1,1,1,2,3,0,0,2,1,1,1,1,1,0,2,1,1,0,0,0,2,1,0,1,2,1,1,0,1,
2,1,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,3,1,0,0,0,0,0,0,0,3,0,0,0,3,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,
0,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,3,2,0,0,0,0,0,0,1,2,1,0,1,1,0,2,0,0,1,0,0,2,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,2,0,0,0,1,3,0,1,0,0,0,2,0,0,0,0,0,0,0,1,2,0,0,0,0,0,
3,3,0,0,1,1,2,0,0,1,2,1,0,1,1,1,0,1,1,0,0,2,1,1,0,1,0,0,1,1,1,0,
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,2,2,1,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
2,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,3,0,0,1,1,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,0,1,2,0,1,2,0,0,1,1,0,2,0,1,0,0,1,0,0,0,0,1,0,0,0,2,0,0,0,0,
1,0,0,1,0,1,1,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,2,1,3,0,0,0,0,1,1,0,0,0,0,0,0,0,3,
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,0,1,0,1,0,0,2,0,0,2,0,0,1,1,2,0,0,1,1,0,0,0,1,0,0,0,1,1,0,0,0,
1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,
1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,1,0,0,0,
2,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,0,0,0,0,2,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,3,0,0,0,
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0,
1,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,1,1,0,0,2,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
};
public ThaiModel(byte[] charToOrderMap, string name)
: base(TIS620_CHAR_TO_ORDER_MAP, THAI_LANG_MODEL,
0.926386f, false, "TIS-620")
{
}
}
}

View file

@ -0,0 +1,180 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
using System;
namespace UniversalDetector.Core
{
// TODO: Using trigrams the detector should be able to discriminate between
// latin-1 and iso8859-2
public class Latin1Prober : CharsetProber
{
private const int FREQ_CAT_NUM = 4;
private const int UDF = 0; // undefined
private const int OTH = 1; // other
private const int ASC = 2; // ascii capital letter
private const int ASS = 3; // ascii small letter
private const int ACV = 4; // accent capital vowel
private const int ACO = 5; // accent capital other
private const int ASV = 6; // accent small vowel
private const int ASO = 7; // accent small other
private const int CLASS_NUM = 8; // total classes
private readonly static byte[] Latin1_CharToClass = {
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 00 - 07
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 08 - 0F
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 10 - 17
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 18 - 1F
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 20 - 27
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 28 - 2F
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 30 - 37
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 38 - 3F
OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, // 40 - 47
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, // 48 - 4F
ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, // 50 - 57
ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, // 58 - 5F
OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, // 60 - 67
ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, // 68 - 6F
ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, // 70 - 77
ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, // 78 - 7F
OTH, UDF, OTH, ASO, OTH, OTH, OTH, OTH, // 80 - 87
OTH, OTH, ACO, OTH, ACO, UDF, ACO, UDF, // 88 - 8F
UDF, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // 90 - 97
OTH, OTH, ASO, OTH, ASO, UDF, ASO, ACO, // 98 - 9F
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // A0 - A7
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // A8 - AF
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // B0 - B7
OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, // B8 - BF
ACV, ACV, ACV, ACV, ACV, ACV, ACO, ACO, // C0 - C7
ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, // C8 - CF
ACO, ACO, ACV, ACV, ACV, ACV, ACV, OTH, // D0 - D7
ACV, ACV, ACV, ACV, ACV, ACO, ACO, ACO, // D8 - DF
ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASO, // E0 - E7
ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, // E8 - EF
ASO, ASO, ASV, ASV, ASV, ASV, ASV, OTH, // F0 - F7
ASV, ASV, ASV, ASV, ASV, ASO, ASO, ASO, // F8 - FF
};
/* 0 : illegal
1 : very unlikely
2 : normal
3 : very likely
*/
private readonly static byte[] Latin1ClassModel = {
/* UDF OTH ASC ASS ACV ACO ASV ASO */
/*UDF*/ 0, 0, 0, 0, 0, 0, 0, 0,
/*OTH*/ 0, 3, 3, 3, 3, 3, 3, 3,
/*ASC*/ 0, 3, 3, 3, 3, 3, 3, 3,
/*ASS*/ 0, 3, 3, 3, 1, 1, 3, 3,
/*ACV*/ 0, 3, 3, 3, 1, 2, 1, 2,
/*ACO*/ 0, 3, 3, 3, 3, 3, 3, 3,
/*ASV*/ 0, 3, 1, 3, 1, 1, 1, 3,
/*ASO*/ 0, 3, 1, 3, 1, 1, 3, 3,
};
private byte lastCharClass;
private int[] freqCounter = new int[FREQ_CAT_NUM];
public Latin1Prober()
{
Reset();
}
public override string GetCharsetName()
{
return "windows-1252";
}
public override void Reset()
{
state = ProbingState.Detecting;
lastCharClass = OTH;
for (int i = 0; i < FREQ_CAT_NUM; i++)
freqCounter[i] = 0;
}
public override ProbingState HandleData(byte[] buf, int offset, int len)
{
byte[] newbuf = FilterWithEnglishLetters(buf, offset, len);
byte charClass, freq;
for (int i = 0; i < newbuf.Length; i++) {
charClass = Latin1_CharToClass[newbuf[i]];
freq = Latin1ClassModel[lastCharClass * CLASS_NUM + charClass];
if (freq == 0) {
state = ProbingState.NotMe;
break;
}
freqCounter[freq]++;
lastCharClass = charClass;
}
return state;
}
public override float GetConfidence()
{
if (state == ProbingState.NotMe)
return 0.01f;
float confidence = 0.0f;
int total = 0;
for (int i = 0; i < FREQ_CAT_NUM; i++) {
total += freqCounter[i];
}
if (total <= 0) {
confidence = 0.0f;
} else {
confidence = freqCounter[3] * 1.0f / total;
confidence -= freqCounter[1] * 20.0f / total;
}
// lower the confidence of latin1 so that other more accurate detector
// can take priority.
return confidence < 0.0f ? 0.0f : confidence * 0.5f;
}
public override void DumpStatus()
{
//Console.WriteLine(" Latin1Prober: {0} [{1}]", GetConfidence(), GetCharsetName());
}
}
}

View file

@ -0,0 +1,175 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
using System;
namespace UniversalDetector.Core
{
/// <summary>
/// Multi-byte charsets probers
/// </summary>
public class MBCSGroupProber : CharsetProber
{
private const int PROBERS_NUM = 7;
private readonly static string[] ProberName =
{ "UTF8", "SJIS", "EUCJP", "GB18030", "EUCKR", "Big5", "EUCTW" };
private CharsetProber[] probers = new CharsetProber[PROBERS_NUM];
private bool[] isActive = new bool[PROBERS_NUM];
private int bestGuess;
private int activeNum;
public MBCSGroupProber()
{
probers[0] = new UTF8Prober();
probers[1] = new SJISProber();
probers[2] = new EUCJPProber();
probers[3] = new GB18030Prober();
probers[4] = new EUCKRProber();
probers[5] = new Big5Prober();
probers[6] = new EUCTWProber();
Reset();
}
public override string GetCharsetName()
{
if (bestGuess == -1) {
GetConfidence();
if (bestGuess == -1)
bestGuess = 0;
}
return probers[bestGuess].GetCharsetName();
}
public override void Reset()
{
activeNum = 0;
for (int i = 0; i < probers.Length; i++) {
if (probers[i] != null) {
probers[i].Reset();
isActive[i] = true;
++activeNum;
} else {
isActive[i] = false;
}
}
bestGuess = -1;
state = ProbingState.Detecting;
}
public override ProbingState HandleData(byte[] buf, int offset, int len)
{
// do filtering to reduce load to probers
byte[] highbyteBuf = new byte[len];
int hptr = 0;
//assume previous is not ascii, it will do no harm except add some noise
bool keepNext = true;
int max = offset + len;
for (int i = offset; i < max; i++) {
if ((buf[i] & 0x80) != 0) {
highbyteBuf[hptr++] = buf[i];
keepNext = true;
} else {
//if previous is highbyte, keep this even it is a ASCII
if (keepNext) {
highbyteBuf[hptr++] = buf[i];
keepNext = false;
}
}
}
ProbingState st = ProbingState.NotMe;
for (int i = 0; i < probers.Length; i++) {
if (!isActive[i])
continue;
st = probers[i].HandleData(highbyteBuf, 0, hptr);
if (st == ProbingState.FoundIt) {
bestGuess = i;
state = ProbingState.FoundIt;
break;
} else if (st == ProbingState.NotMe) {
isActive[i] = false;
activeNum--;
if (activeNum <= 0) {
state = ProbingState.NotMe;
break;
}
}
}
return state;
}
public override float GetConfidence()
{
float bestConf = 0.0f;
float cf = 0.0f;
if (state == ProbingState.FoundIt) {
return 0.99f;
} else if (state == ProbingState.NotMe) {
return 0.01f;
} else {
for (int i = 0; i < PROBERS_NUM; i++) {
if (!isActive[i])
continue;
cf = probers[i].GetConfidence();
if (bestConf < cf) {
bestConf = cf;
bestGuess = i;
}
}
}
return bestConf;
}
public override void DumpStatus()
{
float cf;
GetConfidence();
for (int i = 0; i < PROBERS_NUM; i++) {
if (!isActive[i]) {
//Console.WriteLine(" MBCS inactive: {0} (confidence is too low).", ProberName[i]);
} else {
cf = probers[i].GetConfidence();
//Console.WriteLine(" MBCS {0}: [{1}]", cf, ProberName[i]);
}
}
}
}
}

View file

@ -0,0 +1,640 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
namespace UniversalDetector.Core
{
public class UTF8SMModel : SMModel
{
private readonly static int[] UTF8_cls = {
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 00 - 07
BitPackage.Pack4bits(1,1,1,1,1,1,0,0), // 08 - 0f
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 10 - 17
BitPackage.Pack4bits(1,1,1,0,1,1,1,1), // 18 - 1f
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 20 - 27
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 28 - 2f
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 30 - 37
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 38 - 3f
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 40 - 47
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 48 - 4f
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 50 - 57
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 58 - 5f
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 60 - 67
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 68 - 6f
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 70 - 77
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 78 - 7f
BitPackage.Pack4bits(2,2,2,2,3,3,3,3), // 80 - 87
BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 88 - 8f
BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 90 - 97
BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 98 - 9f
BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // a0 - a7
BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // a8 - af
BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // b0 - b7
BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // b8 - bf
BitPackage.Pack4bits(0,0,6,6,6,6,6,6), // c0 - c7
BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // c8 - cf
BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // d0 - d7
BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // d8 - df
BitPackage.Pack4bits(7,8,8,8,8,8,8,8), // e0 - e7
BitPackage.Pack4bits(8,8,8,8,8,9,8,8), // e8 - ef
BitPackage.Pack4bits(10,11,11,11,11,11,11,11), // f0 - f7
BitPackage.Pack4bits(12,13,13,13,14,15,0,0) // f8 - ff
};
private readonly static int[] UTF8_st = {
BitPackage.Pack4bits(ERROR,START,ERROR,ERROR,ERROR,ERROR, 12, 10),//00-07
BitPackage.Pack4bits( 9, 11, 8, 7, 6, 5, 4, 3),//08-0f
BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//10-17
BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//18-1f
BitPackage.Pack4bits(ITSME,ITSME,ITSME,ITSME,ITSME,ITSME,ITSME,ITSME),//20-27
BitPackage.Pack4bits(ITSME,ITSME,ITSME,ITSME,ITSME,ITSME,ITSME,ITSME),//28-2f
BitPackage.Pack4bits(ERROR,ERROR, 5, 5, 5, 5,ERROR,ERROR),//30-37
BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//38-3f
BitPackage.Pack4bits(ERROR,ERROR,ERROR, 5, 5, 5,ERROR,ERROR),//40-47
BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//48-4f
BitPackage.Pack4bits(ERROR,ERROR, 7, 7, 7, 7,ERROR,ERROR),//50-57
BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//58-5f
BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR, 7, 7,ERROR,ERROR),//60-67
BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//68-6f
BitPackage.Pack4bits(ERROR,ERROR, 9, 9, 9, 9,ERROR,ERROR),//70-77
BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//78-7f
BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR, 9,ERROR,ERROR),//80-87
BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//88-8f
BitPackage.Pack4bits(ERROR,ERROR, 12, 12, 12, 12,ERROR,ERROR),//90-97
BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//98-9f
BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR, 12,ERROR,ERROR),//a0-a7
BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//a8-af
BitPackage.Pack4bits(ERROR,ERROR, 12, 12, 12,ERROR,ERROR,ERROR),//b0-b7
BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR),//b8-bf
BitPackage.Pack4bits(ERROR,ERROR,START,START,START,START,ERROR,ERROR),//c0-c7
BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ERROR) //c8-cf
};
private readonly static int[] UTF8CharLenTable =
{0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6 };
public UTF8SMModel() : base(
new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, UTF8_cls),
16,
new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, UTF8_st),
UTF8CharLenTable, "UTF-8")
{
}
}
public class GB18030SMModel : SMModel
{
private readonly static int[] GB18030_cls = {
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 00 - 07
BitPackage.Pack4bits(1,1,1,1,1,1,0,0), // 08 - 0f
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 10 - 17
BitPackage.Pack4bits(1,1,1,0,1,1,1,1), // 18 - 1f
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 20 - 27
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 28 - 2f
BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // 30 - 37
BitPackage.Pack4bits(3,3,1,1,1,1,1,1), // 38 - 3f
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 40 - 47
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 48 - 4f
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 50 - 57
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 58 - 5f
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 60 - 67
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 68 - 6f
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 70 - 77
BitPackage.Pack4bits(2,2,2,2,2,2,2,4), // 78 - 7f
BitPackage.Pack4bits(5,6,6,6,6,6,6,6), // 80 - 87
BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // 88 - 8f
BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // 90 - 97
BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // 98 - 9f
BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // a0 - a7
BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // a8 - af
BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // b0 - b7
BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // b8 - bf
BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // c0 - c7
BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // c8 - cf
BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // d0 - d7
BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // d8 - df
BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // e0 - e7
BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // e8 - ef
BitPackage.Pack4bits(6,6,6,6,6,6,6,6), // f0 - f7
BitPackage.Pack4bits(6,6,6,6,6,6,6,0) // f8 - ff
};
private readonly static int[] GB18030_st = {
BitPackage.Pack4bits(ERROR,START,START,START,START,START, 3,ERROR),//00-07
BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ITSME,ITSME),//08-0f
BitPackage.Pack4bits(ITSME,ITSME,ITSME,ITSME,ITSME,ERROR,ERROR,START),//10-17
BitPackage.Pack4bits( 4,ERROR,START,START,ERROR,ERROR,ERROR,ERROR),//18-1f
BitPackage.Pack4bits(ERROR,ERROR, 5,ERROR,ERROR,ERROR,ITSME,ERROR),//20-27
BitPackage.Pack4bits(ERROR,ERROR,START,START,START,START,START,START) //28-2f
};
// To be accurate, the length of class 6 can be either 2 or 4.
// But it is not necessary to discriminate between the two since
// it is used for frequency analysis only, and we are validating
// each code range there as well. So it is safe to set it to be
// 2 here.
private readonly static int[] GB18030CharLenTable = {0, 1, 1, 1, 1, 1, 2};
public GB18030SMModel() : base(
new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, GB18030_cls),
7,
new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, GB18030_st),
GB18030CharLenTable, "GB18030")
{
}
}
public class BIG5SMModel : SMModel
{
private readonly static int[] BIG5_cls = {
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 00 - 07
BitPackage.Pack4bits(1,1,1,1,1,1,0,0), // 08 - 0f
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 10 - 17
BitPackage.Pack4bits(1,1,1,0,1,1,1,1), // 18 - 1f
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 20 - 27
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 28 - 2f
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 30 - 37
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 38 - 3f
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 40 - 47
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 48 - 4f
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 50 - 57
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 58 - 5f
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 60 - 67
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 68 - 6f
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 70 - 77
BitPackage.Pack4bits(2,2,2,2,2,2,2,1), // 78 - 7f
BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 80 - 87
BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 88 - 8f
BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 90 - 97
BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 98 - 9f
BitPackage.Pack4bits(4,3,3,3,3,3,3,3), // a0 - a7
BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // a8 - af
BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // b0 - b7
BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // b8 - bf
BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // c0 - c7
BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // c8 - cf
BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // d0 - d7
BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // d8 - df
BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // e0 - e7
BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // e8 - ef
BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // f0 - f7
BitPackage.Pack4bits(3,3,3,3,3,3,3,0) // f8 - ff
};
private readonly static int[] BIG5_st = {
BitPackage.Pack4bits(ERROR,START,START, 3,ERROR,ERROR,ERROR,ERROR),//00-07
BitPackage.Pack4bits(ERROR,ERROR,ITSME,ITSME,ITSME,ITSME,ITSME,ERROR),//08-0f
BitPackage.Pack4bits(ERROR,START,START,START,START,START,START,START) //10-17
};
private readonly static int[] BIG5CharLenTable = {0, 1, 1, 2, 0};
public BIG5SMModel() : base(
new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, BIG5_cls),
5,
new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, BIG5_st),
BIG5CharLenTable, "Big5")
{
}
}
public class EUCJPSMModel : SMModel
{
private readonly static int[] EUCJP_cls = {
//BitPacket.Pack4bits(5,4,4,4,4,4,4,4), // 00 - 07
BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 00 - 07
BitPackage.Pack4bits(4,4,4,4,4,4,5,5), // 08 - 0f
BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 10 - 17
BitPackage.Pack4bits(4,4,4,5,4,4,4,4), // 18 - 1f
BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 20 - 27
BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 28 - 2f
BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 30 - 37
BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 38 - 3f
BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 40 - 47
BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 48 - 4f
BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 50 - 57
BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 58 - 5f
BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 60 - 67
BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 68 - 6f
BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 70 - 77
BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // 78 - 7f
BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // 80 - 87
BitPackage.Pack4bits(5,5,5,5,5,5,1,3), // 88 - 8f
BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // 90 - 97
BitPackage.Pack4bits(5,5,5,5,5,5,5,5), // 98 - 9f
BitPackage.Pack4bits(5,2,2,2,2,2,2,2), // a0 - a7
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a8 - af
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c8 - cf
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // e0 - e7
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // e8 - ef
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // f0 - f7
BitPackage.Pack4bits(0,0,0,0,0,0,0,5) // f8 - ff
};
private readonly static int[] EUCJP_st = {
BitPackage.Pack4bits( 3, 4, 3, 5,START,ERROR,ERROR,ERROR),//00-07
BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ITSME,ITSME,ITSME,ITSME),//08-0f
BitPackage.Pack4bits(ITSME,ITSME,START,ERROR,START,ERROR,ERROR,ERROR),//10-17
BitPackage.Pack4bits(ERROR,ERROR,START,ERROR,ERROR,ERROR, 3,ERROR),//18-1f
BitPackage.Pack4bits( 3,ERROR,ERROR,ERROR,START,START,START,START) //20-27
};
private readonly static int[] EUCJPCharLenTable = { 2, 2, 2, 3, 1, 0 };
public EUCJPSMModel() : base(
new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, EUCJP_cls),
6,
new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, EUCJP_st),
EUCJPCharLenTable, "EUC-JP")
{
}
}
public class EUCKRSMModel : SMModel
{
private readonly static int[] EUCKR_cls = {
//BitPacket.Pack4bits(0,1,1,1,1,1,1,1), // 00 - 07
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 00 - 07
BitPackage.Pack4bits(1,1,1,1,1,1,0,0), // 08 - 0f
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 10 - 17
BitPackage.Pack4bits(1,1,1,0,1,1,1,1), // 18 - 1f
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 20 - 27
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 28 - 2f
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 30 - 37
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 38 - 3f
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 40 - 47
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 48 - 4f
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 50 - 57
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 58 - 5f
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 60 - 67
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 68 - 6f
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 70 - 77
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 78 - 7f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 80 - 87
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 88 - 8f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 90 - 97
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 98 - 9f
BitPackage.Pack4bits(0,2,2,2,2,2,2,2), // a0 - a7
BitPackage.Pack4bits(2,2,2,2,2,3,3,3), // a8 - af
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7
BitPackage.Pack4bits(2,3,2,2,2,2,2,2), // c8 - cf
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e0 - e7
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // e8 - ef
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // f0 - f7
BitPackage.Pack4bits(2,2,2,2,2,2,2,0) // f8 - ff
};
private readonly static int[] EUCKR_st = {
BitPackage.Pack4bits(ERROR,START, 3,ERROR,ERROR,ERROR,ERROR,ERROR),//00-07
BitPackage.Pack4bits(ITSME,ITSME,ITSME,ITSME,ERROR,ERROR,START,START) //08-0f
};
private readonly static int[] EUCKRCharLenTable = { 0, 1, 2, 0 };
public EUCKRSMModel() : base(
new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, EUCKR_cls),
4,
new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, EUCKR_st),
EUCKRCharLenTable, "EUC-KR")
{
}
}
public class EUCTWSMModel : SMModel
{
private readonly static int[] EUCTW_cls = {
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 00 - 07
BitPackage.Pack4bits(2,2,2,2,2,2,0,0), // 08 - 0f
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 10 - 17
BitPackage.Pack4bits(2,2,2,0,2,2,2,2), // 18 - 1f
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 20 - 27
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 28 - 2f
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 30 - 37
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 38 - 3f
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 40 - 47
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 48 - 4f
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 50 - 57
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 58 - 5f
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 60 - 67
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 68 - 6f
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 70 - 77
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 78 - 7f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 80 - 87
BitPackage.Pack4bits(0,0,0,0,0,0,6,0), // 88 - 8f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 90 - 97
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 98 - 9f
BitPackage.Pack4bits(0,3,4,4,4,4,4,4), // a0 - a7
BitPackage.Pack4bits(5,5,1,1,1,1,1,1), // a8 - af
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // b0 - b7
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // b8 - bf
BitPackage.Pack4bits(1,1,3,1,3,3,3,3), // c0 - c7
BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // c8 - cf
BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // d0 - d7
BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // d8 - df
BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // e0 - e7
BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // e8 - ef
BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // f0 - f7
BitPackage.Pack4bits(3,3,3,3,3,3,3,0) // f8 - ff
};
private readonly static int[] EUCTW_st = {
BitPackage.Pack4bits(ERROR,ERROR,START, 3, 3, 3, 4,ERROR),//00-07
BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ERROR,ERROR,ITSME,ITSME),//08-0f
BitPackage.Pack4bits(ITSME,ITSME,ITSME,ITSME,ITSME,ERROR,START,ERROR),//10-17
BitPackage.Pack4bits(START,START,START,ERROR,ERROR,ERROR,ERROR,ERROR),//18-1f
BitPackage.Pack4bits( 5,ERROR,ERROR,ERROR,START,ERROR,START,START),//20-27
BitPackage.Pack4bits(START,ERROR,START,START,START,START,START,START) //28-2f
};
private readonly static int[] EUCTWCharLenTable = { 0, 0, 1, 2, 2, 2, 3 };
public EUCTWSMModel() : base(
new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, EUCTW_cls),
7,
new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, EUCTW_st),
EUCTWCharLenTable, "EUC-TW")
{
}
}
public class SJISSMModel : SMModel
{
private readonly static int[] SJIS_cls = {
//BitPacket.Pack4bits(0,1,1,1,1,1,1,1), // 00 - 07
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 00 - 07
BitPackage.Pack4bits(1,1,1,1,1,1,0,0), // 08 - 0f
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 10 - 17
BitPackage.Pack4bits(1,1,1,0,1,1,1,1), // 18 - 1f
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 20 - 27
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 28 - 2f
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 30 - 37
BitPackage.Pack4bits(1,1,1,1,1,1,1,1), // 38 - 3f
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 40 - 47
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 48 - 4f
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 50 - 57
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 58 - 5f
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 60 - 67
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 68 - 6f
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // 70 - 77
BitPackage.Pack4bits(2,2,2,2,2,2,2,1), // 78 - 7f
BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // 80 - 87
BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // 88 - 8f
BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // 90 - 97
BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // 98 - 9f
//0xa0 is illegal in sjis encoding, but some pages does
//contain such byte. We need to be more error forgiven.
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a0 - a7
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // a8 - af
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b0 - b7
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // b8 - bf
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c0 - c7
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // c8 - cf
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d0 - d7
BitPackage.Pack4bits(2,2,2,2,2,2,2,2), // d8 - df
BitPackage.Pack4bits(3,3,3,3,3,3,3,3), // e0 - e7
BitPackage.Pack4bits(3,3,3,3,3,4,4,4), // e8 - ef
BitPackage.Pack4bits(4,4,4,4,4,4,4,4), // f0 - f7
BitPackage.Pack4bits(4,4,4,4,4,0,0,0) // f8 - ff
};
private readonly static int[] SJIS_st = {
BitPackage.Pack4bits(ERROR,START,START, 3,ERROR,ERROR,ERROR,ERROR),//00-07
BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ITSME,ITSME,ITSME,ITSME),//08-0f
BitPackage.Pack4bits(ITSME,ITSME,ERROR,ERROR,START,START,START,START) //10-17
};
private readonly static int[] SJISCharLenTable = { 0, 1, 1, 2, 0, 0 };
public SJISSMModel() : base(
new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, SJIS_cls),
6,
new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, SJIS_st),
SJISCharLenTable, "Shift_JIS")
{
}
}
public class UCS2BESMModel : SMModel
{
private readonly static int[] UCS2BE_cls = {
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 00 - 07
BitPackage.Pack4bits(0,0,1,0,0,2,0,0), // 08 - 0f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17
BitPackage.Pack4bits(0,0,0,3,0,0,0,0), // 18 - 1f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 20 - 27
BitPackage.Pack4bits(0,3,3,3,3,3,0,0), // 28 - 2f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 40 - 47
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 48 - 4f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 78 - 7f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 80 - 87
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 88 - 8f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 90 - 97
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 98 - 9f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // a0 - a7
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // a8 - af
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // b0 - b7
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // b8 - bf
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // c0 - c7
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // c8 - cf
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // d0 - d7
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // d8 - df
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // e0 - e7
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // e8 - ef
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // f0 - f7
BitPackage.Pack4bits(0,0,0,0,0,0,4,5) // f8 - ff
};
private readonly static int[] UCS2BE_st = {
BitPackage.Pack4bits( 5, 7, 7,ERROR, 4, 3,ERROR,ERROR),//00-07
BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ITSME,ITSME,ITSME,ITSME),//08-0f
BitPackage.Pack4bits(ITSME,ITSME, 6, 6, 6, 6,ERROR,ERROR),//10-17
BitPackage.Pack4bits( 6, 6, 6, 6, 6,ITSME, 6, 6),//18-1f
BitPackage.Pack4bits( 6, 6, 6, 6, 5, 7, 7,ERROR),//20-27
BitPackage.Pack4bits( 5, 8, 6, 6,ERROR, 6, 6, 6),//28-2f
BitPackage.Pack4bits( 6, 6, 6, 6,ERROR,ERROR,START,START) //30-37
};
private readonly static int[] UCS2BECharLenTable = { 2, 2, 2, 0, 2, 2 };
public UCS2BESMModel() : base(
new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, UCS2BE_cls),
6,
new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, UCS2BE_st),
UCS2BECharLenTable, "UTF-16BE")
{
}
}
public class UCS2LESMModel : SMModel
{
private readonly static int[] UCS2LE_cls = {
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 00 - 07
BitPackage.Pack4bits(0,0,1,0,0,2,0,0), // 08 - 0f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 10 - 17
BitPackage.Pack4bits(0,0,0,3,0,0,0,0), // 18 - 1f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 20 - 27
BitPackage.Pack4bits(0,3,3,3,3,3,0,0), // 28 - 2f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 30 - 37
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 38 - 3f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 40 - 47
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 48 - 4f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 50 - 57
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 58 - 5f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 60 - 67
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 68 - 6f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 70 - 77
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 78 - 7f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 80 - 87
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 88 - 8f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 90 - 97
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // 98 - 9f
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // a0 - a7
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // a8 - af
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // b0 - b7
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // b8 - bf
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // c0 - c7
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // c8 - cf
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // d0 - d7
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // d8 - df
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // e0 - e7
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // e8 - ef
BitPackage.Pack4bits(0,0,0,0,0,0,0,0), // f0 - f7
BitPackage.Pack4bits(0,0,0,0,0,0,4,5) // f8 - ff
};
private readonly static int[] UCS2LE_st = {
BitPackage.Pack4bits( 6, 6, 7, 6, 4, 3,ERROR,ERROR),//00-07
BitPackage.Pack4bits(ERROR,ERROR,ERROR,ERROR,ITSME,ITSME,ITSME,ITSME),//08-0f
BitPackage.Pack4bits(ITSME,ITSME, 5, 5, 5,ERROR,ITSME,ERROR),//10-17
BitPackage.Pack4bits( 5, 5, 5,ERROR, 5,ERROR, 6, 6),//18-1f
BitPackage.Pack4bits( 7, 6, 8, 8, 5, 5, 5,ERROR),//20-27
BitPackage.Pack4bits( 5, 5, 5,ERROR,ERROR,ERROR, 5, 5),//28-2f
BitPackage.Pack4bits( 5, 5, 5,ERROR, 5,ERROR,START,START) //30-37
};
private readonly static int[] UCS2LECharLenTable = { 2, 2, 2, 2, 2, 2 };
public UCS2LESMModel() : base(
new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, UCS2LE_cls),
6,
new BitPackage(BitPackage.INDEX_SHIFT_4BITS,
BitPackage.SHIFT_MASK_4BITS,
BitPackage.BIT_SHIFT_4BITS,
BitPackage.UNIT_MASK_4BITS, UCS2LE_st),
UCS2LECharLenTable, "UTF-16LE")
{
}
}
}

View file

@ -0,0 +1,180 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
using System;
namespace UniversalDetector.Core
{
public class SBCSGroupProber : CharsetProber
{
private const int PROBERS_NUM = 13;
private CharsetProber[] probers = new CharsetProber[PROBERS_NUM];
private bool[] isActive = new bool[PROBERS_NUM];
private int bestGuess;
private int activeNum;
public SBCSGroupProber()
{
probers[0] = new SingleByteCharSetProber(new Win1251Model());
probers[1] = new SingleByteCharSetProber(new Koi8rModel());
probers[2] = new SingleByteCharSetProber(new Latin5Model());
probers[3] = new SingleByteCharSetProber(new MacCyrillicModel());
probers[4] = new SingleByteCharSetProber(new Ibm866Model());
probers[5] = new SingleByteCharSetProber(new Ibm855Model());
probers[6] = new SingleByteCharSetProber(new Latin7Model());
probers[7] = new SingleByteCharSetProber(new Win1253Model());
probers[8] = new SingleByteCharSetProber(new Latin5BulgarianModel());
probers[9] = new SingleByteCharSetProber(new Win1251BulgarianModel());
HebrewProber hebprober = new HebrewProber();
probers[10] = hebprober;
// Logical
probers[11] = new SingleByteCharSetProber(new Win1255Model(), false, hebprober);
// Visual
probers[12] = new SingleByteCharSetProber(new Win1255Model(), true, hebprober);
hebprober.SetModelProbers(probers[11], probers[12]);
// disable latin2 before latin1 is available, otherwise all latin1
// will be detected as latin2 because of their similarity.
//probers[13] = new SingleByteCharSetProber(new Latin2HungarianModel());
//probers[14] = new SingleByteCharSetProber(new Win1250HungarianModel());
Reset();
}
public override ProbingState HandleData(byte[] buf, int offset, int len)
{
ProbingState st = ProbingState.NotMe;
//apply filter to original buffer, and we got new buffer back
//depend on what script it is, we will feed them the new buffer
//we got after applying proper filter
//this is done without any consideration to KeepEnglishLetters
//of each prober since as of now, there are no probers here which
//recognize languages with English characters.
byte[] newBuf = FilterWithoutEnglishLetters(buf, offset, len);
if (newBuf.Length == 0)
return state; // Nothing to see here, move on.
for (int i = 0; i < PROBERS_NUM; i++) {
if (!isActive[i])
continue;
st = probers[i].HandleData(newBuf, 0, newBuf.Length);
if (st == ProbingState.FoundIt) {
bestGuess = i;
state = ProbingState.FoundIt;
break;
} else if (st == ProbingState.NotMe) {
isActive[i] = false;
activeNum--;
if (activeNum <= 0) {
state = ProbingState.NotMe;
break;
}
}
}
return state;
}
public override float GetConfidence()
{
float bestConf = 0.0f, cf;
switch (state) {
case ProbingState.FoundIt:
return 0.99f; //sure yes
case ProbingState.NotMe:
return 0.01f; //sure no
default:
for (int i = 0; i < PROBERS_NUM; i++)
{
if (!isActive[i])
continue;
cf = probers[i].GetConfidence();
if (bestConf < cf)
{
bestConf = cf;
bestGuess = i;
}
}
break;
}
return bestConf;
}
public override void DumpStatus()
{
float cf = GetConfidence();
// Console.WriteLine(" SBCS Group Prober --------begin status");
for (int i = 0; i < PROBERS_NUM; i++) {
if (isActive[i])
probers[i].DumpStatus();
//else
//Console.WriteLine(" inactive: [{0}] (i.e. confidence is too low).", probers[i].GetCharsetName());
}
//Console.WriteLine(" SBCS Group found best match [{0}] confidence {1}.", probers[bestGuess].GetCharsetName(), cf);
}
public override void Reset ()
{
int activeNum = 0;
for (int i = 0; i < PROBERS_NUM; i++) {
if (probers[i] != null) {
probers[i].Reset();
isActive[i] = true;
activeNum++;
} else {
isActive[i] = false;
}
}
bestGuess = -1;
state = ProbingState.Detecting;
}
public override string GetCharsetName()
{
//if we have no answer yet
if (bestGuess == -1) {
GetConfidence();
//no charset seems positive
if (bestGuess == -1)
bestGuess = 0;
}
return probers[bestGuess].GetCharsetName();
}
}
}

View file

@ -0,0 +1,170 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
using System;
namespace UniversalDetector.Core
{
public class SingleByteCharSetProber : CharsetProber
{
private const int SAMPLE_SIZE = 64;
private const int SB_ENOUGH_REL_THRESHOLD = 1024;
private const float POSITIVE_SHORTCUT_THRESHOLD = 0.95f;
private const float NEGATIVE_SHORTCUT_THRESHOLD = 0.05f;
private const int SYMBOL_CAT_ORDER = 250;
private const int NUMBER_OF_SEQ_CAT = 4;
private const int POSITIVE_CAT = NUMBER_OF_SEQ_CAT-1;
private const int NEGATIVE_CAT = 0;
protected SequenceModel model;
// true if we need to reverse every pair in the model lookup
bool reversed;
// char order of last character
byte lastOrder;
int totalSeqs;
int totalChar;
int[] seqCounters = new int[NUMBER_OF_SEQ_CAT];
// characters that fall in our sampling range
int freqChar;
// Optional auxiliary prober for name decision. created and destroyed by the GroupProber
CharsetProber nameProber;
public SingleByteCharSetProber(SequenceModel model)
: this(model, false, null)
{
}
public SingleByteCharSetProber(SequenceModel model, bool reversed,
CharsetProber nameProber)
{
this.model = model;
this.reversed = reversed;
this.nameProber = nameProber;
Reset();
}
public override ProbingState HandleData(byte[] buf, int offset, int len)
{
int max = offset + len;
for (int i = offset; i < max; i++) {
byte order = model.GetOrder(buf[i]);
if (order < SYMBOL_CAT_ORDER)
totalChar++;
if (order < SAMPLE_SIZE) {
freqChar++;
if (lastOrder < SAMPLE_SIZE) {
totalSeqs++;
if (!reversed)
++(seqCounters[model.GetPrecedence(lastOrder*SAMPLE_SIZE+order)]);
else // reverse the order of the letters in the lookup
++(seqCounters[model.GetPrecedence(order*SAMPLE_SIZE+lastOrder)]);
}
}
lastOrder = order;
}
if (state == ProbingState.Detecting) {
if (totalSeqs > SB_ENOUGH_REL_THRESHOLD) {
float cf = GetConfidence();
if (cf > POSITIVE_SHORTCUT_THRESHOLD)
state = ProbingState.FoundIt;
else if (cf < NEGATIVE_SHORTCUT_THRESHOLD)
state = ProbingState.NotMe;
}
}
return state;
}
public override void DumpStatus()
{
//Console.WriteLine(" SBCS: {0} [{1}]", GetConfidence(), GetCharsetName());
}
public override float GetConfidence()
{
/*
NEGATIVE_APPROACH
if (totalSeqs > 0) {
if (totalSeqs > seqCounters[NEGATIVE_CAT] * 10)
return (totalSeqs - seqCounters[NEGATIVE_CAT] * 10)/totalSeqs * freqChar / mTotalChar;
}
return 0.01f;
*/
// POSITIVE_APPROACH
float r = 0.0f;
if (totalSeqs > 0) {
r = 1.0f * seqCounters[POSITIVE_CAT] / totalSeqs / model.TypicalPositiveRatio;
r = r * freqChar / totalChar;
if (r >= 1.0f)
r = 0.99f;
return r;
}
return 0.01f;
}
public override void Reset()
{
state = ProbingState.Detecting;
lastOrder = 255;
for (int i = 0; i < NUMBER_OF_SEQ_CAT; i++)
seqCounters[i] = 0;
totalSeqs = 0;
totalChar = 0;
freqChar = 0;
}
public override string GetCharsetName()
{
return (nameProber == null) ? model.CharsetName
: nameProber.GetCharsetName();
}
}
}

View file

@ -0,0 +1,116 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
namespace UniversalDetector.Core
{
/// <summary>
/// for S-JIS encoding, observe characteristic:
/// 1, kana character (or hankaku?) often have hight frequency of appereance
/// 2, kana character often exist in group
/// 3, certain combination of kana is never used in japanese language
/// </summary>
public class SJISProber : CharsetProber
{
private CodingStateMachine codingSM;
private SJISContextAnalyser contextAnalyser;
private SJISDistributionAnalyser distributionAnalyser;
private byte[] lastChar = new byte[2];
public SJISProber()
{
codingSM = new CodingStateMachine(new SJISSMModel());
distributionAnalyser = new SJISDistributionAnalyser();
contextAnalyser = new SJISContextAnalyser();
Reset();
}
public override string GetCharsetName()
{
return "Shift-JIS";
}
public override ProbingState HandleData(byte[] buf, int offset, int len)
{
int codingState;
int max = offset + len;
for (int i = offset; i < max; i++) {
codingState = codingSM.NextState(buf[i]);
if (codingState == SMModel.ERROR) {
state = ProbingState.NotMe;
break;
}
if (codingState == SMModel.ITSME) {
state = ProbingState.FoundIt;
break;
}
if (codingState == SMModel.START) {
int charLen = codingSM.CurrentCharLen;
if (i == offset) {
lastChar[1] = buf[offset];
contextAnalyser.HandleOneChar(lastChar, 2-charLen, charLen);
distributionAnalyser.HandleOneChar(lastChar, 0, charLen);
} else {
contextAnalyser.HandleOneChar(buf, i+1-charLen, charLen);
distributionAnalyser.HandleOneChar(buf, i-1, charLen);
}
}
}
lastChar[0] = buf[max-1];
if (state == ProbingState.Detecting)
if (contextAnalyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
state = ProbingState.FoundIt;
return state;
}
public override void Reset()
{
codingSM.Reset();
state = ProbingState.Detecting;
contextAnalyser.Reset();
distributionAnalyser.Reset();
}
public override float GetConfidence()
{
float contxtCf = contextAnalyser.GetConfidence();
float distribCf = distributionAnalyser.GetConfidence();
return (contxtCf > distribCf ? contxtCf : distribCf);
}
}
}

View file

@ -0,0 +1,83 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Kohei TAKETA <k-tak@void.in> (Java port)
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
using System;
namespace UniversalDetector.Core
{
/// <summary>
/// State machine model
/// </summary>
public abstract class SMModel
{
public const int START = 0;
public const int ERROR = 1;
public const int ITSME = 2;
public BitPackage classTable;
public BitPackage stateTable;
public int[] charLenTable;
private string name;
public string Name {
get { return name; }
}
private int classFactor;
public int ClassFactor {
get { return classFactor; }
}
public SMModel(BitPackage classTable, int classFactor,
BitPackage stateTable, int[] charLenTable, String name)
{
this.classTable = classTable;
this.classFactor = classFactor;
this.stateTable = stateTable;
this.charLenTable = charLenTable;
this.name = name;
}
public int GetClass(byte b)
{
return classTable.Unpack((int)b);
}
}
}

View file

@ -0,0 +1,97 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
using System;
namespace UniversalDetector.Core
{
public abstract class SequenceModel
{
// [256] table use to find a char's order
protected byte[] charToOrderMap;
// [SAMPLE_SIZE][SAMPLE_SIZE] table to find a 2-char sequence's
// frequency
protected byte[] precedenceMatrix;
// freqSeqs / totalSeqs
protected float typicalPositiveRatio;
public float TypicalPositiveRatio {
get { return typicalPositiveRatio; }
}
// not used
protected bool keepEnglishLetter;
public bool KeepEnglishLetter {
get { return keepEnglishLetter; }
}
protected String charsetName;
public string CharsetName {
get { return charsetName; }
}
public SequenceModel(
byte[] charToOrderMap,
byte[] precedenceMatrix,
float typicalPositiveRatio,
bool keepEnglishLetter,
String charsetName)
{
this.charToOrderMap = charToOrderMap;
this.precedenceMatrix = precedenceMatrix;
this.typicalPositiveRatio = typicalPositiveRatio;
this.keepEnglishLetter = keepEnglishLetter;
this.charsetName = charsetName;
}
public byte GetOrder(byte b)
{
return charToOrderMap[b];
}
public byte GetPrecedence(int pos)
{
return precedenceMatrix[pos];
}
}
}

View file

@ -0,0 +1,112 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
namespace UniversalDetector.Core
{
public class UTF8Prober : CharsetProber
{
private static float ONE_CHAR_PROB = 0.50f;
private CodingStateMachine codingSM;
private int numOfMBChar;
public UTF8Prober()
{
numOfMBChar = 0;
codingSM = new CodingStateMachine(new UTF8SMModel());
Reset();
}
public override string GetCharsetName() {
return "UTF-8";
}
public override void Reset()
{
codingSM.Reset();
numOfMBChar = 0;
state = ProbingState.Detecting;
}
public override ProbingState HandleData(byte[] buf, int offset, int len)
{
int codingState = SMModel.START;
int max = offset + len;
for (int i = offset; i < max; i++) {
codingState = codingSM.NextState(buf[i]);
if (codingState == SMModel.ERROR) {
state = ProbingState.NotMe;
break;
}
if (codingState == SMModel.ITSME) {
state = ProbingState.FoundIt;
break;
}
if (codingState == SMModel.START) {
if (codingSM.CurrentCharLen >= 2)
numOfMBChar++;
}
}
if (state == ProbingState.Detecting)
if (GetConfidence() > SHORTCUT_THRESHOLD)
state = ProbingState.FoundIt;
return state;
}
public override float GetConfidence()
{
float unlike = 0.99f;
float confidence = 0.0f;
if (numOfMBChar < 6) {
for (int i = 0; i < numOfMBChar; i++)
unlike *= ONE_CHAR_PROB;
confidence = 1.0f - unlike;
} else {
confidence = 0.99f;
}
return confidence;
}
}
}

View file

@ -0,0 +1,257 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
namespace UniversalDetector.Core
{
enum InputState { PureASCII=0, EscASCII=1, Highbyte=2 };
public abstract class UniversalDetector
{
protected const int FILTER_CHINESE_SIMPLIFIED = 1;
protected const int FILTER_CHINESE_TRADITIONAL = 2;
protected const int FILTER_JAPANESE = 4;
protected const int FILTER_KOREAN = 8;
protected const int FILTER_NON_CJK = 16;
protected const int FILTER_ALL = 31;
protected static int FILTER_CHINESE =
FILTER_CHINESE_SIMPLIFIED | FILTER_CHINESE_TRADITIONAL;
protected static int FILTER_CJK =
FILTER_JAPANESE | FILTER_KOREAN | FILTER_CHINESE_SIMPLIFIED
| FILTER_CHINESE_TRADITIONAL;
protected const float SHORTCUT_THRESHOLD = 0.95f;
protected const float MINIMUM_THRESHOLD = 0.20f;
internal InputState inputState;
protected bool start;
protected bool gotData;
protected bool done;
protected byte lastChar;
protected int bestGuess;
protected const int PROBERS_NUM = 3;
protected int languageFilter;
protected CharsetProber[] charsetProbers = new CharsetProber[PROBERS_NUM];
protected CharsetProber escCharsetProber;
protected string detectedCharset;
public UniversalDetector(int languageFilter) {
this.start = true;
this.inputState = InputState.PureASCII;
this.lastChar = 0x00;
this.bestGuess = -1;
this.languageFilter = languageFilter;
}
public virtual void Feed(byte[] buf, int offset, int len)
{
if (done) {
return;
}
if (len > 0)
gotData = true;
// If the data starts with BOM, we know it is UTF
if (start) {
start = false;
if (len > 3) {
switch (buf[0]) {
case 0xEF:
if (0xBB == buf[1] && 0xBF == buf[2])
detectedCharset = "UTF-8";
break;
case 0xFE:
if (0xFF == buf[1] && 0x00 == buf[2] && 0x00 == buf[3])
// FE FF 00 00 UCS-4, unusual octet order BOM (3412)
detectedCharset = "X-ISO-10646-UCS-4-3412";
else if (0xFF == buf[1])
detectedCharset = "UTF-16BE";
break;
case 0x00:
if (0x00 == buf[1] && 0xFE == buf[2] && 0xFF == buf[3])
detectedCharset = "UTF-32BE";
else if (0x00 == buf[1] && 0xFF == buf[2] && 0xFE == buf[3])
// 00 00 FF FE UCS-4, unusual octet order BOM (2143)
detectedCharset = "X-ISO-10646-UCS-4-2143";
break;
case 0xFF:
if (0xFE == buf[1] && 0x00 == buf[2] && 0x00 == buf[3])
detectedCharset = "UTF-32LE";
else if (0xFE == buf[1])
detectedCharset = "UTF-16LE";
break;
} // switch
}
if (detectedCharset != null) {
done = true;
return;
}
}
for (int i = 0; i < len; i++) {
// other than 0xa0, if every other character is ascii, the page is ascii
if ((buf[i] & 0x80) != 0 && buf[i] != 0xA0) {
// we got a non-ascii byte (high-byte)
if (inputState != InputState.Highbyte) {
inputState = InputState.Highbyte;
// kill EscCharsetProber if it is active
if (escCharsetProber != null) {
escCharsetProber = null;
}
// start multibyte and singlebyte charset prober
if (charsetProbers[0] == null)
charsetProbers[0] = new MBCSGroupProber();
if (charsetProbers[1] == null)
charsetProbers[1] = new SBCSGroupProber();
if (charsetProbers[2] == null)
charsetProbers[2] = new Latin1Prober();
}
} else {
if (inputState == InputState.PureASCII &&
(buf[i] == 0x33 || (buf[i] == 0x7B && lastChar == 0x7E))) {
// found escape character or HZ "~{"
inputState = InputState.EscASCII;
}
lastChar = buf[i];
}
}
ProbingState st = ProbingState.NotMe;
switch (inputState) {
case InputState.EscASCII:
if (escCharsetProber == null) {
escCharsetProber = new EscCharsetProber();
}
st = escCharsetProber.HandleData(buf, offset, len);
if (st == ProbingState.FoundIt) {
done = true;
detectedCharset = escCharsetProber.GetCharsetName();
}
break;
case InputState.Highbyte:
for (int i = 0; i < PROBERS_NUM; i++) {
if (charsetProbers[i] != null) {
st = charsetProbers[i].HandleData(buf, offset, len);
#if DEBUG
charsetProbers[i].DumpStatus();
#endif
if (st == ProbingState.FoundIt) {
done = true;
detectedCharset = charsetProbers[i].GetCharsetName();
return;
}
}
}
break;
default:
// pure ascii
break;
}
return;
}
/// <summary>
/// Notify detector that no further data is available.
/// </summary>
public virtual void DataEnd()
{
if (!gotData) {
// we haven't got any data yet, return immediately
// caller program sometimes call DataEnd before anything has
// been sent to detector
return;
}
if (detectedCharset != null) {
done = true;
Report(detectedCharset, 1.0f);
return;
}
if (inputState == InputState.Highbyte) {
float proberConfidence = 0.0f;
float maxProberConfidence = 0.0f;
int maxProber = 0;
for (int i = 0; i < PROBERS_NUM; i++) {
if (charsetProbers[i] != null) {
proberConfidence = charsetProbers[i].GetConfidence();
if (proberConfidence > maxProberConfidence) {
maxProberConfidence = proberConfidence;
maxProber = i;
}
}
}
if (maxProberConfidence > MINIMUM_THRESHOLD) {
Report(charsetProbers[maxProber].GetCharsetName(), maxProberConfidence);
}
} else if (inputState == InputState.PureASCII) {
Report("ASCII", 1.0f);
}
}
/// <summary>
/// Clear internal state of charset detector.
/// In the original interface this method is protected.
/// </summary>
public virtual void Reset()
{
done = false;
start = true;
detectedCharset = null;
gotData = false;
bestGuess = -1;
inputState = InputState.PureASCII;
lastChar = 0x00;
if (escCharsetProber != null)
escCharsetProber.Reset();
for (int i = 0; i < PROBERS_NUM; i++)
if (charsetProbers[i] != null)
charsetProbers[i].Reset();
}
protected abstract void Report(string charset, float confidence);
}
}

View file

@ -0,0 +1,75 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
*
* Alternatively, the contents of this file may be used under the terms of
* either of the GNU General Public License Version 2 or later (the "GPL"),
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
namespace UniversalDetector
{
/// <summary>
/// Indicate how confident the detection module about the return result.
///
/// NoAnswerYet: the detector have not find out a answer yet based on
/// the data it received.
///
/// BestAnswer: the answer the detector returned is the best one within
/// the knowledge of the detector. In other words, the test to all
/// other candidates fail.
/// For example, the (Shift_JIS/EUC-JP/ISO-2022-JP) detection
/// module may return this with answer "Shift_JIS " if it receive
/// bytes > 0x80 (which make ISO-2022-JP test failed) and byte
/// 0x82 (which may EUC-JP test failed)
///
/// SureAnswer: the detector is 100% sure about the answer.
///
/// Example 1: the Shift_JIS/ISO-2022-JP/EUC-JP detector return
/// this w/ ISO-2022-JP when it hit one of the following ESC seq
/// ESC ( J
/// ESC $ @
/// ESC $ B
///
/// Example 2: the detector which can detect UCS2 return w/ UCS2
/// when the first 2 byte are BOM mark.
/// Example 3: the Korean detector return ISO-2022-KR when it
/// hit ESC $ ) C
/// </summary>
public enum DetectionConfidence
{
NoAnswerYet = 0,
BestAnswer,
SureAnswer,
NoAnswerMatch
}
}

View file

@ -0,0 +1,88 @@
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is Mozilla Universal charset detector code.
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 2001
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Shy Shalom <shooshX@gmail.com>
* Rudi Pettazzi <rudi.pettazzi@gmail.com> (C# port)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
* ***** END LICENSE BLOCK ***** */
using System.IO;
namespace UniversalDetector
{
public interface ICharsetDetector
{
/// <summary>
/// The detected charset. It can be null.
/// </summary>
string Charset { get; }
/// <summary>
/// The confidence of the detected charset, if any
/// </summary>
float Confidence { get; }
/// <summary>
/// Feed a block of bytes to the detector.
/// </summary>
/// <param name="buf">input buffer</param>
/// <param name="offset">offset into buffer</param>
/// <param name="len">number of available bytes</param>
void Feed(byte[] buf, int offset, int len);
/// <summary>
/// Feed a bytes stream to the detector.
/// </summary>
/// <param name="stream">an input stream</param>
void Feed(Stream stream);
/// <summary>
/// Resets the state of the detector.
/// </summary>
void Reset();
/// <summary>
/// Returns true if the detector has found a result and it is sure about it.
/// </summary>
/// <returns>true if the detector has detected the encoding</returns>
bool IsDone();
/// <summary>
/// Tell the detector that there is no more data and it must take its
/// decision.
/// </summary>
void DataEnd();
}
}

View file

@ -318,8 +318,7 @@ namespace Emby.Dlna.PlayTo
CanSeek = info.MediaSource == null ? _device.Duration.HasValue : info.MediaSource.RunTimeTicks.HasValue,
PlayMethod = info.IsDirectStream ? PlayMethod.DirectStream : PlayMethod.Transcode,
QueueableMediaTypes = new List<string> { mediaInfo.MediaType }
PlayMethod = info.IsDirectStream ? PlayMethod.DirectStream : PlayMethod.Transcode
};
}

View file

@ -30,8 +30,8 @@ namespace Emby.Dlna.Profiles
MaxIconWidth = 48;
MaxIconHeight = 48;
MaxStreamingBitrate = 24000000;
MaxStaticBitrate = 24000000;
MaxStreamingBitrate = 30000000;
MaxStaticBitrate = 30000000;
MusicStreamingTranscodingBitrate = 192000;
EnableAlbumArtInDidl = false;

View file

@ -22,8 +22,8 @@
<MaxAlbumArtHeight>480</MaxAlbumArtHeight>
<MaxIconWidth>48</MaxIconWidth>
<MaxIconHeight>48</MaxIconHeight>
<MaxStreamingBitrate>24000000</MaxStreamingBitrate>
<MaxStaticBitrate>24000000</MaxStaticBitrate>
<MaxStreamingBitrate>30000000</MaxStreamingBitrate>
<MaxStaticBitrate>30000000</MaxStaticBitrate>
<MusicStreamingTranscodingBitrate>192000</MusicStreamingTranscodingBitrate>
<MaxStaticMusicBitrate xsi:nil="true" />
<XDlnaDoc>DMS-1.50</XDlnaDoc>
@ -40,9 +40,9 @@
<DirectPlayProfile container="" type="Photo" />
</DirectPlayProfiles>
<TranscodingProfiles>
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="aac" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="aac" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
</TranscodingProfiles>
<ContainerProfiles />
<CodecProfiles />

View file

@ -16,8 +16,8 @@
<MaxAlbumArtHeight>480</MaxAlbumArtHeight>
<MaxIconWidth>48</MaxIconWidth>
<MaxIconHeight>48</MaxIconHeight>
<MaxStreamingBitrate>24000000</MaxStreamingBitrate>
<MaxStaticBitrate>24000000</MaxStaticBitrate>
<MaxStreamingBitrate>30000000</MaxStreamingBitrate>
<MaxStaticBitrate>30000000</MaxStaticBitrate>
<MusicStreamingTranscodingBitrate>192000</MusicStreamingTranscodingBitrate>
<MaxStaticMusicBitrate xsi:nil="true" />
<XDlnaDoc>DMS-1.50</XDlnaDoc>
@ -33,9 +33,9 @@
<DirectPlayProfile container="mp3,wma,aac,wav,flac" type="Audio" />
</DirectPlayProfiles>
<TranscodingProfiles>
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="aac" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="aac" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
</TranscodingProfiles>
<ContainerProfiles />
<CodecProfiles />

View file

@ -21,8 +21,8 @@
<MaxAlbumArtHeight>480</MaxAlbumArtHeight>
<MaxIconWidth>48</MaxIconWidth>
<MaxIconHeight>48</MaxIconHeight>
<MaxStreamingBitrate>24000000</MaxStreamingBitrate>
<MaxStaticBitrate>24000000</MaxStaticBitrate>
<MaxStreamingBitrate>30000000</MaxStreamingBitrate>
<MaxStaticBitrate>30000000</MaxStaticBitrate>
<MusicStreamingTranscodingBitrate>192000</MusicStreamingTranscodingBitrate>
<MaxStaticMusicBitrate xsi:nil="true" />
<XDlnaDoc>DMS-1.50</XDlnaDoc>
@ -37,9 +37,9 @@
<DirectPlayProfile container="mp3,flac,m4a,wma" type="Audio" />
</DirectPlayProfiles>
<TranscodingProfiles>
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="aac" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="aac" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
</TranscodingProfiles>
<ContainerProfiles />
<CodecProfiles />

View file

@ -22,8 +22,8 @@
<MaxAlbumArtHeight>480</MaxAlbumArtHeight>
<MaxIconWidth>48</MaxIconWidth>
<MaxIconHeight>48</MaxIconHeight>
<MaxStreamingBitrate>24000000</MaxStreamingBitrate>
<MaxStaticBitrate>24000000</MaxStaticBitrate>
<MaxStreamingBitrate>30000000</MaxStreamingBitrate>
<MaxStaticBitrate>30000000</MaxStaticBitrate>
<MusicStreamingTranscodingBitrate>192000</MusicStreamingTranscodingBitrate>
<MaxStaticMusicBitrate xsi:nil="true" />
<XDlnaDoc>DMS-1.50</XDlnaDoc>
@ -39,8 +39,8 @@
<DirectPlayProfile container="jpeg,jpg" type="Photo" />
</DirectPlayProfiles>
<TranscodingProfiles>
<TranscodingProfile container="mpeg" type="Video" videoCodec="mpeg2video" audioCodec="mp2" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mpeg" type="Video" videoCodec="mpeg2video" audioCodec="mp2" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
</TranscodingProfiles>
<ContainerProfiles />
<CodecProfiles>

View file

@ -23,8 +23,8 @@
<MaxAlbumArtHeight>480</MaxAlbumArtHeight>
<MaxIconWidth>48</MaxIconWidth>
<MaxIconHeight>48</MaxIconHeight>
<MaxStreamingBitrate>24000000</MaxStreamingBitrate>
<MaxStaticBitrate>24000000</MaxStaticBitrate>
<MaxStreamingBitrate>30000000</MaxStreamingBitrate>
<MaxStaticBitrate>30000000</MaxStaticBitrate>
<MusicStreamingTranscodingBitrate>192000</MusicStreamingTranscodingBitrate>
<MaxStaticMusicBitrate xsi:nil="true" />
<XDlnaDoc>DMS-1.50</XDlnaDoc>
@ -41,9 +41,9 @@
<DirectPlayProfile container="jpeg" type="Photo" />
</DirectPlayProfiles>
<TranscodingProfiles>
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp4" type="Video" videoCodec="h264" audioCodec="aac" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="mp4" type="Video" videoCodec="h264" audioCodec="aac" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
</TranscodingProfiles>
<ContainerProfiles />
<CodecProfiles>

View file

@ -23,7 +23,7 @@
<MaxIconWidth>48</MaxIconWidth>
<MaxIconHeight>48</MaxIconHeight>
<MaxStreamingBitrate>100000000</MaxStreamingBitrate>
<MaxStaticBitrate>24000000</MaxStaticBitrate>
<MaxStaticBitrate>30000000</MaxStaticBitrate>
<MusicStreamingTranscodingBitrate>1280000</MusicStreamingTranscodingBitrate>
<MaxStaticMusicBitrate xsi:nil="true" />
<XDlnaDoc>DMS-1.50</XDlnaDoc>
@ -40,9 +40,9 @@
<DirectPlayProfile container="" type="Photo" />
</DirectPlayProfiles>
<TranscodingProfiles>
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="aac" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="aac" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
</TranscodingProfiles>
<ContainerProfiles />
<CodecProfiles />

View file

@ -22,8 +22,8 @@
<MaxAlbumArtHeight>480</MaxAlbumArtHeight>
<MaxIconWidth>48</MaxIconWidth>
<MaxIconHeight>48</MaxIconHeight>
<MaxStreamingBitrate>24000000</MaxStreamingBitrate>
<MaxStaticBitrate>24000000</MaxStaticBitrate>
<MaxStreamingBitrate>30000000</MaxStreamingBitrate>
<MaxStaticBitrate>30000000</MaxStaticBitrate>
<MusicStreamingTranscodingBitrate>192000</MusicStreamingTranscodingBitrate>
<MaxStaticMusicBitrate xsi:nil="true" />
<XDlnaDoc>DMS-1.50</XDlnaDoc>
@ -42,9 +42,9 @@
<DirectPlayProfile container="jpeg" type="Photo" />
</DirectPlayProfiles>
<TranscodingProfiles>
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="ac3,aac,mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="ac3,aac,mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
</TranscodingProfiles>
<ContainerProfiles>
<ContainerProfile type="Photo">

View file

@ -20,8 +20,8 @@
<MaxAlbumArtHeight>480</MaxAlbumArtHeight>
<MaxIconWidth>48</MaxIconWidth>
<MaxIconHeight>48</MaxIconHeight>
<MaxStreamingBitrate>24000000</MaxStreamingBitrate>
<MaxStaticBitrate>24000000</MaxStaticBitrate>
<MaxStreamingBitrate>30000000</MaxStreamingBitrate>
<MaxStaticBitrate>30000000</MaxStaticBitrate>
<MusicStreamingTranscodingBitrate>192000</MusicStreamingTranscodingBitrate>
<MaxStaticMusicBitrate xsi:nil="true" />
<XDlnaDoc>DMS-1.50</XDlnaDoc>
@ -37,9 +37,9 @@
<DirectPlayProfile container="avi,mp4,mkv,ts,m4v" type="Video" />
</DirectPlayProfiles>
<TranscodingProfiles>
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="aac" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="aac" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
</TranscodingProfiles>
<ContainerProfiles />
<CodecProfiles />

View file

@ -22,8 +22,8 @@
<MaxAlbumArtHeight>480</MaxAlbumArtHeight>
<MaxIconWidth>48</MaxIconWidth>
<MaxIconHeight>48</MaxIconHeight>
<MaxStreamingBitrate>24000000</MaxStreamingBitrate>
<MaxStaticBitrate>24000000</MaxStaticBitrate>
<MaxStreamingBitrate>30000000</MaxStreamingBitrate>
<MaxStaticBitrate>30000000</MaxStaticBitrate>
<MusicStreamingTranscodingBitrate>192000</MusicStreamingTranscodingBitrate>
<MaxStaticMusicBitrate xsi:nil="true" />
<XDlnaDoc>DMS-1.50</XDlnaDoc>
@ -43,9 +43,9 @@
<DirectPlayProfile container="ogg" audioCodec="vorbis" type="Audio" />
</DirectPlayProfiles>
<TranscodingProfiles>
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="aac" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="aac" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
</TranscodingProfiles>
<ContainerProfiles />
<CodecProfiles />

View file

@ -23,8 +23,8 @@
<MaxAlbumArtHeight>480</MaxAlbumArtHeight>
<MaxIconWidth>48</MaxIconWidth>
<MaxIconHeight>48</MaxIconHeight>
<MaxStreamingBitrate>24000000</MaxStreamingBitrate>
<MaxStaticBitrate>24000000</MaxStaticBitrate>
<MaxStreamingBitrate>30000000</MaxStreamingBitrate>
<MaxStaticBitrate>30000000</MaxStaticBitrate>
<MusicStreamingTranscodingBitrate>192000</MusicStreamingTranscodingBitrate>
<MaxStaticMusicBitrate xsi:nil="true" />
<XDlnaDoc>DMS-1.50</XDlnaDoc>
@ -50,9 +50,9 @@
<DirectPlayProfile container="jpeg" type="Photo" />
</DirectPlayProfiles>
<TranscodingProfiles>
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="ac3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="ac3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
</TranscodingProfiles>
<ContainerProfiles>
<ContainerProfile type="Photo">

View file

@ -16,8 +16,8 @@
<MaxAlbumArtHeight>480</MaxAlbumArtHeight>
<MaxIconWidth>48</MaxIconWidth>
<MaxIconHeight>48</MaxIconHeight>
<MaxStreamingBitrate>24000000</MaxStreamingBitrate>
<MaxStaticBitrate>24000000</MaxStaticBitrate>
<MaxStreamingBitrate>30000000</MaxStreamingBitrate>
<MaxStaticBitrate>30000000</MaxStaticBitrate>
<MusicStreamingTranscodingBitrate>192000</MusicStreamingTranscodingBitrate>
<MaxStaticMusicBitrate xsi:nil="true" />
<XDlnaDoc>DMS-1.50</XDlnaDoc>
@ -38,9 +38,9 @@
<DirectPlayProfile container="jpeg,gif,bmp,png" type="Photo" />
</DirectPlayProfiles>
<TranscodingProfiles>
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp4" type="Video" videoCodec="h264" audioCodec="aac" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="mp4" type="Video" videoCodec="h264" audioCodec="aac" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
</TranscodingProfiles>
<ContainerProfiles />
<CodecProfiles>

View file

@ -22,8 +22,8 @@
<MaxAlbumArtHeight>480</MaxAlbumArtHeight>
<MaxIconWidth>48</MaxIconWidth>
<MaxIconHeight>48</MaxIconHeight>
<MaxStreamingBitrate>24000000</MaxStreamingBitrate>
<MaxStaticBitrate>24000000</MaxStaticBitrate>
<MaxStreamingBitrate>30000000</MaxStreamingBitrate>
<MaxStaticBitrate>30000000</MaxStaticBitrate>
<MusicStreamingTranscodingBitrate>192000</MusicStreamingTranscodingBitrate>
<MaxStaticMusicBitrate xsi:nil="true" />
<XDlnaDoc>DMS-1.50</XDlnaDoc>
@ -50,9 +50,9 @@
<DirectPlayProfile container="jpeg" type="Photo" />
</DirectPlayProfiles>
<TranscodingProfiles>
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="ac3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="ac3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
</TranscodingProfiles>
<ContainerProfiles>
<ContainerProfile type="Photo">

View file

@ -22,8 +22,8 @@
<MaxAlbumArtHeight>480</MaxAlbumArtHeight>
<MaxIconWidth>48</MaxIconWidth>
<MaxIconHeight>48</MaxIconHeight>
<MaxStreamingBitrate>24000000</MaxStreamingBitrate>
<MaxStaticBitrate>24000000</MaxStaticBitrate>
<MaxStreamingBitrate>30000000</MaxStreamingBitrate>
<MaxStaticBitrate>30000000</MaxStaticBitrate>
<MusicStreamingTranscodingBitrate>192000</MusicStreamingTranscodingBitrate>
<MaxStaticMusicBitrate xsi:nil="true" />
<XDlnaDoc>DMS-1.50</XDlnaDoc>
@ -42,9 +42,9 @@
<DirectPlayProfile container="mp3,wav" type="Audio" />
</DirectPlayProfiles>
<TranscodingProfiles>
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="ac3,aac,mp3,dts,dca" estimateContentLength="false" enableMpegtsM2TsMode="true" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="ac3,aac,mp3,dts,dca" estimateContentLength="false" enableMpegtsM2TsMode="true" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
</TranscodingProfiles>
<ContainerProfiles />
<CodecProfiles />

View file

@ -26,8 +26,8 @@
<MaxAlbumArtHeight>480</MaxAlbumArtHeight>
<MaxIconWidth>48</MaxIconWidth>
<MaxIconHeight>48</MaxIconHeight>
<MaxStreamingBitrate>24000000</MaxStreamingBitrate>
<MaxStaticBitrate>24000000</MaxStaticBitrate>
<MaxStreamingBitrate>30000000</MaxStreamingBitrate>
<MaxStaticBitrate>30000000</MaxStaticBitrate>
<MusicStreamingTranscodingBitrate>192000</MusicStreamingTranscodingBitrate>
<MaxStaticMusicBitrate xsi:nil="true" />
<XDlnaDoc>DMS-1.50</XDlnaDoc>
@ -52,9 +52,9 @@
<DirectPlayProfile container="jpeg,png,gif" type="Photo" />
</DirectPlayProfiles>
<TranscodingProfiles>
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mkv" type="Video" videoCodec="h264" audioCodec="ac3,aac,mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="mkv" type="Video" videoCodec="h264" audioCodec="ac3,aac,mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
</TranscodingProfiles>
<ContainerProfiles>
<ContainerProfile type="Photo">

View file

@ -26,8 +26,8 @@
<MaxAlbumArtHeight>480</MaxAlbumArtHeight>
<MaxIconWidth>48</MaxIconWidth>
<MaxIconHeight>48</MaxIconHeight>
<MaxStreamingBitrate>24000000</MaxStreamingBitrate>
<MaxStaticBitrate>24000000</MaxStaticBitrate>
<MaxStreamingBitrate>30000000</MaxStreamingBitrate>
<MaxStaticBitrate>30000000</MaxStaticBitrate>
<MusicStreamingTranscodingBitrate>192000</MusicStreamingTranscodingBitrate>
<MaxStaticMusicBitrate xsi:nil="true" />
<XDlnaDoc>DMS-1.50</XDlnaDoc>
@ -52,9 +52,9 @@
<DirectPlayProfile container="jpeg,png,gif" type="Photo" />
</DirectPlayProfiles>
<TranscodingProfiles>
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mkv" type="Video" videoCodec="h264" audioCodec="ac3,aac,mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="mkv" type="Video" videoCodec="h264" audioCodec="ac3,aac,mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
</TranscodingProfiles>
<ContainerProfiles>
<ContainerProfile type="Photo">

View file

@ -24,8 +24,8 @@
<MaxAlbumArtHeight>480</MaxAlbumArtHeight>
<MaxIconWidth>48</MaxIconWidth>
<MaxIconHeight>48</MaxIconHeight>
<MaxStreamingBitrate>24000000</MaxStreamingBitrate>
<MaxStaticBitrate>24000000</MaxStaticBitrate>
<MaxStreamingBitrate>30000000</MaxStreamingBitrate>
<MaxStaticBitrate>30000000</MaxStaticBitrate>
<MusicStreamingTranscodingBitrate>192000</MusicStreamingTranscodingBitrate>
<MaxStaticMusicBitrate xsi:nil="true" />
<XDlnaDoc>DMS-1.50</XDlnaDoc>
@ -50,9 +50,9 @@
<DirectPlayProfile container="jpeg,png,gif" type="Photo" />
</DirectPlayProfiles>
<TranscodingProfiles>
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mkv" type="Video" videoCodec="h264" audioCodec="ac3,aac,mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="mkv" type="Video" videoCodec="h264" audioCodec="ac3,aac,mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
</TranscodingProfiles>
<ContainerProfiles>
<ContainerProfile type="Photo">

View file

@ -24,8 +24,8 @@
<MaxAlbumArtHeight>480</MaxAlbumArtHeight>
<MaxIconWidth>48</MaxIconWidth>
<MaxIconHeight>48</MaxIconHeight>
<MaxStreamingBitrate>24000000</MaxStreamingBitrate>
<MaxStaticBitrate>24000000</MaxStaticBitrate>
<MaxStreamingBitrate>30000000</MaxStreamingBitrate>
<MaxStaticBitrate>30000000</MaxStaticBitrate>
<MusicStreamingTranscodingBitrate>192000</MusicStreamingTranscodingBitrate>
<MaxStaticMusicBitrate xsi:nil="true" />
<XDlnaDoc>DMS-1.50</XDlnaDoc>
@ -50,9 +50,9 @@
<DirectPlayProfile container="jpeg,png,gif" type="Photo" />
</DirectPlayProfiles>
<TranscodingProfiles>
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mkv" type="Video" videoCodec="h264" audioCodec="ac3,aac,mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="mkv" type="Video" videoCodec="h264" audioCodec="ac3,aac,mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
</TranscodingProfiles>
<ContainerProfiles>
<ContainerProfile type="Photo">

View file

@ -24,8 +24,8 @@
<MaxAlbumArtHeight>480</MaxAlbumArtHeight>
<MaxIconWidth>48</MaxIconWidth>
<MaxIconHeight>48</MaxIconHeight>
<MaxStreamingBitrate>24000000</MaxStreamingBitrate>
<MaxStaticBitrate>24000000</MaxStaticBitrate>
<MaxStreamingBitrate>30000000</MaxStreamingBitrate>
<MaxStaticBitrate>30000000</MaxStaticBitrate>
<MusicStreamingTranscodingBitrate>192000</MusicStreamingTranscodingBitrate>
<MaxStaticMusicBitrate xsi:nil="true" />
<XDlnaDoc>DMS-1.50</XDlnaDoc>
@ -47,9 +47,9 @@
<DirectPlayProfile container="jpeg" type="Photo" />
</DirectPlayProfiles>
<TranscodingProfiles>
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="mpeg2video" audioCodec="ac3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="mpeg2video" audioCodec="ac3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
</TranscodingProfiles>
<ContainerProfiles>
<ContainerProfile type="Photo">

View file

@ -23,8 +23,8 @@
<MaxAlbumArtHeight>480</MaxAlbumArtHeight>
<MaxIconWidth>48</MaxIconWidth>
<MaxIconHeight>48</MaxIconHeight>
<MaxStreamingBitrate>24000000</MaxStreamingBitrate>
<MaxStaticBitrate>24000000</MaxStaticBitrate>
<MaxStreamingBitrate>30000000</MaxStreamingBitrate>
<MaxStaticBitrate>30000000</MaxStaticBitrate>
<MusicStreamingTranscodingBitrate>192000</MusicStreamingTranscodingBitrate>
<MaxStaticMusicBitrate xsi:nil="true" />
<XDlnaDoc>DMS-1.50</XDlnaDoc>
@ -45,9 +45,9 @@
<DirectPlayProfile container="mp3" audioCodec="mp3" type="Audio" />
</DirectPlayProfiles>
<TranscodingProfiles>
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="ac3" estimateContentLength="false" enableMpegtsM2TsMode="true" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="ac3" estimateContentLength="false" enableMpegtsM2TsMode="true" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
</TranscodingProfiles>
<ContainerProfiles>
<ContainerProfile type="Photo">

View file

@ -23,8 +23,8 @@
<MaxAlbumArtHeight>480</MaxAlbumArtHeight>
<MaxIconWidth>48</MaxIconWidth>
<MaxIconHeight>48</MaxIconHeight>
<MaxStreamingBitrate>24000000</MaxStreamingBitrate>
<MaxStaticBitrate>24000000</MaxStaticBitrate>
<MaxStreamingBitrate>30000000</MaxStreamingBitrate>
<MaxStaticBitrate>30000000</MaxStaticBitrate>
<MusicStreamingTranscodingBitrate>192000</MusicStreamingTranscodingBitrate>
<MaxStaticMusicBitrate xsi:nil="true" />
<XDlnaDoc>DMS-1.50</XDlnaDoc>
@ -48,9 +48,9 @@
<DirectPlayProfile container="asf" audioCodec="wmav2,wmapro,wmavoice" type="Audio" />
</DirectPlayProfiles>
<TranscodingProfiles>
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="ac3" estimateContentLength="false" enableMpegtsM2TsMode="true" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="ac3" estimateContentLength="false" enableMpegtsM2TsMode="true" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
</TranscodingProfiles>
<ContainerProfiles>
<ContainerProfile type="Photo">

View file

@ -23,8 +23,8 @@
<MaxAlbumArtHeight>480</MaxAlbumArtHeight>
<MaxIconWidth>48</MaxIconWidth>
<MaxIconHeight>48</MaxIconHeight>
<MaxStreamingBitrate>24000000</MaxStreamingBitrate>
<MaxStaticBitrate>24000000</MaxStaticBitrate>
<MaxStreamingBitrate>30000000</MaxStreamingBitrate>
<MaxStaticBitrate>30000000</MaxStaticBitrate>
<MusicStreamingTranscodingBitrate>192000</MusicStreamingTranscodingBitrate>
<MaxStaticMusicBitrate xsi:nil="true" />
<XDlnaDoc>DMS-1.50</XDlnaDoc>
@ -50,9 +50,9 @@
<DirectPlayProfile container="jpeg" type="Photo" />
</DirectPlayProfiles>
<TranscodingProfiles>
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="ac3" estimateContentLength="false" enableMpegtsM2TsMode="true" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="ac3" estimateContentLength="false" enableMpegtsM2TsMode="true" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
</TranscodingProfiles>
<ContainerProfiles>
<ContainerProfile type="Photo">

View file

@ -23,8 +23,8 @@
<MaxAlbumArtHeight>480</MaxAlbumArtHeight>
<MaxIconWidth>48</MaxIconWidth>
<MaxIconHeight>48</MaxIconHeight>
<MaxStreamingBitrate>24000000</MaxStreamingBitrate>
<MaxStaticBitrate>24000000</MaxStaticBitrate>
<MaxStreamingBitrate>30000000</MaxStreamingBitrate>
<MaxStaticBitrate>30000000</MaxStaticBitrate>
<MusicStreamingTranscodingBitrate>192000</MusicStreamingTranscodingBitrate>
<MaxStaticMusicBitrate xsi:nil="true" />
<XDlnaDoc>DMS-1.50</XDlnaDoc>
@ -55,9 +55,9 @@
<DirectPlayProfile container="jpeg" type="Photo" />
</DirectPlayProfiles>
<TranscodingProfiles>
<TranscodingProfile container="mp3" type="Audio" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="ac3" estimateContentLength="false" enableMpegtsM2TsMode="true" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp3" type="Audio" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="ac3" estimateContentLength="false" enableMpegtsM2TsMode="true" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
</TranscodingProfiles>
<ContainerProfiles>
<ContainerProfile type="Photo">

View file

@ -23,8 +23,8 @@
<MaxAlbumArtHeight>480</MaxAlbumArtHeight>
<MaxIconWidth>48</MaxIconWidth>
<MaxIconHeight>48</MaxIconHeight>
<MaxStreamingBitrate>24000000</MaxStreamingBitrate>
<MaxStaticBitrate>24000000</MaxStaticBitrate>
<MaxStreamingBitrate>30000000</MaxStreamingBitrate>
<MaxStaticBitrate>30000000</MaxStaticBitrate>
<MusicStreamingTranscodingBitrate>192000</MusicStreamingTranscodingBitrate>
<MaxStaticMusicBitrate xsi:nil="true" />
<XDlnaDoc>DMS-1.50</XDlnaDoc>
@ -55,9 +55,9 @@
<DirectPlayProfile container="jpeg" type="Photo" />
</DirectPlayProfiles>
<TranscodingProfiles>
<TranscodingProfile container="mp3" type="Audio" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="ac3" estimateContentLength="false" enableMpegtsM2TsMode="true" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp3" type="Audio" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="ac3" estimateContentLength="false" enableMpegtsM2TsMode="true" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
</TranscodingProfiles>
<ContainerProfiles>
<ContainerProfile type="Photo">

View file

@ -23,8 +23,8 @@
<MaxAlbumArtHeight>480</MaxAlbumArtHeight>
<MaxIconWidth>48</MaxIconWidth>
<MaxIconHeight>48</MaxIconHeight>
<MaxStreamingBitrate>24000000</MaxStreamingBitrate>
<MaxStaticBitrate>24000000</MaxStaticBitrate>
<MaxStreamingBitrate>30000000</MaxStreamingBitrate>
<MaxStaticBitrate>30000000</MaxStaticBitrate>
<MusicStreamingTranscodingBitrate>192000</MusicStreamingTranscodingBitrate>
<MaxStaticMusicBitrate xsi:nil="true" />
<XDlnaDoc>DMS-1.50</XDlnaDoc>
@ -45,9 +45,9 @@
<DirectPlayProfile container="jpeg,png,gif,bmp,tiff" type="Photo" />
</DirectPlayProfiles>
<TranscodingProfiles>
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="ac3,aac,mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="ac3,aac,mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
</TranscodingProfiles>
<ContainerProfiles>
<ContainerProfile type="Photo">

View file

@ -23,8 +23,8 @@
<MaxAlbumArtHeight>480</MaxAlbumArtHeight>
<MaxIconWidth>48</MaxIconWidth>
<MaxIconHeight>48</MaxIconHeight>
<MaxStreamingBitrate>24000000</MaxStreamingBitrate>
<MaxStaticBitrate>24000000</MaxStaticBitrate>
<MaxStreamingBitrate>30000000</MaxStreamingBitrate>
<MaxStaticBitrate>30000000</MaxStaticBitrate>
<MusicStreamingTranscodingBitrate>192000</MusicStreamingTranscodingBitrate>
<MaxStaticMusicBitrate xsi:nil="true" />
<XDlnaDoc>DMS-1.50</XDlnaDoc>
@ -45,9 +45,9 @@
<DirectPlayProfile container="jpeg,png,gif,bmp,tiff" type="Photo" />
</DirectPlayProfiles>
<TranscodingProfiles>
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Bytes" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Bytes" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
</TranscodingProfiles>
<ContainerProfiles>
<ContainerProfile type="Photo">

View file

@ -22,8 +22,8 @@
<MaxAlbumArtHeight>480</MaxAlbumArtHeight>
<MaxIconWidth>48</MaxIconWidth>
<MaxIconHeight>48</MaxIconHeight>
<MaxStreamingBitrate>24000000</MaxStreamingBitrate>
<MaxStaticBitrate>24000000</MaxStaticBitrate>
<MaxStreamingBitrate>30000000</MaxStreamingBitrate>
<MaxStaticBitrate>30000000</MaxStaticBitrate>
<MusicStreamingTranscodingBitrate>192000</MusicStreamingTranscodingBitrate>
<MaxStaticMusicBitrate xsi:nil="true" />
<XDlnaDoc>DMS-1.50</XDlnaDoc>
@ -40,9 +40,9 @@
<DirectPlayProfile container="" type="Photo" />
</DirectPlayProfiles>
<TranscodingProfiles>
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="aac" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="aac" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
</TranscodingProfiles>
<ContainerProfiles />
<CodecProfiles />

View file

@ -23,8 +23,8 @@
<MaxAlbumArtHeight>480</MaxAlbumArtHeight>
<MaxIconWidth>48</MaxIconWidth>
<MaxIconHeight>48</MaxIconHeight>
<MaxStreamingBitrate>24000000</MaxStreamingBitrate>
<MaxStaticBitrate>24000000</MaxStaticBitrate>
<MaxStreamingBitrate>30000000</MaxStreamingBitrate>
<MaxStaticBitrate>30000000</MaxStaticBitrate>
<MusicStreamingTranscodingBitrate>192000</MusicStreamingTranscodingBitrate>
<MaxStaticMusicBitrate xsi:nil="true" />
<XDlnaDoc>DMS-1.50</XDlnaDoc>
@ -51,9 +51,9 @@
<DirectPlayProfile container="jpeg,png,gif,bmp,tiff" type="Photo" />
</DirectPlayProfiles>
<TranscodingProfiles>
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="aac" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="aac" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
</TranscodingProfiles>
<ContainerProfiles>
<ContainerProfile type="Photo">

View file

@ -24,8 +24,8 @@
<MaxAlbumArtHeight>480</MaxAlbumArtHeight>
<MaxIconWidth>48</MaxIconWidth>
<MaxIconHeight>48</MaxIconHeight>
<MaxStreamingBitrate>24000000</MaxStreamingBitrate>
<MaxStaticBitrate>24000000</MaxStaticBitrate>
<MaxStreamingBitrate>30000000</MaxStreamingBitrate>
<MaxStaticBitrate>30000000</MaxStaticBitrate>
<MusicStreamingTranscodingBitrate>192000</MusicStreamingTranscodingBitrate>
<MaxStaticMusicBitrate xsi:nil="true" />
<XDlnaDoc>DMS-1.50</XDlnaDoc>
@ -46,9 +46,9 @@
<DirectPlayProfile container="jpeg" type="Photo" />
</DirectPlayProfiles>
<TranscodingProfiles>
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="asf" type="Video" videoCodec="wmv2" audioCodec="wmav2" estimateContentLength="true" enableMpegtsM2TsMode="false" transcodeSeekInfo="Bytes" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="asf" type="Video" videoCodec="wmv2" audioCodec="wmav2" estimateContentLength="true" enableMpegtsM2TsMode="false" transcodeSeekInfo="Bytes" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
</TranscodingProfiles>
<ContainerProfiles>
<ContainerProfile type="Video" container="mp4,mov">

View file

@ -23,8 +23,8 @@
<MaxAlbumArtHeight>480</MaxAlbumArtHeight>
<MaxIconWidth>48</MaxIconWidth>
<MaxIconHeight>48</MaxIconHeight>
<MaxStreamingBitrate>24000000</MaxStreamingBitrate>
<MaxStaticBitrate>24000000</MaxStaticBitrate>
<MaxStreamingBitrate>30000000</MaxStreamingBitrate>
<MaxStaticBitrate>30000000</MaxStaticBitrate>
<MusicStreamingTranscodingBitrate>192000</MusicStreamingTranscodingBitrate>
<MaxStaticMusicBitrate xsi:nil="true" />
<XDlnaDoc>DMS-1.50</XDlnaDoc>
@ -46,9 +46,9 @@
<DirectPlayProfile container="jpeg" type="Photo" />
</DirectPlayProfiles>
<TranscodingProfiles>
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="jpeg" type="Photo" videoCodec="jpeg" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="aac" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="jpeg" type="Photo" videoCodec="jpeg" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="aac" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
</TranscodingProfiles>
<ContainerProfiles>
<ContainerProfile type="Video" container="mp4,mov">

View file

@ -22,8 +22,8 @@
<MaxAlbumArtHeight>480</MaxAlbumArtHeight>
<MaxIconWidth>48</MaxIconWidth>
<MaxIconHeight>48</MaxIconHeight>
<MaxStreamingBitrate>24000000</MaxStreamingBitrate>
<MaxStaticBitrate>24000000</MaxStaticBitrate>
<MaxStreamingBitrate>30000000</MaxStreamingBitrate>
<MaxStaticBitrate>30000000</MaxStaticBitrate>
<MusicStreamingTranscodingBitrate>192000</MusicStreamingTranscodingBitrate>
<MaxStaticMusicBitrate xsi:nil="true" />
<XDlnaDoc>DMS-1.50</XDlnaDoc>
@ -43,9 +43,9 @@
<DirectPlayProfile container="ogg" audioCodec="vorbis" type="Audio" />
</DirectPlayProfiles>
<TranscodingProfiles>
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="aac" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" />
<TranscodingProfile container="mp3" type="Audio" audioCodec="mp3" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="ts" type="Video" videoCodec="h264" audioCodec="aac" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
<TranscodingProfile container="jpeg" type="Photo" estimateContentLength="false" enableMpegtsM2TsMode="false" transcodeSeekInfo="Auto" copyTimestamps="false" context="Streaming" enableSubtitlesInManifest="false" minSegments="0" segmentLength="0" breakOnNonKeyFrames="false" />
</TranscodingProfiles>
<ContainerProfiles />
<CodecProfiles />

View file

@ -57,7 +57,6 @@ namespace Emby.Drawing
private readonly IJsonSerializer _jsonSerializer;
private readonly IServerApplicationPaths _appPaths;
private readonly IImageEncoder _imageEncoder;
private readonly SemaphoreSlim _imageProcessingSemaphore;
private readonly Func<ILibraryManager> _libraryManager;
public ImageProcessor(ILogger logger,
@ -102,8 +101,6 @@ namespace Emby.Drawing
}
_cachedImagedSizes = new ConcurrentDictionary<Guid, ImageSize>(sizeDictionary);
_logger.Info("ImageProcessor started with {0} max concurrent image processes", maxConcurrentImageProcesses);
_imageProcessingSemaphore = new SemaphoreSlim(maxConcurrentImageProcesses, maxConcurrentImageProcesses);
}
public string[] SupportedInputFormats
@ -238,8 +235,6 @@ namespace Emby.Drawing
var outputFormat = GetOutputFormat(options.SupportedOutputFormats[0]);
var cacheFilePath = GetCacheFilePath(originalImagePath, newSize, quality, dateModified, outputFormat, options.AddPlayedIndicator, options.PercentPlayed, options.UnplayedCount, options.Blur, options.BackgroundColor, options.ForegroundLayer);
//var imageProcessingLockTaken = false;
try
{
CheckDisposed();
@ -253,10 +248,6 @@ namespace Emby.Drawing
var tmpPath = Path.ChangeExtension(Path.Combine(_appPaths.TempDirectory, Guid.NewGuid().ToString("N")), Path.GetExtension(cacheFilePath));
_fileSystem.CreateDirectory(Path.GetDirectoryName(tmpPath));
//await _imageProcessingSemaphore.WaitAsync().ConfigureAwait(false);
//imageProcessingLockTaken = true;
_imageEncoder.EncodeImage(originalImagePath, tmpPath, AutoOrient(options.Item), newWidth, newHeight, quality, options, outputFormat);
CopyFile(tmpPath, cacheFilePath);
@ -273,13 +264,6 @@ namespace Emby.Drawing
// Just spit out the original file if all the options are default
return new Tuple<string, string, DateTime>(originalImagePath, MimeTypes.GetMimeType(originalImagePath), dateModified);
}
//finally
//{
// if (imageProcessingLockTaken)
// {
// _imageProcessingSemaphore.Release();
// }
//}
}
private void CopyFile(string src, string destination)
@ -786,24 +770,15 @@ namespace Emby.Drawing
var tmpPath = Path.Combine(_appPaths.TempDirectory, Path.ChangeExtension(Guid.NewGuid().ToString(), Path.GetExtension(enhancedImagePath)));
_fileSystem.CreateDirectory(Path.GetDirectoryName(tmpPath));
await _imageProcessingSemaphore.WaitAsync().ConfigureAwait(false);
await ExecuteImageEnhancers(supportedEnhancers, originalImagePath, tmpPath, item, imageType, imageIndex).ConfigureAwait(false);
try
{
await ExecuteImageEnhancers(supportedEnhancers, originalImagePath, tmpPath, item, imageType, imageIndex).ConfigureAwait(false);
try
{
_fileSystem.CopyFile(tmpPath, enhancedImagePath, true);
}
catch
{
}
_fileSystem.CopyFile(tmpPath, enhancedImagePath, true);
}
finally
catch
{
_imageProcessingSemaphore.Release();
}
return tmpPath;

View file

@ -564,7 +564,7 @@ namespace Emby.Server.Core
StringExtensions.LocalizationManager = LocalizationManager;
RegisterSingleInstance(LocalizationManager);
ITextEncoding textEncoding = new TextEncoding(FileSystemManager);
ITextEncoding textEncoding = new TextEncoding(FileSystemManager, LogManager.GetLogger("TextEncoding"));
RegisterSingleInstance(textEncoding);
Utilities.EncodingHelper = textEncoding;
RegisterSingleInstance<IBlurayExaminer>(() => new BdInfoExaminer(FileSystemManager, textEncoding));
@ -1366,7 +1366,7 @@ namespace Emby.Server.Core
SupportsLibraryMonitor = true,
EncoderLocationType = MediaEncoder.EncoderLocationType,
SystemArchitecture = EnvironmentInfo.SystemArchitecture,
SystemUpdateLevel = ConfigurationManager.CommonConfiguration.SystemUpdateLevel,
SystemUpdateLevel = SystemUpdateLevel,
PackageName = StartupOptions.GetOption("-package")
};
}
@ -1591,7 +1591,7 @@ namespace Emby.Server.Core
}
catch (NotImplementedException)
{
}
catch (Exception ex)
{
@ -1632,7 +1632,7 @@ namespace Emby.Server.Core
public override async Task<CheckForUpdateResult> CheckForApplicationUpdate(CancellationToken cancellationToken, IProgress<double> progress)
{
var cacheLength = TimeSpan.FromHours(3);
var updateLevel = ConfigurationManager.CommonConfiguration.SystemUpdateLevel;
var updateLevel = SystemUpdateLevel;
if (updateLevel == PackageVersionClass.Beta)
{

View file

@ -1990,24 +1990,6 @@ namespace Emby.Server.Implementations.Data
}
index++;
if (string.IsNullOrWhiteSpace(item.Tagline))
{
var movie = item as Movie;
if (movie != null && movie.Taglines.Count > 0)
{
movie.Tagline = movie.Taglines[0];
}
}
if (type == typeof(Person) && item.ProductionLocations.Count == 0)
{
var person = (Person)item;
if (!string.IsNullOrWhiteSpace(person.PlaceOfBirth))
{
item.ProductionLocations = new List<string> { person.PlaceOfBirth };
}
}
return item;
}

View file

@ -146,7 +146,7 @@ namespace Emby.Server.Implementations.Dto
if (channelTuples.Count > 0)
{
_livetvManager().AddChannelInfo(channelTuples, options, user);
await _livetvManager().AddChannelInfo(channelTuples, options, user).ConfigureAwait(false);
}
return list;
@ -161,7 +161,8 @@ namespace Emby.Server.Implementations.Dto
if (tvChannel != null)
{
var list = new List<Tuple<BaseItemDto, LiveTvChannel>> { new Tuple<BaseItemDto, LiveTvChannel>(dto, tvChannel) };
_livetvManager().AddChannelInfo(list, options, user);
var task = _livetvManager().AddChannelInfo(list, options, user);
Task.WaitAll(task);
}
else if (item is LiveTvProgram)
{

View file

@ -311,10 +311,6 @@
<HintPath>..\packages\SQLitePCLRaw.core.1.1.2\lib\portable-net45+netcore45+wpa81+MonoAndroid10+MonoTouch10+Xamarin.iOS10\SQLitePCLRaw.core.dll</HintPath>
<Private>True</Private>
</Reference>
<Reference Include="UniversalDetector, Version=1.0.0.0, Culture=neutral, processorArchitecture=MSIL">
<HintPath>..\packages\UniversalDetector.1.0.1\lib\portable-net45+sl4+wp71+win8+wpa81\UniversalDetector.dll</HintPath>
<Private>True</Private>
</Reference>
</ItemGroup>
<ItemGroup>
<EmbeddedResource Include="Localization\Core\ar.json" />

View file

@ -41,6 +41,7 @@ using SortOrder = MediaBrowser.Model.Entities.SortOrder;
using VideoResolver = MediaBrowser.Naming.Video.VideoResolver;
using MediaBrowser.Common.Configuration;
using MediaBrowser.Common.IO;
using MediaBrowser.Controller.LiveTv;
using MediaBrowser.Model.Tasks;
namespace Emby.Server.Implementations.Library
@ -375,11 +376,22 @@ namespace Emby.Server.Implementations.Library
throw new ArgumentNullException("item");
}
_logger.Debug("Deleting item, Type: {0}, Name: {1}, Path: {2}, Id: {3}",
item.GetType().Name,
item.Name ?? "Unknown name",
item.Path ?? string.Empty,
item.Id);
if (item is LiveTvProgram)
{
_logger.Debug("Deleting item, Type: {0}, Name: {1}, Path: {2}, Id: {3}",
item.GetType().Name,
item.Name ?? "Unknown name",
item.Path ?? string.Empty,
item.Id);
}
else
{
_logger.Info("Deleting item, Type: {0}, Name: {1}, Path: {2}, Id: {3}",
item.GetType().Name,
item.Name ?? "Unknown name",
item.Path ?? string.Empty,
item.Id);
}
var parent = item.Parent;
@ -2565,7 +2577,7 @@ namespace Emby.Server.Implementations.Library
}).OrderBy(i => i.Path).ToList();
}
private static readonly string[] ExtrasSubfolderNames = new[] { "extras", "specials", "shorts", "scenes", "featurettes", "behind the scenes", "deleted scenes" };
private static readonly string[] ExtrasSubfolderNames = new[] { "extras", "specials", "shorts", "scenes", "featurettes", "behind the scenes", "deleted scenes", "interviews" };
public IEnumerable<Video> FindExtras(BaseItem owner, List<FileSystemMetadata> fileSystemChildren, IDirectoryService directoryService)
{

View file

@ -9,13 +9,10 @@ using MediaBrowser.Model.Logging;
using MediaBrowser.Model.MediaInfo;
using MediaBrowser.Model.Serialization;
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using MediaBrowser.Common.IO;
using MediaBrowser.Controller.IO;
using MediaBrowser.Model.IO;
using MediaBrowser.Model.Configuration;
using MediaBrowser.Model.Threading;
@ -365,12 +362,10 @@ namespace Emby.Server.Implementations.Library
private readonly Dictionary<string, LiveStreamInfo> _openStreams = new Dictionary<string, LiveStreamInfo>(StringComparer.OrdinalIgnoreCase);
private readonly SemaphoreSlim _liveStreamSemaphore = new SemaphoreSlim(1, 1);
public async Task<LiveStreamResponse> OpenLiveStream(LiveStreamRequest request, bool enableAutoClose, CancellationToken cancellationToken)
public async Task<LiveStreamResponse> OpenLiveStream(LiveStreamRequest request, CancellationToken cancellationToken)
{
await _liveStreamSemaphore.WaitAsync(cancellationToken).ConfigureAwait(false);
enableAutoClose = false;
try
{
var tuple = GetProvider(request.OpenToken);
@ -389,8 +384,6 @@ namespace Emby.Server.Implementations.Library
var info = new LiveStreamInfo
{
Date = DateTime.UtcNow,
EnableCloseTimer = enableAutoClose,
Id = mediaSource.LiveStreamId,
MediaSource = mediaSource,
DirectStreamProvider = mediaSourceTuple.Item2
@ -398,11 +391,6 @@ namespace Emby.Server.Implementations.Library
_openStreams[mediaSource.LiveStreamId] = info;
if (enableAutoClose)
{
StartCloseTimer();
}
var json = _jsonSerializer.SerializeToString(mediaSource);
_logger.Debug("Live stream opened: " + json);
var clone = _jsonSerializer.DeserializeFromString<MediaSourceInfo>(json);
@ -462,28 +450,6 @@ namespace Emby.Server.Implementations.Library
return result.Item1;
}
public async Task PingLiveStream(string id, CancellationToken cancellationToken)
{
await _liveStreamSemaphore.WaitAsync(cancellationToken).ConfigureAwait(false);
try
{
LiveStreamInfo info;
if (_openStreams.TryGetValue(id, out info))
{
info.Date = DateTime.UtcNow;
}
else
{
_logger.Error("Failed to ping live stream {0}", id);
}
}
finally
{
_liveStreamSemaphore.Release();
}
}
private async Task CloseLiveStreamWithProvider(IMediaSourceProvider provider, string streamId)
{
_logger.Info("Closing live stream {0} with provider {1}", streamId, provider.GetType().Name);
@ -525,11 +491,6 @@ namespace Emby.Server.Implementations.Library
await CloseLiveStreamWithProvider(tuple.Item1, tuple.Item2).ConfigureAwait(false);
}
if (_openStreams.Count == 0)
{
StopCloseTimer();
}
}
}
finally
@ -558,66 +519,11 @@ namespace Emby.Server.Implementations.Library
return new Tuple<IMediaSourceProvider, string>(provider, keyId);
}
private ITimer _closeTimer;
private readonly TimeSpan _openStreamMaxAge = TimeSpan.FromSeconds(180);
private void StartCloseTimer()
{
StopCloseTimer();
_closeTimer = _timerFactory.Create(CloseTimerCallback, null, _openStreamMaxAge, _openStreamMaxAge);
}
private void StopCloseTimer()
{
var timer = _closeTimer;
if (timer != null)
{
_closeTimer = null;
timer.Dispose();
}
}
private async void CloseTimerCallback(object state)
{
List<LiveStreamInfo> infos;
await _liveStreamSemaphore.WaitAsync().ConfigureAwait(false);
try
{
infos = _openStreams
.Values
.Where(i => i.EnableCloseTimer && DateTime.UtcNow - i.Date > _openStreamMaxAge)
.ToList();
}
finally
{
_liveStreamSemaphore.Release();
}
foreach (var info in infos)
{
if (!info.Closed)
{
try
{
await CloseLiveStream(info.Id).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.ErrorException("Error closing media source", ex);
}
}
}
}
/// <summary>
/// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources.
/// </summary>
public void Dispose()
{
StopCloseTimer();
Dispose(true);
}
@ -644,8 +550,6 @@ namespace Emby.Server.Implementations.Library
private class LiveStreamInfo
{
public DateTime Date;
public bool EnableCloseTimer;
public string Id;
public bool Closed;
public MediaSourceInfo MediaSource;

View file

@ -219,7 +219,7 @@ namespace Emby.Server.Implementations.Library.Resolvers.Movies
if (string.Equals(collectionType, CollectionType.MusicVideos, StringComparison.OrdinalIgnoreCase))
{
return FindMovie<MusicVideo>(args.Path, args.Parent, files, args.DirectoryService, collectionType, false);
return FindMovie<MusicVideo>(args.Path, args.Parent, files, args.DirectoryService, collectionType, true);
}
if (string.Equals(collectionType, CollectionType.HomeVideos, StringComparison.OrdinalIgnoreCase))

View file

@ -39,6 +39,7 @@ using MediaBrowser.Model.FileOrganization;
using MediaBrowser.Model.System;
using MediaBrowser.Model.Threading;
using MediaBrowser.Model.Extensions;
using MediaBrowser.Model.Querying;
namespace Emby.Server.Implementations.LiveTv.EmbyTV
{
@ -1512,7 +1513,7 @@ namespace Emby.Server.Implementations.LiveTv.EmbyTV
_timerProvider.AddOrUpdate(timer, false);
SaveRecordingMetadata(timer, recordPath, seriesPath);
EnforceKeepUpTo(timer);
EnforceKeepUpTo(timer, seriesPath);
};
await recorder.Record(mediaStreamInfo, recordPath, duration, onStarted, cancellationToken)
@ -1583,12 +1584,16 @@ namespace Emby.Server.Implementations.LiveTv.EmbyTV
}, _logger);
}
private async void EnforceKeepUpTo(TimerInfo timer)
private async void EnforceKeepUpTo(TimerInfo timer, string seriesPath)
{
if (string.IsNullOrWhiteSpace(timer.SeriesTimerId))
{
return;
}
if (string.IsNullOrWhiteSpace(seriesPath))
{
return;
}
var seriesTimerId = timer.SeriesTimerId;
var seriesTimer = _seriesTimerProvider.GetAll().FirstOrDefault(i => string.Equals(i.Id, seriesTimerId, StringComparison.OrdinalIgnoreCase));
@ -1621,6 +1626,43 @@ namespace Emby.Server.Implementations.LiveTv.EmbyTV
.ToList();
await DeleteLibraryItemsForTimers(timersToDelete).ConfigureAwait(false);
var librarySeries = _libraryManager.FindByPath(seriesPath, true) as Folder;
if (librarySeries == null)
{
return;
}
var episodesToDelete = (await librarySeries.GetItems(new InternalItemsQuery
{
SortBy = new[] { ItemSortBy.DateCreated },
SortOrder = SortOrder.Descending,
IsVirtualItem = false,
IsFolder = false,
Recursive = true
}).ConfigureAwait(false))
.Items
.Where(i => i.LocationType == LocationType.FileSystem && _fileSystem.FileExists(i.Path))
.Skip(seriesTimer.KeepUpTo - 1)
.ToList();
foreach (var item in episodesToDelete)
{
try
{
await _libraryManager.DeleteItem(item, new DeleteOptions
{
DeleteFileLocation = true
}).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.ErrorException("Error deleting item", ex);
}
}
}
finally
{
@ -1658,7 +1700,8 @@ namespace Emby.Server.Implementations.LiveTv.EmbyTV
await _libraryManager.DeleteItem(libraryItem, new DeleteOptions
{
DeleteFileLocation = true
});
}).ConfigureAwait(false);
}
else
{

View file

@ -30,8 +30,6 @@ namespace Emby.Server.Implementations.LiveTv
var now = DateTime.UtcNow;
var allowVideoStreamCopy = mediaSource.MediaStreams.Any(i => i.Type == MediaStreamType.Video && i.AllowStreamCopy);
var info = await _mediaEncoder.GetMediaInfo(new MediaInfoRequest
{
InputPath = mediaSource.Path,
@ -76,25 +74,28 @@ namespace Emby.Server.Implementations.LiveTv
var videoStream = mediaSource.MediaStreams.FirstOrDefault(i => i.Type == MediaBrowser.Model.Entities.MediaStreamType.Video);
if (videoStream != null)
{
videoStream.AllowStreamCopy = allowVideoStreamCopy;
if (!videoStream.BitRate.HasValue)
{
var width = videoStream.Width ?? 1920;
if (width >= 1900)
if (width >= 3000)
{
videoStream.BitRate = 30000000;
}
else if (width >= 1900)
{
videoStream.BitRate = 20000000;
}
else if (width >= 1200)
{
videoStream.BitRate = 8000000;
}
else if (width >= 1260)
{
videoStream.BitRate = 3000000;
}
else if (width >= 700)
{
videoStream.BitRate = 1000000;
videoStream.BitRate = 2000000;
}
}

View file

@ -490,7 +490,6 @@ namespace Emby.Server.Implementations.LiveTv
if (stream.Type == MediaStreamType.Video)
{
stream.IsInterlaced = true;
stream.AllowStreamCopy = false;
}
}
}
@ -2284,7 +2283,7 @@ namespace Emby.Server.Implementations.LiveTv
};
}
public void AddChannelInfo(List<Tuple<BaseItemDto, LiveTvChannel>> tuples, DtoOptions options, User user)
public async Task AddChannelInfo(List<Tuple<BaseItemDto, LiveTvChannel>> tuples, DtoOptions options, User user)
{
var now = DateTime.UtcNow;
@ -2304,6 +2303,12 @@ namespace Emby.Server.Implementations.LiveTv
RemoveFields(options);
var currentProgramsList = new List<BaseItem>();
var currentChannelsDict = new Dictionary<string, BaseItemDto>();
var addCurrentProgram = options.AddCurrentProgram;
var addMediaSources = options.Fields.Contains(ItemFields.MediaSources);
foreach (var tuple in tuples)
{
var dto = tuple.Item1;
@ -2314,19 +2319,38 @@ namespace Emby.Server.Implementations.LiveTv
dto.ChannelType = channel.ChannelType;
dto.ServiceName = channel.ServiceName;
if (options.Fields.Contains(ItemFields.MediaSources))
currentChannelsDict[dto.Id] = dto;
if (addMediaSources)
{
dto.MediaSources = channel.GetMediaSources(true).ToList();
}
if (options.AddCurrentProgram)
if (addCurrentProgram)
{
var channelIdString = channel.Id.ToString("N");
var currentProgram = programs.FirstOrDefault(i => string.Equals(i.ChannelId, channelIdString));
if (currentProgram != null)
{
dto.CurrentProgram = _dtoService.GetBaseItemDto(currentProgram, options, user);
currentProgramsList.Add(currentProgram);
}
}
}
if (addCurrentProgram)
{
var currentProgramDtos = await _dtoService.GetBaseItemDtos(currentProgramsList, options, user).ConfigureAwait(false);
foreach (var programDto in currentProgramDtos)
{
if (!string.IsNullOrWhiteSpace(programDto.ChannelId))
{
BaseItemDto channelDto;
if (currentChannelsDict.TryGetValue(programDto.ChannelId, out channelDto))
{
channelDto.CurrentProgram = programDto;
}
}
}
}

View file

@ -183,19 +183,24 @@ namespace Emby.Server.Implementations.LiveTv
{
var width = videoStream.Width ?? 1920;
if (width >= 1900)
if (width >= 3000)
{
videoStream.BitRate = 30000000;
}
else if (width >= 1900)
{
videoStream.BitRate = 20000000;
}
else if (width >= 1200)
{
videoStream.BitRate = 8000000;
}
else if (width >= 1260)
{
videoStream.BitRate = 3000000;
}
else if (width >= 700)
{
videoStream.BitRate = 1000000;
videoStream.BitRate = 2000000;
}
}
}

View file

@ -145,6 +145,7 @@ namespace Emby.Server.Implementations.LiveTv.TunerHosts
},
RequiresOpening = true,
RequiresClosing = true,
RequiresLooping = true,
ReadAtNativeFramerate = false,

View file

@ -13,7 +13,6 @@ using MediaBrowser.Common.IO;
using MediaBrowser.Model.IO;
using MediaBrowser.Model.Services;
using MediaBrowser.Model.Text;
using UniversalDetector;
namespace Emby.Server.Implementations.ServerManager
{
@ -137,7 +136,8 @@ namespace Emby.Server.Implementations.ServerManager
{
return;
}
var charset = DetectCharset(bytes);
var charset = _textEncoding.GetDetectedEncodingName(bytes, null);
if (string.Equals(charset, "utf-8", StringComparison.OrdinalIgnoreCase))
{
@ -148,33 +148,6 @@ namespace Emby.Server.Implementations.ServerManager
OnReceiveInternal(_textEncoding.GetASCIIEncoding().GetString(bytes, 0, bytes.Length));
}
}
private string DetectCharset(byte[] bytes)
{
try
{
using (var ms = _memoryStreamProvider.CreateNew(bytes))
{
var detector = new CharsetDetector();
detector.Feed(ms);
detector.DataEnd();
var charset = detector.Charset;
if (!string.IsNullOrWhiteSpace(charset))
{
//_logger.Debug("UniversalDetector detected charset {0}", charset);
}
return charset;
}
}
catch (IOException ex)
{
_logger.ErrorException("Error attempting to determine web socket message charset", ex);
}
return null;
}
private void OnReceiveInternal(string message)
{

View file

@ -197,6 +197,8 @@ namespace Emby.Server.Implementations.Session
_logger.ErrorException("Error disposing session controller", ex);
}
}
info.Dispose();
}
/// <summary>
@ -308,10 +310,7 @@ namespace Emby.Server.Implementations.Session
/// <summary>
/// Updates the now playing item id.
/// </summary>
/// <param name="session">The session.</param>
/// <param name="info">The information.</param>
/// <param name="libraryItem">The library item.</param>
private async Task UpdateNowPlayingItem(SessionInfo session, PlaybackProgressInfo info, BaseItem libraryItem)
private async Task UpdateNowPlayingItem(SessionInfo session, PlaybackProgressInfo info, BaseItem libraryItem, bool updateLastCheckInTime)
{
if (string.IsNullOrWhiteSpace(info.MediaSourceId))
{
@ -350,7 +349,11 @@ namespace Emby.Server.Implementations.Session
session.NowPlayingItem = info.Item;
session.LastActivityDate = DateTime.UtcNow;
session.LastPlaybackCheckIn = DateTime.UtcNow;
if (updateLastCheckInTime)
{
session.LastPlaybackCheckIn = DateTime.UtcNow;
}
session.PlayState.IsPaused = info.IsPaused;
session.PlayState.PositionTicks = info.PositionTicks;
@ -415,7 +418,7 @@ namespace Emby.Server.Implementations.Session
if (!_activeConnections.TryGetValue(key, out sessionInfo))
{
sessionInfo = new SessionInfo
sessionInfo = new SessionInfo(this, _logger)
{
Client = appName,
DeviceId = deviceId,
@ -602,14 +605,14 @@ namespace Emby.Server.Implementations.Session
? null
: GetNowPlayingItem(session, info.ItemId);
await UpdateNowPlayingItem(session, info, libraryItem).ConfigureAwait(false);
await UpdateNowPlayingItem(session, info, libraryItem, true).ConfigureAwait(false);
if (!string.IsNullOrEmpty(session.DeviceId) && info.PlayMethod != PlayMethod.Transcode)
{
ClearTranscodingInfo(session.DeviceId);
}
session.QueueableMediaTypes = info.QueueableMediaTypes;
session.StartAutomaticProgress(_timerFactory, info);
var users = GetUsers(session);
@ -668,14 +671,15 @@ namespace Emby.Server.Implementations.Session
await _userDataManager.SaveUserData(userId, item, data, UserDataSaveReason.PlaybackStart, CancellationToken.None).ConfigureAwait(false);
}
public Task OnPlaybackProgress(PlaybackProgressInfo info)
{
return OnPlaybackProgress(info, false);
}
/// <summary>
/// Used to report playback progress for an item
/// </summary>
/// <param name="info">The info.</param>
/// <returns>Task.</returns>
/// <exception cref="System.ArgumentNullException"></exception>
/// <exception cref="System.ArgumentOutOfRangeException">positionTicks</exception>
public async Task OnPlaybackProgress(PlaybackProgressInfo info)
public async Task OnPlaybackProgress(PlaybackProgressInfo info, bool isAutomated)
{
if (info == null)
{
@ -688,7 +692,7 @@ namespace Emby.Server.Implementations.Session
? null
: GetNowPlayingItem(session, info.ItemId);
await UpdateNowPlayingItem(session, info, libraryItem).ConfigureAwait(false);
await UpdateNowPlayingItem(session, info, libraryItem, !isAutomated).ConfigureAwait(false);
var users = GetUsers(session);
@ -700,18 +704,6 @@ namespace Emby.Server.Implementations.Session
}
}
if (!string.IsNullOrWhiteSpace(info.LiveStreamId))
{
try
{
await _mediaSourceManager.PingLiveStream(info.LiveStreamId, CancellationToken.None).ConfigureAwait(false);
}
catch (Exception ex)
{
_logger.ErrorException("Error closing live stream", ex);
}
}
EventHelper.FireEventIfNotNull(PlaybackProgress, this, new PlaybackProgressEventArgs
{
Item = libraryItem,
@ -727,6 +719,11 @@ namespace Emby.Server.Implementations.Session
}, _logger);
if (!isAutomated)
{
session.StartAutomaticProgress(_timerFactory, info);
}
StartIdleCheckTimer();
}
@ -788,6 +785,8 @@ namespace Emby.Server.Implementations.Session
var session = GetSession(info.SessionId);
session.StopAutomaticProgress();
var libraryItem = string.IsNullOrWhiteSpace(info.ItemId)
? null
: GetNowPlayingItem(session, info.ItemId);
@ -1009,19 +1008,9 @@ namespace Emby.Server.Implementations.Session
}
}
if (command.PlayCommand != PlayCommand.PlayNow)
if (items.Any(i => !session.PlayableMediaTypes.Contains(i.MediaType, StringComparer.OrdinalIgnoreCase)))
{
if (items.Any(i => !session.QueueableMediaTypes.Contains(i.MediaType, StringComparer.OrdinalIgnoreCase)))
{
throw new ArgumentException(string.Format("{0} is unable to queue the requested media type.", session.DeviceName ?? session.Id));
}
}
else
{
if (items.Any(i => !session.PlayableMediaTypes.Contains(i.MediaType, StringComparer.OrdinalIgnoreCase)))
{
throw new ArgumentException(string.Format("{0} is unable to play the requested media type.", session.DeviceName ?? session.Id));
}
throw new ArgumentException(string.Format("{0} is unable to play the requested media type.", session.DeviceName ?? session.Id));
}
if (user != null && command.ItemIds.Length == 1 && user.Configuration.EnableNextEpisodeAutoPlay)
@ -1601,7 +1590,6 @@ namespace Emby.Server.Implementations.Session
LastActivityDate = session.LastActivityDate,
NowViewingItem = session.NowViewingItem,
ApplicationVersion = session.ApplicationVersion,
QueueableMediaTypes = session.QueueableMediaTypes,
PlayableMediaTypes = session.PlayableMediaTypes,
AdditionalUsers = session.AdditionalUsers,
SupportedCommands = session.SupportedCommands,

View file

@ -289,7 +289,6 @@ namespace Emby.Server.Implementations.Session
var itemId = vals[0];
var queueableMediaTypes = string.Empty;
var canSeek = true;
if (vals.Length > 1)
@ -298,15 +297,14 @@ namespace Emby.Server.Implementations.Session
}
if (vals.Length > 2)
{
queueableMediaTypes = vals[2];
// vals[2] used to be QueueableMediaTypes
}
var info = new PlaybackStartInfo
{
CanSeek = canSeek,
ItemId = itemId,
SessionId = session.Id,
QueueableMediaTypes = queueableMediaTypes.Split(',').ToList()
SessionId = session.Id
};
if (vals.Length > 3)

View file

@ -270,9 +270,14 @@ namespace Emby.Server.Implementations.Updates
}
}
private PackageVersionClass GetSystemUpdateLevel()
{
return _applicationHost.SystemUpdateLevel;
}
private TimeSpan GetCacheLength()
{
switch (_config.CommonConfiguration.SystemUpdateLevel)
switch (GetSystemUpdateLevel())
{
case PackageVersionClass.Beta:
return TimeSpan.FromMinutes(30);
@ -424,10 +429,12 @@ namespace Emby.Server.Implementations.Updates
.ToList();
}
var systemUpdateLevel = GetSystemUpdateLevel();
// Figure out what needs to be installed
var packages = plugins.Select(p =>
{
var latestPluginInfo = GetLatestCompatibleVersion(catalog, p.Name, p.Id.ToString(), applicationVersion, _config.CommonConfiguration.SystemUpdateLevel);
var latestPluginInfo = GetLatestCompatibleVersion(catalog, p.Name, p.Id.ToString(), applicationVersion, systemUpdateLevel);
return latestPluginInfo != null && GetPackageVersion(latestPluginInfo) > p.Version ? latestPluginInfo : null;

View file

@ -4,5 +4,4 @@
<package id="MediaBrowser.Naming" version="1.0.5" targetFramework="portable45-net45+win8" />
<package id="SQLitePCL.pretty" version="1.1.0" targetFramework="portable45-net45+win8" />
<package id="SQLitePCLRaw.core" version="1.1.2" targetFramework="portable45-net45+win8" />
<package id="UniversalDetector" version="1.0.1" targetFramework="portable45-net45+win8" />
</packages>

View file

@ -16,6 +16,7 @@ using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using MediaBrowser.Common.IO;
using MediaBrowser.Controller.MediaEncoding;
using MediaBrowser.Controller.Net;
using MediaBrowser.Model.Diagnostics;
using MediaBrowser.Model.IO;
@ -425,18 +426,6 @@ namespace MediaBrowser.Api
{
job.ChangeKillTimerIfStarted();
}
if (!string.IsNullOrWhiteSpace(job.LiveStreamId))
{
try
{
await _mediaSourceManager.PingLiveStream(job.LiveStreamId, CancellationToken.None).ConfigureAwait(false);
}
catch (Exception ex)
{
Logger.ErrorException("Error closing live stream", ex);
}
}
}
/// <summary>
@ -829,23 +818,4 @@ namespace MediaBrowser.Api
}
}
}
/// <summary>
/// Enum TranscodingJobType
/// </summary>
public enum TranscodingJobType
{
/// <summary>
/// The progressive
/// </summary>
Progressive,
/// <summary>
/// The HLS
/// </summary>
Hls,
/// <summary>
/// The dash
/// </summary>
Dash
}
}

View file

@ -134,6 +134,7 @@
<Compile Include="SearchService.cs" />
<Compile Include="Session\SessionsService.cs" />
<Compile Include="SimilarItemsHelper.cs" />
<Compile Include="SuggestionsService.cs" />
<Compile Include="System\ActivityLogService.cs" />
<Compile Include="System\ActivityLogWebSocketListener.cs" />
<Compile Include="System\SystemService.cs" />

View file

@ -169,7 +169,7 @@ namespace MediaBrowser.Api.Playback
{
OpenToken = state.MediaSource.OpenToken
}, false, cancellationTokenSource.Token).ConfigureAwait(false);
}, cancellationTokenSource.Token).ConfigureAwait(false);
EncodingHelper.AttachMediaSourceInfo(state, liveStreamResponse.MediaSource, state.RequestedUrl);

View file

@ -826,6 +826,11 @@ namespace MediaBrowser.Api.Playback.Hls
args += " -ab " + bitrate.Value.ToString(UsCulture);
}
if (state.OutputAudioSampleRate.HasValue)
{
args += " -ar " + state.OutputAudioSampleRate.Value.ToString(UsCulture);
}
args += " " + EncodingHelper.GetAudioFilterParam(state, ApiEntryPoint.Instance.GetEncodingOptions(), true);
return args;
@ -890,6 +895,11 @@ namespace MediaBrowser.Api.Playback.Hls
args += " -copyts";
}
if (!string.IsNullOrEmpty(state.OutputVideoSync))
{
args += " -vsync " + state.OutputVideoSync;
}
return args;
}
@ -932,7 +942,7 @@ namespace MediaBrowser.Api.Playback.Hls
}
var videoCodec = EncodingHelper.GetVideoEncoder(state, ApiEntryPoint.Instance.GetEncodingOptions());
var breakOnNonKeyFrames = state.Request.BreakOnNonKeyFrames && string.Equals(videoCodec, "copy", StringComparison.OrdinalIgnoreCase);
var breakOnNonKeyFrames = state.EnableBreakOnNonKeyFrames(videoCodec);
var breakOnNonKeyFramesArg = breakOnNonKeyFrames ? " -break_non_keyframes 1" : "";

View file

@ -6,6 +6,7 @@ using System.IO;
using System.Linq;
using System.Threading.Tasks;
using MediaBrowser.Common.IO;
using MediaBrowser.Controller.MediaEncoding;
using MediaBrowser.Model.IO;
using MediaBrowser.Model.Services;

View file

@ -6,10 +6,7 @@ using MediaBrowser.Controller.MediaEncoding;
using MediaBrowser.Model.IO;
using MediaBrowser.Model.Serialization;
using System;
using MediaBrowser.Common.IO;
using MediaBrowser.Controller.IO;
using MediaBrowser.Controller.Net;
using MediaBrowser.Model.IO;
using MediaBrowser.Model.Dlna;
using MediaBrowser.Model.Services;
@ -60,6 +57,11 @@ namespace MediaBrowser.Api.Playback.Hls
args += " -ab " + bitrate.Value.ToString(UsCulture);
}
if (state.OutputAudioSampleRate.HasValue)
{
args += " -ar " + state.OutputAudioSampleRate.Value.ToString(UsCulture);
}
args += " " + EncodingHelper.GetAudioFilterParam(state, ApiEntryPoint.Instance.GetEncodingOptions(), true);
return args;
@ -85,36 +87,42 @@ namespace MediaBrowser.Api.Playback.Hls
if (codec.Equals("copy", StringComparison.OrdinalIgnoreCase))
{
// if h264_mp4toannexb is ever added, do not use it for live tv
if (state.VideoStream != null && EncodingHelper.IsH264(state.VideoStream) && !string.Equals(state.VideoStream.NalLengthSize, "0", StringComparison.OrdinalIgnoreCase))
if (state.VideoStream != null && EncodingHelper.IsH264(state.VideoStream) &&
!string.Equals(state.VideoStream.NalLengthSize, "0", StringComparison.OrdinalIgnoreCase))
{
args += " -bsf:v h264_mp4toannexb";
}
args += " -flags -global_header";
return args;
}
var keyFrameArg = string.Format(" -force_key_frames \"expr:gte(t,n_forced*{0})\"",
state.SegmentLength.ToString(UsCulture));
var hasGraphicalSubs = state.SubtitleStream != null && !state.SubtitleStream.IsTextSubtitleStream && state.SubtitleDeliveryMethod == SubtitleDeliveryMethod.Encode;
var encodingOptions = ApiEntryPoint.Instance.GetEncodingOptions();
args += " " + EncodingHelper.GetVideoQualityParam(state, codec, encodingOptions, GetDefaultH264Preset()) + keyFrameArg;
// Add resolution params, if specified
if (!hasGraphicalSubs)
else
{
args += EncodingHelper.GetOutputSizeParam(state, codec);
}
var keyFrameArg = string.Format(" -force_key_frames \"expr:gte(t,n_forced*{0})\"",
state.SegmentLength.ToString(UsCulture));
// This is for internal graphical subs
if (hasGraphicalSubs)
{
args += EncodingHelper.GetGraphicalSubtitleParam(state, codec);
var hasGraphicalSubs = state.SubtitleStream != null && !state.SubtitleStream.IsTextSubtitleStream && state.SubtitleDeliveryMethod == SubtitleDeliveryMethod.Encode;
var encodingOptions = ApiEntryPoint.Instance.GetEncodingOptions();
args += " " + EncodingHelper.GetVideoQualityParam(state, codec, encodingOptions, GetDefaultH264Preset()) + keyFrameArg;
// Add resolution params, if specified
if (!hasGraphicalSubs)
{
args += EncodingHelper.GetOutputSizeParam(state, codec);
}
// This is for internal graphical subs
if (hasGraphicalSubs)
{
args += EncodingHelper.GetGraphicalSubtitleParam(state, codec);
}
}
args += " -flags -global_header";
if (!string.IsNullOrEmpty(state.OutputVideoSync))
{
args += " -vsync " + state.OutputVideoSync;
}
return args;
}

View file

@ -109,7 +109,7 @@ namespace MediaBrowser.Api.Playback
{
var authInfo = _authContext.GetAuthorizationInfo(Request);
var result = await _mediaSourceManager.OpenLiveStream(request, true, CancellationToken.None).ConfigureAwait(false);
var result = await _mediaSourceManager.OpenLiveStream(request, CancellationToken.None).ConfigureAwait(false);
var profile = request.DeviceProfile;
if (profile == null)

View file

@ -42,7 +42,6 @@ namespace MediaBrowser.Api.Playback
public int? SegmentLength { get; set; }
public int? MinSegments { get; set; }
public bool BreakOnNonKeyFrames { get; set; }
}
public class VideoStreamRequest : StreamRequest

View file

@ -104,14 +104,6 @@ namespace MediaBrowser.Api.Playback
}
}
public bool IsSegmentedLiveStream
{
get
{
return TranscodingType != TranscodingJobType.Progressive && !RunTimeTicks.HasValue;
}
}
public int HlsListSize
{
get
@ -121,14 +113,12 @@ namespace MediaBrowser.Api.Playback
}
public string UserAgent { get; set; }
public TranscodingJobType TranscodingType { get; set; }
public StreamState(IMediaSourceManager mediaSourceManager, ILogger logger, TranscodingJobType transcodingType)
: base(logger)
: base(logger, transcodingType)
{
_mediaSourceManager = mediaSourceManager;
_logger = logger;
TranscodingType = transcodingType;
}
public string MimeType { get; set; }

View file

@ -0,0 +1,94 @@
using MediaBrowser.Controller.Dto;
using MediaBrowser.Controller.Entities;
using MediaBrowser.Controller.Net;
using MediaBrowser.Model.Dto;
using MediaBrowser.Model.Querying;
using MediaBrowser.Model.Services;
using System;
using System.Linq;
using System.Threading.Tasks;
using MediaBrowser.Controller.Library;
namespace MediaBrowser.Api
{
[Route("/Users/{UserId}/Suggestions", "GET", Summary = "Gets items based on a query.")]
public class GetSuggestedItems : IReturn<QueryResult<BaseItem>>
{
public string MediaType { get; set; }
public string Type { get; set; }
public string UserId { get; set; }
public bool EnableTotalRecordCount { get; set; }
public int? StartIndex { get; set; }
public int? Limit { get; set; }
public string[] GetMediaTypes()
{
return (MediaType ?? string.Empty).Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
}
public string[] GetIncludeItemTypes()
{
return (Type ?? string.Empty).Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
}
}
public class SuggestionsService : BaseApiService
{
private readonly IDtoService _dtoService;
private readonly IAuthorizationContext _authContext;
private readonly IUserManager _userManager;
private readonly ILibraryManager _libraryManager;
public SuggestionsService(IDtoService dtoService, IAuthorizationContext authContext, IUserManager userManager, ILibraryManager libraryManager)
{
_dtoService = dtoService;
_authContext = authContext;
_userManager = userManager;
_libraryManager = libraryManager;
}
public async Task<object> Get(GetSuggestedItems request)
{
var result = await GetResultItems(request).ConfigureAwait(false);
return ToOptimizedResult(result);
}
private async Task<QueryResult<BaseItemDto>> GetResultItems(GetSuggestedItems request)
{
var user = !string.IsNullOrWhiteSpace(request.UserId) ? _userManager.GetUserById(request.UserId) : null;
var dtoOptions = GetDtoOptions(_authContext, request);
var result = GetItems(request, user, dtoOptions);
var dtoList = await _dtoService.GetBaseItemDtos(result.Items, dtoOptions, user).ConfigureAwait(false);
if (dtoList == null)
{
throw new InvalidOperationException("GetBaseItemDtos returned null");
}
return new QueryResult<BaseItemDto>
{
TotalRecordCount = result.TotalRecordCount,
Items = dtoList.ToArray()
};
}
private QueryResult<BaseItem> GetItems(GetSuggestedItems request, User user, DtoOptions dtoOptions)
{
return _libraryManager.GetItemsResult(new InternalItemsQuery(user)
{
SortBy = new string[] { ItemSortBy.Random },
MediaTypes = request.GetMediaTypes(),
IncludeItemTypes = request.GetIncludeItemTypes(),
IsVirtualItem = false,
StartIndex = request.StartIndex,
Limit = request.Limit,
DtoOptions = dtoOptions,
EnableTotalRecordCount = request.EnableTotalRecordCount,
Recursive = true
});
}
}
}

View file

@ -199,7 +199,6 @@ namespace MediaBrowser.Api
[ApiMember(Name = "EnableUserData", Description = "Optional, include user data", IsRequired = false, DataType = "boolean", ParameterType = "query", Verb = "GET")]
public bool? EnableUserData { get; set; }
}
[Route("/Shows/{Id}/Seasons", "GET", Summary = "Gets seasons for a tv series")]
@ -245,7 +244,6 @@ namespace MediaBrowser.Api
[ApiMember(Name = "EnableUserData", Description = "Optional, include user data", IsRequired = false, DataType = "boolean", ParameterType = "query", Verb = "GET")]
public bool? EnableUserData { get; set; }
}
/// <summary>
@ -427,11 +425,11 @@ namespace MediaBrowser.Api
{
var user = _userManager.GetUserById(request.UserId);
var series = _libraryManager.GetItemById(request.Id) as Series;
var series = GetSeries(request.Id, user);
if (series == null)
{
throw new ResourceNotFoundException("No series exists with Id " + request.Id);
throw new ResourceNotFoundException("Series not found");
}
var seasons = (await series.GetItems(new InternalItemsQuery(user)
@ -455,6 +453,16 @@ namespace MediaBrowser.Api
};
}
private Series GetSeries(string seriesId, User user)
{
if (!string.IsNullOrWhiteSpace(seriesId))
{
return _libraryManager.GetItemById(seriesId) as Series;
}
return null;
}
public async Task<object> Get(GetEpisodes request)
{
var user = _userManager.GetUserById(request.UserId);
@ -474,11 +482,11 @@ namespace MediaBrowser.Api
}
else if (request.Season.HasValue)
{
var series = _libraryManager.GetItemById(request.Id) as Series;
var series = GetSeries(request.Id, user);
if (series == null)
{
throw new ResourceNotFoundException("No series exists with Id " + request.Id);
throw new ResourceNotFoundException("Series not found");
}
var season = series.GetSeasons(user).FirstOrDefault(i => i.IndexNumber == request.Season.Value);
@ -494,11 +502,11 @@ namespace MediaBrowser.Api
}
else
{
var series = _libraryManager.GetItemById(request.Id) as Series;
var series = GetSeries(request.Id, user);
if (series == null)
{
throw new ResourceNotFoundException("No series exists with Id " + request.Id);
throw new ResourceNotFoundException("Series not found");
}
episodes = series.GetEpisodes(user);

View file

@ -109,13 +109,6 @@ namespace MediaBrowser.Api.UserLibrary
[ApiMember(Name = "CanSeek", Description = "Indicates if the client can seek", IsRequired = false, DataType = "boolean", ParameterType = "query", Verb = "POST")]
public bool CanSeek { get; set; }
/// <summary>
/// Gets or sets the id.
/// </summary>
/// <value>The id.</value>
[ApiMember(Name = "QueueableMediaTypes", Description = "A list of media types that can be queued from this item, comma delimited. Audio,Video,Book,Game", IsRequired = true, DataType = "string", ParameterType = "query", Verb = "POST", AllowMultiple = true)]
public string QueueableMediaTypes { get; set; }
[ApiMember(Name = "AudioStreamIndex", IsRequired = false, DataType = "int", ParameterType = "query", Verb = "POST")]
public int? AudioStreamIndex { get; set; }
@ -292,13 +285,10 @@ namespace MediaBrowser.Api.UserLibrary
/// <param name="request">The request.</param>
public void Post(OnPlaybackStart request)
{
var queueableMediaTypes = request.QueueableMediaTypes ?? string.Empty;
Post(new ReportPlaybackStart
{
CanSeek = request.CanSeek,
ItemId = request.Id,
QueueableMediaTypes = queueableMediaTypes.Split(',').ToList(),
MediaSourceId = request.MediaSourceId,
AudioStreamIndex = request.AudioStreamIndex,
SubtitleStreamIndex = request.SubtitleStreamIndex,

View file

@ -31,17 +31,6 @@ namespace MediaBrowser.Api.UserLibrary
public string PresetViews { get; set; }
}
[Route("/Users/{UserId}/SpecialViewOptions", "GET")]
public class GetSpecialViewOptions : IReturn<List<SpecialViewOption>>
{
/// <summary>
/// Gets or sets the user id.
/// </summary>
/// <value>The user id.</value>
[ApiMember(Name = "UserId", Description = "User Id", IsRequired = true, DataType = "string", ParameterType = "path", Verb = "GET")]
public string UserId { get; set; }
}
[Route("/Users/{UserId}/GroupingOptions", "GET")]
public class GetGroupingOptions : IReturn<List<SpecialViewOption>>
{
@ -114,29 +103,6 @@ namespace MediaBrowser.Api.UserLibrary
return ToOptimizedResult(result);
}
public async Task<object> Get(GetSpecialViewOptions request)
{
var user = _userManager.GetUserById(request.UserId);
var views = user.RootFolder
.GetChildren(user, true)
.OfType<ICollectionFolder>()
.Where(IsEligibleForSpecialView)
.ToList();
var list = views
.Select(i => new SpecialViewOption
{
Name = i.Name,
Id = i.Id.ToString("N")
})
.OrderBy(i => i.Name)
.ToList();
return ToOptimizedResult(list);
}
public async Task<object> Get(GetGroupingOptions request)
{
var user = _userManager.GetUserById(request.UserId);
@ -159,11 +125,6 @@ namespace MediaBrowser.Api.UserLibrary
return ToOptimizedResult(list);
}
private bool IsEligibleForSpecialView(ICollectionFolder view)
{
return UserView.IsEligibleForEnhancedView(view.CollectionType);
}
}
class SpecialViewOption

View file

@ -157,5 +157,7 @@ namespace MediaBrowser.Common
/// <param name="type">The type.</param>
/// <returns>System.Object.</returns>
object CreateInstance(Type type);
PackageVersionClass SystemUpdateLevel { get; }
}
}

Some files were not shown because too many files have changed in this diff Show more