Visual Studio Reformat: Emby.Server.Implementations Part T-T

This commit is contained in:
Erwin de Haan 2019-01-13 20:22:56 +01:00
parent 0efc699e3d
commit 25f0315e91
39 changed files with 1054 additions and 892 deletions

View file

@ -1,14 +1,14 @@
using System;
using System.Collections.Generic;
using System.Linq;
using MediaBrowser.Controller.Configuration;
using MediaBrowser.Controller.Dto;
using MediaBrowser.Controller.Entities;
using MediaBrowser.Controller.Entities.TV;
using MediaBrowser.Controller.Library;
using MediaBrowser.Controller.TV;
using MediaBrowser.Model.Entities;
using MediaBrowser.Model.Querying;
using System;
using System.Collections.Generic;
using System.Linq;
using MediaBrowser.Controller.Configuration;
using MediaBrowser.Controller.Dto;
namespace Emby.Server.Implementations.TV
{

View file

@ -1,10 +1,8 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.IO.Compression;
using NLangDetect.Core.Utils;
using MediaBrowser.Model.Serialization;
using System.Linq;
using MediaBrowser.Model.Serialization;
using NLangDetect.Core.Utils;
namespace NLangDetect.Core
{

View file

@ -1,8 +1,8 @@
using System;
using System.IO;
using System.IO.Compression;
using System.Xml;
using NLangDetect.Core.Utils;
using System.IO;
namespace NLangDetect.Core
{

View file

@ -1,10 +1,9 @@
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Reflection;
using System.Text.RegularExpressions;
using System.Linq;
using System;
using System.Text.RegularExpressions;
namespace NLangDetect.Core.Utils
{

View file

@ -1,9 +1,9 @@
using System;
using System.Text;
using MediaBrowser.Model.IO;
using Microsoft.Extensions.Logging;
using MediaBrowser.Model.Serialization;
using MediaBrowser.Model.Text;
using Microsoft.Extensions.Logging;
using NLangDetect.Core;
using UniversalDetector;

View file

@ -57,22 +57,29 @@ namespace UniversalDetector.Core
int codingState = 0;
int max = offset + len;
for (int i = offset; i < max; i++) {
for (int i = offset; i < max; i++)
{
codingState = codingSM.NextState(buf[i]);
if (codingState == SMModel.ERROR) {
if (codingState == SMModel.ERROR)
{
state = ProbingState.NotMe;
break;
}
if (codingState == SMModel.ITSME) {
if (codingState == SMModel.ITSME)
{
state = ProbingState.FoundIt;
break;
}
if (codingState == SMModel.START) {
if (codingState == SMModel.START)
{
int charLen = codingSM.CurrentCharLen;
if (i == offset) {
if (i == offset)
{
lastChar[1] = buf[offset];
distributionAnalyser.HandleOneChar(lastChar, 0, charLen);
} else {
}
else
{
distributionAnalyser.HandleOneChar(buf, i - 1, charLen);
}
}

View file

@ -97,9 +97,11 @@ namespace UniversalDetector.Core
{
//we only care about 2-bytes character in our distribution analysis
int order = (charLen == 2) ? GetOrder(buf, offset) : -1;
if (order >= 0) {
if (order >= 0)
{
totalChars++;
if (order < tableSize) { // order is valid
if (order < tableSize)
{ // order is valid
if (512 > charToFreqOrder[order])
freqChars++;
}
@ -124,7 +126,8 @@ namespace UniversalDetector.Core
// negative answer
if (totalChars <= 0 || freqChars <= MINIMUM_DATA_THRESHOLD)
return SURE_NO;
if (totalChars != freqChars) {
if (totalChars != freqChars)
{
float r = freqChars / ((totalChars - freqChars) * typicalDistributionRatio);
if (r < SURE_YES)
return r;
@ -2559,12 +2562,15 @@ namespace UniversalDetector.Core
/// </summary>
public override int GetOrder(byte[] buf, int offset)
{
if (buf[offset] >= 0xA4) {
if (buf[offset] >= 0xA4)
{
if (buf[offset + 1] >= 0xA1)
return 157 * (buf[offset] - 0xA4) + buf[offset + 1] - 0xA1 + 63;
else
return 157 * (buf[offset] - 0xA4) + buf[offset + 1] - 0x40;
} else {
}
else
{
return -1;
}
}

View file

@ -40,7 +40,8 @@ using System.IO;
namespace UniversalDetector.Core
{
public enum ProbingState {
public enum ProbingState
{
Detecting = 0, // no sure answer yet, but caller can ask for confidence
FoundIt = 1, // positive answer
NotMe = 2 // negative answer
@ -107,21 +108,27 @@ namespace UniversalDetector.Core
{
byte[] result = null;
using (MemoryStream ms = new MemoryStream(buf.Length)) {
using (MemoryStream ms = new MemoryStream(buf.Length))
{
bool meetMSB = false;
int max = offset + len;
int prev = offset;
int cur = offset;
while (cur < max) {
while (cur < max)
{
byte b = buf[cur];
if ((b & 0x80) != 0) {
if ((b & 0x80) != 0)
{
meetMSB = true;
} else if (b < CAPITAL_A || (b > CAPITAL_Z && b < SMALL_A)
|| b > SMALL_Z) {
if (meetMSB && cur > prev) {
}
else if (b < CAPITAL_A || (b > CAPITAL_Z && b < SMALL_A)
|| b > SMALL_Z)
{
if (meetMSB && cur > prev)
{
ms.Write(buf, prev, cur - prev);
ms.WriteByte(SPACE);
meetMSB = false;
@ -149,14 +156,16 @@ namespace UniversalDetector.Core
{
byte[] result = null;
using (MemoryStream ms = new MemoryStream(buf.Length)) {
using (MemoryStream ms = new MemoryStream(buf.Length))
{
bool inTag = false;
int max = offset + len;
int prev = offset;
int cur = offset;
while (cur < max) {
while (cur < max)
{
byte b = buf[cur];
@ -167,8 +176,10 @@ namespace UniversalDetector.Core
// it's ascii, but it's not a letter
if ((b & 0x80) == 0 && (b < CAPITAL_A || b > SMALL_Z
|| (b > CAPITAL_Z && b < SMALL_A))) {
if (cur > prev && !inTag) {
|| (b > CAPITAL_Z && b < SMALL_A)))
{
if (cur > prev && !inTag)
{
ms.Write(buf, prev, cur - prev);
ms.WriteByte(SPACE);
}

View file

@ -60,7 +60,8 @@ namespace UniversalDetector.Core
// for each byte we get its class, if it is first byte,
// we also get byte length
int byteCls = model.GetClass(b);
if (currentState == SMModel.START) {
if (currentState == SMModel.START)
{
currentBytePos = 0;
currentCharLen = model.charLenTable[byteCls];
}

View file

@ -62,23 +62,30 @@ namespace UniversalDetector.Core
int codingState;
int max = offset + len;
for (int i = offset; i < max; i++) {
for (int i = offset; i < max; i++)
{
codingState = codingSM.NextState(buf[i]);
if (codingState == SMModel.ERROR) {
if (codingState == SMModel.ERROR)
{
state = ProbingState.NotMe;
break;
}
if (codingState == SMModel.ITSME) {
if (codingState == SMModel.ITSME)
{
state = ProbingState.FoundIt;
break;
}
if (codingState == SMModel.START) {
if (codingState == SMModel.START)
{
int charLen = codingSM.CurrentCharLen;
if (i == offset) {
if (i == offset)
{
lastChar[1] = buf[offset];
contextAnalyser.HandleOneChar(lastChar, 0, charLen);
distributionAnalyser.HandleOneChar(lastChar, 0, charLen);
} else {
}
else
{
contextAnalyser.HandleOneChar(buf, i - 1, charLen);
distributionAnalyser.HandleOneChar(buf, i - 1, charLen);
}

View file

@ -60,22 +60,29 @@ namespace UniversalDetector.Core
int codingState;
int max = offset + len;
for (int i = offset; i < max; i++) {
for (int i = offset; i < max; i++)
{
codingState = codingSM.NextState(buf[i]);
if (codingState == SMModel.ERROR) {
if (codingState == SMModel.ERROR)
{
state = ProbingState.NotMe;
break;
}
if (codingState == SMModel.ITSME) {
if (codingState == SMModel.ITSME)
{
state = ProbingState.FoundIt;
break;
}
if (codingState == SMModel.START) {
if (codingState == SMModel.START)
{
int charLen = codingSM.CurrentCharLen;
if (i == offset) {
if (i == offset)
{
lastChar[1] = buf[offset];
distributionAnalyser.HandleOneChar(lastChar, 0, charLen);
} else {
}
else
{
distributionAnalyser.HandleOneChar(buf, i - 1, charLen);
}
}

View file

@ -56,22 +56,29 @@ namespace UniversalDetector.Core
int codingState;
int max = offset + len;
for (int i = 0; i < max; i++) {
for (int i = 0; i < max; i++)
{
codingState = codingSM.NextState(buf[i]);
if (codingState == SMModel.ERROR) {
if (codingState == SMModel.ERROR)
{
state = ProbingState.NotMe;
break;
}
if (codingState == SMModel.ITSME) {
if (codingState == SMModel.ITSME)
{
state = ProbingState.FoundIt;
break;
}
if (codingState == SMModel.START) {
if (codingState == SMModel.START)
{
int charLen = codingSM.CurrentCharLen;
if (i == offset) {
if (i == offset)
{
lastChar[1] = buf[offset];
distributionAnalyser.HandleOneChar(lastChar, 0, charLen);
} else {
}
else
{
distributionAnalyser.HandleOneChar(buf, i - 1, charLen);
}
}

View file

@ -67,22 +67,30 @@ namespace UniversalDetector.Core
{
int max = offset + len;
for (int i = offset; i < max && state == ProbingState.Detecting; i++) {
for (int j = activeSM - 1; j >= 0; j--) {
for (int i = offset; i < max && state == ProbingState.Detecting; i++)
{
for (int j = activeSM - 1; j >= 0; j--)
{
// byte is feed to all active state machine
int codingState = codingSM[j].NextState(buf[i]);
if (codingState == SMModel.ERROR) {
if (codingState == SMModel.ERROR)
{
// got negative answer for this state machine, make it inactive
activeSM--;
if (activeSM == 0) {
if (activeSM == 0)
{
state = ProbingState.NotMe;
return state;
} else if (j != activeSM) {
}
else if (j != activeSM)
{
CodingStateMachine t = codingSM[activeSM];
codingSM[activeSM] = codingSM[j];
codingSM[j] = t;
}
} else if (codingState == SMModel.ITSME) {
}
else if (codingState == SMModel.ITSME)
{
state = ProbingState.FoundIt;
detectedCharset = codingSM[j].ModelName;
return state;

View file

@ -64,22 +64,29 @@ namespace UniversalDetector.Core
int codingState = SMModel.START;
int max = offset + len;
for (int i = offset; i < max; i++) {
for (int i = offset; i < max; i++)
{
codingState = codingSM.NextState(buf[i]);
if (codingState == SMModel.ERROR) {
if (codingState == SMModel.ERROR)
{
state = ProbingState.NotMe;
break;
}
if (codingState == SMModel.ITSME) {
if (codingState == SMModel.ITSME)
{
state = ProbingState.FoundIt;
break;
}
if (codingState == SMModel.START) {
if (codingState == SMModel.START)
{
int charLen = codingSM.CurrentCharLen;
if (i == offset) {
if (i == offset)
{
lastChar[1] = buf[offset];
analyser.HandleOneChar(lastChar, 0, charLen);
} else {
}
else
{
analyser.HandleOneChar(buf, i - 1, charLen);
}
}
@ -87,7 +94,8 @@ namespace UniversalDetector.Core
lastChar[0] = buf[max - 1];
if (state == ProbingState.Detecting) {
if (state == ProbingState.Detecting)
{
if (analyser.GotEnoughData() && GetConfidence() > SHORTCUT_THRESHOLD)
state = ProbingState.FoundIt;
}

View file

@ -36,7 +36,6 @@
*
* ***** END LICENSE BLOCK ***** */
using System;
/**
* General ideas of the Hebrew charset recognition
@ -217,14 +216,17 @@ namespace UniversalDetector.Core
int max = offset + len;
for (int i = offset; i < max; i++) {
for (int i = offset; i < max; i++)
{
byte b = buf[i];
// a word just ended
if (b == 0x20) {
if (b == 0x20)
{
// *(curPtr-2) was not a space so prev is not a 1 letter word
if (beforePrev != 0x20) {
if (beforePrev != 0x20)
{
// case (1) [-2:not space][-1:final letter][cur:space]
if (IsFinal(prev))
finalCharLogicalScore++;
@ -233,7 +235,9 @@ namespace UniversalDetector.Core
finalCharVisualScore++;
}
} else {
}
else
{
// case (3) [-2:space][-1:final letter][cur:not space]
if ((beforePrev == 0x20) && (IsFinal(prev)) && (b != ' '))
++finalCharVisualScore;

View file

@ -181,16 +181,22 @@ namespace UniversalDetector.Core
// to record those bytes as well and analyse the character once it
// is complete, but since a character will not make much difference,
// skipping it will simplify our logic and improve performance.
for (int i = needToSkipCharNum+offset; i < max; ) {
for (int i = needToSkipCharNum + offset; i < max;)
{
int order = GetOrder(buf, i, out charLen);
i += charLen;
if (i > max) {
if (i > max)
{
needToSkipCharNum = i - max;
lastCharOrder = -1;
} else {
if (order != -1 && lastCharOrder != -1) {
}
else
{
if (order != -1 && lastCharOrder != -1)
{
totalRel++;
if (totalRel > MAX_REL_THRESHOLD) {
if (totalRel > MAX_REL_THRESHOLD)
{
done = true;
break;
}
@ -210,7 +216,8 @@ namespace UniversalDetector.Core
// Only 2-bytes characters are of our interest
int order = (charLen == 2) ? GetOrder(buf, offset) : -1;
if (order != -1 && lastCharOrder != -1) {
if (order != -1 && lastCharOrder != -1)
{
totalRel++;
// count this sequence to its category counter
relSample[jp2CharContext[lastCharOrder, order]]++;
@ -221,7 +228,8 @@ namespace UniversalDetector.Core
public void Reset()
{
totalRel = 0;
for (int i = 0; i < CATEGORIES_NUM; i++) {
for (int i = 0; i < CATEGORIES_NUM; i++)
{
relSample[i] = 0;
needToSkipCharNum = 0;
lastCharOrder = -1;
@ -254,7 +262,8 @@ namespace UniversalDetector.Core
charLen = 1;
// return its order if it is hiragana
if (buf[offset] == HIRAGANA_FIRST_BYTE) {
if (buf[offset] == HIRAGANA_FIRST_BYTE)
{
byte low = buf[offset + 1];
if (low >= 0x9F && low <= 0xF1)
return low - 0x9F;
@ -265,7 +274,8 @@ namespace UniversalDetector.Core
protected override int GetOrder(byte[] buf, int offset)
{
// We are only interested in Hiragana
if (buf[offset] == HIRAGANA_FIRST_BYTE) {
if (buf[offset] == HIRAGANA_FIRST_BYTE)
{
byte low = buf[offset + 1];
if (low >= 0x9F && low <= 0xF1)
return low - 0x9F;
@ -292,7 +302,8 @@ namespace UniversalDetector.Core
charLen = 1;
// return its order if it is hiragana
if (high == HIRAGANA_FIRST_BYTE) {
if (high == HIRAGANA_FIRST_BYTE)
{
byte low = buf[offset + 1];
if (low >= 0xA1 && low <= 0xF3)
return low - 0xA1;
@ -303,7 +314,8 @@ namespace UniversalDetector.Core
protected override int GetOrder(byte[] buf, int offset)
{
// We are only interested in Hiragana
if (buf[offset] == HIRAGANA_FIRST_BYTE) {
if (buf[offset] == HIRAGANA_FIRST_BYTE)
{
byte low = buf[offset + 1];
if (low >= 0xA1 && low <= 0xF3)
return low - 0xA1;

View file

@ -36,7 +36,6 @@
*
* ***** END LICENSE BLOCK ***** */
using System;
namespace UniversalDetector.Core
{
@ -135,10 +134,12 @@ namespace UniversalDetector.Core
byte[] newbuf = FilterWithEnglishLetters(buf, offset, len);
byte charClass, freq;
for (int i = 0; i < newbuf.Length; i++) {
for (int i = 0; i < newbuf.Length; i++)
{
charClass = Latin1_CharToClass[newbuf[i]];
freq = Latin1ClassModel[lastCharClass * CLASS_NUM + charClass];
if (freq == 0) {
if (freq == 0)
{
state = ProbingState.NotMe;
break;
}
@ -155,13 +156,17 @@ namespace UniversalDetector.Core
float confidence = 0.0f;
int total = 0;
for (int i = 0; i < FREQ_CAT_NUM; i++) {
for (int i = 0; i < FREQ_CAT_NUM; i++)
{
total += freqCounter[i];
}
if (total <= 0) {
if (total <= 0)
{
confidence = 0.0f;
} else {
}
else
{
confidence = freqCounter[3] * 1.0f / total;
confidence -= freqCounter[1] * 20.0f / total;
}

View file

@ -36,7 +36,6 @@
*
* ***** END LICENSE BLOCK ***** */
using System;
namespace UniversalDetector.Core
{
@ -67,7 +66,8 @@ namespace UniversalDetector.Core
public override string GetCharsetName()
{
if (bestGuess == -1) {
if (bestGuess == -1)
{
GetConfidence();
if (bestGuess == -1)
bestGuess = 0;
@ -78,12 +78,16 @@ namespace UniversalDetector.Core
public override void Reset()
{
activeNum = 0;
for (int i = 0; i < probers.Length; i++) {
if (probers[i] != null) {
for (int i = 0; i < probers.Length; i++)
{
if (probers[i] != null)
{
probers[i].Reset();
isActive[i] = true;
++activeNum;
} else {
}
else
{
isActive[i] = false;
}
}
@ -100,13 +104,18 @@ namespace UniversalDetector.Core
bool keepNext = true;
int max = offset + len;
for (int i = offset; i < max; i++) {
if ((buf[i] & 0x80) != 0) {
for (int i = offset; i < max; i++)
{
if ((buf[i] & 0x80) != 0)
{
highbyteBuf[hptr++] = buf[i];
keepNext = true;
} else {
}
else
{
//if previous is highbyte, keep this even it is a ASCII
if (keepNext) {
if (keepNext)
{
highbyteBuf[hptr++] = buf[i];
keepNext = false;
}
@ -115,18 +124,23 @@ namespace UniversalDetector.Core
ProbingState st = ProbingState.NotMe;
for (int i = 0; i < probers.Length; i++) {
for (int i = 0; i < probers.Length; i++)
{
if (!isActive[i])
continue;
st = probers[i].HandleData(highbyteBuf, 0, hptr);
if (st == ProbingState.FoundIt) {
if (st == ProbingState.FoundIt)
{
bestGuess = i;
state = ProbingState.FoundIt;
break;
} else if (st == ProbingState.NotMe) {
}
else if (st == ProbingState.NotMe)
{
isActive[i] = false;
activeNum--;
if (activeNum <= 0) {
if (activeNum <= 0)
{
state = ProbingState.NotMe;
break;
}
@ -140,16 +154,23 @@ namespace UniversalDetector.Core
float bestConf = 0.0f;
float cf = 0.0f;
if (state == ProbingState.FoundIt) {
if (state == ProbingState.FoundIt)
{
return 0.99f;
} else if (state == ProbingState.NotMe) {
}
else if (state == ProbingState.NotMe)
{
return 0.01f;
} else {
for (int i = 0; i < PROBERS_NUM; i++) {
}
else
{
for (int i = 0; i < PROBERS_NUM; i++)
{
if (!isActive[i])
continue;
cf = probers[i].GetConfidence();
if (bestConf < cf) {
if (bestConf < cf)
{
bestConf = cf;
bestGuess = i;
}
@ -162,10 +183,14 @@ namespace UniversalDetector.Core
{
float cf;
GetConfidence();
for (int i = 0; i < PROBERS_NUM; i++) {
if (!isActive[i]) {
for (int i = 0; i < PROBERS_NUM; i++)
{
if (!isActive[i])
{
//Console.WriteLine(" MBCS inactive: {0} (confidence is too low).", ProberName[i]);
} else {
}
else
{
cf = probers[i].GetConfidence();
//Console.WriteLine(" MBCS {0}: [{1}]", cf, ProberName[i]);
}

View file

@ -36,7 +36,6 @@
*
* ***** END LICENSE BLOCK ***** */
using System;
namespace UniversalDetector.Core
{
@ -88,19 +87,24 @@ namespace UniversalDetector.Core
if (newBuf.Length == 0)
return state; // Nothing to see here, move on.
for (int i = 0; i < PROBERS_NUM; i++) {
for (int i = 0; i < PROBERS_NUM; i++)
{
if (!isActive[i])
continue;
st = probers[i].HandleData(newBuf, 0, newBuf.Length);
if (st == ProbingState.FoundIt) {
if (st == ProbingState.FoundIt)
{
bestGuess = i;
state = ProbingState.FoundIt;
break;
} else if (st == ProbingState.NotMe) {
}
else if (st == ProbingState.NotMe)
{
isActive[i] = false;
activeNum--;
if (activeNum <= 0) {
if (activeNum <= 0)
{
state = ProbingState.NotMe;
break;
}
@ -112,7 +116,8 @@ namespace UniversalDetector.Core
public override float GetConfidence()
{
float bestConf = 0.0f, cf;
switch (state) {
switch (state)
{
case ProbingState.FoundIt:
return 0.99f; //sure yes
case ProbingState.NotMe:
@ -138,7 +143,8 @@ namespace UniversalDetector.Core
{
float cf = GetConfidence();
// Console.WriteLine(" SBCS Group Prober --------begin status");
for (int i = 0; i < PROBERS_NUM; i++) {
for (int i = 0; i < PROBERS_NUM; i++)
{
if (isActive[i])
probers[i].DumpStatus();
//else
@ -151,12 +157,16 @@ namespace UniversalDetector.Core
public override void Reset()
{
int activeNum = 0;
for (int i = 0; i < PROBERS_NUM; i++) {
if (probers[i] != null) {
for (int i = 0; i < PROBERS_NUM; i++)
{
if (probers[i] != null)
{
probers[i].Reset();
isActive[i] = true;
activeNum++;
} else {
}
else
{
isActive[i] = false;
}
}
@ -167,7 +177,8 @@ namespace UniversalDetector.Core
public override string GetCharsetName()
{
//if we have no answer yet
if (bestGuess == -1) {
if (bestGuess == -1)
{
GetConfidence();
//no charset seems positive
if (bestGuess == -1)

View file

@ -36,7 +36,6 @@
*
* ***** END LICENSE BLOCK ***** */
using System;
namespace UniversalDetector.Core
{
@ -89,16 +88,19 @@ namespace UniversalDetector.Core
{
int max = offset + len;
for (int i = offset; i < max; i++) {
for (int i = offset; i < max; i++)
{
byte order = model.GetOrder(buf[i]);
if (order < SYMBOL_CAT_ORDER)
totalChar++;
if (order < SAMPLE_SIZE) {
if (order < SAMPLE_SIZE)
{
freqChar++;
if (lastOrder < SAMPLE_SIZE) {
if (lastOrder < SAMPLE_SIZE)
{
totalSeqs++;
if (!reversed)
++(seqCounters[model.GetPrecedence(lastOrder * SAMPLE_SIZE + order)]);
@ -109,8 +111,10 @@ namespace UniversalDetector.Core
lastOrder = order;
}
if (state == ProbingState.Detecting) {
if (totalSeqs > SB_ENOUGH_REL_THRESHOLD) {
if (state == ProbingState.Detecting)
{
if (totalSeqs > SB_ENOUGH_REL_THRESHOLD)
{
float cf = GetConfidence();
if (cf > POSITIVE_SHORTCUT_THRESHOLD)
state = ProbingState.FoundIt;
@ -139,7 +143,8 @@ namespace UniversalDetector.Core
// POSITIVE_APPROACH
float r = 0.0f;
if (totalSeqs > 0) {
if (totalSeqs > 0)
{
r = 1.0f * seqCounters[POSITIVE_CAT] / totalSeqs / model.TypicalPositiveRatio;
r = r * freqChar / totalChar;
if (r >= 1.0f)

View file

@ -69,23 +69,30 @@ namespace UniversalDetector.Core
int codingState;
int max = offset + len;
for (int i = offset; i < max; i++) {
for (int i = offset; i < max; i++)
{
codingState = codingSM.NextState(buf[i]);
if (codingState == SMModel.ERROR) {
if (codingState == SMModel.ERROR)
{
state = ProbingState.NotMe;
break;
}
if (codingState == SMModel.ITSME) {
if (codingState == SMModel.ITSME)
{
state = ProbingState.FoundIt;
break;
}
if (codingState == SMModel.START) {
if (codingState == SMModel.START)
{
int charLen = codingSM.CurrentCharLen;
if (i == offset) {
if (i == offset)
{
lastChar[1] = buf[offset];
contextAnalyser.HandleOneChar(lastChar, 2 - charLen, charLen);
distributionAnalyser.HandleOneChar(lastChar, 0, charLen);
} else {
}
else
{
contextAnalyser.HandleOneChar(buf, i + 1 - charLen, charLen);
distributionAnalyser.HandleOneChar(buf, i - 1, charLen);
}

View file

@ -36,7 +36,6 @@
*
* ***** END LICENSE BLOCK ***** */
using System;
namespace UniversalDetector.Core
{

View file

@ -36,7 +36,6 @@
*
* ***** END LICENSE BLOCK ***** */
using System;
namespace UniversalDetector.Core
{

View file

@ -51,7 +51,8 @@ namespace UniversalDetector.Core
Reset();
}
public override string GetCharsetName() {
public override string GetCharsetName()
{
return "UTF-8";
}
@ -67,21 +68,25 @@ namespace UniversalDetector.Core
int codingState = SMModel.START;
int max = offset + len;
for (int i = offset; i < max; i++) {
for (int i = offset; i < max; i++)
{
codingState = codingSM.NextState(buf[i]);
if (codingState == SMModel.ERROR) {
if (codingState == SMModel.ERROR)
{
state = ProbingState.NotMe;
break;
}
if (codingState == SMModel.ITSME) {
if (codingState == SMModel.ITSME)
{
state = ProbingState.FoundIt;
break;
}
if (codingState == SMModel.START) {
if (codingState == SMModel.START)
{
if (codingSM.CurrentCharLen >= 2)
numOfMBChar++;
}
@ -98,11 +103,14 @@ namespace UniversalDetector.Core
float unlike = 0.99f;
float confidence = 0.0f;
if (numOfMBChar < 6) {
if (numOfMBChar < 6)
{
for (int i = 0; i < numOfMBChar; i++)
unlike *= ONE_CHAR_PROB;
confidence = 1.0f - unlike;
} else {
}
else
{
confidence = 0.99f;
}
return confidence;

View file

@ -70,7 +70,8 @@ namespace UniversalDetector.Core
protected CharsetProber escCharsetProber;
protected string detectedCharset;
public UniversalDetector(int languageFilter) {
public UniversalDetector(int languageFilter)
{
this.start = true;
this.inputState = InputState.PureASCII;
this.lastChar = 0x00;
@ -80,7 +81,8 @@ namespace UniversalDetector.Core
public virtual void Feed(byte[] buf, int offset, int len)
{
if (done) {
if (done)
{
return;
}
@ -88,10 +90,13 @@ namespace UniversalDetector.Core
gotData = true;
// If the data starts with BOM, we know it is UTF
if (start) {
if (start)
{
start = false;
if (len > 3) {
switch (buf[0]) {
if (len > 3)
{
switch (buf[0])
{
case 0xEF:
if (0xBB == buf[1] && 0xBF == buf[2])
detectedCharset = "UTF-8";
@ -118,22 +123,27 @@ namespace UniversalDetector.Core
break;
} // switch
}
if (detectedCharset != null) {
if (detectedCharset != null)
{
done = true;
return;
}
}
for (int i = 0; i < len; i++) {
for (int i = 0; i < len; i++)
{
// other than 0xa0, if every other character is ascii, the page is ascii
if ((buf[i] & 0x80) != 0 && buf[i] != 0xA0) {
if ((buf[i] & 0x80) != 0 && buf[i] != 0xA0)
{
// we got a non-ascii byte (high-byte)
if (inputState != InputState.Highbyte) {
if (inputState != InputState.Highbyte)
{
inputState = InputState.Highbyte;
// kill EscCharsetProber if it is active
if (escCharsetProber != null) {
if (escCharsetProber != null)
{
escCharsetProber = null;
}
@ -145,9 +155,12 @@ namespace UniversalDetector.Core
if (charsetProbers[2] == null)
charsetProbers[2] = new Latin1Prober();
}
} else {
}
else
{
if (inputState == InputState.PureASCII &&
(buf[i] == 0x33 || (buf[i] == 0x7B && lastChar == 0x7E))) {
(buf[i] == 0x33 || (buf[i] == 0x7B && lastChar == 0x7E)))
{
// found escape character or HZ "~{"
inputState = InputState.EscASCII;
}
@ -157,25 +170,31 @@ namespace UniversalDetector.Core
ProbingState st = ProbingState.NotMe;
switch (inputState) {
switch (inputState)
{
case InputState.EscASCII:
if (escCharsetProber == null) {
if (escCharsetProber == null)
{
escCharsetProber = new EscCharsetProber();
}
st = escCharsetProber.HandleData(buf, offset, len);
if (st == ProbingState.FoundIt) {
if (st == ProbingState.FoundIt)
{
done = true;
detectedCharset = escCharsetProber.GetCharsetName();
}
break;
case InputState.Highbyte:
for (int i = 0; i < PROBERS_NUM; i++) {
if (charsetProbers[i] != null) {
for (int i = 0; i < PROBERS_NUM; i++)
{
if (charsetProbers[i] != null)
{
st = charsetProbers[i].HandleData(buf, offset, len);
#if DEBUG
charsetProbers[i].DumpStatus();
#endif
if (st == ProbingState.FoundIt) {
if (st == ProbingState.FoundIt)
{
done = true;
detectedCharset = charsetProbers[i].GetCharsetName();
return;
@ -195,38 +214,47 @@ namespace UniversalDetector.Core
/// </summary>
public virtual void DataEnd()
{
if (!gotData) {
if (!gotData)
{
// we haven't got any data yet, return immediately
// caller program sometimes call DataEnd before anything has
// been sent to detector
return;
}
if (detectedCharset != null) {
if (detectedCharset != null)
{
done = true;
Report(detectedCharset, 1.0f);
return;
}
if (inputState == InputState.Highbyte) {
if (inputState == InputState.Highbyte)
{
float proberConfidence = 0.0f;
float maxProberConfidence = 0.0f;
int maxProber = 0;
for (int i = 0; i < PROBERS_NUM; i++) {
if (charsetProbers[i] != null) {
for (int i = 0; i < PROBERS_NUM; i++)
{
if (charsetProbers[i] != null)
{
proberConfidence = charsetProbers[i].GetConfidence();
if (proberConfidence > maxProberConfidence) {
if (proberConfidence > maxProberConfidence)
{
maxProberConfidence = proberConfidence;
maxProber = i;
}
}
}
if (maxProberConfidence > MINIMUM_THRESHOLD) {
if (maxProberConfidence > MINIMUM_THRESHOLD)
{
Report(charsetProbers[maxProber].GetCharsetName(), maxProberConfidence);
}
} else if (inputState == InputState.PureASCII) {
}
else if (inputState == InputState.PureASCII)
{
Report("ASCII", 1.0f);
}
}