diff --git a/MediaBrowser.MediaEncoding/Subtitles/SrtParser.cs b/MediaBrowser.MediaEncoding/Subtitles/SrtParser.cs index 410c0bbdd6..09bc52df41 100644 --- a/MediaBrowser.MediaEncoding/Subtitles/SrtParser.cs +++ b/MediaBrowser.MediaEncoding/Subtitles/SrtParser.cs @@ -1,17 +1,61 @@ using System; using System.Collections.Generic; +using System.Globalization; using System.IO; -using System.Linq; -using System.Text; -using System.Threading.Tasks; +using System.Text.RegularExpressions; namespace MediaBrowser.MediaEncoding.Subtitles { public class SrtParser : ISubtitleParser { - public SubtitleTrackInfo Parse(Stream stream) - { - throw new NotImplementedException(); + private readonly CultureInfo _usCulture = new CultureInfo("en-US"); + public SubtitleTrackInfo Parse(Stream stream) { + var trackInfo = new SubtitleTrackInfo(); + using ( var reader = new StreamReader(stream)) + { + string line; + while ((line = reader.ReadLine()) != null) + { + if (string.IsNullOrWhiteSpace(line)) + { + continue; + } + var subEvent = new SubtitleTrackEvent {Id = line}; + line = reader.ReadLine(); + var time = Regex.Split(line, @"[\t ]*-->[\t ]*"); + subEvent.StartPositionTicks = GetTicks(time[0]); + var endTime = time[1]; + var idx = endTime.IndexOf(" ", StringComparison.Ordinal); + if (idx > 0) + endTime = endTime.Substring(0, idx); + subEvent.EndPositionTicks = GetTicks(endTime); + var multiline = new List(); + while ((line = reader.ReadLine()) != null) + { + if (string.IsNullOrEmpty(line)) + { + break; + } + multiline.Add(line); + } + subEvent.Text = string.Join(@"\N", multiline); + subEvent.Text = Regex.Replace(subEvent.Text, @"\{(\\[\w]+\(?([\w\d]+,?)+\)?)+\}", string.Empty, RegexOptions.IgnoreCase); + subEvent.Text = Regex.Replace(subEvent.Text, "<", "<", RegexOptions.IgnoreCase); + subEvent.Text = Regex.Replace(subEvent.Text, ">", ">", RegexOptions.IgnoreCase); + subEvent.Text = Regex.Replace(subEvent.Text, "<(\\/?(font|b|u|i|s))((\\s+(\\w|\\w[\\w\\-]*\\w)(\\s*=\\s*(?:\\\".*?\\\"|'.*?'|[^'\\\">\\s]+))?)+\\s*|\\s*)(\\/?)>", "<$1$3$7>", RegexOptions.IgnoreCase); + subEvent.Text = Regex.Replace(subEvent.Text, @"\\N", "
",RegexOptions.IgnoreCase); + trackInfo.TrackEvents.Add(subEvent); + } + } + return trackInfo; + } + + long GetTicks(string time) { + TimeSpan span; + return TimeSpan.TryParseExact(time, @"hh\:mm\:ss\.fff", _usCulture, out span) + ? span.Ticks + : (TimeSpan.TryParseExact(time, @"hh\:mm\:ss\,fff", _usCulture, out span) + ? span.Ticks : 0); } } } diff --git a/MediaBrowser.MediaEncoding/Subtitles/SsaParser.cs b/MediaBrowser.MediaEncoding/Subtitles/SsaParser.cs index ca7e58371c..996ef1c4e2 100644 --- a/MediaBrowser.MediaEncoding/Subtitles/SsaParser.cs +++ b/MediaBrowser.MediaEncoding/Subtitles/SsaParser.cs @@ -1,17 +1,69 @@ using System; using System.Collections.Generic; +using System.Globalization; using System.IO; using System.Linq; -using System.Text; -using System.Threading.Tasks; +using System.Text.RegularExpressions; namespace MediaBrowser.MediaEncoding.Subtitles { public class SsaParser : ISubtitleParser { + private readonly CultureInfo _usCulture = new CultureInfo("en-US"); + public SubtitleTrackInfo Parse(Stream stream) { - throw new NotImplementedException(); + var trackInfo = new SubtitleTrackInfo(); + var eventIndex = 1; + using (var reader = new StreamReader(stream)) + { + string line; + while (reader.ReadLine() != "[Events]") + {} + var headers = ParseFieldHeaders(reader.ReadLine()); + + while ((line = reader.ReadLine()) != null) + { + if (string.IsNullOrWhiteSpace(line)) + { + continue; + } + if(line.StartsWith("[")) + break; + if(string.IsNullOrEmpty(line)) + continue; + var subEvent = new SubtitleTrackEvent { Id = eventIndex.ToString(_usCulture) }; + eventIndex++; + var sections = line.Substring(10).Split(','); + + subEvent.StartPositionTicks = GetTicks(sections[headers["Start"]]); + subEvent.EndPositionTicks = GetTicks(sections[headers["End"]]); + subEvent.Text = string.Join(",", sections.Skip(headers["Text"])); + subEvent.Text = Regex.Replace(subEvent.Text, @"\{(\\[\w]+\(?([\w\d]+,?)+\)?)+\}", string.Empty, RegexOptions.IgnoreCase); + subEvent.Text = Regex.Replace(subEvent.Text, @"\\N", "
", RegexOptions.IgnoreCase); + + trackInfo.TrackEvents.Add(subEvent); + } + } + return trackInfo; + } + + long GetTicks(string time) + { + TimeSpan span; + return TimeSpan.TryParseExact(time, @"h\:mm\:ss\.ff", _usCulture, out span) + ? span.Ticks: 0; + } + + private Dictionary ParseFieldHeaders(string line) { + var fields = line.Substring(8).Split(',').Select(x=>x.Trim()).ToList(); + + var result = new Dictionary { + {"Start", fields.IndexOf("Start")}, + {"End", fields.IndexOf("End")}, + {"Text", fields.IndexOf("Text")} + }; + return result; } } } diff --git a/MediaBrowser.Tests/MediaBrowser.Tests.csproj b/MediaBrowser.Tests/MediaBrowser.Tests.csproj index 6ae7544b85..dad3677f2b 100644 --- a/MediaBrowser.Tests/MediaBrowser.Tests.csproj +++ b/MediaBrowser.Tests/MediaBrowser.Tests.csproj @@ -50,6 +50,8 @@ + + @@ -61,6 +63,10 @@ {17e1f4e6-8abd-4fe5-9ecf-43d4b6087ba2} MediaBrowser.Controller + + {0BD82FA6-EB8A-4452-8AF5-74F9C3849451} + MediaBrowser.MediaEncoding + {7eeeb4bb-f3e8-48fc-b4c5-70f0fff8329b} MediaBrowser.Model @@ -77,6 +83,14 @@ + + + Always + + + Always + + diff --git a/MediaBrowser.Tests/MediaEncoding/Subtitles/SrtParserTests.cs b/MediaBrowser.Tests/MediaEncoding/Subtitles/SrtParserTests.cs new file mode 100644 index 0000000000..0d86fbdcd8 --- /dev/null +++ b/MediaBrowser.Tests/MediaEncoding/Subtitles/SrtParserTests.cs @@ -0,0 +1,108 @@ +using System; +using System.Collections.Generic; +using System.IO; +using MediaBrowser.MediaEncoding.Subtitles; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace MediaBrowser.Tests.MediaEncoding.Subtitles { + + [TestClass] + public class SrtParserTests { + + [TestMethod] + public void TestParse() { + + var expectedSubs = + new SubtitleTrackInfo { + TrackEvents = new List { + new SubtitleTrackEvent { + Id = "1", + StartPositionTicks = 24000000, + EndPositionTicks = 52000000, + Text = + "[Background Music Playing]" + }, + new SubtitleTrackEvent { + Id = "2", + StartPositionTicks = 157120000, + EndPositionTicks = 173990000, + Text = + "Oh my god, Watch out!
It's coming!!" + }, + new SubtitleTrackEvent { + Id = "3", + StartPositionTicks = 257120000, + EndPositionTicks = 303990000, + Text = "[Bird noises]" + }, + new SubtitleTrackEvent { + Id = "4", + StartPositionTicks = 310000000, + EndPositionTicks = 319990000, + Text = + "This text is RED and has not been positioned." + }, + new SubtitleTrackEvent { + Id = "5", + StartPositionTicks = 320000000, + EndPositionTicks = 329990000, + Text = + "This is a
new line, as is
this" + }, + new SubtitleTrackEvent { + Id = "6", + StartPositionTicks = 330000000, + EndPositionTicks = 339990000, + Text = + "This contains nested bold, italic, underline and strike-through HTML tags" + }, + new SubtitleTrackEvent { + Id = "7", + StartPositionTicks = 340000000, + EndPositionTicks = 349990000, + Text = + "Unclosed but supported HTML tags are left in, SSA italics aren't" + }, + new SubtitleTrackEvent { + Id = "8", + StartPositionTicks = 350000000, + EndPositionTicks = 359990000, + Text = + "<ggg>Unsupported</ggg> HTML tags are escaped and left in, even if <hhh>not closed." + }, + new SubtitleTrackEvent { + Id = "9", + StartPositionTicks = 360000000, + EndPositionTicks = 369990000, + Text = + "Multiple SSA tags are stripped" + }, + new SubtitleTrackEvent { + Id = "10", + StartPositionTicks = 370000000, + EndPositionTicks = 379990000, + Text = + "Greater than (<) and less than (>) are shown" + } + } + }; + + var sut = new SrtParser(); + + var stream = File.OpenRead(@"MediaEncoding\Subtitles\TestSubtitles\unit.srt"); + + var result = sut.Parse(stream); + + Assert.IsNotNull(result); + Assert.AreEqual(expectedSubs.TrackEvents.Count,result.TrackEvents.Count); + for (int i = 0; i < expectedSubs.TrackEvents.Count; i++) + { + Assert.AreEqual(expectedSubs.TrackEvents[i].Id, result.TrackEvents[i].Id); + Assert.AreEqual(expectedSubs.TrackEvents[i].StartPositionTicks, result.TrackEvents[i].StartPositionTicks); + Assert.AreEqual(expectedSubs.TrackEvents[i].EndPositionTicks, result.TrackEvents[i].EndPositionTicks); + Assert.AreEqual(expectedSubs.TrackEvents[i].Text, result.TrackEvents[i].Text); + } + + } + } +} \ No newline at end of file diff --git a/MediaBrowser.Tests/MediaEncoding/Subtitles/SsaParserTests.cs b/MediaBrowser.Tests/MediaEncoding/Subtitles/SsaParserTests.cs new file mode 100644 index 0000000000..51dc7f959e --- /dev/null +++ b/MediaBrowser.Tests/MediaEncoding/Subtitles/SsaParserTests.cs @@ -0,0 +1,59 @@ +using System; +using System.Collections.Generic; +using System.IO; +using MediaBrowser.MediaEncoding.Subtitles; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace MediaBrowser.Tests.MediaEncoding.Subtitles { + + [TestClass] + public class SsaParserTests { + + [TestMethod] + public void TestParse() { + + var expectedSubs = + new SubtitleTrackInfo { + TrackEvents = new List { + new SubtitleTrackEvent { + Id = "1", + StartPositionTicks = 24000000, + EndPositionTicks = 72000000, + Text = + "Senator, we're
making our final
approach into Coruscant." + }, + new SubtitleTrackEvent { + Id = "2", + StartPositionTicks = 97100000, + EndPositionTicks = 133900000, + Text = + "Very good, Lieutenant." + }, + new SubtitleTrackEvent { + Id = "3", + StartPositionTicks = 150400000, + EndPositionTicks = 180400000, + Text = "It's
a
trap!" + } + } + }; + + var sut = new SsaParser(); + + var stream = File.OpenRead(@"MediaEncoding\Subtitles\TestSubtitles\data.ssa"); + + var result = sut.Parse(stream); + + Assert.IsNotNull(result); + Assert.AreEqual(expectedSubs.TrackEvents.Count,result.TrackEvents.Count); + for (int i = 0; i < expectedSubs.TrackEvents.Count; i++) + { + Assert.AreEqual(expectedSubs.TrackEvents[i].Id, result.TrackEvents[i].Id); + Assert.AreEqual(expectedSubs.TrackEvents[i].StartPositionTicks, result.TrackEvents[i].StartPositionTicks); + Assert.AreEqual(expectedSubs.TrackEvents[i].EndPositionTicks, result.TrackEvents[i].EndPositionTicks); + Assert.AreEqual(expectedSubs.TrackEvents[i].Text, result.TrackEvents[i].Text); + } + + } + } +} \ No newline at end of file diff --git a/MediaBrowser.Tests/MediaEncoding/Subtitles/TestSubtitles/data.ssa b/MediaBrowser.Tests/MediaEncoding/Subtitles/TestSubtitles/data.ssa new file mode 100644 index 0000000000..3114a844a5 --- /dev/null +++ b/MediaBrowser.Tests/MediaEncoding/Subtitles/TestSubtitles/data.ssa @@ -0,0 +1,23 @@ +[Script Info] +Title: Testing subtitles for the SSA Format + +[V4 Styles] +Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, TertiaryColour, BackColour, Bold, Italic, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, AlphaLevel, Encoding +Style: Default,Arial,20,65535,65535,65535,-2147483640,-1,0,1,3,0,2,30,30,30,0,0 +Style: Titre_episode,Akbar,140,15724527,65535,65535,986895,-1,0,1,1,0,3,30,30,30,0,0 +Style: Wolf main,Wolf_Rain,56,15724527,15724527,15724527,4144959,0,0,1,1,2,2,5,5,30,0,0 + + + +[Events] +Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text +Dialogue: 0,0:00:02.40,0:00:07.20,Default,,0000,0000,0000,,Senator, {\kf89}we're \Nmaking our final \napproach into Coruscant. +Dialogue: 0,0:00:09.71,0:00:13.39,Default,,0000,0000,0000,,{\pos(400,570)}Very good, Lieutenant. +Dialogue: 0,0:00:15.04,0:00:18.04,Default,,0000,0000,0000,,It's \Na \ntrap! + + +[Pictures] +This section will be ignored + +[Fonts] +This section will be ignored \ No newline at end of file diff --git a/MediaBrowser.Tests/MediaEncoding/Subtitles/TestSubtitles/unit.srt b/MediaBrowser.Tests/MediaEncoding/Subtitles/TestSubtitles/unit.srt new file mode 100644 index 0000000000..5f6e5636ec --- /dev/null +++ b/MediaBrowser.Tests/MediaEncoding/Subtitles/TestSubtitles/unit.srt @@ -0,0 +1,44 @@ + + +1 +00:00:02.400 --> 00:00:05.200 +[Background Music Playing] + +2 +00:00:15,712 --> 00:00:17,399 X1:000 X2:000 Y1:050 Y2:100 +Oh my god, Watch out! +It's coming!! + +3 +00:00:25,712 --> 00:00:30,399 +[Bird noises] + +4 +00:00:31,000 --> 00:00:31,999 +This text is RED and has not been {\pos(142,120)}positioned. + +5 +00:00:32,000 --> 00:00:32,999 +This is a\nnew line, as is\Nthis + +6 +00:00:33,000 --> 00:00:33,999 +This contains nested bold, italic, underline and strike-through HTML tags + +7 +00:00:34,000 --> 00:00:34,999 +Unclosed but supported HTML tags are left in, {\i1} SSA italics aren't + +8 +00:00:35,000 --> 00:00:35,999 +Unsupported HTML tags are escaped and left in, even if not closed. + +9 +00:00:36,000 --> 00:00:36,999 +Multiple {\pos(142,120)\b1}SSA tags are stripped + +10 +00:00:37,000 --> 00:00:37,999 +Greater than (<) and less than (>) are shown + +