mirror of
https://github.com/jellyfin/jellyfin.git
synced 2024-07-25 15:08:37 +02:00
Reduce number of TMDB lookups if filenames have punctuation chars
Previosly TMDB would be queried with the raw name and always fail, then retry with the cleaned name. Now non-word chars are always cleaned out first. If first query fails, retry with more aggressive cleaning.
This commit is contained in:
parent
d016a9772f
commit
8bd356ab20
|
@ -5,6 +5,7 @@ using System.Linq;
|
||||||
using System.Net;
|
using System.Net;
|
||||||
using System.Threading;
|
using System.Threading;
|
||||||
using System.Threading.Tasks;
|
using System.Threading.Tasks;
|
||||||
|
using System.Text.RegularExpressions;
|
||||||
using MediaBrowser.Common.Net;
|
using MediaBrowser.Common.Net;
|
||||||
using MediaBrowser.Controller.Library;
|
using MediaBrowser.Controller.Library;
|
||||||
using MediaBrowser.Controller.Providers;
|
using MediaBrowser.Controller.Providers;
|
||||||
|
@ -19,6 +20,20 @@ namespace MediaBrowser.Providers.Tmdb.Movies
|
||||||
public class TmdbSearch
|
public class TmdbSearch
|
||||||
{
|
{
|
||||||
private static readonly CultureInfo EnUs = new CultureInfo("en-US");
|
private static readonly CultureInfo EnUs = new CultureInfo("en-US");
|
||||||
|
|
||||||
|
private static readonly Regex cleanEnclosed = new Regex(@"\p{Ps}.*\p{Pe}", RegexOptions.Compiled);
|
||||||
|
private static readonly Regex cleanNonWord = new Regex(@"[\W_]+", RegexOptions.Compiled);
|
||||||
|
private static readonly Regex cleanStopWords = new Regex(@"\b( # Start at word boundary
|
||||||
|
19[0-9]{2}|20[0-9]{2}| # 1900-2099
|
||||||
|
S[0-9]{2}| # Season
|
||||||
|
E[0-9]{2}| # Episode
|
||||||
|
(2160|1080|720|576|480)[ip]?| # Resolution
|
||||||
|
[xh]?264| # Encoding
|
||||||
|
(web|dvd|bd|hdtv|hd)rip| # *Rip
|
||||||
|
web|hdtv|mp4|bluray|ktr|dl|single|imageset|internal|doku|dubbed|retail|xxx|flac
|
||||||
|
).* # Match rest of string",
|
||||||
|
RegexOptions.Compiled | RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase);
|
||||||
|
|
||||||
private const string Search3 = TmdbUtils.BaseTmdbApiUrl + @"3/search/{3}?api_key={1}&query={0}&language={2}";
|
private const string Search3 = TmdbUtils.BaseTmdbApiUrl + @"3/search/{3}?api_key={1}&query={0}&language={2}";
|
||||||
|
|
||||||
private readonly ILogger _logger;
|
private readonly ILogger _logger;
|
||||||
|
@ -61,19 +76,18 @@ namespace MediaBrowser.Providers.Tmdb.Movies
|
||||||
|
|
||||||
var tmdbImageUrl = tmdbSettings.images.GetImageUrl("original");
|
var tmdbImageUrl = tmdbSettings.images.GetImageUrl("original");
|
||||||
|
|
||||||
if (!string.IsNullOrWhiteSpace(name))
|
// Does this mean we are reparsing already parsed ItemLookupInfo?
|
||||||
{
|
var parsedName = _libraryManager.ParseName(name);
|
||||||
var parsedName = _libraryManager.ParseName(name);
|
var yearInName = parsedName.Year;
|
||||||
var yearInName = parsedName.Year;
|
name = parsedName.Name;
|
||||||
name = parsedName.Name;
|
year = year ?? yearInName;
|
||||||
year = year ?? yearInName;
|
|
||||||
}
|
|
||||||
|
|
||||||
_logger.LogInformation("MovieDbProvider: Finding id for item: " + name);
|
_logger.LogInformation("TmdbSearch: Finding id for item: {0} ({1})", name, year);
|
||||||
var language = idInfo.MetadataLanguage.ToLowerInvariant();
|
var language = idInfo.MetadataLanguage.ToLowerInvariant();
|
||||||
|
|
||||||
//nope - search for it
|
// Replace sequences of non-word characters with space
|
||||||
//var searchType = item is BoxSet ? "collection" : "movie";
|
// TMDB expects a space separated list of words make sure that is the case
|
||||||
|
name = cleanNonWord.Replace(name, " ").Trim();
|
||||||
|
|
||||||
var results = await GetSearchResults(name, searchType, year, language, tmdbImageUrl, cancellationToken).ConfigureAwait(false);
|
var results = await GetSearchResults(name, searchType, year, language, tmdbImageUrl, cancellationToken).ConfigureAwait(false);
|
||||||
|
|
||||||
|
@ -86,36 +100,35 @@ namespace MediaBrowser.Providers.Tmdb.Movies
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Ideally retrying alternatives should be done outside the search
|
||||||
|
// provider so that the retry logic can be common for all search
|
||||||
|
// providers
|
||||||
if (results.Count == 0)
|
if (results.Count == 0)
|
||||||
{
|
{
|
||||||
// try with dot and _ turned to space
|
name = parsedName.Name;
|
||||||
var originalName = name;
|
|
||||||
|
|
||||||
name = name.Replace(",", " ");
|
// Remove things enclosed in []{}() etc
|
||||||
name = name.Replace(".", " ");
|
name = cleanEnclosed.Replace(name, string.Empty);
|
||||||
name = name.Replace("_", " ");
|
|
||||||
name = name.Replace("-", " ");
|
|
||||||
name = name.Replace("!", " ");
|
|
||||||
name = name.Replace("?", " ");
|
|
||||||
|
|
||||||
var parenthIndex = name.IndexOf('(');
|
// Replace sequences of non-word characters with space
|
||||||
if (parenthIndex != -1)
|
name = cleanNonWord.Replace(name, " ");
|
||||||
{
|
|
||||||
name = name.Substring(0, parenthIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
// Clean based on common stop words / tokens
|
||||||
|
name = cleanStopWords.Replace(name, string.Empty);
|
||||||
|
|
||||||
|
// Trim whitespace
|
||||||
name = name.Trim();
|
name = name.Trim();
|
||||||
|
|
||||||
// Search again if the new name is different
|
// Search again if the new name is different
|
||||||
if (!string.Equals(name, originalName))
|
if (!string.Equals(name, parsedName.Name) && !string.IsNullOrWhiteSpace(name))
|
||||||
{
|
{
|
||||||
|
_logger.LogInformation("TmdbSearch: Finding id for item: {0} ({1})", name, year);
|
||||||
results = await GetSearchResults(name, searchType, year, language, tmdbImageUrl, cancellationToken).ConfigureAwait(false);
|
results = await GetSearchResults(name, searchType, year, language, tmdbImageUrl, cancellationToken).ConfigureAwait(false);
|
||||||
|
|
||||||
if (results.Count == 0 && !string.Equals(language, "en", StringComparison.OrdinalIgnoreCase))
|
if (results.Count == 0 && !string.Equals(language, "en", StringComparison.OrdinalIgnoreCase))
|
||||||
{
|
{
|
||||||
//one more time, in english
|
//one more time, in english
|
||||||
results = await GetSearchResults(name, searchType, year, "en", tmdbImageUrl, cancellationToken).ConfigureAwait(false);
|
results = await GetSearchResults(name, searchType, year, "en", tmdbImageUrl, cancellationToken).ConfigureAwait(false);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue