From d4e5e659350ab1b5d1005f0aae51ac25bb39c6ac Mon Sep 17 00:00:00 2001 From: Timon Ringwald Date: Mon, 15 Aug 2022 13:40:13 +0200 Subject: [PATCH] lang code parsing --- lang_codes.go | 144 +++++++++++++++++++++++++++++++++++++++++++++ torrent_parsing.go | 4 +- 2 files changed, 146 insertions(+), 2 deletions(-) create mode 100644 lang_codes.go diff --git a/lang_codes.go b/lang_codes.go new file mode 100644 index 0000000..4bf8814 --- /dev/null +++ b/lang_codes.go @@ -0,0 +1,144 @@ +package parsers + +import "strings" + +// langSynonyms converts all irregular lang codes to ISO 639-1 +var langSynonyms = map[string]string{ + // english + "ENG": "en", // Erai-Raws + "US": "en", // Erai-Raws + + // portuguese + "POR-BR": "pt", // Erai-Raws + "POR": "pt", // Erai-Raws + "BR": "pt", // Erai-Raws + + // spanish + "SPA-LA": "es", // Erai-Raws + "SPA": "es", // Erai-Raws + "MX": "es", // Erai-Raws + + // arabic + "ARA": "ar", // Erai-Raws + "SA": "ar", // Erai-Raws + + // french + "FRE": "fr", // Erai-Raws + + // german + "GER": "de", // Erai-Raws + + // italian + "ITA": "it", // Erai-Raws + + // russian + "RUS": "ru", // Erai-Raws + + // japanese + "JPN": "ja", // Erai-Raws + "JP": "ja", // Erai-Raws + + // polish + "POL": "pl", // Erai-Raws + + // dutch + "DUT": "nl", // Erai-Raws + + // norwegian + "NOB": "no", // Erai-Raws + + // turkish + "TUR": "tr", // Erai-Raws + + // swedish + "SWE": "sv", // Erai-Raws + "SE": "sv", // Erai-Raws + + // greek + "GRE": "el", // Erai-Raws + "GR": "el", // Erai-Raws + + // hebrew + "HEB": "he", // Erai-Raws + "IL": "he", // Erai-Raws + + // romanian + "RUM": "ro", // Erai-Raws + + // indonesian + "IND": "id", // Erai-Raws + + // thai + "THA": "th", // Erai-Raws + + // korean + "KOR": "ko", // Erai-Raws + "KR": "ko", // Erai-Raws + + // danish + "DAN": "da", // Erai-Raws + "DK": "da", // Erai-Raws + + // chinese (simplified & traditional) + "CHI": "zh", // Erai-Raws + "CN": "zh", // Erai-Raws + + // bulgarian + "BUL": "bg", // Erai-Raws + + // vietnamese + "VIE": "vi", // Erai-Raws + "VN": "vi", // Erai-Raws + + // hindi + "HIN": "hi", // Erai-Raws + "IN": "hi", // Erai-Raws + + // tamil + "TEL": "ta", // Erai-Raws + "LK": "ta", // Erai-Raws + + // ukrainian + "UKR": "uk", // Erai-Raws + "UA": "uk", // Erai-Raws + + // hungarian + "HUN": "hu", // Erai-Raws + + // czech + "CES": "cs", // Erai-Raws + "CZ": "cs", // Erai-Raws + + // croatian + "HRV": "hr", // Erai-Raws + + // malaysian + "MAY": "ms", // Erai-Raws + "MY": "ms", // Erai-Raws + + // slovakian + "SLK": "sk", // Erai-Raws + + // filipino + "FIL": "tl", // Erai-Raws + "PH": "tl", // Erai-Raws +} + +// ParseLanguage converts irregular language codes to ISO 639-1 +func ParseLanguage(str string) string { + if code, ok := langSynonyms[str]; ok { + return code + } + + return strings.ToLower(str) +} + +// ParseLanguages converts multiple irregular language codes to ISO 639-1. +// It simply calls ParseLanguage for each language code +func ParseLanguages(langCodes []string) []string { + codes := make([]string, 0, len(langCodes)) + for _, irregularLangCode := range langCodes { + codes = append(codes, ParseLanguage(irregularLangCode)) + } + return codes +} diff --git a/torrent_parsing.go b/torrent_parsing.go index b954149..431f274 100644 --- a/torrent_parsing.go +++ b/torrent_parsing.go @@ -84,8 +84,8 @@ func regexTorrentParser(regex string, options TorrentParseOptions) model.Torrent Episode: episode, Resolution: resolution, Parser: parser, - Languages: languages, - Subtitles: subtitles, + Languages: ParseLanguages(languages), + Subtitles: ParseLanguages(subtitles), Torrent: torrent, }, true