Compare commits

...

2 Commits

Author SHA1 Message Date
Timon Ringwald
d4e5e65935 lang code parsing 2022-08-15 13:40:13 +02:00
Timon Ringwald
77db2c6683 default torrent values introduced 2022-08-15 13:06:19 +02:00
7 changed files with 198 additions and 16 deletions

View File

@ -4,5 +4,8 @@ import "git.milar.in/milarin/adverr"
var (
ErrTorrentParserInsufficientData = adverr.NewErrTmpl("ErrTorrentParserInsufficientData", "regex '%s' must at least provide title and episode")
ErrTorrentParserInsufficientLanguageData = adverr.NewErrTmpl("ErrTorrentParserInsufficientLanguageData", "no language reference in regex and no default language set")
ErrTorrentParserInsufficientSubtitleData = adverr.NewErrTmpl("ErrTorrentParserInsufficientSubtitleData", "no subtitle reference in regex and no default subtitle set")
ErrTorrentParserInsufficientResolutionData = adverr.NewErrTmpl("ErrTorrentParserInsufficientResolutionData", "no resolution reference in regex and no default resolution set")
ErrTorrentParserInvalidGroupReference = adverr.NewErrTmpl("ErrTorrentParserInvalidGroupReference", "options references group %d but regex only has %d groups")
)

2
go.mod
View File

@ -3,7 +3,7 @@ module git.milar.in/animan/parsers
go 1.18
require (
git.milar.in/animan/model v0.0.0-20220804154959-f8b66e8c9eda
git.milar.in/animan/model v0.0.0-20220815093549-fe9b565a9dd2
git.milar.in/milarin/adverr v0.2.1
)

2
go.sum
View File

@ -1,5 +1,7 @@
git.milar.in/animan/model v0.0.0-20220804154959-f8b66e8c9eda h1:9XG7Dn+UtyLwwNcuol10EdHcBQjYD+1WzmnJbRUJQfs=
git.milar.in/animan/model v0.0.0-20220804154959-f8b66e8c9eda/go.mod h1:oXDr3slnzXoccIrci2wotX0cWwMVuAQ9dSvmh1buE4c=
git.milar.in/animan/model v0.0.0-20220815093549-fe9b565a9dd2 h1:vNbYNp8VJQ6FBENgZMHA1IFOfPkQEA+TIaN8PFF8m48=
git.milar.in/animan/model v0.0.0-20220815093549-fe9b565a9dd2/go.mod h1:oXDr3slnzXoccIrci2wotX0cWwMVuAQ9dSvmh1buE4c=
git.milar.in/milarin/adverr v0.2.1 h1:eyXFGC+Ui/kcNt2+NqP3HiAplwxzqeNr9DfitsUb3c4=
git.milar.in/milarin/adverr v0.2.1/go.mod h1:wwfglcey4R3vqjNL/d8mbnvFJGzETRXzAEolIHZY32w=
git.milar.in/milarin/anilist v1.5.0 h1:fSiAXY/topNk4ISEp2QtcG9HHKLJfMc8w05iqc+Paf0=

144
lang_codes.go Normal file
View File

@ -0,0 +1,144 @@
package parsers
import "strings"
// langSynonyms converts all irregular lang codes to ISO 639-1
var langSynonyms = map[string]string{
// english
"ENG": "en", // Erai-Raws
"US": "en", // Erai-Raws
// portuguese
"POR-BR": "pt", // Erai-Raws
"POR": "pt", // Erai-Raws
"BR": "pt", // Erai-Raws
// spanish
"SPA-LA": "es", // Erai-Raws
"SPA": "es", // Erai-Raws
"MX": "es", // Erai-Raws
// arabic
"ARA": "ar", // Erai-Raws
"SA": "ar", // Erai-Raws
// french
"FRE": "fr", // Erai-Raws
// german
"GER": "de", // Erai-Raws
// italian
"ITA": "it", // Erai-Raws
// russian
"RUS": "ru", // Erai-Raws
// japanese
"JPN": "ja", // Erai-Raws
"JP": "ja", // Erai-Raws
// polish
"POL": "pl", // Erai-Raws
// dutch
"DUT": "nl", // Erai-Raws
// norwegian
"NOB": "no", // Erai-Raws
// turkish
"TUR": "tr", // Erai-Raws
// swedish
"SWE": "sv", // Erai-Raws
"SE": "sv", // Erai-Raws
// greek
"GRE": "el", // Erai-Raws
"GR": "el", // Erai-Raws
// hebrew
"HEB": "he", // Erai-Raws
"IL": "he", // Erai-Raws
// romanian
"RUM": "ro", // Erai-Raws
// indonesian
"IND": "id", // Erai-Raws
// thai
"THA": "th", // Erai-Raws
// korean
"KOR": "ko", // Erai-Raws
"KR": "ko", // Erai-Raws
// danish
"DAN": "da", // Erai-Raws
"DK": "da", // Erai-Raws
// chinese (simplified & traditional)
"CHI": "zh", // Erai-Raws
"CN": "zh", // Erai-Raws
// bulgarian
"BUL": "bg", // Erai-Raws
// vietnamese
"VIE": "vi", // Erai-Raws
"VN": "vi", // Erai-Raws
// hindi
"HIN": "hi", // Erai-Raws
"IN": "hi", // Erai-Raws
// tamil
"TEL": "ta", // Erai-Raws
"LK": "ta", // Erai-Raws
// ukrainian
"UKR": "uk", // Erai-Raws
"UA": "uk", // Erai-Raws
// hungarian
"HUN": "hu", // Erai-Raws
// czech
"CES": "cs", // Erai-Raws
"CZ": "cs", // Erai-Raws
// croatian
"HRV": "hr", // Erai-Raws
// malaysian
"MAY": "ms", // Erai-Raws
"MY": "ms", // Erai-Raws
// slovakian
"SLK": "sk", // Erai-Raws
// filipino
"FIL": "tl", // Erai-Raws
"PH": "tl", // Erai-Raws
}
// ParseLanguage converts irregular language codes to ISO 639-1
func ParseLanguage(str string) string {
if code, ok := langSynonyms[str]; ok {
return code
}
return strings.ToLower(str)
}
// ParseLanguages converts multiple irregular language codes to ISO 639-1.
// It simply calls ParseLanguage for each language code
func ParseLanguages(langCodes []string) []string {
codes := make([]string, 0, len(langCodes))
for _, irregularLangCode := range langCodes {
codes = append(codes, ParseLanguage(irregularLangCode))
}
return codes
}

View File

@ -22,8 +22,9 @@ var Parsers = []model.Parser{
Name: 1,
Episode: 2,
Resolution: 3,
Languages: 4,
LanguageParser: SquareBracketsLanguageParser,
Subtitles: 4,
SubtitleParser: SquareBracketsLanguageParser,
DefaultLanguages: []string{"ja"},
},
),
FileParser: nil,

View File

@ -9,22 +9,38 @@ import (
// TorrentParseOptions holds the subgroup index in which information can be found in a given regex
// as well as some parser specific functions
type TorrentParseOptions struct {
// regex group references
Name int
Episode int
Languages int
Subtitles int
Resolution int
// language parsers
LanguageParser LanguageParserFunc
SubtitleParser LanguageParserFunc
// default values used when group reference is 0
DefaultLanguages []string
DefaultSubtitles []string
DefaultResolution model.Resolution
}
func regexTorrentParser(regex string, options TorrentParseOptions) model.TorrentParserFunc {
pattern := regexp.MustCompile(regex)
// handle faulty regexes
if options.Name == 0 || options.Episode == 0 {
panic(ErrTorrentParserInsufficientData.New(regex))
} else if options.Languages == 0 && options.DefaultLanguages == nil {
panic(ErrTorrentParserInsufficientLanguageData.New(regex))
} else if options.Subtitles == 0 && options.DefaultSubtitles == nil {
panic(ErrTorrentParserInsufficientSubtitleData.New(regex))
} else if options.Resolution == 0 && options.DefaultResolution == 0 {
panic(ErrTorrentParserInsufficientResolutionData.New(regex))
}
// handle faulty group references
for _, g := range []int{options.Name, options.Episode, options.Languages, options.Subtitles, options.Resolution} {
if g > pattern.NumSubexp() {
panic(ErrTorrentParserInvalidGroupReference.New(g, pattern.NumSubexp()))
@ -32,6 +48,8 @@ func regexTorrentParser(regex string, options TorrentParseOptions) model.Torrent
}
return func(parser *model.Parser, torrent *model.Torrent) (ParsedTorrent *model.ParsedTorrent, ok bool) {
var err error
matches := pattern.FindStringSubmatch(torrent.Title)
if matches == nil {
@ -43,17 +61,31 @@ func regexTorrentParser(regex string, options TorrentParseOptions) model.Torrent
return nil, false
}
resolution, err := model.ParseResolution(matches[options.Resolution])
resolution := options.DefaultResolution
if options.Resolution != 0 {
resolution, err = model.ParseResolution(matches[options.Resolution])
if err != nil {
return nil, false
}
}
languages := options.DefaultLanguages
if options.Languages != 0 {
languages = options.LanguageParser(matches[options.Languages])
}
subtitles := options.DefaultSubtitles
if options.Subtitles != 0 {
subtitles = options.SubtitleParser(matches[options.Subtitles])
}
return &model.ParsedTorrent{
Title: matches[options.Name],
OriginalAnimeTitle: matches[options.Name],
Episode: episode,
Resolution: resolution,
Parser: parser,
Languages: options.LanguageParser(matches[options.Languages]),
Languages: ParseLanguages(languages),
Subtitles: ParseLanguages(subtitles),
Torrent: torrent,
}, true