parsers/lang_codes.go

149 lines
2.8 KiB
Go
Raw Normal View History

2022-08-15 13:40:13 +02:00
package parsers
import "strings"
// langSynonyms converts all irregular lang codes to ISO 639-1
var langSynonyms = map[string]string{
// english
2022-08-22 12:01:25 +02:00
"eng": "en", // Erai-Raws | ffprobe
"us": "en", // Erai-Raws
2022-08-15 13:40:13 +02:00
// portuguese
2022-08-22 12:01:25 +02:00
"por-br": "pt", // Erai-Raws
"por": "pt", // Erai-Raws | ffprobe
"br": "pt", // Erai-Raws
2022-08-15 13:40:13 +02:00
// spanish
2022-08-22 12:01:25 +02:00
"spa-la": "es", // Erai-Raws
"spa": "es", // Erai-Raws | ffprobe
"mx": "es", // Erai-Raws
2022-08-15 13:40:13 +02:00
// arabic
2022-08-22 12:01:25 +02:00
"ara": "ar", // Erai-Raws | ffprobe
"sa": "ar", // Erai-Raws
2022-08-15 13:40:13 +02:00
// french
2022-08-22 12:01:25 +02:00
"fre": "fr", // Erai-Raws | ffprobe
2022-08-15 13:40:13 +02:00
// german
2022-08-22 12:01:25 +02:00
"ger": "de", // Erai-Raws | ffprobe
2022-08-15 13:40:13 +02:00
// italian
2022-08-22 12:01:25 +02:00
"ita": "it", // Erai-Raws | ffprobe
// finnish
"fin": "fi", // ffprobe | ffprobe
2022-08-15 13:40:13 +02:00
// russian
2022-08-22 12:01:25 +02:00
"rus": "ru", // Erai-Raws
2022-08-15 13:40:13 +02:00
// japanese
2022-08-22 12:01:25 +02:00
"jpn": "ja", // Erai-Raws | ffprobe
"jp": "ja", // Erai-Raws
2022-08-15 13:40:13 +02:00
// polish
2022-08-22 12:01:25 +02:00
"pol": "pl", // Erai-Raws | ffprobe
2022-08-15 13:40:13 +02:00
// dutch
2022-08-22 12:01:25 +02:00
"dut": "nl", // Erai-Raws | ffprobe
2022-08-15 13:40:13 +02:00
// norwegian
2022-08-22 12:01:25 +02:00
"nob": "no", // Erai-Raws | ffprobe
2022-08-15 13:40:13 +02:00
// turkish
2022-08-22 12:01:25 +02:00
"tur": "tr", // Erai-Raws | ffprobe
2022-08-15 13:40:13 +02:00
// swedish
2022-08-22 12:01:25 +02:00
"swe": "sv", // Erai-Raws | ffprobe
"se": "sv", // Erai-Raws
2022-08-15 13:40:13 +02:00
// greek
2022-08-22 12:01:25 +02:00
"gre": "el", // Erai-Raws | ffprobe
"gr": "el", // Erai-Raws
2022-08-15 13:40:13 +02:00
// hebrew
2022-08-22 12:01:25 +02:00
"heb": "he", // Erai-Raws | ffprobe
"il": "he", // Erai-Raws
2022-08-15 13:40:13 +02:00
// romanian
2022-08-22 12:01:25 +02:00
"rum": "ro", // Erai-Raws
"rom": "ro", // ffprobe
2022-08-15 13:40:13 +02:00
// indonesian
2022-08-22 12:01:25 +02:00
"ind": "id", // Erai-Raws
2022-08-15 13:40:13 +02:00
// thai
2022-08-22 12:01:25 +02:00
"tha": "th", // Erai-Raws | ffprobe
2022-08-15 13:40:13 +02:00
// korean
2022-08-22 12:01:25 +02:00
"kor": "ko", // Erai-Raws | ffprobe
"kr": "ko", // Erai-Raws
2022-08-15 13:40:13 +02:00
// danish
2022-08-22 12:01:25 +02:00
"dan": "da", // Erai-Raws | ffprobe
"dk": "da", // Erai-Raws
2022-08-15 13:40:13 +02:00
// chinese (simplified & traditional)
2022-08-22 12:01:25 +02:00
"chi": "zh", // Erai-Raws | ffprobe
"cn": "zh", // Erai-Raws
2022-08-15 13:40:13 +02:00
// bulgarian
2022-08-22 12:01:25 +02:00
"bul": "bg", // Erai-Raws | ffprobe
2022-08-15 13:40:13 +02:00
// vietnamese
2022-08-22 12:01:25 +02:00
"vie": "vi", // Erai-Raws
"vn": "vi", // Erai-Raws
2022-08-15 13:40:13 +02:00
// hindi
2022-08-22 12:01:25 +02:00
"hin": "hi", // Erai-Raws
"in": "hi", // Erai-Raws
2022-08-15 13:40:13 +02:00
// tamil
2022-08-22 12:01:25 +02:00
"tel": "ta", // Erai-Raws
"lk": "ta", // Erai-Raws
2022-08-15 13:40:13 +02:00
// ukrainian
2022-08-22 12:01:25 +02:00
"ukr": "uk", // Erai-Raws
"ua": "uk", // Erai-Raws
2022-08-15 13:40:13 +02:00
// hungarian
2022-08-22 12:01:25 +02:00
"hun": "hu", // Erai-Raws
2022-08-15 13:40:13 +02:00
// czech
2022-08-22 12:01:25 +02:00
"ces": "cs", // Erai-Raws
"cz": "cs", // Erai-Raws
2022-08-15 13:40:13 +02:00
// croatian
2022-08-22 12:01:25 +02:00
"hrv": "hr", // Erai-Raws
2022-08-15 13:40:13 +02:00
// malaysian
2022-08-22 12:01:25 +02:00
"may": "ms", // Erai-Raws
"my": "ms", // Erai-Raws
2022-08-15 13:40:13 +02:00
// slovakian
2022-08-22 12:01:25 +02:00
"slk": "sk", // Erai-Raws
2022-08-15 13:40:13 +02:00
// filipino
2022-08-22 12:01:25 +02:00
"fil": "tl", // Erai-Raws
"ph": "tl", // Erai-Raws
2022-08-15 13:40:13 +02:00
}
// ParseLanguage converts irregular language codes to ISO 639-1
func ParseLanguage(str string) string {
2022-08-22 12:01:25 +02:00
if code, ok := langSynonyms[strings.ToLower(str)]; ok {
2022-08-15 13:40:13 +02:00
return code
}
return strings.ToLower(str)
}
// ParseLanguages converts multiple irregular language codes to ISO 639-1.
// It simply calls ParseLanguage for each language code
func ParseLanguages(langCodes []string) []string {
codes := make([]string, 0, len(langCodes))
for _, irregularLangCode := range langCodes {
codes = append(codes, ParseLanguage(irregularLangCode))
}
return codes
}