commit f697e9268f384abf52687f6eb4d82be2ecf2ff56 Author: milarin Date: Fri Mar 3 14:37:16 2023 +0100 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a48d947 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*_test.go diff --git a/api.go b/api.go new file mode 100644 index 0000000..860c96f --- /dev/null +++ b/api.go @@ -0,0 +1,203 @@ +package nuapi + +import ( + "context" + "fmt" + "os" + "path" + "strconv" + "strings" + "time" + + "git.milar.in/milarin/slices" + "github.com/PuerkitoBio/goquery" +) + +type Api struct { + UserAgent string + Cookie string +} + +func NewApi(cookie string) *Api { + return &Api{ + Cookie: cookie, + UserAgent: "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/110.0", + } +} + +func (api *Api) GetReadingList(ctx context.Context, listIndex int) (*ReadingList, error) { + doc, err := api.getWithCookie(ctx, fmt.Sprintf("https://www.novelupdates.com/reading-list/?list=%d", listIndex)) + if err != nil { + return nil, err + } + + listID := ReadingListID(doc.Find("#cssmenu ul li.active a").Text()) + + selection := doc.Find("table tbody tr") + entries := make([]ReadingListEntry, 0, selection.Length()) + selection.Each(func(i int, s *goquery.Selection) { + link := s.Find("td:nth-child(2) a") + href, ok := link.Attr("href") + if !ok { + return + } + + novelID := NovelID(path.Base(href)) + novel := NovelEntry{ + NovelID: novelID, + Name: link.Text(), + } + + currentChapterLink := s.Find("td:nth-child(3) a") + currentChapter := ReadingListChapterEntry{ + NovelID: novelID, + ID: ChapterID(currentChapterLink.Text()), + Link: currentChapterLink.AttrOr("href", ""), + } + + latestChapterLink := s.Find("td:nth-child(3) a") + latestChapter := ReadingListChapterEntry{ + NovelID: novelID, + ID: ChapterID(latestChapterLink.Text()), + Link: latestChapterLink.AttrOr("href", ""), + } + + entries = append(entries, ReadingListEntry{ + Novel: novel, + CurrentChapter: currentChapter, + LatestChapter: latestChapter, + }) + }) + + return &ReadingList{ + ID: listID, + Entries: entries, + }, nil +} + +func (api *Api) GetNovelByID(novelID NovelID) (*Novel, error) { + doc, err := api.get(context.Background(), fmt.Sprintf("https://www.novelupdates.com/series/%s/", novelID)) + if err != nil { + return nil, err + } + + title := doc.Find(".seriestitlenu").Text() + description := doc.Find("#editdescription").Text() + cover := doc.Find(".wpb_wrapper img").AttrOr("src", "") + + associatedNamesHtml, err := doc.Find("#editassociated").Html() + if err != nil { + return nil, err + } + associatedNames := strings.Split(strings.TrimSpace(associatedNamesHtml), "
") + + novelType := NovelType(doc.Find("#showtype a.genre.type").Text()) + originalLanguage := Language(strings.ToLower(strings.Trim(doc.Find("#showtype a.genre.type + span").Text(), "()"))) + + genreElems := doc.Find("#seriesgenre a.genre") + genres := make([]GenreID, 0, genreElems.Length()) + genreElems.Each(func(i int, s *goquery.Selection) { + href, ok := s.Attr("href") + if !ok { + return + } + genres = append(genres, GenreID(path.Base(href))) + }) + + tagElems := doc.Find("#showtags a.genre") + tags := make([]TagID, 0, genreElems.Length()) + tagElems.Each(func(i int, s *goquery.Selection) { + href, ok := s.Attr("href") + if !ok { + return + } + tags = append(tags, TagID(path.Base(href))) + }) + + return &Novel{ + ID: novelID, + Name: title, + AssociatedNames: associatedNames, + Description: description, + Cover: cover, + Type: novelType, + OriginalLanguage: originalLanguage, + Genres: genres, + Tags: tags, + }, nil +} + +func (api *Api) GetChapterEntriesByNovelID(novelID NovelID) *Cursor[NovelChapterEntry] { + ctx, cancelFunc := context.WithCancel(context.Background()) + out := make(chan *NovelChapterEntry, 15) + + go func() { + defer close(out) + + doc, err := api.get(ctx, fmt.Sprintf("https://www.novelupdates.com/series/%s/?pg=%d", novelID, 1)) + if err != nil { + fmt.Fprintln(os.Stderr, err) + return + } + + pageCount, err := strconv.ParseInt(doc.Find(".digg_pagination a:nth-child(5)").Text(), 10, 64) + if err != nil { + fmt.Fprintln(os.Stderr, err) + return + } + + for pageIndex := int(pageCount); pageIndex > 0; pageIndex-- { + if ctx.Err() != nil { + break + } + + entries, err := api.getChapterEntriesByPageIndex(ctx, novelID, pageIndex) + if err != nil { + return + } + + for _, entry := range entries { + entry := entry + out <- &entry + } + } + }() + + return &Cursor[NovelChapterEntry]{ + ctx: ctx, + cancelFunc: cancelFunc, + Chan: out, + } +} + +func (api *Api) getChapterEntriesByPageIndex(ctx context.Context, novelID NovelID, pageIndex int) ([]NovelChapterEntry, error) { + doc, err := api.get(ctx, fmt.Sprintf("https://www.novelupdates.com/series/%s/?pg=%d", novelID, pageIndex)) + if err != nil { + return nil, err + } + + entryElems := doc.Find("#myTable tbody tr") + entries := make([]NovelChapterEntry, 0, entryElems.Length()) + entryElems.Each(func(i int, s *goquery.Selection) { + td3 := s.Find("td:nth-child(3) a.chp-release") + + chapterID := ChapterID(td3.Text()) + groupID := GroupID(path.Base(s.Find("td:nth-child(2) a").AttrOr("href", ""))) + link := "https:" + td3.AttrOr("href", "") + + date, err := time.Parse("01/02/06", strings.TrimSpace(s.Find("td:first-child").Text())) + if err != nil { + return + } + + entries = append(entries, NovelChapterEntry{ + NovelID: novelID, + ID: chapterID, + Link: link, + Date: date, + Group: groupID, + }) + }) + + return slices.Reverse(entries), nil +} diff --git a/cursor.go b/cursor.go new file mode 100644 index 0000000..319a328 --- /dev/null +++ b/cursor.go @@ -0,0 +1,37 @@ +package nuapi + +import ( + "context" +) + +type Cursor[T any] struct { + ctx context.Context + cancelFunc context.CancelFunc + Chan <-chan *T +} + +func (c *Cursor[T]) First() *T { + defer c.cancelFunc() + return <-c.Chan +} + +func (c *Cursor[T]) Close() { + c.cancelFunc() +} + +func (c *Cursor[T]) Next() (*T, bool) { + if c.ctx.Err() != nil { + return nil, false + } + + value, ok := <-c.Chan + return value, ok +} + +func (c *Cursor[T]) Slice() []T { + s := make([]T, 0) + for value, ok := c.Next(); ok; value, ok = c.Next() { + s = append(s, *value) + } + return s +} diff --git a/genre.go b/genre.go new file mode 100644 index 0000000..4b1eab8 --- /dev/null +++ b/genre.go @@ -0,0 +1,195 @@ +package nuapi + +type GenreID string + +const ( + GenreAction GenreID = "action" + GenreAdult GenreID = "adult" + GenreAdventure GenreID = "adventure" + GenreComedy GenreID = "comedy" + GenreDrama GenreID = "drama" + GenreEcchi GenreID = "ecchi" + GenreFantasy GenreID = "fantasy" + GenreGenderBender GenreID = "gender-bender" + GenreHarem GenreID = "harem" + GenreHistorical GenreID = "historical" + GenreHorror GenreID = "horror" + GenreJosei GenreID = "josei" + GenreMartialArts GenreID = "martial-arts" + GenreMature GenreID = "mature" + GenreMecha GenreID = "mecha" + GenreMystery GenreID = "mystery" + GenrePsychological GenreID = "psychological" + GenreRomance GenreID = "romance" + GenreSchoolLife GenreID = "school-life" + GenreSciFi GenreID = "sci-fi" + GenreSeinen GenreID = "seinen" + GenreShoujo GenreID = "shoujo" + GenreShoujoAi GenreID = "shoujo-ai" + GenreShounen GenreID = "shounen" + GenreShounenAi GenreID = "shounen-ai" + GenreSliceOfLife GenreID = "slice-of-life" + GenreSmut GenreID = "smut" + GenreSports GenreID = "sports" + GenreSupernatural GenreID = "supernatural" + GenreTragedy GenreID = "tragedy" + GenreWuxia GenreID = "wuxia" + GenreXianxia GenreID = "xianxia" + GenreXuanhuan GenreID = "xuanhuan" + GenreYaoi GenreID = "yaoi" + GenreYuri GenreID = "yuri" +) + +func (g GenreID) String() string { + switch g { + case GenreAction: + return "Action" + case GenreAdult: + return "Adult" + case GenreAdventure: + return "Adventure" + case GenreComedy: + return "Comedy" + case GenreDrama: + return "Drama" + case GenreEcchi: + return "Ecchi" + case GenreFantasy: + return "Fantasy" + case GenreGenderBender: + return "Gender Bender" + case GenreHarem: + return "Harem" + case GenreHistorical: + return "Historical" + case GenreHorror: + return "Horror" + case GenreJosei: + return "Josei" + case GenreMartialArts: + return "Martial Arts" + case GenreMature: + return "Mature" + case GenreMecha: + return "Mecha" + case GenreMystery: + return "Mystery" + case GenrePsychological: + return "Psychological" + case GenreRomance: + return "Romance" + case GenreSchoolLife: + return "School Life" + case GenreSciFi: + return "Sci-fi" + case GenreSeinen: + return "Seinen" + case GenreShoujo: + return "Shoujo" + case GenreShoujoAi: + return "Shoujo Ai" + case GenreShounen: + return "Shounen" + case GenreShounenAi: + return "Shounen Ai" + case GenreSliceOfLife: + return "Slice of Life" + case GenreSmut: + return "Smut" + case GenreSports: + return "Sports" + case GenreSupernatural: + return "Supernatural" + case GenreTragedy: + return "Tragedy" + case GenreWuxia: + return "Wuxia" + case GenreXianxia: + return "Xianxia" + case GenreXuanhuan: + return "Xuanhuan" + case GenreYaoi: + return "Yaoi" + case GenreYuri: + return "Yuri" + default: + panic("invalid genre: " + g) + } +} + +func ParseGenre(str string) GenreID { + switch str { + case "Action": + return GenreAction + case "Adult": + return GenreAdult + case "Adventure": + return GenreAdventure + case "Comedy": + return GenreComedy + case "Drama": + return GenreDrama + case "Ecchi": + return GenreEcchi + case "Fantasy": + return GenreFantasy + case "Gender Bender": + return GenreGenderBender + case "Harem": + return GenreHarem + case "Historical": + return GenreHistorical + case "Horror": + return GenreHorror + case "Josei": + return GenreJosei + case "Martial Arts": + return GenreMartialArts + case "Mature": + return GenreMature + case "Mecha": + return GenreMecha + case "Mystery": + return GenreMystery + case "Psychological": + return GenrePsychological + case "Romance": + return GenreRomance + case "School Life": + return GenreSchoolLife + case "Sci-fi": + return GenreSciFi + case "Seinen": + return GenreSeinen + case "Shoujo": + return GenreShoujo + case "Shoujo Ai": + return GenreShoujoAi + case "Shounen": + return GenreShounen + case "Shounen Ai": + return GenreShounenAi + case "Slice of Life": + return GenreSliceOfLife + case "Smut": + return GenreSmut + case "Sports": + return GenreSports + case "Supernatural": + return GenreSupernatural + case "Tragedy": + return GenreTragedy + case "Wuxia": + return GenreWuxia + case "Xianxia": + return GenreXianxia + case "Xuanhuan": + return GenreXuanhuan + case "Yaoi": + return GenreYaoi + case "Yuri": + return GenreYuri + default: + panic("invalid genre: " + str) + } +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..26d5142 --- /dev/null +++ b/go.mod @@ -0,0 +1,13 @@ +module git.milar.in/milarin/nu-api + +go 1.20 + +require ( + git.milar.in/milarin/slices v0.0.6 + github.com/PuerkitoBio/goquery v1.8.1 +) + +require ( + github.com/andybalholm/cascadia v1.3.1 // indirect + golang.org/x/net v0.7.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..6fe43c5 --- /dev/null +++ b/go.sum @@ -0,0 +1,37 @@ +git.milar.in/milarin/slices v0.0.6 h1:AQoSarZ58WHYol9c6woWJSe8wFpPC2RC4cvIlZpfg9s= +git.milar.in/milarin/slices v0.0.6/go.mod h1:NOr53AOeur/qscu/FBj3lsFR262PNYBccLYSTCAXRk4= +github.com/PuerkitoBio/goquery v1.8.1 h1:uQxhNlArOIdbrH1tr0UXwdVFgDcZDrZVdcpygAcwmWM= +github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJsnnd3H7Ho5jQ= +github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c= +github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g= +golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/novelupdates.go b/novelupdates.go new file mode 100644 index 0000000..cc442eb --- /dev/null +++ b/novelupdates.go @@ -0,0 +1,57 @@ +package nuapi + +import ( + "bytes" + "compress/gzip" + "context" + "errors" + "fmt" + "os/exec" + + "github.com/PuerkitoBio/goquery" +) + +func (api *Api) getWithCookie(ctx context.Context, url string) (*goquery.Document, error) { + if api.Cookie == "" { + return nil, errors.New("no API cookie set") + } + + curl := exec.CommandContext(ctx, "curl", + "-s", url, + "-H", fmt.Sprintf("User-Agent: %s", api.UserAgent), + "-H", fmt.Sprintf("Cookie: %s", api.Cookie), + "-H", fmt.Sprintf("Accept-Encoding: %s", "gzip"), + ) + + data, err := curl.Output() + if err != nil { + return nil, err + } + + r, err := gzip.NewReader(bytes.NewReader(data)) + if err != nil { + return nil, err + } + + return goquery.NewDocumentFromReader(r) +} + +func (api *Api) get(ctx context.Context, url string) (*goquery.Document, error) { + curl := exec.CommandContext(ctx, "curl", + "-s", url, + "-H", fmt.Sprintf("User-Agent: %s", api.UserAgent), + "-H", fmt.Sprintf("Accept-Encoding: %s", "gzip"), + ) + + data, err := curl.Output() + if err != nil { + return nil, err + } + + r, err := gzip.NewReader(bytes.NewReader(data)) + if err != nil { + return nil, err + } + + return goquery.NewDocumentFromReader(r) +} diff --git a/tag.go b/tag.go new file mode 100644 index 0000000..b7df9b7 --- /dev/null +++ b/tag.go @@ -0,0 +1,3 @@ +package nuapi + +type TagID string diff --git a/types.go b/types.go new file mode 100644 index 0000000..be2e02c --- /dev/null +++ b/types.go @@ -0,0 +1,84 @@ +package nuapi + +import ( + "encoding/json" + "time" +) + +type NovelID string +type ChapterID string +type ReadingListID string +type GroupID string + +type ReadingList struct { + ID ReadingListID `json:"id"` + Entries []ReadingListEntry `json:"entries"` +} + +type ReadingListEntry struct { + Novel NovelEntry `json:"novel"` + CurrentChapter ReadingListChapterEntry `json:"current_chapter"` + LatestChapter ReadingListChapterEntry `json:"latest_chapter"` +} + +func (e ReadingListEntry) NewChapterAvailable() bool { + return e.CurrentChapter.ID != e.LatestChapter.ID +} + +type ReadingListChapterEntry struct { + NovelID NovelID `json:"novel_id"` + ID ChapterID `json:"id"` + Link string `json:"link"` +} + +type NovelEntry struct { + NovelID NovelID `json:"id"` + Name string `json:"name"` +} + +type Novel struct { + ID NovelID `json:"id"` + Name string `json:"name"` + AssociatedNames []string `json:"associated_names"` + Description string `json:"description"` + Cover string `json:"cover"` + Type NovelType `json:"type"` + OriginalLanguage Language `json:"original_language"` + Genres []GenreID `json:"genres"` + Tags []TagID `json:"tags"` +} + +type NovelType string + +const ( + TypeLightNovel NovelType = "Light Novel" + TypePublishedNovel NovelType = "Published Novel" + TypeWebNovel NovelType = "Web Novel" +) + +type Language string + +const ( + LanguageJapanese Language = "jp" + LanguageChinese Language = "cn" + LanguageMalaysian Language = "my" + LanguageFilipino Language = "fil" + LanguageKhmer Language = "khm" + LanguageThai Language = "th" + LanguageIndonesian Language = "id" + LanguageKorean Language = "kr" + LanguageVietnamese Language = "vn" +) + +type NovelChapterEntry struct { + ID ChapterID `json:"id"` + Link string `json:"link"` + NovelID NovelID `json:"novel_id"` + Date time.Time `json:"date"` + Group GroupID `json:"group"` +} + +func (nce NovelChapterEntry) String() string { + data, _ := json.MarshalIndent(nce, "", "\t") + return string(data) +}