From f697e9268f384abf52687f6eb4d82be2ecf2ff56 Mon Sep 17 00:00:00 2001 From: milarin Date: Fri, 3 Mar 2023 14:37:16 +0100 Subject: [PATCH] initial commit --- .gitignore | 1 + api.go | 203 ++++++++++++++++++++++++++++++++++++++++++++++++ cursor.go | 37 +++++++++ genre.go | 195 ++++++++++++++++++++++++++++++++++++++++++++++ go.mod | 13 ++++ go.sum | 37 +++++++++ novelupdates.go | 57 ++++++++++++++ tag.go | 3 + types.go | 84 ++++++++++++++++++++ 9 files changed, 630 insertions(+) create mode 100644 .gitignore create mode 100644 api.go create mode 100644 cursor.go create mode 100644 genre.go create mode 100644 go.mod create mode 100644 go.sum create mode 100644 novelupdates.go create mode 100644 tag.go create mode 100644 types.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a48d947 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*_test.go diff --git a/api.go b/api.go new file mode 100644 index 0000000..860c96f --- /dev/null +++ b/api.go @@ -0,0 +1,203 @@ +package nuapi + +import ( + "context" + "fmt" + "os" + "path" + "strconv" + "strings" + "time" + + "git.milar.in/milarin/slices" + "github.com/PuerkitoBio/goquery" +) + +type Api struct { + UserAgent string + Cookie string +} + +func NewApi(cookie string) *Api { + return &Api{ + Cookie: cookie, + UserAgent: "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/110.0", + } +} + +func (api *Api) GetReadingList(ctx context.Context, listIndex int) (*ReadingList, error) { + doc, err := api.getWithCookie(ctx, fmt.Sprintf("https://www.novelupdates.com/reading-list/?list=%d", listIndex)) + if err != nil { + return nil, err + } + + listID := ReadingListID(doc.Find("#cssmenu ul li.active a").Text()) + + selection := doc.Find("table tbody tr") + entries := make([]ReadingListEntry, 0, selection.Length()) + selection.Each(func(i int, s *goquery.Selection) { + link := s.Find("td:nth-child(2) a") + href, ok := link.Attr("href") + if !ok { + return + } + + novelID := NovelID(path.Base(href)) + novel := NovelEntry{ + NovelID: novelID, + Name: link.Text(), + } + + currentChapterLink := s.Find("td:nth-child(3) a") + currentChapter := ReadingListChapterEntry{ + NovelID: novelID, + ID: ChapterID(currentChapterLink.Text()), + Link: currentChapterLink.AttrOr("href", ""), + } + + latestChapterLink := s.Find("td:nth-child(3) a") + latestChapter := ReadingListChapterEntry{ + NovelID: novelID, + ID: ChapterID(latestChapterLink.Text()), + Link: latestChapterLink.AttrOr("href", ""), + } + + entries = append(entries, ReadingListEntry{ + Novel: novel, + CurrentChapter: currentChapter, + LatestChapter: latestChapter, + }) + }) + + return &ReadingList{ + ID: listID, + Entries: entries, + }, nil +} + +func (api *Api) GetNovelByID(novelID NovelID) (*Novel, error) { + doc, err := api.get(context.Background(), fmt.Sprintf("https://www.novelupdates.com/series/%s/", novelID)) + if err != nil { + return nil, err + } + + title := doc.Find(".seriestitlenu").Text() + description := doc.Find("#editdescription").Text() + cover := doc.Find(".wpb_wrapper img").AttrOr("src", "") + + associatedNamesHtml, err := doc.Find("#editassociated").Html() + if err != nil { + return nil, err + } + associatedNames := strings.Split(strings.TrimSpace(associatedNamesHtml), "
") + + novelType := NovelType(doc.Find("#showtype a.genre.type").Text()) + originalLanguage := Language(strings.ToLower(strings.Trim(doc.Find("#showtype a.genre.type + span").Text(), "()"))) + + genreElems := doc.Find("#seriesgenre a.genre") + genres := make([]GenreID, 0, genreElems.Length()) + genreElems.Each(func(i int, s *goquery.Selection) { + href, ok := s.Attr("href") + if !ok { + return + } + genres = append(genres, GenreID(path.Base(href))) + }) + + tagElems := doc.Find("#showtags a.genre") + tags := make([]TagID, 0, genreElems.Length()) + tagElems.Each(func(i int, s *goquery.Selection) { + href, ok := s.Attr("href") + if !ok { + return + } + tags = append(tags, TagID(path.Base(href))) + }) + + return &Novel{ + ID: novelID, + Name: title, + AssociatedNames: associatedNames, + Description: description, + Cover: cover, + Type: novelType, + OriginalLanguage: originalLanguage, + Genres: genres, + Tags: tags, + }, nil +} + +func (api *Api) GetChapterEntriesByNovelID(novelID NovelID) *Cursor[NovelChapterEntry] { + ctx, cancelFunc := context.WithCancel(context.Background()) + out := make(chan *NovelChapterEntry, 15) + + go func() { + defer close(out) + + doc, err := api.get(ctx, fmt.Sprintf("https://www.novelupdates.com/series/%s/?pg=%d", novelID, 1)) + if err != nil { + fmt.Fprintln(os.Stderr, err) + return + } + + pageCount, err := strconv.ParseInt(doc.Find(".digg_pagination a:nth-child(5)").Text(), 10, 64) + if err != nil { + fmt.Fprintln(os.Stderr, err) + return + } + + for pageIndex := int(pageCount); pageIndex > 0; pageIndex-- { + if ctx.Err() != nil { + break + } + + entries, err := api.getChapterEntriesByPageIndex(ctx, novelID, pageIndex) + if err != nil { + return + } + + for _, entry := range entries { + entry := entry + out <- &entry + } + } + }() + + return &Cursor[NovelChapterEntry]{ + ctx: ctx, + cancelFunc: cancelFunc, + Chan: out, + } +} + +func (api *Api) getChapterEntriesByPageIndex(ctx context.Context, novelID NovelID, pageIndex int) ([]NovelChapterEntry, error) { + doc, err := api.get(ctx, fmt.Sprintf("https://www.novelupdates.com/series/%s/?pg=%d", novelID, pageIndex)) + if err != nil { + return nil, err + } + + entryElems := doc.Find("#myTable tbody tr") + entries := make([]NovelChapterEntry, 0, entryElems.Length()) + entryElems.Each(func(i int, s *goquery.Selection) { + td3 := s.Find("td:nth-child(3) a.chp-release") + + chapterID := ChapterID(td3.Text()) + groupID := GroupID(path.Base(s.Find("td:nth-child(2) a").AttrOr("href", ""))) + link := "https:" + td3.AttrOr("href", "") + + date, err := time.Parse("01/02/06", strings.TrimSpace(s.Find("td:first-child").Text())) + if err != nil { + return + } + + entries = append(entries, NovelChapterEntry{ + NovelID: novelID, + ID: chapterID, + Link: link, + Date: date, + Group: groupID, + }) + }) + + return slices.Reverse(entries), nil +} diff --git a/cursor.go b/cursor.go new file mode 100644 index 0000000..319a328 --- /dev/null +++ b/cursor.go @@ -0,0 +1,37 @@ +package nuapi + +import ( + "context" +) + +type Cursor[T any] struct { + ctx context.Context + cancelFunc context.CancelFunc + Chan <-chan *T +} + +func (c *Cursor[T]) First() *T { + defer c.cancelFunc() + return <-c.Chan +} + +func (c *Cursor[T]) Close() { + c.cancelFunc() +} + +func (c *Cursor[T]) Next() (*T, bool) { + if c.ctx.Err() != nil { + return nil, false + } + + value, ok := <-c.Chan + return value, ok +} + +func (c *Cursor[T]) Slice() []T { + s := make([]T, 0) + for value, ok := c.Next(); ok; value, ok = c.Next() { + s = append(s, *value) + } + return s +} diff --git a/genre.go b/genre.go new file mode 100644 index 0000000..4b1eab8 --- /dev/null +++ b/genre.go @@ -0,0 +1,195 @@ +package nuapi + +type GenreID string + +const ( + GenreAction GenreID = "action" + GenreAdult GenreID = "adult" + GenreAdventure GenreID = "adventure" + GenreComedy GenreID = "comedy" + GenreDrama GenreID = "drama" + GenreEcchi GenreID = "ecchi" + GenreFantasy GenreID = "fantasy" + GenreGenderBender GenreID = "gender-bender" + GenreHarem GenreID = "harem" + GenreHistorical GenreID = "historical" + GenreHorror GenreID = "horror" + GenreJosei GenreID = "josei" + GenreMartialArts GenreID = "martial-arts" + GenreMature GenreID = "mature" + GenreMecha GenreID = "mecha" + GenreMystery GenreID = "mystery" + GenrePsychological GenreID = "psychological" + GenreRomance GenreID = "romance" + GenreSchoolLife GenreID = "school-life" + GenreSciFi GenreID = "sci-fi" + GenreSeinen GenreID = "seinen" + GenreShoujo GenreID = "shoujo" + GenreShoujoAi GenreID = "shoujo-ai" + GenreShounen GenreID = "shounen" + GenreShounenAi GenreID = "shounen-ai" + GenreSliceOfLife GenreID = "slice-of-life" + GenreSmut GenreID = "smut" + GenreSports GenreID = "sports" + GenreSupernatural GenreID = "supernatural" + GenreTragedy GenreID = "tragedy" + GenreWuxia GenreID = "wuxia" + GenreXianxia GenreID = "xianxia" + GenreXuanhuan GenreID = "xuanhuan" + GenreYaoi GenreID = "yaoi" + GenreYuri GenreID = "yuri" +) + +func (g GenreID) String() string { + switch g { + case GenreAction: + return "Action" + case GenreAdult: + return "Adult" + case GenreAdventure: + return "Adventure" + case GenreComedy: + return "Comedy" + case GenreDrama: + return "Drama" + case GenreEcchi: + return "Ecchi" + case GenreFantasy: + return "Fantasy" + case GenreGenderBender: + return "Gender Bender" + case GenreHarem: + return "Harem" + case GenreHistorical: + return "Historical" + case GenreHorror: + return "Horror" + case GenreJosei: + return "Josei" + case GenreMartialArts: + return "Martial Arts" + case GenreMature: + return "Mature" + case GenreMecha: + return "Mecha" + case GenreMystery: + return "Mystery" + case GenrePsychological: + return "Psychological" + case GenreRomance: + return "Romance" + case GenreSchoolLife: + return "School Life" + case GenreSciFi: + return "Sci-fi" + case GenreSeinen: + return "Seinen" + case GenreShoujo: + return "Shoujo" + case GenreShoujoAi: + return "Shoujo Ai" + case GenreShounen: + return "Shounen" + case GenreShounenAi: + return "Shounen Ai" + case GenreSliceOfLife: + return "Slice of Life" + case GenreSmut: + return "Smut" + case GenreSports: + return "Sports" + case GenreSupernatural: + return "Supernatural" + case GenreTragedy: + return "Tragedy" + case GenreWuxia: + return "Wuxia" + case GenreXianxia: + return "Xianxia" + case GenreXuanhuan: + return "Xuanhuan" + case GenreYaoi: + return "Yaoi" + case GenreYuri: + return "Yuri" + default: + panic("invalid genre: " + g) + } +} + +func ParseGenre(str string) GenreID { + switch str { + case "Action": + return GenreAction + case "Adult": + return GenreAdult + case "Adventure": + return GenreAdventure + case "Comedy": + return GenreComedy + case "Drama": + return GenreDrama + case "Ecchi": + return GenreEcchi + case "Fantasy": + return GenreFantasy + case "Gender Bender": + return GenreGenderBender + case "Harem": + return GenreHarem + case "Historical": + return GenreHistorical + case "Horror": + return GenreHorror + case "Josei": + return GenreJosei + case "Martial Arts": + return GenreMartialArts + case "Mature": + return GenreMature + case "Mecha": + return GenreMecha + case "Mystery": + return GenreMystery + case "Psychological": + return GenrePsychological + case "Romance": + return GenreRomance + case "School Life": + return GenreSchoolLife + case "Sci-fi": + return GenreSciFi + case "Seinen": + return GenreSeinen + case "Shoujo": + return GenreShoujo + case "Shoujo Ai": + return GenreShoujoAi + case "Shounen": + return GenreShounen + case "Shounen Ai": + return GenreShounenAi + case "Slice of Life": + return GenreSliceOfLife + case "Smut": + return GenreSmut + case "Sports": + return GenreSports + case "Supernatural": + return GenreSupernatural + case "Tragedy": + return GenreTragedy + case "Wuxia": + return GenreWuxia + case "Xianxia": + return GenreXianxia + case "Xuanhuan": + return GenreXuanhuan + case "Yaoi": + return GenreYaoi + case "Yuri": + return GenreYuri + default: + panic("invalid genre: " + str) + } +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..26d5142 --- /dev/null +++ b/go.mod @@ -0,0 +1,13 @@ +module git.milar.in/milarin/nu-api + +go 1.20 + +require ( + git.milar.in/milarin/slices v0.0.6 + github.com/PuerkitoBio/goquery v1.8.1 +) + +require ( + github.com/andybalholm/cascadia v1.3.1 // indirect + golang.org/x/net v0.7.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..6fe43c5 --- /dev/null +++ b/go.sum @@ -0,0 +1,37 @@ +git.milar.in/milarin/slices v0.0.6 h1:AQoSarZ58WHYol9c6woWJSe8wFpPC2RC4cvIlZpfg9s= +git.milar.in/milarin/slices v0.0.6/go.mod h1:NOr53AOeur/qscu/FBj3lsFR262PNYBccLYSTCAXRk4= +github.com/PuerkitoBio/goquery v1.8.1 h1:uQxhNlArOIdbrH1tr0UXwdVFgDcZDrZVdcpygAcwmWM= +github.com/PuerkitoBio/goquery v1.8.1/go.mod h1:Q8ICL1kNUJ2sXGoAhPGUdYDJvgQgHzJsnnd3H7Ho5jQ= +github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c= +github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g= +golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/novelupdates.go b/novelupdates.go new file mode 100644 index 0000000..cc442eb --- /dev/null +++ b/novelupdates.go @@ -0,0 +1,57 @@ +package nuapi + +import ( + "bytes" + "compress/gzip" + "context" + "errors" + "fmt" + "os/exec" + + "github.com/PuerkitoBio/goquery" +) + +func (api *Api) getWithCookie(ctx context.Context, url string) (*goquery.Document, error) { + if api.Cookie == "" { + return nil, errors.New("no API cookie set") + } + + curl := exec.CommandContext(ctx, "curl", + "-s", url, + "-H", fmt.Sprintf("User-Agent: %s", api.UserAgent), + "-H", fmt.Sprintf("Cookie: %s", api.Cookie), + "-H", fmt.Sprintf("Accept-Encoding: %s", "gzip"), + ) + + data, err := curl.Output() + if err != nil { + return nil, err + } + + r, err := gzip.NewReader(bytes.NewReader(data)) + if err != nil { + return nil, err + } + + return goquery.NewDocumentFromReader(r) +} + +func (api *Api) get(ctx context.Context, url string) (*goquery.Document, error) { + curl := exec.CommandContext(ctx, "curl", + "-s", url, + "-H", fmt.Sprintf("User-Agent: %s", api.UserAgent), + "-H", fmt.Sprintf("Accept-Encoding: %s", "gzip"), + ) + + data, err := curl.Output() + if err != nil { + return nil, err + } + + r, err := gzip.NewReader(bytes.NewReader(data)) + if err != nil { + return nil, err + } + + return goquery.NewDocumentFromReader(r) +} diff --git a/tag.go b/tag.go new file mode 100644 index 0000000..b7df9b7 --- /dev/null +++ b/tag.go @@ -0,0 +1,3 @@ +package nuapi + +type TagID string diff --git a/types.go b/types.go new file mode 100644 index 0000000..be2e02c --- /dev/null +++ b/types.go @@ -0,0 +1,84 @@ +package nuapi + +import ( + "encoding/json" + "time" +) + +type NovelID string +type ChapterID string +type ReadingListID string +type GroupID string + +type ReadingList struct { + ID ReadingListID `json:"id"` + Entries []ReadingListEntry `json:"entries"` +} + +type ReadingListEntry struct { + Novel NovelEntry `json:"novel"` + CurrentChapter ReadingListChapterEntry `json:"current_chapter"` + LatestChapter ReadingListChapterEntry `json:"latest_chapter"` +} + +func (e ReadingListEntry) NewChapterAvailable() bool { + return e.CurrentChapter.ID != e.LatestChapter.ID +} + +type ReadingListChapterEntry struct { + NovelID NovelID `json:"novel_id"` + ID ChapterID `json:"id"` + Link string `json:"link"` +} + +type NovelEntry struct { + NovelID NovelID `json:"id"` + Name string `json:"name"` +} + +type Novel struct { + ID NovelID `json:"id"` + Name string `json:"name"` + AssociatedNames []string `json:"associated_names"` + Description string `json:"description"` + Cover string `json:"cover"` + Type NovelType `json:"type"` + OriginalLanguage Language `json:"original_language"` + Genres []GenreID `json:"genres"` + Tags []TagID `json:"tags"` +} + +type NovelType string + +const ( + TypeLightNovel NovelType = "Light Novel" + TypePublishedNovel NovelType = "Published Novel" + TypeWebNovel NovelType = "Web Novel" +) + +type Language string + +const ( + LanguageJapanese Language = "jp" + LanguageChinese Language = "cn" + LanguageMalaysian Language = "my" + LanguageFilipino Language = "fil" + LanguageKhmer Language = "khm" + LanguageThai Language = "th" + LanguageIndonesian Language = "id" + LanguageKorean Language = "kr" + LanguageVietnamese Language = "vn" +) + +type NovelChapterEntry struct { + ID ChapterID `json:"id"` + Link string `json:"link"` + NovelID NovelID `json:"novel_id"` + Date time.Time `json:"date"` + Group GroupID `json:"group"` +} + +func (nce NovelChapterEntry) String() string { + data, _ := json.MarshalIndent(nce, "", "\t") + return string(data) +}