Skip to content

Commit

Permalink
Hack in support for new khinsider layout
Browse files Browse the repository at this point in the history
  • Loading branch information
marcus-crane committed Apr 23, 2022
1 parent 6c9289b commit dbfb76b
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 16 deletions.
28 changes: 25 additions & 3 deletions pkg/indexer/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@ import (
"encoding/json"
"errors"
"fmt"
"github.com/marcus-crane/khinsider/v2/pkg/update"
"github.com/pterm/pterm"
"io"
"net/http"
"os"
Expand All @@ -14,6 +12,9 @@ import (
"strconv"
"strings"

"github.com/marcus-crane/khinsider/v2/pkg/update"
"github.com/pterm/pterm"

"github.com/marcus-crane/khinsider/v2/pkg/scrape"
"github.com/marcus-crane/khinsider/v2/pkg/types"
"github.com/marcus-crane/khinsider/v2/pkg/util"
Expand Down Expand Up @@ -60,7 +61,28 @@ func BuildIndex() error {
p, _ := pterm.DefaultProgressbar.WithTotal(len(letters)).WithTitle("Building indexer").WithRemoveWhenDone(true).Start()
for _, letter := range letters {
p.UpdateTitle("Downloading results for " + letter)
letterResults, err := scrape.GetResultsForLetter(letter)
page := 1
letterResults, more, err := scrape.GetResultsForLetter(letter)
for {
if more {
page += 1
letterUrl := fmt.Sprintf("%s?page=%d", letter, page)
p.UpdateTitle(fmt.Sprintf("~ Downloading Page %d of %s", page, letter))
results, evenMore, err := scrape.GetResultsForLetter(letterUrl)
if err != nil {
panic(err)
}
for k, v := range results {
letterResults[k] = v
}
if !evenMore {
break
}
more = evenMore
} else {
break
}
}
if err != nil {
panic(err)
}
Expand Down
36 changes: 23 additions & 13 deletions pkg/scrape/scrape.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@ package scrape

import (
"fmt"
"github.com/marcus-crane/khinsider/v2/pkg/util"
"io"
"net/http"
"strconv"
"strings"

"github.com/marcus-crane/khinsider/v2/pkg/util"

"github.com/PuerkitoBio/goquery"
"github.com/pterm/pterm"

Expand All @@ -29,7 +30,7 @@ func DownloadPage(url string) (*http.Response, error) {
return res, err
}

func GetResultsForLetter(letter string) (types.SearchResults, error) {
func GetResultsForLetter(letter string) (types.SearchResults, bool, error) {
url := fmt.Sprintf("%s%s", LetterBase, letter)
res, err := DownloadPage(url)
defer func(Body io.ReadCloser) {
Expand All @@ -39,24 +40,33 @@ func GetResultsForLetter(letter string) (types.SearchResults, error) {
}
}(res.Body)
if err != nil {
return nil, err
return nil, false, err
}
doc, err := goquery.NewDocumentFromReader(res.Body)
if err != nil {
return nil, err
return nil, false, err
}
results := make(types.SearchResults)
doc.Find("#EchoTopic p[align='left'] a").Each(func(i int, s *goquery.Selection) {
title := s.Text()
results[title] = "#"
trackUrl, exists := s.Attr("href")
if exists {
results[title] = trackUrl
} else {
results[title] = "#"
doc.Find("table.albumList tr").Each(func(i int, s *goquery.Selection) {
if i == 0 {
return
}
s.Find("td a").Each(func(i int, t *goquery.Selection) {
if i == 1 {
title := strings.TrimSpace(t.Text())
results[title] = "#"
trackUrl, exists := t.Attr("href")
if exists {
results[title] = trackUrl
}
}
})
})
more := false
doc.Find(".pagination-next a").Each(func(i int, s *goquery.Selection) {
_, more = s.Attr("href")
})
return results, nil
return results, more, nil
}

func RetrieveAlbum(slug string) (types.Album, error) {
Expand Down

0 comments on commit dbfb76b

Please sign in to comment.