Skip to content

Commit

Permalink
Small fixes for Wattpad. (#1137)
Browse files Browse the repository at this point in the history
* adapter_wattpadcom: Various fixes and changes

* adapter_wattpadcom: Config update & category 0 not always present

---------

Co-authored-by: dbhmw <[email protected]>
  • Loading branch information
dbhmw and dbhmw authored Dec 31, 2024
1 parent a9944cd commit 816bbdf
Show file tree
Hide file tree
Showing 7 changed files with 391 additions and 32 deletions.
14 changes: 10 additions & 4 deletions calibre-plugin/plugin-defaults.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3276,11 +3276,17 @@ readings_label:Readings

[wattpad.com]
use_basic_cache:true
#is_adult:true
extra_titlepage_entries: language, reads
extra_valid_entries: language, tags, reads
extra_titlepage_entries: reads
extra_valid_entries: reads
reads_label:Read Count
include_in_genre: tags
datechapter_format:%%Y-%%m-%%d %%H:%%M:%%S

## You can set the 'dateUpdated_method' to either:
## - 'modifyDate': This will keep the current behavior, where the
## update date corresponds to any modification made to the content.
## - 'lastPublishedPart': This will set the update date to
## the date of the last published chapter.
dateUpdated_method: modifyDate

# Add comma separators for numeric reads. Eg 10000 becomes 10,000
add_to_comma_entries:,reads
Expand Down
71 changes: 48 additions & 23 deletions fanficfare/adapters/adapter_wattpadcom.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
class WattpadComAdapter(BaseSiteAdapter):
# All the API discovery work done by github user de3sw2aq1
# Source: https://github.com/de3sw2aq1/wattpad-ebook-scraper/blob/master/scrape.py
API_GETCATEGORIES = 'https://www.wattpad.com/apiv2/getcategories'
API_GETCATEGORIES = 'https://www.wattpad.com/api/v3/categories'
API_STORYINFO = 'https://www.wattpad.com/api/v3/stories/%s' # stories?id=X is NOT the same
API_STORYTEXT = 'https://www.wattpad.com/apiv2/storytext?id=%s'
API_CHAPTERINFO = 'https://www.wattpad.com/v4/parts/%s?fields=group(id)&_=%s'
Expand All @@ -44,15 +44,6 @@ def __init__(self, config, url):
self._setURL('https://www.wattpad.com/story/%s' % self.storyId)
self.chapter_photoUrl = {}

# categoryDefs do not change all that often, if at all. Could be put in a constant, leaving it as a class var for now
# note: classvar may be useless because of del adapter
if WattpadComAdapter.CATEGORY_DEFs is None:
try:
WattpadComAdapter.CATEGORY_DEFs = json.loads(self.get_request(WattpadComAdapter.API_GETCATEGORIES))
except:
logger.warning('API_GETCATEGORIES failed.')
WattpadComAdapter.CATEGORY_DEFs = []

@staticmethod
def getSiteDomain():
return 'www.wattpad.com'
Expand Down Expand Up @@ -99,7 +90,17 @@ def getStoryId(self, url):
else:
return groupid

def doExtractChapterUrlsAndMetadata(self, get_cover=True):
def extractChapterUrlsAndMetadata(self, get_cover=True):
# categoryDefs do not change all that often, if at all. Could be put in a constant, leaving it as a class var for now
# note: classvar may be useless because of del adapter
if WattpadComAdapter.CATEGORY_DEFs is None:
try:
WattpadComAdapter.CATEGORY_DEFs = json.loads(self.get_request(WattpadComAdapter.API_GETCATEGORIES))
except Exception as e:
logger.warning('API_GETCATEGORIES failed: %s. Fallback to list from 2024-12'%e)
WattpadComAdapter.CATEGORY_DEFs = [{"id":4,"name":"Romance","name_english":"Romance","roles":["onboarding","writing","searching"]},{"id":5,"name":"Science Fiction","name_english":"Science Fiction","roles":["onboarding","writing","searching"]},{"id":3,"name":"Fantasy","name_english":"Fantasy","roles":["onboarding","writing","searching"]},{"id":7,"name":"Humor","name_english":"Humor","roles":["onboarding","writing","searching"]},{"id":12,"name":"Paranormal","name_english":"Paranormal","roles":["onboarding","writing","searching"]},{"id":8,"name":"Mystery Thriller","name_english":"Mystery Thriller","roles":["onboarding","writing","searching"]},{"id":9,"name":"Horror","name_english":"Horror","roles":["onboarding","writing","searching"]},{"id":11,"name":"Adventure","name_english":"Adventure","roles":["onboarding","writing","searching"]},{"id":23,"name":"Historical Fiction","name_english":"Historical Fiction","roles":["onboarding","writing","searching"]},{"id":1,"name":"Teen Fiction","name_english":"Teen Fiction","roles":["onboarding","writing","searching"]},{"id":6,"name":"Fanfiction","name_english":"Fanfiction","roles":["onboarding","writing","searching"]},{"id":2,"name":"Poetry","name_english":"Poetry","roles":["onboarding","writing","searching"]},{"id":17,"name":"Short Story","name_english":"Short Story","roles":["onboarding","writing","searching"]},{"id":21,"name":"General Fiction","name_english":"General Fiction","roles":["onboarding","writing","searching"]},{"id":24,"name":"ChickLit","name_english":"ChickLit","roles":["onboarding","writing","searching"]},{"id":14,"name":"Action","name_english":"Action","roles":["onboarding","writing","searching"]},{"id":18,"name":"Vampire","name_english":"Vampire","roles":["onboarding","writing","searching"]},{"id":22,"name":"Werewolf","name_english":"Werewolf","roles":["onboarding","writing","searching"]},{"id":13,"name":"Spiritual","name_english":"Spiritual","roles":["onboarding","writing","searching"]},{"id":16,"name":"Non-Fiction","name_english":"Non-Fiction","roles":["onboarding","writing","searching"]},{"id":10,"name":"Classics","name_english":"Classics","roles":["onboarding","searching"]},{"id":19,"name":"Random","name_english":"Random","roles":["writing","searching"]}]

logger.debug("URL: "+self.url)
try:
storyInfo = json.loads(self.get_request(WattpadComAdapter.API_STORYINFO % self.storyId))
# logger.debug('storyInfo: %s' % json.dumps(storyInfo, sort_keys=True,
Expand All @@ -113,6 +114,13 @@ def doExtractChapterUrlsAndMetadata(self, get_cover=True):
if not (self.is_adult or self.getConfig("is_adult")) and storyInfo['mature'] == True:
raise exceptions.AdultCheckRequired(self.url)

# Tags
self.story.extendList('genre', storyInfo['tags'])

# Rating
if storyInfo['mature']:
self.story.setMetadata('rating', 'Mature')

# title
self.story.setMetadata('title', storyInfo['title'])

Expand All @@ -131,26 +139,43 @@ def doExtractChapterUrlsAndMetadata(self, get_cover=True):
self.setDescription(storyInfo['url'], storyInfo['description'])

# DATES
self.story.setMetadata('dateUpdated', makeDate(storyInfo['modifyDate'].rstrip('Z'), "%Y-%m-%dT%H:%M:%S"))
if self.story.getConfig('dateUpdated_method') == "lastPublishedPart":
self.story.setMetadata('dateUpdated', makeDate(storyInfo['lastPublishedPart']['createDate'], self.getDateFormat()))
else:
self.story.setMetadata('dateUpdated', makeDate(storyInfo['modifyDate'], self.getDateFormat()))
self.story.setMetadata('datePublished', makeDate(storyInfo['createDate'].rstrip('Z'), "%Y-%m-%dT%H:%M:%S"))

# Chapters
for part in storyInfo['parts']:
self.add_chapter(part['title'], part['url'])
chapterDate = makeDate(part["createDate"], self.getDateFormat())
chaptermodifyDate = makeDate(part["modifyDate"], self.getDateFormat())
self.add_chapter(part["title"], part["url"], {
"date": chapterDate.strftime(self.getConfig("datechapter_format", self.getConfig("datePublished_format", self.getDateFormat()))),
"modifyDate": chaptermodifyDate.strftime(self.getConfig("datechapter_format", self.getConfig("datePublished_format", self.getDateFormat())))
},
)
self.chapter_photoUrl[part['url']] = part['photoUrl']
self.setCoverImage(storyInfo['url'], storyInfo['cover'].replace('-256-','-512-'))
self.story.setMetadata('language', storyInfo['language']['name'])

# CATEGORIES
try:
storyCategories = [WattpadComAdapter.CATEGORY_DEFs.get(unicode(c)) for c in storyInfo['categories'] if
unicode(c) in WattpadComAdapter.CATEGORY_DEFs]

self.story.setMetadata('category', storyCategories[0])
self.story.setMetadata('tags', storyInfo['tags'])
except:
pass

return self.extractChapterUrlsAndMetadata()
# The category '0' is almost always present but does not have an entry in the Wattpad API (https://www.wattpad.com/api/v3/categories).
logger.debug('Categories: %s'%str(storyInfo['categories']))
0 in storyInfo['categories'] and storyInfo['categories'].remove(0)
storyCategories = []
for category in WattpadComAdapter.CATEGORY_DEFs:
if category['id'] in storyInfo['categories']:
storyCategories.append(category['name'])
storyInfo['categories'].remove(category['id'])
if not storyInfo['categories']:
break
self.story.extendList('category', storyCategories)
#try:
#storyCategories = [WattpadComAdapter.CATEGORY_DEFs.get(unicode(c)) for c in storyInfo['categories'] if
# unicode(c) in WattpadComAdapter.CATEGORY_DEFs]
#self.story.setMetadata('category', storyCategories[0])
#except Exception as e:
#pass

def getChapterText(self, url):
logger.debug('%s' % url)
Expand Down
2 changes: 2 additions & 0 deletions fanficfare/configurable.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,7 @@ def get_valid_set_options():
'skip_sticky_first_posts':(base_xenforo2_list,None,boollist),
'include_dice_rolls':(base_xenforo2_list,None,boollist+['svg']),
'include_chapter_banner_images':(['wattpad.com'],None,boollist),
'dateUpdated_method':(['wattpad.com'],None,['modifyDate', 'lastPublishedPart']),
'fix_excess_space': (['novelonlinefull.com', 'novelall.com'], ['epub', 'html'], boollist),
'dedup_order_chapter_list': (['wuxiaworld.xyz', 'novelupdates.cc'], None, boollist),
'show_nsfw_cover_images': (['fiction.live'], None, boollist),
Expand Down Expand Up @@ -574,6 +575,7 @@ def get_valid_keywords():
'skip_sticky_first_posts',
'include_dice_rolls',
'include_chapter_banner_images',
'dateUpdated_method',
'datethreadmark_format',
'fix_pseudo_html',
'fix_excess_space',
Expand Down
14 changes: 10 additions & 4 deletions fanficfare/defaults.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3271,11 +3271,17 @@ readings_label:Readings

[wattpad.com]
use_basic_cache:true
#is_adult:true
extra_titlepage_entries: language, reads
extra_valid_entries: language, tags, reads
extra_titlepage_entries: reads
extra_valid_entries: reads
reads_label:Read Count
include_in_genre: tags
datechapter_format:%%Y-%%m-%%d %%H:%%M:%%S

## You can set the 'dateUpdated_method' to either:
## - 'modifyDate': This will keep the current behavior, where the
## update date corresponds to any modification made to the content.
## - 'lastPublishedPart': This will set the update date to
## the date of the last published chapter.
dateUpdated_method: modifyDate

# Add comma separators for numeric reads. Eg 10000 becomes 10,000
add_to_comma_entries:,reads
Expand Down
176 changes: 176 additions & 0 deletions tests/adapters/test_adapter_wattpadcom.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
import pytest
from unittest.mock import patch
from fanficfare.exceptions import HTTPErrorFFF

from fanficfare.adapters.adapter_wattpadcom import WattpadComAdapter as wattpadcom
from tests.adapters.generic_adapter_test import GenericAdapterTestExtractChapterUrlsAndMetadata, GenericAdapterTestGetChapterText
from tests.conftest import wattpadcom_api_story_return, wattpadcom_api_chapter_return, wattpadcom_api_getcategories_return

SPECIFIC_TEST_DATA = {
'adapter': wattpadcom,
'url': 'https://www.wattpad.com/story/173080052-the-kids-aren%27t-alright',
'sections': ["wattpad.com"],
'specific_path_adapter': 'adapter_wattpadcom.WattpadComAdapter',

'title': 'The Kids Aren\'t Alright',
'cover_image': 'https://img.wattpad.com/cover/173080052-512-k768737.jpg',
'author': 'bee_mcd',
'authorId': 'bee_mcd',
'datePublished': '2019-01-02',
'dateUpdated': '2024-01-22',
'intro': "The year is 1988, and Finn, Ronan, Becca and Jasper are spending the summer at a reformatory camp located deep in the Alaskan wilderness. The camp, named Lightlake, is the last chance the teens have to get their lives back on track, but changing for the better isn't easy - and especially not at a place like Lightlake, where secrets outnumber the campers and myths have a way of coming to life.\n\nThis story is now free on Wattpad. \n\n[[word count: 200,000-250,000 words]]",
'expected_chapters': {
0: {'title': 'Chapter 1: Finn',
'url': 'https://www.wattpad.com/675342676-the-kids-aren%27t-alright-chapter-1-finn',
'date': '2019-01-02 03:02:00'},
10: {'title': 'Chapter 11: Jasper',
'url': 'https://www.wattpad.com/675347689-the-kids-aren%27t-alright-chapter-11-jasper'},
76: {'title': 'Sneak Peak of Book #2, "Kids These Days"',
'url': 'https://www.wattpad.com/807690860-the-kids-aren%27t-alright-sneak-peak-of-book-2-kids'},
},
'list_chapters_fixture': wattpadcom_api_story_return,

'chapter_url': 'https://www.wattpad.com/675344459-the-kids-aren%27t-alright-chapter-3-ronan',
'expected_sentences': [
"We end up stopping at a newspaper stand a few blocks away.",
"\"We can go somewhere else if it bothers you so much. I'll call a cab.\"",
"\"I'll see you tomorrow,\" I say to him as he climbs the stairs to the front door. \"We can catch a Mets game—\""
],
'chapter_fixture': wattpadcom_api_chapter_return,

'status': 'Completed',
'category': 'Teen Fiction',
'genre': '80s, adventure, alaska, camps, comedy, drama, foundfamily, friends, humor, lake, lgbt, magic, mystery, myth, novel, psychic, retro, summer, summercamp, teen, teenfiction, texttospeech, wilderness, youngadult, yukon',
'language': 'English',
'rating': '',
'reads': '1206132',
}

class TestExtractChapterUrlsAndMetadata(GenericAdapterTestExtractChapterUrlsAndMetadata):
def setup_method(self):
self.expected_data = SPECIFIC_TEST_DATA

super().setup_method(
SPECIFIC_TEST_DATA['adapter'],
SPECIFIC_TEST_DATA['url'],
SPECIFIC_TEST_DATA['sections'],
SPECIFIC_TEST_DATA['specific_path_adapter'],
SPECIFIC_TEST_DATA['list_chapters_fixture'])

self.configuration.validEntries.extend(['reads'])

@pytest.fixture(autouse=True)
def setup_env(self):
with patch(f'fanficfare.adapters.{self.path_adapter}.setDescription') as mock_setDescription, \
patch(f'fanficfare.adapters.{self.path_adapter}.setCoverImage') as mock_setCoverImage, \
patch(f'fanficfare.adapters.{self.path_adapter}.get_request') as mockget_request:

self.mock_setCoverImage = mock_setCoverImage
self.mock_setDescription = mock_setDescription
self.mockget_request = mockget_request

if wattpadcom.CATEGORY_DEFs == None:
self.mockget_request.side_effect = [wattpadcom_api_getcategories_return, self.fixture]
else:
self.mockget_request.return_value = self.fixture

yield

def test_get_cover_image(self):
# When
self.adapter.extractChapterUrlsAndMetadata()

# Then
self.mock_setCoverImage.assert_called_with(self.url, self.expected_data['cover_image'])

def test_get_published_date(self):
# When
self.adapter.extractChapterUrlsAndMetadata()

# Then
assert self.adapter.story.getMetadata('datePublished') == self.expected_data['datePublished']

def test_get_status(self):
# When
self.adapter.extractChapterUrlsAndMetadata()

# Then
assert self.adapter.story.getMetadata('status') == self.expected_data['status']

def test_get_genre(self):
# When
self.adapter.extractChapterUrlsAndMetadata()

# Then
assert self.adapter.story.getMetadata('genre') == self.expected_data['genre']

def test_get_warnings(self):
# When
self.adapter.extractChapterUrlsAndMetadata()

# Then
assert self.adapter.story.getMetadata('reads') == self.expected_data['reads']

def test_get_language(self):
# When
self.adapter.extractChapterUrlsAndMetadata()

# Then
assert self.adapter.story.getMetadata('language') == self.expected_data['language']

def test_get_agerating(self):
# When
self.adapter.extractChapterUrlsAndMetadata()

# Then
assert self.adapter.story.getMetadata('rating') == self.expected_data['rating']

def test_get_agerating(self):
# When
self.adapter.extractChapterUrlsAndMetadata()

# Then
assert self.adapter.story.getMetadata('category') == self.expected_data['category']

@patch('fanficfare.adapters.adapter_wattpadcom.WattpadComAdapter.get_request')
def test_get_category_when_req_fails(self, mockget_request):
# Given
mockget_request.side_effect = [HTTPErrorFFF(self.expected_data['url'], 403, 'Client Error'), wattpadcom_api_story_return]
wattpadcom.CATEGORY_DEFs = None

# When
self.adapter.extractChapterUrlsAndMetadata()

# Then
assert self.adapter.story.getMetadata('category') == self.expected_data['category']


class TestGetChapterText(GenericAdapterTestGetChapterText):
def setup_method(self):
self.expected_data = SPECIFIC_TEST_DATA

super().setup_method(
SPECIFIC_TEST_DATA['adapter'],
SPECIFIC_TEST_DATA['url'],
SPECIFIC_TEST_DATA['sections'],
SPECIFIC_TEST_DATA['specific_path_adapter'],
SPECIFIC_TEST_DATA['chapter_fixture'])

@pytest.fixture(autouse=True)
def setup_env(self):
with patch(f'fanficfare.adapters.{self.path_adapter}.setDescription') as mock_setDescription, \
patch(f'fanficfare.adapters.{self.path_adapter}.setCoverImage') as mock_setCoverImage, \
patch(f'fanficfare.adapters.{self.path_adapter}.get_request') as mockget_request:

mockget_request.side_effect = [wattpadcom_api_story_return, self.fixture]

yield

def test_get_metadata(self):
# When
self.adapter.extractChapterUrlsAndMetadata()
response = self.adapter.getChapterText(self.expected_data['chapter_url'])

# Then
for p in self.expected_data['expected_sentences']:
assert p in response
3 changes: 2 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from tests.fixtures_chireads import *
from tests.fixtures_fanfictionsfr import *
from tests.fixtures_fanfictionsfr import *
from tests.fixtures_wattpadcom import *
143 changes: 143 additions & 0 deletions tests/fixtures_wattpadcom.py

Large diffs are not rendered by default.

0 comments on commit 816bbdf

Please sign in to comment.