Skip to content

Commit

Permalink
Solve some type issues.
Browse files Browse the repository at this point in the history
  • Loading branch information
emmiegit committed Feb 23, 2024
1 parent f9ae54a commit c6a0429
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 13 deletions.
4 changes: 3 additions & 1 deletion yellowstone/request/forum_post_revision.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,6 @@ def get(
"forum/sub/ForumPostRevisionModule",
{"revisionId": revision_id},
)
return html.strip()
_ = html
# TODO
raise NotImplementedError
1 change: 1 addition & 0 deletions yellowstone/request/forum_post_revisions.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,4 @@ def get(
soup = make_soup(html)
# TODO
_ = soup
raise NotImplementedError
4 changes: 2 additions & 2 deletions yellowstone/request/forum_posts.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
regex_extract_int,
regex_extract_str,
)
from ..types import UserModuleData
from ..types import ForumPostUser
from ..wikidot import Wikidot
from .forum_categories import CATEGORY_ID_REGEX

Expand All @@ -33,7 +33,7 @@ class ForumPostData:
id: int
parent: Optional[int]
title: str
created_by: UserModuleData
created_by: ForumPostUser
created_at: datetime
html: str

Expand Down
17 changes: 9 additions & 8 deletions yellowstone/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
CustomUserData,
DeletedUserData,
ForumLastPostData,
ForumPostUser,
UserModuleData,
)

Expand Down Expand Up @@ -53,7 +54,9 @@ def regex_extract_str(source: str, body: str, regex: re.Pattern) -> str:
assert (
len(match.groups()) == 1
), "Extracting single value from regex with multiple groups"
return match[1]
string = match[1]
assert isinstance(string, str), "Group 1 from regex is not a string"
return string


def regex_extract_int(source: str, body: str, regex: re.Pattern) -> int:
Expand All @@ -65,7 +68,6 @@ def find_element(source: str, soup: Union[BeautifulSoup, Tag], *args, **kwargs)
element = soup.find(*args, **kwargs)
if element is None:
raise ScrapingError(f"No '{args} {kwargs}' found for {source}")

return element


Expand All @@ -74,7 +76,6 @@ def select_element(source: str, soup: Union[BeautifulSoup, Tag], selector: str)
element = soup.select_one(selector)
if element is None:
raise ScrapingError(f"No '{selector} found for {source}")

return element


Expand All @@ -97,10 +98,7 @@ def get_entity_date(source: str, tag: Tag) -> datetime:
raise ScrapingError(f"Could not find date timestamp from {source}")


def get_entity_user(
source: str,
tag: Tag,
) -> Union[UserModuleData, DeletedUserData, CustomUserData]:
def get_entity_user(source: str, tag: Tag) -> ForumPostUser:
"""
Parses out a user module entity, including unusual cases.
Requires being focused on .printuser
Expand All @@ -121,9 +119,12 @@ def get_entity_user(
# If there is a ".printuser a", it's either a regular user or a guest
entity = tag.find("a")
if entity is not None:
assert isinstance(entity, Tag), ".printuser a is not an HTML entity"

# Anonymous users have an IP address
ip_entity = entity.find(class_="ip")
ip_entity = entity.find("span", class_="ip")
if ip_entity is not None:
assert isinstance(ip_entity, Tag), "span.ip is not an HTML entity"
ip = ip_entity.text
if ip.startswith("(") and ip.endswith(")"):
ip = ip[1:-1]
Expand Down
11 changes: 9 additions & 2 deletions yellowstone/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
from datetime import datetime
from typing import Union

Json = Union[None, int, float, str, list["Json"], dict[str, "Json"]]


@dataclass
class UserModuleData:
Expand Down Expand Up @@ -46,3 +44,12 @@ class ForumLastPostData:
posted_user: UserModuleData
thread_id: int
post_id: int


Json = Union[None, int, float, str, list["Json"], dict[str, "Json"]]
ForumPostUser = Union[
UserModuleData,
DeletedUserData,
AnonymousUserData,
CustomUserData,
]

0 comments on commit c6a0429

Please sign in to comment.