Skip to content

Commit

Permalink
frontend: add another regext to catch blog image in first 5 lines
Browse files Browse the repository at this point in the history
  • Loading branch information
khancyr committed Oct 31, 2023
1 parent 7d3c751 commit 37ad38e
Showing 1 changed file with 3 additions and 0 deletions.
3 changes: 3 additions & 0 deletions frontend/scripts/get_discourse_posts.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,13 @@ def get_first_youtube_link(request: str) -> str:
# Regular expression to find URLs that contain 'YouTube' or image links
url_pattern = re.compile(r'href=[\'"]?(https?://www\.youtube[^\'" >]+)')
img_pattern = re.compile(r'(?:href|src)=[\'"]?(https?://[^\'" >]+\.(jpg|jpeg|png|gif|svg|bmp|webp))')
img_pattern2 = re.compile(r'(?:img src)=[\'"]?(https?://[^\'" >]+)') # catch google link and such

# Find all matches
youtube_links = url_pattern.findall(first_five_lines)
img_links = img_pattern.findall(first_five_lines_lower)[0] if img_pattern.findall(first_five_lines_lower) else None
if img_links is None:
img_links = img_pattern2.findall(first_five_lines_lower)[0] if img_pattern2.findall(first_five_lines_lower) else None

# If there are image links before YouTube links, return empty string
if img_links and (not youtube_links or
Expand Down

0 comments on commit 37ad38e

Please sign in to comment.