From 51cdb66f9c65c41fd1c5b0564001ec5724be1575 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tu=C4=9Fhan=20Belbek?= Date: Thu, 17 Oct 2024 14:17:48 +0200 Subject: [PATCH] [HarvardBusinessReviewBridge] Add bridge (#4293) --- bridges/HarvardBusinessReviewBridge.php | 88 +++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 bridges/HarvardBusinessReviewBridge.php diff --git a/bridges/HarvardBusinessReviewBridge.php b/bridges/HarvardBusinessReviewBridge.php new file mode 100644 index 00000000000..cd99a1ba29f --- /dev/null +++ b/bridges/HarvardBusinessReviewBridge.php @@ -0,0 +1,88 @@ + [ + 'name' => 'Limit', + 'type' => 'number', + 'required' => true, + 'title' => 'Maximum number of items to return', + 'defaultValue' => 6, //More requires clicking button "Load more" + ], + ]]; + + public function collectData() + { + $url = self::URI . '/the-latest'; + $html = getSimpleHTMLDOM($url); + + foreach ($html->find('li.stream-entry') as $data) { + // Skip if $data is null + if ($data === null) { + continue; + } + + try { + // Skip entries containing the text 'stream-ad-container' + if ($data->innertext !== null && strpos($data->innertext, 'stream-ad-container') !== false) { + continue; + } + + // Skip entries with class 'sponsored' + if ($data->hasClass('sponsored')) { + continue; + } + + $item = []; + $linkElement = $data->find('a', 0); + $titleElement = $data->find('h3.hed a', 0); + $authorElement = $data->find('ul.byline-list li', 0); + $timestampElement = $data->find('li.pubdate time', 0); + $contentElement = $data->find('div.dek', 0); + + if ($linkElement) { + $item['uri'] = self::URI . $linkElement->getAttribute('href'); + } else { + continue; // Skip this entry if no link is found + } + if ($titleElement) { + $item['title'] = trim($titleElement->plaintext); + } else { + continue; // Skip this entry if no title is found + } + if ($authorElement) { + $item['author'] = trim($authorElement->plaintext); + } else { + $item['author'] = 'Unknown'; // Default value if author is missing + } + if ($timestampElement) { + $item['timestamp'] = strtotime($timestampElement->plaintext); + } else { + $item['timestamp'] = time(); // Default to current time if timestamp is missing + } + if ($contentElement) { + $item['content'] = trim($contentElement->plaintext); + } else { + $item['content'] = ''; // Default to empty string if content is missing + } + $item['uid'] = hash('sha256', $item['title']); + + $this->items[] = $item; + + if (count($this->items) >= $this->getInput('postcount')) { + break; + } + } catch (Exception $e) { + // Log the error if necessary + continue; // Skip to the next iteration on error + } + } + } +} \ No newline at end of file