From fbaa43748f863ea75fd738656be2dfdfc700d319 Mon Sep 17 00:00:00 2001 From: Mohamed jinas Date: Sun, 7 Jan 2024 21:54:36 +0500 Subject: [PATCH] WIP --- app/Services/Scrapers/ThiladhunScraper.php | 4 ++++ app/Services/ThiladhunService.php | 11 ++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/app/Services/Scrapers/ThiladhunScraper.php b/app/Services/Scrapers/ThiladhunScraper.php index beef35a..8eb52c8 100644 --- a/app/Services/Scrapers/ThiladhunScraper.php +++ b/app/Services/Scrapers/ThiladhunScraper.php @@ -37,6 +37,10 @@ class ThiladhunScraper public function extract($url) { + if (strpos($url, 'gallery') !== false) { + return null; + } + $crawler = $this->client->request('GET', $url); $crawler->filter('meta[property="og:title"]')->each(function ($node) { diff --git a/app/Services/ThiladhunService.php b/app/Services/ThiladhunService.php index 456060b..a9bdab4 100644 --- a/app/Services/ThiladhunService.php +++ b/app/Services/ThiladhunService.php @@ -19,10 +19,15 @@ class ThiladhunService extends Client $articles = (new ThiladhunFeed)->get(); $articlesitems = []; - //Looping through the articles and scraping and while scraping it creates a new instance of the scraper. + // Looping through the articles and scraping and while scraping it creates a new instance of the scraper. foreach ($articles as $article) { - //Remove query strings - $articlesitems[] = (new ThiladhunScraper)->extract($article["link"]); + // Scrape the article + $scrapedArticle = (new ThiladhunScraper)->extract($article["link"]); + + // Check if the scraped article is not null + if ($scrapedArticle !== null && !empty($scrapedArticle)) { + $articlesitems[] = $scrapedArticle; + } } return $articlesitems;