This commit is contained in:
Mohamed jinas
2024-01-07 21:54:36 +05:00
parent 38d14a812c
commit fbaa43748f
2 changed files with 12 additions and 3 deletions

View File

@@ -37,6 +37,10 @@ class ThiladhunScraper
public function extract($url) public function extract($url)
{ {
if (strpos($url, 'gallery') !== false) {
return null;
}
$crawler = $this->client->request('GET', $url); $crawler = $this->client->request('GET', $url);
$crawler->filter('meta[property="og:title"]')->each(function ($node) { $crawler->filter('meta[property="og:title"]')->each(function ($node) {

View File

@@ -19,10 +19,15 @@ class ThiladhunService extends Client
$articles = (new ThiladhunFeed)->get(); $articles = (new ThiladhunFeed)->get();
$articlesitems = []; $articlesitems = [];
//Looping through the articles and scraping and while scraping it creates a new instance of the scraper. // Looping through the articles and scraping and while scraping it creates a new instance of the scraper.
foreach ($articles as $article) { foreach ($articles as $article) {
//Remove query strings // Scrape the article
$articlesitems[] = (new ThiladhunScraper)->extract($article["link"]); $scrapedArticle = (new ThiladhunScraper)->extract($article["link"]);
// Check if the scraped article is not null
if ($scrapedArticle !== null && !empty($scrapedArticle)) {
$articlesitems[] = $scrapedArticle;
}
} }
return $articlesitems; return $articlesitems;