WIP
This commit is contained in:
@@ -37,6 +37,10 @@ class ThiladhunScraper
|
|||||||
public function extract($url)
|
public function extract($url)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
if (strpos($url, 'gallery') !== false) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
$crawler = $this->client->request('GET', $url);
|
$crawler = $this->client->request('GET', $url);
|
||||||
|
|
||||||
$crawler->filter('meta[property="og:title"]')->each(function ($node) {
|
$crawler->filter('meta[property="og:title"]')->each(function ($node) {
|
||||||
|
@@ -19,10 +19,15 @@ class ThiladhunService extends Client
|
|||||||
$articles = (new ThiladhunFeed)->get();
|
$articles = (new ThiladhunFeed)->get();
|
||||||
|
|
||||||
$articlesitems = [];
|
$articlesitems = [];
|
||||||
//Looping through the articles and scraping and while scraping it creates a new instance of the scraper.
|
// Looping through the articles and scraping and while scraping it creates a new instance of the scraper.
|
||||||
foreach ($articles as $article) {
|
foreach ($articles as $article) {
|
||||||
//Remove query strings
|
// Scrape the article
|
||||||
$articlesitems[] = (new ThiladhunScraper)->extract($article["link"]);
|
$scrapedArticle = (new ThiladhunScraper)->extract($article["link"]);
|
||||||
|
|
||||||
|
// Check if the scraped article is not null
|
||||||
|
if ($scrapedArticle !== null && !empty($scrapedArticle)) {
|
||||||
|
$articlesitems[] = $scrapedArticle;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return $articlesitems;
|
return $articlesitems;
|
||||||
|
Reference in New Issue
Block a user