Mihaaru scraper support
This commit is contained in:
@@ -2,7 +2,9 @@
|
|||||||
|
|
||||||
namespace App\Console\Commands;
|
namespace App\Console\Commands;
|
||||||
|
|
||||||
|
use App\Source;
|
||||||
use Illuminate\Console\Command;
|
use Illuminate\Console\Command;
|
||||||
|
use App\Services\MihaaruService;
|
||||||
|
|
||||||
class ScrapeMihaaruCommand extends Command
|
class ScrapeMihaaruCommand extends Command
|
||||||
{
|
{
|
||||||
@@ -37,6 +39,25 @@ class ScrapeMihaaruCommand extends Command
|
|||||||
*/
|
*/
|
||||||
public function handle()
|
public function handle()
|
||||||
{
|
{
|
||||||
return 0;
|
$source = Source::where('slug', 'mihaaru')->first();
|
||||||
|
|
||||||
|
$articles = (new MihaaruService)->scrape();
|
||||||
|
|
||||||
|
foreach($articles as $article)
|
||||||
|
{
|
||||||
|
$source->articles()->firstOrCreate([
|
||||||
|
"title" => $article["title"],
|
||||||
|
"url" => $article["url"],
|
||||||
|
"author" => $article["author"],
|
||||||
|
"featured_image" => $article["image"],
|
||||||
|
"body" => $article["content"],
|
||||||
|
"guid" => $article["guid"],
|
||||||
|
"published_date" => $article["date"],
|
||||||
|
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
namespace App\Console;
|
namespace App\Console;
|
||||||
|
|
||||||
|
use App\Console\Commands\ScrapeMihaaruCommand;
|
||||||
use Illuminate\Console\Scheduling\Schedule;
|
use Illuminate\Console\Scheduling\Schedule;
|
||||||
use Illuminate\Foundation\Console\Kernel as ConsoleKernel;
|
use Illuminate\Foundation\Console\Kernel as ConsoleKernel;
|
||||||
|
|
||||||
@@ -13,7 +14,7 @@ class Kernel extends ConsoleKernel
|
|||||||
* @var array
|
* @var array
|
||||||
*/
|
*/
|
||||||
protected $commands = [
|
protected $commands = [
|
||||||
//
|
ScrapeMihaaruCommand::class
|
||||||
];
|
];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@@ -15,15 +15,15 @@ class MihaaruService extends Client
|
|||||||
{
|
{
|
||||||
|
|
||||||
$articles = $this->get("https://mihaaru.com/rss")["channel"]["item"];
|
$articles = $this->get("https://mihaaru.com/rss")["channel"]["item"];
|
||||||
|
|
||||||
$articlesitems = [];
|
$articlesitems = [];
|
||||||
$emihaaru = new MihaaruScraper();
|
$scraper = new MihaaruScraper();
|
||||||
|
|
||||||
foreach ($articles as $article) {
|
foreach ($articles as $article) {
|
||||||
$link = $article['link'];
|
$link = $article['link'];
|
||||||
$date = $article['pubDate'];
|
$date = $article['pubDate'];
|
||||||
$guid = $article['guid'];
|
$guid = $article['guid'];
|
||||||
$articlesitems[] = $emihaaru->extract($link, $date, $guid);
|
$articlesitems[] = $scraper->extract($link, $date, $guid);
|
||||||
}
|
}
|
||||||
|
|
||||||
return $articlesitems;
|
return $articlesitems;
|
||||||
|
@@ -19,7 +19,7 @@ class MihaaruScraper
|
|||||||
$this->client = new Client;
|
$this->client = new Client;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function extract($url)
|
public function extract($url, $date = null, $guid = null)
|
||||||
{
|
{
|
||||||
|
|
||||||
$crawler = $this->client->request('GET', $url);
|
$crawler = $this->client->request('GET', $url);
|
||||||
@@ -66,6 +66,8 @@ class MihaaruScraper
|
|||||||
'image' => $this->image,
|
'image' => $this->image,
|
||||||
'content' => $this->content,
|
'content' => $this->content,
|
||||||
'url' => $url,
|
'url' => $url,
|
||||||
|
'date' => $date,
|
||||||
|
'guid' => $guid,
|
||||||
'author' => $this->author,
|
'author' => $this->author,
|
||||||
'topics' => $this->tags,
|
'topics' => $this->tags,
|
||||||
];
|
];
|
||||||
|
Reference in New Issue
Block a user