Mihaaru scraper support

This commit is contained in:
2020-08-10 03:21:56 +05:00
parent 5af5f75175
commit fd2fc06115
4 changed files with 30 additions and 6 deletions

View File

@@ -2,7 +2,9 @@
namespace App\Console\Commands; namespace App\Console\Commands;
use App\Source;
use Illuminate\Console\Command; use Illuminate\Console\Command;
use App\Services\MihaaruService;
class ScrapeMihaaruCommand extends Command class ScrapeMihaaruCommand extends Command
{ {
@@ -37,6 +39,25 @@ class ScrapeMihaaruCommand extends Command
*/ */
public function handle() public function handle()
{ {
return 0; $source = Source::where('slug', 'mihaaru')->first();
$articles = (new MihaaruService)->scrape();
foreach($articles as $article)
{
$source->articles()->firstOrCreate([
"title" => $article["title"],
"url" => $article["url"],
"author" => $article["author"],
"featured_image" => $article["image"],
"body" => $article["content"],
"guid" => $article["guid"],
"published_date" => $article["date"],
]);
}
} }
} }

View File

@@ -2,6 +2,7 @@
namespace App\Console; namespace App\Console;
use App\Console\Commands\ScrapeMihaaruCommand;
use Illuminate\Console\Scheduling\Schedule; use Illuminate\Console\Scheduling\Schedule;
use Illuminate\Foundation\Console\Kernel as ConsoleKernel; use Illuminate\Foundation\Console\Kernel as ConsoleKernel;
@@ -13,7 +14,7 @@ class Kernel extends ConsoleKernel
* @var array * @var array
*/ */
protected $commands = [ protected $commands = [
// ScrapeMihaaruCommand::class
]; ];
/** /**

View File

@@ -15,15 +15,15 @@ class MihaaruService extends Client
{ {
$articles = $this->get("https://mihaaru.com/rss")["channel"]["item"]; $articles = $this->get("https://mihaaru.com/rss")["channel"]["item"];
$articlesitems = []; $articlesitems = [];
$emihaaru = new MihaaruScraper(); $scraper = new MihaaruScraper();
foreach ($articles as $article) { foreach ($articles as $article) {
$link = $article['link']; $link = $article['link'];
$date = $article['pubDate']; $date = $article['pubDate'];
$guid = $article['guid']; $guid = $article['guid'];
$articlesitems[] = $emihaaru->extract($link, $date, $guid); $articlesitems[] = $scraper->extract($link, $date, $guid);
} }
return $articlesitems; return $articlesitems;

View File

@@ -19,7 +19,7 @@ class MihaaruScraper
$this->client = new Client; $this->client = new Client;
} }
public function extract($url) public function extract($url, $date = null, $guid = null)
{ {
$crawler = $this->client->request('GET', $url); $crawler = $this->client->request('GET', $url);
@@ -66,6 +66,8 @@ class MihaaruScraper
'image' => $this->image, 'image' => $this->image,
'content' => $this->content, 'content' => $this->content,
'url' => $url, 'url' => $url,
'date' => $date,
'guid' => $guid,
'author' => $this->author, 'author' => $this->author,
'topics' => $this->tags, 'topics' => $this->tags,
]; ];