Add adhadhu support
This commit is contained in:
		
							
								
								
									
										65
									
								
								app/Services/Scrapers/AdhadhuScraper.php
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								app/Services/Scrapers/AdhadhuScraper.php
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,65 @@ | ||||
| <?php | ||||
|  | ||||
| namespace App\Services\Scrapers; | ||||
|  | ||||
| use Goutte\Client; | ||||
| use Illuminate\Support\Str; | ||||
|  | ||||
| class AdhadhuScraper | ||||
| { | ||||
|     protected $client; | ||||
|  | ||||
|     protected $title; | ||||
|     protected $content; | ||||
|     protected $image; | ||||
|     protected $topics = []; | ||||
|     protected $author; | ||||
|  | ||||
|     public function __construct() | ||||
|     { | ||||
|         $this->client = new Client; | ||||
|     } | ||||
|  | ||||
|     public function extract($url, $date = null) | ||||
|     { | ||||
|         $crawler = $this->client->request('GET', $url); | ||||
|  | ||||
|         // Extract title | ||||
|         $this->title = $crawler->filter('h1.font-52')->first()->text(); | ||||
|  | ||||
|         // Extract image URL | ||||
|         $this->image = $crawler->filter('img.img-fluid.hero-img')->first()->attr('src'); | ||||
|  | ||||
|         // Extract author name | ||||
|         $this->author = $crawler->filter('.MuiAvatar-circle img')->first()->attr('alt'); | ||||
|  | ||||
|         // Extract content | ||||
|         $crawler->filter('.body > p')->each(function ($node) { | ||||
|             $this->content[] = $node->text(); | ||||
|         }); | ||||
|  | ||||
|         // Extract topics (tags) | ||||
|         $crawler->filter('a[href^="/tags/"]')->each(function ($node) { | ||||
|             $href = $node->attr('href'); | ||||
|             $slug = basename($href); // Extracts the last segment of the URL | ||||
|  | ||||
|             $this->topics[] = [ | ||||
|                 "name" => trim($node->filter('.tag')->first()->text()), | ||||
|                 "slug" => Str::slug($slug) | ||||
|             ]; | ||||
|         }); | ||||
|  | ||||
|         return [ | ||||
|             'source'    => 'Adhadhu', | ||||
|             'title'     => $this->title, | ||||
|             'og_title'   => $crawler->filter('meta[property*="og:title"]')->first()->attr('content'), | ||||
|             'image'     => $this->image, | ||||
|             'content'   => $this->content, | ||||
|             'url'       => $url, | ||||
|             'date'      => $date, | ||||
|             'guid'      => str_replace("https://adhadhu.com/news/","",$url), | ||||
|             'author'    => $this->author, | ||||
|             'topics'    => $this->topics | ||||
|         ]; | ||||
|     } | ||||
| } | ||||
		Reference in New Issue
	
	Block a user