SERP

Preface

As of version 2.0, Scavenger can be used to scrape search engine result pages (or SERP). This is a useful practice in the world of Search Engine Optimization (SEO). This section demonstrates how to use Scavenger for SERP.


Prerequisites

Firstly, you must create eloquent model(s) to support your intended targets. In this case we have created the model, GoogleResult, with the following migration and model class:

Migration:

<?php

use Illuminate\Support\Facades\Schema;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Database\Migrations\Migration;

class CreateGoogleResultsTable extends Migration
{
    /**
     * Run the migrations.
     *
     * @return void
     */
    public function up()
    {
        Schema::create('google_results', function (Blueprint $table) {
            $table->increments('id');
            $table->text('link');
            $table->text('description');
            $table->integer('position')->nullable();
            $table->timestamps();
        });
    }

    /**
     * Reverse the migrations.
     *
     * @return void
     */
    public function down()
    {
        Schema::dropIfExists('google_results');
    }
}

Model class:

<?php

namespace App;

use Illuminate\Database\Eloquent\Model;

class GoogleResult extends Model
{
    //
}

Scavenger Configuration

The following configuration would be used to scrape Google SERP. This example can be used straight out-the-box, as at Feb. 20, 2018.

<?php

return [
    'debug' => false,
    'log' => true,
    'verbosity' => 1,
    'database' => [
        'scraps_table' => env('SCAVENGER_SCRAPS_TABLE', 'scavenger_scraps'),
    ],
    'daemon' => [ 
        'model' => \App\User::class,
        'id_prop' => 'email',
        'id' => '[email protected]',
        'info' => [
            'name' => 'Scavenger Daemon',
            'password' => 'pass'
        ]
    ],
    'hash_algorithm' => 'sha512',
    'storage' => [
        'dir' => env('SCAVENGER_STORAGE_DIR', 'scavenger'),
    ],

    'targets' => [
        // Google SERP:
        'google' => [
            'example' => false,
            'serp' => true,
            'model' => 'App\\GoogleResult',
            'source' => 'https://www.google.com',
            'search' => [
                'keywords' => ['dog'],
                'form' => [
                    'selector' => 'form[name="f"]',
                    'keyword_input_name' => 'q',
                ]
            ],
            'pages' => 2,
            'pager' => [
                'selector' => '#foot > table > tr > td.b:last-child',
                'text' => 'Next',
            ],
            'markup' => [
                '__result' => 'div.g',
                'title' => 'h3 > a',
                'description' => '.st',
                'link' => '__link',
                'position' => '__position',
            ],
        ],
    ],
];

Execution