Skip to main content

Puppeteer Integration

Puppeteer is a Node.js library for controlling headless Chrome. This guide shows how to use WeProxies with Puppeteer.

Installation

npm install puppeteer

Basic Usage

HTTP Proxy (No Authentication)

const puppeteer = require('puppeteer');

(async () => {
const browser = await puppeteer.launch({
args: ['--proxy-server=http://proxy.weproxies.com:1080']
});

const page = await browser.newPage();

await page.goto('https://api.ipify.org?format=json');
const content = await page.content();
console.log(content);

await browser.close();
})();

With Authentication

const puppeteer = require('puppeteer');

const PROXY_USER = 'wp_user123';
const PROXY_PASS = 'password';
const PROXY_HOST = 'proxy.weproxies.com';
const PROXY_PORT = '1080';

(async () => {
const browser = await puppeteer.launch({
args: [`--proxy-server=http://${PROXY_HOST}:${PROXY_PORT}`]
});

const page = await browser.newPage();

// Authenticate with proxy
await page.authenticate({
username: PROXY_USER,
password: PROXY_PASS
});

await page.goto('https://api.ipify.org?format=json');
const content = await page.content();
console.log(content);

await browser.close();
})();

Country Targeting

const puppeteer = require('puppeteer');

async function browseWithCountry(url, country) {
const browser = await puppeteer.launch({
args: ['--proxy-server=http://proxy.weproxies.com:1080']
});

const page = await browser.newPage();

// Authenticate with country in username
await page.authenticate({
username: `wp_user123-country-${country}`,
password: 'password'
});

await page.goto(url);
const content = await page.content();

await browser.close();
return content;
}

// Usage
(async () => {
const usResult = await browseWithCountry('https://api.ipify.org?format=json', 'US');
console.log('US IP:', usResult);

const gbResult = await browseWithCountry('https://api.ipify.org?format=json', 'GB');
console.log('GB IP:', gbResult);
})();

Sticky Sessions

const puppeteer = require('puppeteer');
const { v4: uuidv4 } = require('uuid');

async function browseWithSession(url, country = null) {
const sessionId = uuidv4().replace(/-/g, '').slice(0, 12);

let username = 'wp_user123';
if (country) {
username += `-country-${country}`;
}
username += `-session-${sessionId}`;

const browser = await puppeteer.launch({
args: ['--proxy-server=http://proxy.weproxies.com:1080']
});

const page = await browser.newPage();

await page.authenticate({
username,
password: 'password'
});

// Multiple requests will use the same IP
for (let i = 0; i < 3; i++) {
await page.goto(url);
const body = await page.evaluate(() => document.body.innerText);
console.log(`Request ${i + 1}: ${body}`);
}

await browser.close();
}

browseWithSession('https://api.ipify.org?format=json', 'US');

Advanced Configuration

Headless Mode Options

const browser = await puppeteer.launch({
headless: 'new', // Use new headless mode
args: [
'--proxy-server=http://proxy.weproxies.com:1080',
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-accelerated-2d-canvas',
'--disable-gpu'
]
});

Custom User Agent

const page = await browser.newPage();

await page.setUserAgent(
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
);

await page.setExtraHTTPHeaders({
'Accept-Language': 'en-US,en;q=0.9'
});

Viewport Settings

await page.setViewport({
width: 1920,
height: 1080,
deviceScaleFactor: 1
});

Request Interception

await page.setRequestInterception(true);

page.on('request', request => {
// Block images and stylesheets for faster scraping
const resourceType = request.resourceType();
if (['image', 'stylesheet', 'font'].includes(resourceType)) {
request.abort();
} else {
request.continue();
}
});

Complete Scraping Example

const puppeteer = require('puppeteer');

class ProxyScraper {
constructor(username, password) {
this.username = username;
this.password = password;
this.browser = null;
}

async init(country = null, session = null) {
this.browser = await puppeteer.launch({
headless: 'new',
args: [
'--proxy-server=http://proxy.weproxies.com:1080',
'--no-sandbox',
'--disable-setuid-sandbox'
]
});

let user = this.username;
if (country) user += `-country-${country}`;
if (session) user += `-session-${session}`;

this.authCredentials = {
username: user,
password: this.password
};
}

async scrape(url, options = {}) {
const page = await this.browser.newPage();

try {
await page.authenticate(this.authCredentials);

if (options.userAgent) {
await page.setUserAgent(options.userAgent);
}

await page.setViewport({ width: 1920, height: 1080 });

if (options.blockResources) {
await page.setRequestInterception(true);
page.on('request', req => {
if (['image', 'stylesheet', 'font'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
}

await page.goto(url, {
waitUntil: 'networkidle2',
timeout: options.timeout || 30000
});

if (options.waitForSelector) {
await page.waitForSelector(options.waitForSelector, { timeout: 10000 });
}

const content = options.getText
? await page.evaluate(() => document.body.innerText)
: await page.content();

if (options.screenshot) {
await page.screenshot({ path: options.screenshot });
}

return { success: true, content };
} catch (error) {
return { success: false, error: error.message };
} finally {
await page.close();
}
}

async close() {
if (this.browser) {
await this.browser.close();
}
}
}

// Usage
(async () => {
const scraper = new ProxyScraper('wp_user123', 'password');

// Initialize with US country and sticky session
await scraper.init('US', 'my-session-123');

// Scrape multiple pages
const urls = [
'https://api.ipify.org?format=json',
'https://httpbin.org/ip'
];

for (const url of urls) {
const result = await scraper.scrape(url, {
getText: true,
timeout: 30000,
blockResources: true
});
console.log(`${url}: ${JSON.stringify(result)}`);
}

await scraper.close();
})();

Multi-Country Parallel Scraping

const puppeteer = require('puppeteer');

async function scrapeFromCountry(url, country) {
const browser = await puppeteer.launch({
headless: 'new',
args: ['--proxy-server=http://proxy.weproxies.com:1080', '--no-sandbox']
});

const page = await browser.newPage();

await page.authenticate({
username: `wp_user123-country-${country}`,
password: 'password'
});

try {
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
const content = await page.evaluate(() => document.body.innerText);
return { country, content, success: true };
} catch (error) {
return { country, error: error.message, success: false };
} finally {
await browser.close();
}
}

// Scrape from multiple countries in parallel
(async () => {
const countries = ['US', 'GB', 'DE', 'FR', 'JP'];
const url = 'https://api.ipify.org?format=json';

const results = await Promise.all(
countries.map(country => scrapeFromCountry(url, country))
);

results.forEach(result => {
console.log(`${result.country}: ${result.success ? result.content : result.error}`);
});
})();

Best Practices

1. Always Close Browsers

try {
// Your scraping code
} finally {
await browser.close();
}

2. Handle Navigation Errors

try {
await page.goto(url, { timeout: 30000 });
} catch (error) {
if (error.name === 'TimeoutError') {
console.log('Page load timed out');
}
}

3. Use Stealth Mode

npm install puppeteer-extra puppeteer-extra-plugin-stealth
const puppeteer = require('puppeteer-extra');
const StealthPlugin = require('puppeteer-extra-plugin-stealth');

puppeteer.use(StealthPlugin());

const browser = await puppeteer.launch({
args: ['--proxy-server=http://proxy.weproxies.com:1080']
});