Puppeteer Integration
Puppeteer is a Node.js library for controlling headless Chrome. This guide shows how to use WeProxies with Puppeteer.
Installation
npm install puppeteer
Basic Usage
HTTP Proxy (No Authentication)
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({
args: ['--proxy-server=http://proxy.weproxies.com:1080']
});
const page = await browser.newPage();
await page.goto('https://api.ipify.org?format=json');
const content = await page.content();
console.log(content);
await browser.close();
})();
With Authentication
const puppeteer = require('puppeteer');
const PROXY_USER = 'wp_user123';
const PROXY_PASS = 'password';
const PROXY_HOST = 'proxy.weproxies.com';
const PROXY_PORT = '1080';
(async () => {
const browser = await puppeteer.launch({
args: [`--proxy-server=http://${PROXY_HOST}:${PROXY_PORT}`]
});
const page = await browser.newPage();
// Authenticate with proxy
await page.authenticate({
username: PROXY_USER,
password: PROXY_PASS
});
await page.goto('https://api.ipify.org?format=json');
const content = await page.content();
console.log(content);
await browser.close();
})();
Country Targeting
const puppeteer = require('puppeteer');
async function browseWithCountry(url, country) {
const browser = await puppeteer.launch({
args: ['--proxy-server=http://proxy.weproxies.com:1080']
});
const page = await browser.newPage();
// Authenticate with country in username
await page.authenticate({
username: `wp_user123-country-${country}`,
password: 'password'
});
await page.goto(url);
const content = await page.content();
await browser.close();
return content;
}
// Usage
(async () => {
const usResult = await browseWithCountry('https://api.ipify.org?format=json', 'US');
console.log('US IP:', usResult);
const gbResult = await browseWithCountry('https://api.ipify.org?format=json', 'GB');
console.log('GB IP:', gbResult);
})();
Sticky Sessions
const puppeteer = require('puppeteer');
const { v4: uuidv4 } = require('uuid');
async function browseWithSession(url, country = null) {
const sessionId = uuidv4().replace(/-/g, '').slice(0, 12);
let username = 'wp_user123';
if (country) {
username += `-country-${country}`;
}
username += `-session-${sessionId}`;
const browser = await puppeteer.launch({
args: ['--proxy-server=http://proxy.weproxies.com:1080']
});
const page = await browser.newPage();
await page.authenticate({
username,
password: 'password'
});
// Multiple requests will use the same IP
for (let i = 0; i < 3; i++) {
await page.goto(url);
const body = await page.evaluate(() => document.body.innerText);
console.log(`Request ${i + 1}: ${body}`);
}
await browser.close();
}
browseWithSession('https://api.ipify.org?format=json', 'US');
Advanced Configuration
Headless Mode Options
const browser = await puppeteer.launch({
headless: 'new', // Use new headless mode
args: [
'--proxy-server=http://proxy.weproxies.com:1080',
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-accelerated-2d-canvas',
'--disable-gpu'
]
});
Custom User Agent
const page = await browser.newPage();
await page.setUserAgent(
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
);
await page.setExtraHTTPHeaders({
'Accept-Language': 'en-US,en;q=0.9'
});
Viewport Settings
await page.setViewport({
width: 1920,
height: 1080,
deviceScaleFactor: 1
});
Request Interception
await page.setRequestInterception(true);
page.on('request', request => {
// Block images and stylesheets for faster scraping
const resourceType = request.resourceType();
if (['image', 'stylesheet', 'font'].includes(resourceType)) {
request.abort();
} else {
request.continue();
}
});
Complete Scraping Example
const puppeteer = require('puppeteer');
class ProxyScraper {
constructor(username, password) {
this.username = username;
this.password = password;
this.browser = null;
}
async init(country = null, session = null) {
this.browser = await puppeteer.launch({
headless: 'new',
args: [
'--proxy-server=http://proxy.weproxies.com:1080',
'--no-sandbox',
'--disable-setuid-sandbox'
]
});
let user = this.username;
if (country) user += `-country-${country}`;
if (session) user += `-session-${session}`;
this.authCredentials = {
username: user,
password: this.password
};
}
async scrape(url, options = {}) {
const page = await this.browser.newPage();
try {
await page.authenticate(this.authCredentials);
if (options.userAgent) {
await page.setUserAgent(options.userAgent);
}
await page.setViewport({ width: 1920, height: 1080 });
if (options.blockResources) {
await page.setRequestInterception(true);
page.on('request', req => {
if (['image', 'stylesheet', 'font'].includes(req.resourceType())) {
req.abort();
} else {
req.continue();
}
});
}
await page.goto(url, {
waitUntil: 'networkidle2',
timeout: options.timeout || 30000
});
if (options.waitForSelector) {
await page.waitForSelector(options.waitForSelector, { timeout: 10000 });
}
const content = options.getText
? await page.evaluate(() => document.body.innerText)
: await page.content();
if (options.screenshot) {
await page.screenshot({ path: options.screenshot });
}
return { success: true, content };
} catch (error) {
return { success: false, error: error.message };
} finally {
await page.close();
}
}
async close() {
if (this.browser) {
await this.browser.close();
}
}
}
// Usage
(async () => {
const scraper = new ProxyScraper('wp_user123', 'password');
// Initialize with US country and sticky session
await scraper.init('US', 'my-session-123');
// Scrape multiple pages
const urls = [
'https://api.ipify.org?format=json',
'https://httpbin.org/ip'
];
for (const url of urls) {
const result = await scraper.scrape(url, {
getText: true,
timeout: 30000,
blockResources: true
});
console.log(`${url}: ${JSON.stringify(result)}`);
}
await scraper.close();
})();
Multi-Country Parallel Scraping
const puppeteer = require('puppeteer');
async function scrapeFromCountry(url, country) {
const browser = await puppeteer.launch({
headless: 'new',
args: ['--proxy-server=http://proxy.weproxies.com:1080', '--no-sandbox']
});
const page = await browser.newPage();
await page.authenticate({
username: `wp_user123-country-${country}`,
password: 'password'
});
try {
await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
const content = await page.evaluate(() => document.body.innerText);
return { country, content, success: true };
} catch (error) {
return { country, error: error.message, success: false };
} finally {
await browser.close();
}
}
// Scrape from multiple countries in parallel
(async () => {
const countries = ['US', 'GB', 'DE', 'FR', 'JP'];
const url = 'https://api.ipify.org?format=json';
const results = await Promise.all(
countries.map(country => scrapeFromCountry(url, country))
);
results.forEach(result => {
console.log(`${result.country}: ${result.success ? result.content : result.error}`);
});
})();
Best Practices
1. Always Close Browsers
try {
// Your scraping code
} finally {
await browser.close();
}
2. Handle Navigation Errors
try {
await page.goto(url, { timeout: 30000 });
} catch (error) {
if (error.name === 'TimeoutError') {
console.log('Page load timed out');
}
}
3. Use Stealth Mode
npm install puppeteer-extra puppeteer-extra-plugin-stealth
const puppeteer = require('puppeteer-extra');
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
puppeteer.use(StealthPlugin());
const browser = await puppeteer.launch({
args: ['--proxy-server=http://proxy.weproxies.com:1080']
});