forked from brightdata/brightdata-mcp
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbrowser_tools.js
More file actions
501 lines (478 loc) · 18 KB
/
browser_tools.js
File metadata and controls
501 lines (478 loc) · 18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
'use strict'; /*jslint node:true es9:true*/
import {UserError, imageContent as image_content} from 'fastmcp';
import {z} from 'zod';
import axios from 'axios';
import {Browser_session} from './browser_session.js';
let browser_zone = process.env.BROWSER_ZONE || 'mcp_browser';
let open_session;
let open_session_country = null;
const require_browser = async country=>{
const normalized_country = country ? country.toLowerCase()
: open_session_country;
const needs_new_session = !open_session
|| normalized_country!==open_session_country;
if (needs_new_session)
{
open_session_country = normalized_country || null;
open_session = new Browser_session({
cdp_endpoint: await calculate_cdp_endpoint(open_session_country),
});
}
return open_session;
};
const calculate_cdp_endpoint = async country=>{
try {
const status_response = await axios({
url: 'https://api.brightdata.com/status',
method: 'GET',
headers: {authorization: `Bearer ${process.env.API_TOKEN}`},
});
const customer = status_response.data.customer;
const password_response = await axios({
url: `https://api.brightdata.com/zone/passwords?zone=${browser_zone}`,
method: 'GET',
headers: {authorization: `Bearer ${process.env.API_TOKEN}`},
});
const password = password_response.data.passwords[0];
const country_suffix = country ? `-country-${country}` : '';
return `wss://brd-customer-${customer}-zone-${browser_zone}`
+`${country_suffix}:${password}@brd.superproxy.io:9222`;
} catch(e){
if (e.response?.status===422)
throw new Error(`Browser zone '${browser_zone}' does not exist`);
throw new Error(`Error retrieving browser credentials: ${e.message}`);
}
};
let scraping_browser_navigate = {
name: 'scraping_browser_navigate',
description: 'Navigate a scraping browser session to a new URL',
annotations: {
title: 'Browser Navigate',
destructiveHint: true,
openWorldHint: true,
},
parameters: z.object({
url: z.string().describe('The URL to navigate to'),
country: z.string().regex(/^[A-Za-z]{2}$/)
.optional()
.describe('Optional 2-letter ISO country code to route the '
+'browser session (e.g., "US", "GB")'),
}),
execute: async({url, country})=>{
const normalized_country = country?.toLowerCase();
const browser_session = await require_browser(normalized_country);
const page = await browser_session.get_page({url});
await browser_session.clear_requests();
try {
await page.goto(url, {
timeout: 120000,
waitUntil: 'domcontentloaded',
});
return [
`Successfully navigated to ${url}`,
`Title: ${await page.title()}`,
`URL: ${page.url()}`,
].join('\n');
} catch(e){
throw new UserError(`Error navigating to ${url}: ${e}`);
}
},
};
let scraping_browser_go_back = {
name: 'scraping_browser_go_back',
description: 'Go back to the previous page',
annotations: {
title: 'Browser Go Back',
destructiveHint: true,
},
parameters: z.object({}),
execute: async()=>{
const page = await (await require_browser()).get_page();
try {
await page.goBack();
return [
'Successfully navigated back',
`Title: ${await page.title()}`,
`URL: ${page.url()}`,
].join('\n');
} catch(e){
throw new UserError(`Error navigating back: ${e}`);
}
},
};
const scraping_browser_go_forward = {
name: 'scraping_browser_go_forward',
description: 'Go forward to the next page',
annotations: {
title: 'Browser Go Forward',
destructiveHint: true,
},
parameters: z.object({}),
execute: async()=>{
const page = await (await require_browser()).get_page();
try {
await page.goForward();
return [
'Successfully navigated forward',
`Title: ${await page.title()}`,
`URL: ${page.url()}`,
].join('\n');
} catch(e){
throw new UserError(`Error navigating forward: ${e}`);
}
},
};
let scraping_browser_snapshot = {
name: 'scraping_browser_snapshot',
description: [
'Capture an ARIA snapshot of the current page showing all interactive '
+'elements with their refs.',
'This provides accurate element references that can be used with '
+'ref-based tools.',
'Use this before interacting with elements to get proper refs instead '
+'of guessing selectors.'
].join('\n'),
annotations: {
title: 'Browser Snapshot',
readOnlyHint: true,
},
parameters: z.object({
filtered: z.boolean().optional().describe(
'Whether to apply filtering/compaction (default: false). '
+'Set to true to get a compacted version of the snapshot.'),
}),
execute: async({filtered=false})=>{
const browser_session = await require_browser();
const page = await browser_session.get_page();
try {
const snapshot = await browser_session.capture_snapshot(
{filtered});
const lines = [
`Page: ${snapshot.url}`,
`Title: ${snapshot.title}`,
'',
'Interactive Elements:',
snapshot.aria_snapshot,
];
if (snapshot.dom_snapshot)
{
lines.push('');
lines.push('DOM Interactive Elements:');
lines.push(snapshot.dom_snapshot);
}
return lines.join('\n');
} catch(e){
throw new UserError(`Error capturing snapshot: ${e}`);
}
},
};
let scraping_browser_click_ref = {
name: 'scraping_browser_click_ref',
description: [
'Click on an element using its ref from the ARIA snapshot.',
'Use scraping_browser_snapshot first to get the correct ref values.',
'This is more reliable than CSS selectors.'
].join('\n'),
annotations: {
title: 'Browser Click Element',
destructiveHint: true,
},
parameters: z.object({
ref: z.string().describe('The ref attribute from the ARIA snapshot (e.g., "23")'),
element: z.string().describe('Description of the element being clicked for context'),
}),
execute: async({ref, element})=>{
const browser_session = await require_browser();
try {
const locator = await browser_session.ref_locator({element, ref});
await locator.click({timeout: 5000});
return `Successfully clicked element: ${element} (ref=${ref})`;
} catch(e){
throw new UserError(`Error clicking element ${element} with ref ${ref}: ${e}`);
}
},
};
let scraping_browser_type_ref = {
name: 'scraping_browser_type_ref',
description: [
'Type text into an element using its ref from the ARIA snapshot.',
'Use scraping_browser_snapshot first to get the correct ref values.',
'This is more reliable than CSS selectors.'
].join('\n'),
annotations: {
title: 'Browser Type Text',
destructiveHint: true,
},
parameters: z.object({
ref: z.string().describe('The ref attribute from the ARIA snapshot (e.g., "23")'),
element: z.string().describe('Description of the element being typed into for context'),
text: z.string().describe('Text to type'),
submit: z.boolean().optional()
.describe('Whether to submit the form after typing (press Enter)'),
}),
execute: async({ref, element, text, submit})=>{
const browser_session = await require_browser();
try {
const locator = await browser_session.ref_locator({element, ref});
await locator.fill(text);
if (submit)
await locator.press('Enter');
const suffix = submit ? ' and submitted the form' : '';
return 'Successfully typed "'+text+'" into element: '+element
+' (ref='+ref+')'+suffix;
} catch(e){
throw new UserError(`Error typing into element ${element} with ref ${ref}: ${e}`);
}
},
};
let scraping_browser_screenshot = {
name: 'scraping_browser_screenshot',
description: 'Take a screenshot of the current page',
annotations: {
title: 'Browser Screenshot',
readOnlyHint: true,
},
parameters: z.object({
full_page: z.boolean().optional().describe([
'Whether to screenshot the full page (default: false)',
'You should avoid fullscreen if it\'s not important, since the '
+'images can be quite large',
].join('\n')),
}),
execute: async({full_page=false})=>{
const page = await (await require_browser()).get_page();
try {
const buffer = await page.screenshot({fullPage: full_page});
return image_content({buffer});
} catch(e){
throw new UserError(`Error taking screenshot: ${e}`);
}
},
};
let scraping_browser_get_html = {
name: 'scraping_browser_get_html',
description: 'Get the HTML content of the current page. Avoid using this '
+'tool and if used, use full_page option unless it is important to see '
+'things like script tags since this can be large',
annotations: {
title: 'Browser Get HTML',
readOnlyHint: true,
},
parameters: z.object({
full_page: z.boolean().optional().describe([
'Whether to get the full page HTML including head and script tags',
'Avoid this if you only need the extra HTML, since it can be '
+'quite large',
].join('\n')),
}),
execute: async({full_page=false})=>{
const page = await (await require_browser()).get_page();
try {
if (!full_page)
return await page.$eval('body', body=>body.innerHTML);
const html = await page.content();
if (!full_page && html)
return html.split('<body>')[1].split('</body>')[0];
return html;
} catch(e){
throw new UserError(`Error getting HTML content: ${e}`);
}
},
};
let scraping_browser_get_text = {
name: 'scraping_browser_get_text',
description: 'Get the text content of the current page',
annotations: {
title: 'Browser Get Text',
readOnlyHint: true,
},
parameters: z.object({}),
execute: async()=>{
const page = await (await require_browser()).get_page();
try { return await page.$eval('body', body=>body.innerText); }
catch(e){ throw new UserError(`Error getting text content: ${e}`); }
},
};
let scraping_browser_scroll = {
name: 'scraping_browser_scroll',
description: 'Scroll to the bottom of the current page',
annotations: {
title: 'Browser Scroll',
destructiveHint: true,
},
parameters: z.object({}),
execute: async()=>{
const page = await (await require_browser()).get_page();
try {
await page.evaluate(()=>{
window.scrollTo(0, document.body.scrollHeight);
});
return 'Successfully scrolled to the bottom of the page';
} catch(e){
throw new UserError(`Error scrolling page: ${e}`);
}
},
};
let scraping_browser_scroll_to_ref = {
name: 'scraping_browser_scroll_to_ref',
description: [
'Scroll to a specific element using its ref from the ARIA snapshot.',
'Use scraping_browser_snapshot first to get the correct ref values.',
'This is more reliable than CSS selectors.'
].join('\n'),
annotations: {
title: 'Browser Scroll to Element',
destructiveHint: true,
},
parameters: z.object({
ref: z.string().describe('The ref attribute from the ARIA snapshot (e.g., "23")'),
element: z.string().describe('Description of the element to scroll to'),
}),
execute: async({ref, element})=>{
const browser_session = await require_browser();
try {
const locator = await browser_session.ref_locator({element, ref});
await locator.scrollIntoViewIfNeeded();
return `Successfully scrolled to element: ${element} (ref=${ref})`;
} catch(e){
throw new UserError(`Error scrolling to element ${element} with `
+`ref ${ref}: ${e}`);
}
},
};
let scraping_browser_network_requests = {
name: 'scraping_browser_network_requests',
description: [
'Get all network requests made since loading the current page.',
'Shows HTTP method, URL, status code and status text for each request.',
'Useful for debugging API calls, tracking data fetching, and '
+'understanding page behavior.'
].join('\n'),
annotations: {
title: 'Browser Network Requests',
readOnlyHint: true,
},
parameters: z.object({}),
execute: async()=>{
const browser_session = await require_browser();
try {
const requests = await browser_session.get_requests();
if (requests.size==0)
return 'No network requests recorded for the current page.';
const results = [];
requests.forEach((response, request)=>{
const result = [];
result.push(`[${request.method().toUpperCase()}] ${request.url()}`);
if (response)
result.push(`=> [${response.status()}] ${response.statusText()}`);
results.push(result.join(' '));
});
return [
`Network Requests (${results.length} total):`,
'',
...results
].join('\n');
} catch(e){
throw new UserError(`Error getting network requests: ${e}`);
}
},
};
let scraping_browser_wait_for_ref = {
name: 'scraping_browser_wait_for_ref',
description: [
'Wait for an element to be visible using its ref from the ARIA snapshot.',
'Use scraping_browser_snapshot first to get the correct ref values.',
'This is more reliable than CSS selectors.'
].join('\n'),
annotations: {
title: 'Browser Wait for Element',
readOnlyHint: true,
},
parameters: z.object({
ref: z.string().describe('The ref attribute from the ARIA snapshot (e.g., "23")'),
element: z.string().describe('Description of the element being waited for'),
timeout: z.number().optional()
.describe('Maximum time to wait in milliseconds (default: 30000)'),
}),
execute: async({ref, element, timeout})=>{
const browser_session = await require_browser();
try {
const locator = await browser_session.ref_locator({element, ref});
await locator.waitFor({timeout: timeout || 30000});
return `Successfully waited for element: ${element} (ref=${ref})`;
} catch(e){
throw new UserError(`Error waiting for element ${element} with ref ${ref}: ${e}`);
}
},
};
let scraping_browser_fill_form = {
name: 'scraping_browser_fill_form',
description: [
'Fill multiple form fields in one operation.',
'Use scraping_browser_snapshot first to get the correct ref values.',
'This is more efficient than filling fields one by one.'
].join('\n'),
parameters: z.object({
fields: z.array(z.object({
name: z.string().describe('Human-readable field name'),
type: z.enum(['textbox', 'checkbox', 'radio', 'combobox',
'slider']).describe('Type of the field'),
ref: z.string().describe(
'Exact target field reference from the page snapshot'),
value: z.string().describe([
'Value to fill in the field.',
'For checkbox: use "true" or "false".',
'For combobox: use the text of the option to select.'
].join(' ')),
})).describe('Fields to fill in'),
}),
execute: async({fields})=>{
const browser_session = await require_browser();
try {
const results = [];
for (const field of fields)
{
const locator = await browser_session.ref_locator({
element: field.name,
ref: field.ref,
});
if (field.type=='textbox' || field.type=='slider')
{
await locator.fill(field.value);
results.push(`Filled ${field.name} with "${field.value}"`);
}
else if (field.type=='checkbox' || field.type=='radio')
{
const checked = field.value=='true';
await locator.setChecked(checked);
results.push(`Set ${field.name} to ${checked ? 'checked'
: 'unchecked'}`);
}
else if (field.type=='combobox')
{
await locator.selectOption({label: field.value});
results.push(`Selected "${field.value}" in ${field.name}`);
}
}
return 'Successfully filled form:\n'+results.join('\n');
} catch(e){
throw new UserError(`Error filling form: ${e}`);
}
},
};
export const tools = [
scraping_browser_navigate,
scraping_browser_go_back,
scraping_browser_go_forward,
scraping_browser_snapshot,
scraping_browser_click_ref,
scraping_browser_type_ref,
scraping_browser_screenshot,
scraping_browser_network_requests,
scraping_browser_wait_for_ref,
scraping_browser_fill_form,
scraping_browser_get_text,
scraping_browser_get_html,
scraping_browser_scroll,
scraping_browser_scroll_to_ref,
];