3
3
import re
4
4
from dataclasses import dataclass , field
5
5
from datetime import datetime
6
- from typing import Any , Dict , List , Optional , Protocol , Tuple
6
+ from typing import Any , Dict , List , Optional , Protocol , Tuple , Union
7
7
from urllib .parse import urlparse
8
8
9
9
from ollama import Client
@@ -30,7 +30,7 @@ class BrowserStateData:
30
30
class WebSearchResult :
31
31
title : str
32
32
url : str
33
- content : Dict [str , str ] # {"fullText": str}
33
+ content : Dict [str , str ]
34
34
35
35
36
36
class SearchClient (Protocol ):
@@ -94,7 +94,6 @@ def __init__(
94
94
self .state = BrowserState (initial_state )
95
95
self ._client : Optional [Client ] = client
96
96
97
- # parity with TS: one setter that accepts both
98
97
def set_client (self , client : Client ) -> None :
99
98
self ._client = client
100
99
@@ -160,10 +159,9 @@ def _process_markdown_links(self, text: str) -> Tuple[str, Dict[int, str]]:
160
159
links : Dict [int , str ] = {}
161
160
link_id = 0
162
161
163
- # collapse [text]\n(url) -> [text](url)
164
162
multiline_pattern = re .compile (r'\[([^\]]+)\]\s*\n\s*\(([^)]+)\)' )
165
163
text = multiline_pattern .sub (lambda m : f'[{ m .group (1 )} ]({ m .group (2 )} )' , text )
166
- text = re .sub (r'\s+' , ' ' , text ) # mild cleanup from the above
164
+ text = re .sub (r'\s+' , ' ' , text )
167
165
168
166
link_pattern = re .compile (r'\[([^\]]+)\]\(([^)]+)\)' )
169
167
@@ -185,7 +183,6 @@ def _get_end_loc(self, loc: int, num_lines: int, total_lines: int, lines: List[s
185
183
txt = self ._join_lines_with_numbers (lines [loc :])
186
184
data = self .state .get_data ()
187
185
if len (txt ) > data .view_tokens :
188
- # approximate char-per-token heuristic (keep identical to TS flow)
189
186
max_chars_per_token = 128
190
187
upper_bound = min ((data .view_tokens + 1 ) * max_chars_per_token , len (txt ))
191
188
segment = txt [:upper_bound ]
@@ -242,10 +239,10 @@ def _build_search_results_page_collection(self, query: str, results: Dict[str, A
242
239
)
243
240
244
241
tb = []
245
- tb .append ('' ) # L0 blank
246
- tb .append ('URL: ' ) # L1 "URL: "
247
- tb .append ('# Search Results' ) # L2
248
- tb .append ('' ) # L3 blank
242
+ tb .append ('' )
243
+ tb .append ('URL: ' )
244
+ tb .append ('# Search Results' )
245
+ tb .append ('' )
249
246
250
247
link_idx = 0
251
248
for query_results in results .get ('results' , {}).values ():
@@ -276,7 +273,6 @@ def _build_search_result_page(self, result: WebSearchResult, link_idx: int) -> P
276
273
fetched_at = datetime .utcnow (),
277
274
)
278
275
279
- # preview block (when no full text)
280
276
link_fmt = f'【{ link_idx } †{ result .title } 】\n '
281
277
preview = link_fmt + f'URL: { result .url } \n '
282
278
full_text = result .content .get ('fullText' , '' ) if result .content else ''
@@ -296,7 +292,7 @@ def _build_search_result_page(self, result: WebSearchResult, link_idx: int) -> P
296
292
page .lines = self ._wrap_lines (page .text , 80 )
297
293
return page
298
294
299
- def _build_page_from_crawl (self , requested_url : str , crawl_response : Dict [str , Any ]) -> Page :
295
+ def _build_page_from_fetch (self , requested_url : str , fetch_response : Dict [str , Any ]) -> Page :
300
296
page = Page (
301
297
url = requested_url ,
302
298
title = requested_url ,
@@ -306,7 +302,7 @@ def _build_page_from_crawl(self, requested_url: str, crawl_response: Dict[str, A
306
302
fetched_at = datetime .utcnow (),
307
303
)
308
304
309
- for url , url_results in crawl_response .get ('results' , {}).items ():
305
+ for url , url_results in fetch_response .get ('results' , {}).items ():
310
306
if url_results :
311
307
r0 = url_results [0 ]
312
308
if r0 .get ('content' ):
@@ -372,22 +368,20 @@ def search(self, *, query: str, topn: int = 5) -> Dict[str, Any]:
372
368
if not self ._client :
373
369
raise RuntimeError ('Client not provided' )
374
370
375
- resp = self ._client .web_search ([ query ] , max_results = topn )
371
+ resp = self ._client .web_search (query , max_results = topn )
376
372
377
- # Normalize to dict shape used by page builders
378
373
normalized : Dict [str , Any ] = {'results' : {}}
379
- for q , items in resp .results .items ():
380
- rows : List [Dict [str , str ]] = []
381
- for item in items :
382
- content = item .content or ''
383
- rows .append (
384
- {
385
- 'title' : item .title ,
386
- 'url' : item .url ,
387
- 'content' : content ,
388
- }
389
- )
390
- normalized ['results' ][q ] = rows
374
+ rows : List [Dict [str , str ]] = []
375
+ for item in resp .results :
376
+ content = item .content or ''
377
+ rows .append (
378
+ {
379
+ 'title' : item .title ,
380
+ 'url' : item .url ,
381
+ 'content' : content ,
382
+ }
383
+ )
384
+ normalized ['results' ][query ] = rows
391
385
392
386
search_page = self ._build_search_results_page_collection (query , normalized )
393
387
self ._save_page (search_page )
@@ -430,7 +424,6 @@ def open(
430
424
if state .page_stack :
431
425
page = self ._page_from_stack (state .page_stack [- 1 ])
432
426
433
- # Open by URL (string id)
434
427
if isinstance (id , str ):
435
428
url = id
436
429
if url in state .url_to_page :
@@ -439,35 +432,30 @@ def open(
439
432
page_text = self ._display_page (state .url_to_page [url ], cursor , loc , num_lines )
440
433
return {'state' : self .get_state (), 'pageText' : cap_tool_content (page_text )}
441
434
442
- crawl_response = self ._client .web_crawl ([url ])
443
- # Normalize to dict shape used by page builders
444
- normalized : Dict [str , Any ] = {'results' : {}}
445
- for u , items in crawl_response .results .items ():
446
- rows : List [Dict [str , str ]] = []
447
- for item in items :
448
- content = item .content or ''
449
- rows .append (
435
+ fetch_response = self ._client .web_fetch (url )
436
+ normalized : Dict [str , Any ] = {
437
+ 'results' : {
438
+ url : [
450
439
{
451
- 'title' : item .title ,
452
- 'url' : item . url ,
453
- 'content' : content ,
440
+ 'title' : fetch_response .title or url ,
441
+ 'url' : url ,
442
+ 'content' : fetch_response . content or '' ,
454
443
}
455
- )
456
- normalized ['results' ][u ] = rows
457
- new_page = self ._build_page_from_crawl (url , normalized )
444
+ ]
445
+ }
446
+ }
447
+ new_page = self ._build_page_from_fetch (url , normalized )
458
448
self ._save_page (new_page )
459
449
cursor = len (self .get_state ().page_stack ) - 1
460
450
page_text = self ._display_page (new_page , cursor , loc , num_lines )
461
451
return {'state' : self .get_state (), 'pageText' : cap_tool_content (page_text )}
462
452
463
- # Open by link id (int) from current page
464
453
if isinstance (id , int ):
465
454
if not page :
466
455
raise RuntimeError ('No current page to resolve link from' )
467
456
468
457
link_url = page .links .get (id )
469
458
if not link_url :
470
- # build an error page like TS
471
459
err = Page (
472
460
url = f'invalid_link_{ id } ' ,
473
461
title = f'No link with id { id } on `{ page .title } `' ,
@@ -497,28 +485,25 @@ def open(
497
485
498
486
new_page = state .url_to_page .get (link_url )
499
487
if not new_page :
500
- crawl_response = self ._client .web_crawl ([link_url ])
501
- normalized : Dict [str , Any ] = {'results' : {}}
502
- for u , items in crawl_response .results .items ():
503
- rows : List [Dict [str , str ]] = []
504
- for item in items :
505
- content = item .content or ''
506
- rows .append (
488
+ fetch_response = self ._client .web_fetch (link_url )
489
+ normalized : Dict [str , Any ] = {
490
+ 'results' : {
491
+ link_url : [
507
492
{
508
- 'title' : item .title ,
509
- 'url' : item . url ,
510
- 'content' : content ,
493
+ 'title' : fetch_response .title or link_url ,
494
+ 'url' : link_url ,
495
+ 'content' : fetch_response . content or '' ,
511
496
}
512
- )
513
- normalized ['results' ][u ] = rows
514
- new_page = self ._build_page_from_crawl (link_url , normalized )
497
+ ]
498
+ }
499
+ }
500
+ new_page = self ._build_page_from_fetch (link_url , normalized )
515
501
516
502
self ._save_page (new_page )
517
503
cursor = len (self .get_state ().page_stack ) - 1
518
504
page_text = self ._display_page (new_page , cursor , loc , num_lines )
519
505
return {'state' : self .get_state (), 'pageText' : cap_tool_content (page_text )}
520
506
521
- # No id: just re-display the current page and advance stack
522
507
if not page :
523
508
raise RuntimeError ('No current page to display' )
524
509
@@ -547,3 +532,5 @@ def find(self, *, pattern: str, cursor: int = -1) -> Dict[str, Any]:
547
532
548
533
page_text = self ._display_page (find_page , new_cursor , 0 , - 1 )
549
534
return {'state' : self .get_state (), 'pageText' : cap_tool_content (page_text )}
535
+
536
+
0 commit comments