不重复第访问网站,采用递归的方法from bs4 import BeautifulSoupimport urllib.requestdef spider(url): global urls if url not in urls: ____________________ try: data=urllib.request.urlopen(url) data=data.read() data=data.decode() soup=BeautifulSoup(data,"lxml") print(soup.find("h3").text) links=soup.select("a") for link in links: href=link["href"] _________________________ spider(url) except Exception as err: print(err)start_url="http://127.0.0.1:5000"urls=[]spider(start_url)print("The End")
A、urls.append(url);url=start_url+"/"+href;
B、urls.append(url);url=start_url+href;
C、urls.insert(url,0);url=start_url+"/"+href;
D、urls.insert(url,0);url=start_url+href
发布时间:2024-09-22 02:43:37