Out of Scope domain in Spider feature for ZAP Python

74 views
Skip to first unread message

Ely Reyes

unread,
May 24, 2023, 12:09:17 PM5/24/23
to OWASP ZAP User Group
Hello,

I'm trying to create a python script that allows me to spider a website with the ZAP spider feature, but I'm not receiving any out of scope domains as I receive when I use the ZAP GUI. Is there anything that I should be doing different or add to my code so I may receive such domains. Below is an example script of what I been working on. 

version brings : from zapv2 import ZAPv2 import time import subprocess from urllib.parse import urlparse # Start ZAP in daemon mode zap_process = subprocess.Popen(['zap.sh', '-daemon', '-host', '127.0.0.1', '-port', '8080']) # Wait for ZAP to start time.sleep(20) # List of subdomains to crawl subdomains = ['www.example.com', 'sub.example.com'] # Create a dictionary to store the crawled links for each subdomain crawled_links = {subdomain: [] for subdomain in subdomains} # Create a set to store the out-of-scope domains out_of_scope_domains = set() # Create an instance of the ZAP API client zap = ZAPv2(apikey='') # Set the configuration options for the ZAP proxy if needed zap.core.set_option_default_user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.0.0 Safari/537.36") zap.spider.set_option_max_depth(5) # Replace 5 with the desired maximum depth value # Perform spidering on the main site main_site = 'http://www.example.com' zap.urlopen(main_site) scanid = zap.spider.scan(main_site) # Wait for the spidering to complete while int(zap.spider.status(scanid)) < 100: time.sleep(2) # Get the URLs discovered by the spider on the main site urls = zap.spider.results(scanid) print(urls) main_site_links = [url for url in urls if urlparse(url).netloc in subdomains] # Classify the sites found on the main site into subdomains for link in main_site_links: for subdomain in subdomains: if urlparse(link).netloc == subdomain: crawled_links[subdomain].append(link) break else: out_of_scope_domains.add(urlparse(link).netloc) # Spider the subdomains and add the links found for subdomain in subdomains: target_url = f'http://{subdomain}' zap.urlopen(target_url) # Spider the subdomain scanid = zap.spider.scan(target_url) while int(zap.spider.status(scanid)) < 100: time.sleep(2) # Get the URLs discovered by the spider on the subdomain urls = zap.spider.results(scanid) # Add the links to the respective subdomain in the crawled_links dictionary crawled_links[subdomain].extend([url for url in urls if urlparse(url).netloc == subdomain]) out_of_scope_domains.update(urlparse(url).netloc for url in urls if urlparse(url).netloc not in subdomains) # Print the crawled links for each subdomain output_file = 'crawled_links1.txt' with open(output_file, 'w') as file: for subdomain, links in crawled_links.items(): file.write(f'Crawled links for {subdomain}:\n') for link in links: file.write(link + '\n') # Write the out-of-scope domains to the file file.write('\nOut-of-scope domains:\n') for domain in out_of_scope_domains: file.write(domain + '\n') # Terminate the ZAP process zap_process.terminate()
Reply all
Reply to author
Forward
0 new messages