Content Discovery

Resources

# Fuzzing Wordlists
https://github.com/fuzzdb-project/fuzzdb

# Fuzzing and Content Discovery
https://github.com/kaimi-io/web-fuzz-wordlists

Tips

# Fuzz non-printable characters in any user input
# Could result in regex bypass, account takeover...
0x00, 0x2F, 0x3A, 0x40, 0x5B, 0x60, 0x7B, 0xFF
%00, %2F, %3A, %40, %5B, %60, %7B, %FF

Scrapping from JS

# You can parse and scrape javascript content in a target website to look for hidden subdomains or interesting paths
# Often, endpoints are not public but users can still interact with them
# Tools like dirscraper automates this (https://github.com/Cillian-Collins/dirscraper)

# Classic
python discraper.py -u <url>

# Output mode
python discraper.py -u <url> -o <output>

# Silent mode (you won't see result in term)
python discraper.py -u <url> -s -o <output>

# Relative URL Extractor is another good tool to scrape from JS files (https://github.com/jobertabma/relative-url-extractor)
ruby extract.rb https://hackerone.com/some-file.js
# Extract all API endpoints from AngularJS & Angular javascript files
curl -s URL | grep -Po "(\/)((?:[a-zA-Z\-_\:\.0-9\{\}]+))(\/)*((?:[a-zA-Z\-_\:\.0-9\{\}]+))(\/)((?:[a-zA-Z\-_\/\:\.0-9\{\}]+))" | sort -u
# https://github.com/incogbyte/jsearch
# simple script that grep infos from javascript files
python3 jsearch.py -u https://google.com -n google
https://github.com/GerbenJavado/LinkFinder

# Analyzing one file and HTML output
python linkfinder.py -i https://example.com/1.js -o results.html

# CLI/STDOUT output
python linkfinder.py -i https://example.com/1.js -o cli

# Analyzing entire domain
python linkfinder.py -i https://example.com -d
https://github.com/m4ll0k/SecretFinder
# Based on LinkFinder
# Using regular expression to searhc for data like API keys, tokens...

python3 SecretFinder.py -i https://example.com/1.js -o results.html
python3 SecretFinder.py -i https://example.com/1.js -o cli
python3 SecretFinder.py -i https://example.com/ -e
python3 SecretFinder.py -i https://example.com/ -e -g 'jquery;bootstrap;api.google.com'
https://github.com/lc/subjs
# subjs fetches javascript files from a list of URLS or subdomains.

$ cat urls.txt | subjs 
$ subjs -i urls.txt
$ cat hosts.txt | gau | subjs
# Check for broken links and domain takeover
# For twitter, TwitterBFTD is great
https://github.com/stevenvachon/broken-link-checker
$ blc -rof --filter-level 3 https://example.com/
$ blc -rfoi --exclude linkedin.com --exclude youtube.com --filter-level 3 https://example.com/

Dirsearch

$ python3 dirsearch.py -u https://www.target.fr -f -e php,xml,txt -t 10 -w wordpress.fuzz.txt

gau

https://github.com/lc/gau
# getallurls (gau) fetches known URLs from AlienVault's Open Threat Exchange,
# the Wayback Machine, and Common Crawl for any given domain

# It can be used to map and discover new targets (endpoints, domains, subdomains...)

$ printf example.com | gau
$ cat domains.txt | gau
$ gau example.com

hakrawler

https://github.com/hakluke/hakrawler

# Usage
cat urls.txt | hakrawler

# Example tool chain
echo google.com | haktrails subdomains | httpx | hakrawler