7 Python Scripts for Technical SEO
Free + Automate Your Audits

🧰 1. Website Crawler (Find Broken Links Fast)
This script scans your website and shows you which pages are working—and which ones are broken.
👉 What it helps with:
Finding 404 errors
Catching broken internal links
Spotting crawl issues
💡 Why it matters:
Broken links hurt both SEO and user experience.
####TECHNICAL AI SEO CODE####
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
from collections import deque
def is_same_domain(base_url, test_url):
return urlparse(base_url).netloc == urlparse(test_url).netloc
def crawl_site(start_url, max_pages=50):
visited = set()
queue = deque([start_url])
broken_links = []
headers = {
"User-Agent": "Mozilla/5.0 (compatible; SimpleSEOCrawler/1.0)"
}
while queue and len(visited) < max_pages:
current_url = queue.popleft()
if current_url in visited:
continue
visited.add(current_url)
print(f"Crawling: {current_url}")
try:
response = requests.get(current_url, headers=headers, timeout=10)
status_code = response.status_code
print(f"Status: {status_code}")
if status_code >= 400:
broken_links.append((current_url, status_code))
continue
soup = BeautifulSoup(response.text, "html.parser")
for link in soup.find_all("a", href=True):
href = link["href"].strip()
if href.startswith("#") or href.startswith("mailto:") or href.startswith("tel:"):
continue
full_url = urljoin(current_url, href)
if is_same_domain(start_url, full_url) and full_url not in visited:
queue.append(full_url)
except requests.RequestException as e:
print(f"Error crawling {current_url}: {e}")
broken_links.append((current_url, "Request failed"))
return broken_links
if __name__ == "__main__":
website_url = "https://example.com"
results = crawl_site(website_url, max_pages=50)
print("\nBroken Links / Problem URLs:")
for url, status in results:
print(f"{url} -> {status}")
###END CODE###
🔍 2. Title & Meta Description Checker
This one pulls page titles and meta descriptions so you can review them quickly.
👉 What it helps with:
Finding missing meta descriptions
Fixing duplicate titles
Improving click-through rates
💡 Whyit matters:
Your title + description = what people see in Google.
###BEGIN SEO CODE###
import requests
from bs4 import BeautifulSoup
def get_meta_data(url):
headers = {
"User-Agent": "Mozilla/5.0 (compatible; MetaChecker/1.0)"
}
try:
response = requests.get(url, headers=headers, timeout=10)
soup = BeautifulSoup(response.text, "html.parser")
# Get title
title = soup.title.string.strip() if soup.title and soup.title.string else "No Title Found"
# Get meta description
meta_desc_tag = soup.find("meta", attrs={"name": "description"})
meta_desc = meta_desc_tag["content"].strip() if meta_desc_tag and meta_desc_tag.get("content") else "No Meta Description Found"
print(f"\nURL: {url}")
print(f"Title: {title}")
print(f"Meta Description: {meta_desc}")
except requests.RequestException as e:
print(f"Error fetching {url}: {e}")
# Example usage
if __name__ == "__main__":
urls = [
"https://example.com",
"https://example.com/about"
]
for page in urls:
get_meta_data(page)
###END CODE###
🔗 3. Internal Link Analyzer
This script shows how your pages are linked together internally.
👉 What it helps with:
Improving site structure
Finding weak or missing internal links
Identifying orphan pages
💡 Why it matters:
Internal links help Google understand your site.
###BEGIN SEO CODE###
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
from collections import defaultdict
def is_internal_link(base_url, link):
return urlparse(base_url).netloc == urlparse(link).netloc
def analyze_internal_links(start_url, max_pages=50):
visited = set()
to_visit = [start_url]
internal_links_map = defaultdict(list)
headers = {
"User-Agent": "Mozilla/5.0 (compatible; InternalLinkAnalyzer/1.0)"
}
while to_visit and len(visited) < max_pages:
current_url = to_visit.pop()
if current_url in visited:
continue
visited.add(current_url)
print(f"Analyzing: {current_url}")
try:
response = requests.get(current_url, headers=headers, timeout=10)
soup = BeautifulSoup(response.text, "html.parser")
for link in soup.find_all("a", href=True):
href = link["href"].strip()
full_url = urljoin(current_url, href)
if is_internal_link(start_url, full_url):
internal_links_map[current_url].append(full_url)
if full_url not in visited:
to_visit.append(full_url)
except requests.RequestException as e:
print(f"Error: {e}")
return internal_links_map
if __name__ == "__main__":
site = "https://example.com"
results = analyze_internal_links(site, max_pages=50)
print("\nInternal Link Structure:")
for page, links in results.items():
print(f"\n{page}")
for l in links:
print(f" -> {l}")
###END CODE###
📊 4. Sitemap Extractor
Quickly pulls all URLs from your sitemap.
👉 What it helps with:
Seeing what’s indexed
Comparing sitemap vs actual pages
Finding gaps
💡 Why it matters:
Your sitemap should reflect your best content.
###BEGIN SEO CODE###
import requests
import xml.etree.ElementTree as ET
def extract_sitemap_urls(sitemap_url):
headers = {
"User-Agent": "Mozilla/5.0 (compatible; SitemapExtractor/1.0)"
}
try:
response = requests.get(sitemap_url, headers=headers, timeout=10)
response.raise_for_status()
root = ET.fromstring(response.content)
namespace = {"ns": "http://www.sitemaps.org/schemas/sitemap/0.9"}
urls = []
for url in root.findall("ns:url", namespace):
loc = url.find("ns:loc", namespace)
if loc is not None and loc.text:
urls.append(loc.text.strip())
return urls
except requests.RequestException as e:
print(f"Error fetching sitemap: {e}")
return []
except ET.ParseError as e:
print(f"Error parsing sitemap XML: {e}")
return []
if __name__ == "__main__":
sitemap = "https://example.com/sitemap.xml"
sitemap_urls = extract_sitemap_urls(sitemap)
print("\nURLs found in sitemap:")
for url in sitemap_urls:
print(url)
###END CODE###
🤖 5. Content Quality Checker
This one gives you a quick idea of whether your content is too thin.
👉 What it helps with:
Finding weak pages
Prioritizing updates
Improving overall quality
💡 Why it matters:
Thin content = lower rankings.
###BEGIN SEO CODE###
def check_content_quality(text):
word_count = len(text.split())
if word_count < 300:
quality = "Thin Content"
elif 300 <= word_count < 800:
quality = "Average Content"
else:
quality = "Strong Content"
print(f"Word Count: {word_count}")
print(f"Content Quality: {quality}")
if __name__ == "__main__":
sample_text = """
Add your page content here. This script checks the word count
and gives you a simple quality label based on how much text is on the page.
"""
check_content_quality(sample_text)
###END CODE###
📁 6. Log File Analyzer
Looks at your server logs to see how search engines are crawling your site.
👉 What it helps with:
Understanding Googlebot behavior
Finding crawl budget issues
Spotting ignored pages
💡 Why it matters:
This is one of the most underused SEO insights.
###BEGIN SEO CODE###
def analyze_log_file(file_path):
googlebot_hits = 0
total_lines = 0
urls = {}
try:
with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
for line in file:
total_lines += 1
if "Googlebot" in line:
googlebot_hits += 1
parts = line.split()
if len(parts) > 6:
url = parts[6]
urls[url] = urls.get(url, 0) + 1
print(f"\nTotal Log Entries: {total_lines}")
print(f"Googlebot Visits: {googlebot_hits}")
print("\nTop Crawled URLs:")
sorted_urls = sorted(urls.items(), key=lambda x: x[1], reverse=True)
for url, count in sorted_urls[:10]:
print(f"{url} -> {count} hits")
except FileNotFoundError:
print("Log file not found.")
except Exception as e:
print(f"Error: {e}")
if __name__ == "__main__":
log_file = "access.log"
analyze_log_file(log_file)
###END CODE###
⚡ 7. Simple Page Speed Checker
Measures how long pages take to load.
👉 What it helps with:
Finding slow pages
Improving performance
Boosting user experience
💡 Why it matters:
Speed is a ranking factor.
###BEGIN SEO CODE###
import requests
import time
def check_page_speed(url):
headers = {
"User-Agent": "Mozilla/5.0 (compatible; SpeedChecker/1.0)"
}
try:
start_time = time.time()
response = requests.get(url, headers=headers, timeout=10)
end_time = time.time()
load_time = round(end_time - start_time, 2)
print(f"\nURL: {url}")
print(f"Status Code: {response.status_code}")
print(f"Load Time: {load_time} seconds")
if load_time < 1:
print("Speed: Fast ⚡")
elif load_time < 3:
print("Speed: متوسط (OK)")
else:
print("Speed: Slow 🐢")
except requests.RequestException as e:
print(f"Error checking {url}: {e}")
if __name__ == "__main__":
urls = [
"https://example.com",
"https://example.com/about"
]
for site in urls:
check_page_speed(site)
###END CODE###
📸 (Insert screenshot here)
🧠 A Few Simple Tips
Start with just one script — don’t try everything at once
Combine data (crawl + logs = powerful insights)
Always double-check results
🔗 Where to Go Next
If you want to take this further:
Turn these into a small toolkit
Automate them to run weekly
Build simple dashboards
💡 Final Thoughts
You don’t need expensive tools to do solid technical SEO.
A few simple scripts can save you hours and give you insights most people miss.
Start small, experiment, and build from there.
About the Author
Sandy Rowley is a Technical SEO expert and creator of The Rowley SEO Method, sharing insights on AI SEO, automation, and practical search optimization.
⚠️ AI Disclosure
This article was created with AI assistance and reviewed and edited by the author.
About the Creator
Sandy Rowley
AI SEO Expert Sandy Rowley helps businesses grow with cutting-edge search strategies, AI-driven content, technical SEO, and conversion-focused web design. 25+ years experience delivering high-ranking, revenue-generating digital solutions.



Comments
There are no comments for this story
Be the first to respond and start the conversation.