browser-history-generator/main.py
2024-01-12 22:12:34 -05:00

40 lines
1.7 KiB
Python

import csv
import random
def generate_random_title(domain, ip=False):
# This function generates a random title - you can customize it as needed
typical_web_pages = ["home", "about", "contact", "login", "register", "dashboard", "profile", "settings", "help", "faq", "blog", "news", "events", "gallery", "products", "services", "testimonials", "careers", "terms", "privacy", "sitemap", "search", "404"]
if ip:
title = domain
else:
title = domain.split('.')[0]
return f"{title} - {random.choice(typical_web_pages)}"
def create_edge_history_csv(domain_file, ip_file, output_csv, ip_chance=0.15):
with open(output_csv, 'w', newline='', encoding='utf-8') as csvfile:
with open(domain_file, 'r') as file:
domains = file.readlines()
with open(ip_file, 'r') as file:
ips = file.readlines()
fieldnames = ['URL', 'Page Title']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for _ in range(10000):
if random.random() < ip_chance:
# Based on the chance, use an IP address
url = random.choice(ips).strip()
title = generate_random_title(url, ip=True)
else:
# Otherwise, use a domain
url = random.choice(domains).strip()
title = generate_random_title(url, ip=False)
writer.writerow({'URL': url, 'Page Title': title})
# Replace 'domains.txt' and 'ips.txt' with the path to your files
# The output will be saved in 'edge_history.csv'
create_edge_history_csv('top10kdomains.csv', 'ips.txt', 'edge_history.csv')