import csv import random def generate_random_title(domain, ip=False): # This function generates a random title - you can customize it as needed typical_web_pages = ["home", "about", "contact", "login", "register", "dashboard", "profile", "settings", "help", "faq", "blog", "news", "events", "gallery", "products", "services", "testimonials", "careers", "terms", "privacy", "sitemap", "search", "404"] if ip: title = domain else: title = domain.split('.')[0] return f"{title} - {random.choice(typical_web_pages)}" def create_edge_history_csv(domain_file, ip_file, output_csv, ip_chance=0.15): with open(output_csv, 'w', newline='', encoding='utf-8') as csvfile: with open(domain_file, 'r') as file: domains = file.readlines() with open(ip_file, 'r') as file: ips = file.readlines() fieldnames = ['URL', 'Page Title'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for _ in range(10000): if random.random() < ip_chance: # Based on the chance, use an IP address url = random.choice(ips).strip() title = generate_random_title(url, ip=True) else: # Otherwise, use a domain url = random.choice(domains).strip() title = generate_random_title(url, ip=False) writer.writerow({'URL': url, 'Page Title': title}) # Replace 'domains.txt' and 'ips.txt' with the path to your files # The output will be saved in 'edge_history.csv' create_edge_history_csv('top10kdomains.csv', 'ips.txt', 'edge_history.csv')