Init commit

2024-01-12 22:12:34 -05:00 · 2024-01-12 22:12:34 -05:00 · f23be658a0
commit f23be658a0
5 changed files with 20057 additions and 0 deletions
--- a/csvshortner.py
+++ b/csvshortner.py
@ -0,0 +1,12 @@
 csvname = 'top10milliondomains.csv'
 output = 'top10kdomains.csv'
 amount = 10000
 counter = 0
 with open(csvname, 'r') as file:
    with open(output, 'w+')as fileout:
        while counter <= amount:
            domain = file.readline().split(",")[1].strip('"')
            if domain == "Domain": continue
            fileout.write(f"{domain}\n")
            counter += 1
--- a/edge_history.csv
+++ b/edge_history.csv
--- a/ips.txt
+++ b/ips.txt
@ -0,0 +1,4 @@
 10.0.0.1
 10.0.0.2
 10.0.0.3
 10.0.0.4
--- a/main.py
+++ b/main.py
@ -0,0 +1,39 @@
 import csv
 import random
 def generate_random_title(domain, ip=False):
    # This function generates a random title - you can customize it as needed
    typical_web_pages = ["home", "about", "contact", "login", "register", "dashboard", "profile", "settings", "help", "faq", "blog", "news", "events", "gallery", "products", "services", "testimonials", "careers", "terms", "privacy", "sitemap", "search", "404"]
    if ip:
        title = domain
    else:
        title = domain.split('.')[0]
    return f"{title} - {random.choice(typical_web_pages)}"
 def create_edge_history_csv(domain_file, ip_file, output_csv, ip_chance=0.15):
    with open(output_csv, 'w', newline='', encoding='utf-8') as csvfile:
            with open(domain_file, 'r') as file:
                domains = file.readlines()
            with open(ip_file, 'r') as file:
                ips = file.readlines()
            fieldnames = ['URL', 'Page Title']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            for _ in range(10000):
                if random.random() < ip_chance:
                    # Based on the chance, use an IP address
                    url = random.choice(ips).strip()
                    title = generate_random_title(url, ip=True)
                else:
                    # Otherwise, use a domain
                    url = random.choice(domains).strip()
                    title = generate_random_title(url, ip=False)
                writer.writerow({'URL': url, 'Page Title': title})
 # Replace 'domains.txt' and 'ips.txt' with the path to your files
 # The output will be saved in 'edge_history.csv'
 create_edge_history_csv('top10kdomains.csv', 'ips.txt', 'edge_history.csv')
--- a/top10kdomains.csv
+++ b/top10kdomains.csv
+.0.0.1
+.0.0.2
+.0.0.3
+.0.0.4