Init commit
This commit is contained in:
commit
f23be658a0
12
csvshortner.py
Normal file
12
csvshortner.py
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
csvname = 'top10milliondomains.csv'
|
||||||
|
output = 'top10kdomains.csv'
|
||||||
|
amount = 10000
|
||||||
|
|
||||||
|
counter = 0
|
||||||
|
with open(csvname, 'r') as file:
|
||||||
|
with open(output, 'w+')as fileout:
|
||||||
|
while counter <= amount:
|
||||||
|
domain = file.readline().split(",")[1].strip('"')
|
||||||
|
if domain == "Domain": continue
|
||||||
|
fileout.write(f"{domain}\n")
|
||||||
|
counter += 1
|
||||||
10001
edge_history.csv
Normal file
10001
edge_history.csv
Normal file
File diff suppressed because it is too large
Load Diff
39
main.py
Normal file
39
main.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
import csv
|
||||||
|
import random
|
||||||
|
|
||||||
|
def generate_random_title(domain, ip=False):
|
||||||
|
# This function generates a random title - you can customize it as needed
|
||||||
|
typical_web_pages = ["home", "about", "contact", "login", "register", "dashboard", "profile", "settings", "help", "faq", "blog", "news", "events", "gallery", "products", "services", "testimonials", "careers", "terms", "privacy", "sitemap", "search", "404"]
|
||||||
|
if ip:
|
||||||
|
title = domain
|
||||||
|
else:
|
||||||
|
title = domain.split('.')[0]
|
||||||
|
return f"{title} - {random.choice(typical_web_pages)}"
|
||||||
|
|
||||||
|
def create_edge_history_csv(domain_file, ip_file, output_csv, ip_chance=0.15):
|
||||||
|
with open(output_csv, 'w', newline='', encoding='utf-8') as csvfile:
|
||||||
|
with open(domain_file, 'r') as file:
|
||||||
|
domains = file.readlines()
|
||||||
|
|
||||||
|
with open(ip_file, 'r') as file:
|
||||||
|
ips = file.readlines()
|
||||||
|
|
||||||
|
fieldnames = ['URL', 'Page Title']
|
||||||
|
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||||||
|
|
||||||
|
writer.writeheader()
|
||||||
|
|
||||||
|
for _ in range(10000):
|
||||||
|
if random.random() < ip_chance:
|
||||||
|
# Based on the chance, use an IP address
|
||||||
|
url = random.choice(ips).strip()
|
||||||
|
title = generate_random_title(url, ip=True)
|
||||||
|
else:
|
||||||
|
# Otherwise, use a domain
|
||||||
|
url = random.choice(domains).strip()
|
||||||
|
title = generate_random_title(url, ip=False)
|
||||||
|
writer.writerow({'URL': url, 'Page Title': title})
|
||||||
|
|
||||||
|
# Replace 'domains.txt' and 'ips.txt' with the path to your files
|
||||||
|
# The output will be saved in 'edge_history.csv'
|
||||||
|
create_edge_history_csv('top10kdomains.csv', 'ips.txt', 'edge_history.csv')
|
||||||
10001
top10kdomains.csv
Normal file
10001
top10kdomains.csv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user