/ nmgn.py
nmgn.py
1 import random 2 import string 3 import os 4 from datetime import datetime, timedelta 5 6 # Path to the local file for storing names 7 local_file_path = "first-names.txt" 8 9 # Function to load names from the local file 10 def load_names(file_path): 11 if not os.path.exists(file_path): 12 print(f"Error: The file '{file_path}' does not exist.") 13 print("Please download the file from the following URL and place it in the same directory as this script:") 14 print("https://gist.githubusercontent.com/elifiner/cc90fdd387449158829515782936a9a4/raw/fea1da1a3c4ce5c8e470f679a8e1bc741281a609/first-names.txt") 15 exit(1) 16 with open(file_path, "r") as file: 17 return file.read().splitlines() 18 19 # Load names from the local file 20 first_names = load_names(local_file_path) 21 last_names = first_names # Assuming the same file contains both first and last names 22 23 # List of U.S. states for state of birth (SOB) 24 states = [ 25 "AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "FL", "GA", 26 "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", 27 "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", 28 "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", 29 "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY" 30 ] 31 32 domains = [ 33 "earthlink.net", "verizon.net", "msn.com", "yahoo.com", "aol.com", 34 "protonmail.com", "icloud.com", "outlook.com", "zoho.com", "mail.com", 35 "yandex.com", "fastmail.com", "rocketmail.com", "gmx.com", "tutanota.com", 36 "mail.ru", "hushmail.com", "guerrillamail.com", "inbox.com", "sendinblue.com", 37 "lavabit.com", "lycos.com", "aol.co.uk", "myway.com", "bluebottle.com", 38 "unseen.is", "chime.com", "walla.com", "mindspring.com", "gawab.com", 39 "openmailbox.org", "rediffmail.com", "epix.net", "gmail.com", "hotmail.com", 40 "live.com", "yahoo.co.uk", "icloud.co.uk", "outlook.co.uk", "mailchimp.com", 41 "zoho.co.uk", "aol.com.au", "msn.co.uk", "seznam.cz", "t-online.de", 42 "orange.fr", "bellsouth.net", "comcast.net", "btinternet.com", "charter.net", 43 "cox.net", "shaw.ca", "telus.net", "sbcglobal.net", "att.net", "roadrunner.com", 44 45 # Government Domains 46 "usa.gov", "gov.uk", "canada.ca", "gov.au", "gov.in", "gov.sg", 47 "gov.za", "gov.ph", "gov.br", "gov.de", "gov.fr", "gov.it", "gov.jp", 48 49 # Military Domains 50 "army.mil", "navy.mil", "af.mil", "usmc.mil", "uscg.mil", "spaceforce.mil", 51 52 # Educational Domains 53 "harvard.edu", "stanford.edu", "mit.edu", "ox.ac.uk", "cam.ac.uk", 54 "berkeley.edu", "umich.edu", "columbia.edu", "yale.edu", "princeton.edu", 55 "ucla.edu", "nyu.edu", "utexas.edu", "uchicago.edu", "upenn.edu", 56 "caltech.edu", "duke.edu", "northwestern.edu", "uw.edu", "gatech.edu", 57 ] 58 59 # Function to generate a random email 60 def generate_email(first_name, last_name): 61 domain = random.choice(domains) 62 email_format = random.choice([f"{first_name}.{last_name}", f"{first_name}{last_name}", f"{first_name[0]}{last_name}"]) 63 email = f"{email_format}@{domain}" 64 return email.lower() 65 66 # Function to generate a random password 67 def generate_password(): 68 chars = string.ascii_letters + string.digits + string.punctuation 69 password_length = random.randint(8, 16) 70 password = "".join(random.choices(chars, k=password_length)) 71 return password 72 73 # Function to generate a random reasonable DOB (between 18 and 70 years ago) 74 def generate_dob(): 75 today = datetime.today() 76 start_date = today - timedelta(days=70 * 365) # 70 years ago 77 end_date = today - timedelta(days=18 * 365) # 18 years ago 78 random_date = start_date + (end_date - start_date) * random.random() 79 return random_date.strftime("%Y-%m-%d") 80 81 # Function to generate a random state of birth (SOB) with a 50% chance 82 def generate_sob(): 83 if random.random() < 0.5: # 50% chance 84 return random.choice(states) 85 return "N/A" # Return "N/A" if no SOB is generated 86 87 # Function to generate a random phone number 88 def generate_phone_number(): 89 if random.random() < 0.7: # 70% chance 90 return f"{random.randint(200, 999)}-{random.randint(100, 999)}-{random.randint(1000, 9999)}" 91 return "N/A" # Return "N/A" if no phone number is generated 92 93 # Function to generate a random 9-digit number with heavy obscuration 94 def generate_9_digit_number(): 95 if random.random() < 0.5: # 50% chance 96 number = f"{random.randint(100000000, 999999999)}" 97 # Replace 6 out of 9 digits with * 98 indices_to_obscure = random.sample(range(9), 6) # Randomly select 6 indices to obscure 99 number_list = list(number) 100 for index in indices_to_obscure: 101 number_list[index] = "*" 102 return "".join(number_list) 103 return "N/A" # Return "N/A" if no 9-digit number is generated 104 105 # Function to generate a single line of fake data 106 def generate_line(): 107 first_name = random.choice(first_names) 108 last_name = random.choice(last_names) 109 email = generate_email(first_name, last_name) 110 password = generate_password() 111 dob = generate_dob() 112 sob = generate_sob() 113 phone_number = generate_phone_number() 114 nine_digit_number = generate_9_digit_number() 115 return f"{first_name} {last_name} | {email} | {password} | {dob} | {sob} | {phone_number} | {nine_digit_number}\n" 116 117 # Function to generate a file of a specified size 118 def generate_file(filename, size_bytes): 119 with open(filename, "w") as file: 120 written_size = 0 121 while written_size < size_bytes: 122 line = generate_line() 123 file.write(line) 124 written_size += len(line.encode("utf-8")) # Account for byte size 125 print(f"Written: {written_size / (1024 * 1024):.2f} MB", end="\r") 126 127 # Run the script 128 if __name__ == "__main__": 129 # Ask the user for the desired file size 130 size_input = float(input("Enter the desired file size (e.g., 1 for 1GB, 0.5 for 500MB, 0.1 for 100MB): ")) 131 132 # Convert the size to bytes and determine the appropriate unit for the filename 133 if size_input >= 1: 134 size_bytes = size_input * 1024 * 1024 * 1024 # Convert GB to bytes 135 size_label = f"{int(size_input)}gb" if size_input.is_integer() else f"{size_input}gb" 136 else: 137 size_bytes = size_input * 1024 * 1024 # Convert MB to bytes 138 size_label = f"{int(size_input * 1000)}mb" if (size_input * 1000).is_integer() else f"{size_input * 1000}mb" 139 140 # Adjust the filename based on the size 141 filename = f"fake_data_{size_label}.txt" 142 143 print(f"Generating {size_label.upper()} of fake data in '{filename}'...") 144 generate_file(filename, size_bytes) 145 print("\nDone! File generated successfully.")