Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changed GitDorker with Internon changes #42

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 76 additions & 71 deletions GitDorker.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,26 @@
purpose of this tool is to enumerate interesting users,repos, and files to provide an
easy to read overview of where a potential sensitive information exposure may reside.

HELP: python3 GitDorker.py -h
Modified by: Internon
""")

# IMPORTS
import sys
import json
import time
import datetime
import argparse
import random
import requests
import csv
from functools import partial
from itertools import zip_longest
from termcolor import colored
from multiprocessing.dummy import Pool
import multiprocessing
import threading
import tqdm
import signal

# API CONFIG
GITHUB_API_URL = 'https://api.github.com'
Expand All @@ -53,7 +59,6 @@
help="organization's GitHub name (required or -org if query not specified)")
parser.add_argument("-t", "--token", help="your github token (required if token file not specififed)")
parser.add_argument("-tf", "--tokenfile", help="file containing new line separated github tokens ")
parser.add_argument("-e", "--threads", help="maximum n threads, default 1")
parser.add_argument("-p", "--positiveresults", action='store_true', help="display positive results only")
parser.add_argument("-o", "--output", help="output to file name (required or -o)")

Expand All @@ -67,7 +72,6 @@
organizations_list = []
users_list = []
keywords_list = []

# TOKEN ARGUMENT LOGIC
if args.token:
tokens_list = args.token.split(',')
Expand Down Expand Up @@ -106,11 +110,6 @@
if args.organization:
organizations_list = args.organization.split(',')

if args.threads:
threads = int(args.threads)
else:
threads = 1

# if not args.query and not args.queryfile and not args.organization and not args.users and not args.userfile:
# parser.error('query or organization missing or users missing')

Expand All @@ -128,67 +127,35 @@
# NUMBER OF REQUESTS PER MINUTE (TOKENS MUST BE UNIQUE)
requests_per_minute = (len(tokens_list) * 30) - 1

# TOKEN ROUND ROBIN
n = -1


def token_round_robin():
global n
n = n + 1
if n == len(tokens_list):
n = 0
current_token = tokens_list[n]
return current_token


mylock = threading.Lock()
mylock2 = threading.Lock()
# API SEARCH FUNCTION
def api_search(url):
if args.dorks: # UNDO COMPLETE! :)
if args.keyword:
sys.stdout.write(colored(
'\r[#] $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ Dorking with Keyword In Progress $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ %d/%d\r' % (stats_dict['n_current'], stats_dict['n_total_urls']),
"green"))
sys.stdout.flush()
else:
sys.stdout.write(
colored('\r[#] $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ Dorking In Progress $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ %d/%d\r' % (stats_dict['n_current'], stats_dict['n_total_urls']), "green"))
sys.stdout.flush()

elif args.keyword and not args.dorks:
sys.stdout.write(
colored('\r[#] $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ Keyword Search In Progress $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ %d/%d\r' % (stats_dict['n_current'], stats_dict['n_total_urls']),
"green"))
sys.stdout.flush()

stats_dict['n_current'] = stats_dict['n_current'] + 1
headers = {"Authorization": "token " + token_round_robin()}

created = multiprocessing.Process()
current = multiprocessing.current_process()
thread_number = created._identity[0]
token = tokens_list[thread_number-3]
headers = {"Authorization": "token " + token}
try:
time.sleep(random.uniform(0.5, 1.5))
r = requests.get(url, headers=headers)
json = r.json()
if args.limitbypass:
if stats_dict['n_current'] % requests_per_minute == 0:
for remaining in range(63, 0, -1):
sys.stdout.write("\r")
sys.stdout.write(colored(
"\r[#] (-_-)zzZZzzZZzzZZzzZZ sleeping to avoid rate limits. GitDorker will resume soon (-_-)zzZZzzZZzzZZzzZZ | {:2d} seconds remaining.\r".format(
remaining), "blue"))
sys.stdout.flush()
time.sleep(1)
else:
if stats_dict['n_current'] % 29 == 0:
for remaining in range(63, 0, -1):
sys.stdout.write("\r")
sys.stdout.write(colored(
"\r[#] (-_-)zzZZzzZZzzZZzzZZ sleeping to avoid rate limits. GitDorker will resume soon (-_-)zzZZzzZZzzZZzzZZ | {:2d} seconds remaining.\r".format(
remaining), "blue"))
sys.stdout.flush()
time.sleep(1)

if int(r.headers["X-RateLimit-Remaining"]) <= 1:
seconds = r.headers["X-RateLimit-Reset"]
end_datetime = datetime.datetime.fromtimestamp(int(seconds))
if (end_datetime - datetime.datetime.now()).total_seconds() <= 0:
time.sleep(2)
else:
time.sleep((end_datetime - datetime.datetime.now()).total_seconds())
if 'documentation_url' in json:
print(colored("[-] error occurred: %s" % json['documentation_url'], 'red'))
#It seems that the GitDork is working better without a delay here and only saving the errors for the following check
#time.sleep(15)
with mylock2:
url_errors_dict[url] = json['documentation_url']
else:
url_results_dict[url] = json['total_count']
with mylock:
url_results_dict[url] = json['total_count']

except Exception as e:
print(colored("[-] error occurred: %s" % e, 'red'))
Expand All @@ -206,7 +173,9 @@ def __urlencode(str):
# DECLARE DICTIONARIES
url_dict = {}
results_dict = {}
url_results_dict = {}
url_results_dict = multiprocessing.Manager().dict()
url_errors_dict = multiprocessing.Manager().dict()
global stats_dict
stats_dict = {
'l_tokens': len(tokens_list),
'n_current': 0,
Expand Down Expand Up @@ -311,21 +280,58 @@ def __urlencode(str):
sys.stdout.write(colored('[#] %d queries ran.\n' % len(queries_list), 'cyan'))
sys.stdout.write(colored('[#] %d urls generated.\n' % len(url_dict), 'cyan'))
sys.stdout.write(colored('[#] %d tokens being used.\n' % len(tokens_list), 'cyan'))
sys.stdout.write(colored('[#] running %d threads.\n' % threads, 'cyan'))
#The idea is that GitHub api ratelimit is based on each account so if we make each token as a thread and we control the time and the rate limit reset, we can increment performance with errors control
sys.stdout.write(colored('[#] %d threads (For more threads and be faster increment tokens).\n' % len(tokens_list), 'cyan'))
if args.limitbypass:
sys.stdout.write(colored('[#] %d requests per minute allowed\n' % requests_per_minute, 'cyan'))
else:
sys.stdout.write(colored('[#] 29 requests per minute allowed\n', 'cyan'))
print("")
# SLEEP
time.sleep(1)

def init_worker():
signal.signal(signal.SIGINT, signal.SIG_IGN)
# POOL FUNCTION TO RUN API SEARCH
pool = Pool(threads)
pool.map(api_search, url_dict)
pool.close()
pool.join()

threads = len(tokens_list)
pool = multiprocessing.Pool(threads, init_worker)
whilecount = 1
startloop = time.time()
for _ in tqdm.tqdm(pool.imap(api_search, url_dict), total=len(url_dict)):
pass
endloop = time.time()
print(colored("We are going to process the errors until no errors found", 'cyan'))
print(colored("PRESS CONTROL + C to stop the loops", 'cyan'))
print("")
try:
while len(url_errors_dict) != 0:
timeinloop = endloop - startloop
whilecount = whilecount + 1
print(colored("Time elapsed since starting loops: %s seconds" % int(float(timeinloop)), 'green'))
print("")
url_dict = url_errors_dict.copy()
s = set()
for val in url_errors_dict.values():
s.add(val)
print(colored("Errors reasons on loop %s: (secondari-rate-limit error is normal)" % whilecount, 'cyan'))
for error in s:
if "secondary-rate-limits" not in error:
print(colored(error, 'red'))
else:
print(colored(error, 'cyan'))
print("")
url_errors_dict.clear()
time.sleep(10)
for _ in tqdm.tqdm(pool.imap(api_search, url_dict), total=len(url_dict)):
pass
endloop = time.time()
pool.close()
pool.join()
except KeyboardInterrupt:
print(colored("Stopped errors processing, check manually the errors from the following list.", 'red'))
timeinloop = endloop - startloop
print(colored("Time elapsed since starting loops: %s seconds" % timeinloop, 'green'))
pool.terminate()
pool.join()
# SET COUNT
count = 0
keyword_count = 0
Expand Down Expand Up @@ -362,7 +368,6 @@ def __urlencode(str):
print("")

for url in results_dict[query]:

if url in url_results_dict:
if args.recentlyindexed:
new_url = url.replace('https://api.github.com/search/code',
Expand Down Expand Up @@ -401,7 +406,7 @@ def __urlencode(str):

else:
failure = sys.stdout.write(colored('[-] ', 'red'))
sys.stdout.write(colored('%s' % new_url, 'white'))
sys.stdout.write(colored('%s' % url, 'white'))
count = count + 1
print('')

Expand Down