Skip to content

Commit

Permalink
update parsers
Browse files Browse the repository at this point in the history
  • Loading branch information
JMousqueton committed Sep 19, 2024
1 parent 0e721cb commit 247b5b8
Show file tree
Hide file tree
Showing 15 changed files with 159 additions and 28 deletions.
3 changes: 2 additions & 1 deletion parsers/8base.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ def main():
except:
published = datetime.strptime(date_string, "%d.%m.%y").strftime("%Y-%m-%d %H:%M:%S.%f")
description = div.find('div', class_='small opacity-50').text.strip()
appender(name, '8base', description.replace('\n',' '),"",published,link)
#appender(name, '8base', description.replace('\n',' '),"",published,link)
appender(name, '8base', description,"",published,link)
file.close()
except:
errlog('8base : ' + 'parsing fail')
Expand Down
24 changes: 9 additions & 15 deletions parsers/ElDorado.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,20 +42,14 @@ def main():
html_doc='source/'+filename
file=open(html_doc,'r')
soup=BeautifulSoup(file,'html.parser')
articles = soup.find_all('article')
for article in articles:
title_tag = article.find('h1')
link_tag = article.find('a', href=True)
summary_tag = article.find('p', class_='opacity-70')
tags = article.find_all('span', class_='inline-flex')
if title_tag and link_tag:
title = title_tag.get_text(strip=True)
link = link_tag['href']
summary = summary_tag.get_text(strip=True) if summary_tag else ""
tags_list = [tag.get_text(strip=True) for tag in tags]
tags_summary = ' '.join(tags_list)
full_summary = f"{summary} Tags: {tags_summary}" if tags_summary else summary

appender(title, group_name, full_summary,"","",link )
containers = soup.find_all('div', class_='u-container-style')
for container in containers:
title = container.find('h5', class_='u-text-default') # Find the title
description = container.find('p', class_='u-text') # Find the description
image = container.find('img') # Find the image
href = container.get('data-href') # Get the href attribute
if title:

appender(title.text, group_name, "","","","","" )
except Exception as e:
errlog(group_name + ' - parsing fail with error: ' + str(e) + 'in file:' + filename)
59 changes: 59 additions & 0 deletions parsers/ValenciaLeaks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
"""
From Template v3 - 20240807
+----------------------------------------------+
| Description | Website | published | post URL |
+-----------------------+-----------+----------+
| X | | | X |
+-----------------------+-----------+----------+
Rappel : def appender(post_title, group_name, description="", website="", published="", post_url="")
"""

import os,datetime,sys,re
from bs4 import BeautifulSoup
from datetime import datetime

## Import Ransomware.live libs
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), 'libs')))
from ransomwarelive import stdlog, errlog, extract_md5_from_filename, find_slug_by_md5, appender

def main():

# Define the date format to convert to
date_format = "%Y-%m-%d %H:%M:%S.%f"

## Get the ransomware group name from the script name
script_path = os.path.abspath(__file__)
# If it's a symbolic link find the link source
if os.path.islink(script_path):
original_path = os.readlink(script_path)
if not os.path.isabs(original_path):
original_path = os.path.join(os.path.dirname(script_path), original_path)
original_path = os.path.abspath(original_path)
original_name = os.path.basename(original_path)
group_name = original_name.replace('.py','')
# else get the script name
else:
script_name = os.path.basename(script_path)
group_name = script_name.replace('.py','')

for filename in os.listdir('source'):
try:
if filename.startswith(group_name+'-'):
html_doc='source/'+filename
file=open(html_doc,'r')
soup=BeautifulSoup(file,'html.parser')
thread_boxes = soup.find_all('div', class_='thread-box')
for i, box in enumerate(thread_boxes, start=1):
card_title = box.find('h5', class_='card-title').string if box.find('h5', class_='card-title') else "N/A"
time_left = box.find('p', class_='time-left').string if box.find('p', class_='time-left') else "N/A"
data_finaldate = box.get('data-finaldate', 'N/A')
data_leak = box.get('data-leak', 'N/A')
size = box.find('span', class_='price').string if box.find('span', class_='price') else "N/A"
view_link = box.find('a', class_='btn').get('href') if box.find('a', class_='btn') else "N/A"
link = find_slug_by_md5(group_name, extract_md5_from_filename(html_doc)) + view_link
description = f"Data Exfiltrated : {size} - Leak Date : {data_finaldate}"


appender(card_title, group_name, description,"","",link )
except Exception as e:
errlog(group_name + ' - parsing fail with error: ' + str(e) + 'in file:' + filename)
3 changes: 2 additions & 1 deletion parsers/abyss.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ def main():
for div in divs_name:
title = div.find('h5',{"class": "card-title"}).text.strip()
description = div.find('p',{"class" : "card-text"}).text.strip()
appender(title, 'abyss', description.replace('\n',' '))
# appender(title, 'abyss', description.replace('\n',' '))
appender(title, 'abyss', description)
file.close()
except:
errlog('abyss: ' + 'parsing fail')
Expand Down
3 changes: 2 additions & 1 deletion parsers/akira.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ def main():
combined_datetime = datetime.datetime.combine(dt_object.date(), current_time)
published = combined_datetime.strftime("%Y-%m-%d %H:%M:%S.%f")
#published = dt_object.strftime("%Y-%m-%d %H:%M:%S.%f")
appender(title.replace('\n',''), 'akira', description.replace('\n',' '),'',published)
#appender(title.replace('\n',''), 'akira', description.replace('\n',' '),'',published)
appender(title.replace('\n',''), 'akira', description,'',published)
file.close()
except Exception as e:
errlog(f"Akira parsinf failed with error: {e}")
Expand Down
3 changes: 2 additions & 1 deletion parsers/alphv-api.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,5 @@ def main():
+------------------------------+------------------+----------+
Rappel : def appender(post_title, group_name, description="", website="", published="", post_url=""):
"""
appender(title.rstrip('.'), 'alphv', description.replace('\n',' '),url,convert_date(created_dt)+'.123456','http://alphvuzxyxv6ylumd2ngp46xzq3pw6zflomrghvxeuks6kklberrbmyd.onion/' + id)
#appender(title.rstrip('.'), 'alphv', description.replace('\n',' '),url,convert_date(created_dt)+'.123456','http://alphvuzxyxv6ylumd2ngp46xzq3pw6zflomrghvxeuks6kklberrbmyd.onion/' + id)
appender(title.replace('\n',''), 'akira', description,'',published)
5 changes: 3 additions & 2 deletions parsers/cactus.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,9 @@ def main():
published_at = datetime.strptime(post['attributes']['publishedAt'], "%Y-%m-%dT%H:%M:%S.%fZ")
postdate = published_at.strftime("%Y-%m-%d %H:%M:%S.%f")
#victim = post['attributes']['slug']
content = strip_html_tags(post['attributes']['content'])
content = cut_content_before_link(content).replace('6wuivqgrv2g7brcwhjw5co3vligiqowpumzkcyebku7i2busrvlxnzid','***************')
content = post['attributes']['content']
# content = strip_html_tags(post['attributes']['content'])
# content = cut_content_before_link(content) # .replace('6wuivqgrv2g7brcwhjw5co3vligiqowpumzkcyebku7i2busrvlxnzid','***************')
post_url = post['attributes']['slug'] # Replace 'link_field' with actual field name
post_url = "https://cactusbloguuodvqjmnzlwetjlpj6aggc6iocwhuupb47laukux7ckid.onion/posts/" + post_url
appender(victim,'cactus',content,website,postdate,post_url)
Expand Down
2 changes: 1 addition & 1 deletion parsers/ciphbit.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def main():
published = ''
p_elements = h2.find_parent().find_all('p')
description = ' '.join(p.get_text() for p in p_elements)
description = description.replace('\n',' ')
# description = description.replace('\n',' ')
appender(victim, 'ciphbit', description,website,published)
file.close()
#except:
Expand Down
3 changes: 2 additions & 1 deletion parsers/clop.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,5 @@ def main():
if item in blacklist:
continue
post_url= "http://santat7kpllt6iyvqbr7q4amdv6dzrh6paatvyrzl7ry3zm72zigf4ad.onion/" + str.lower(item.replace(".","-"))
appender(item, 'clop','_URL_','','',post_url)
if "ARCHIVE" not in item:
appender(item, 'clop','_URL_','','',post_url)
2 changes: 1 addition & 1 deletion parsers/dragonforce-api.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def fetch_json_from_onion_url(onion_url):

# Assuming the response contains JSON data, parse it
#json_data = response.json()
json_data = openjson('./source/dragonforce.json')
json_data = openjson('/tmp/dragon.json')
return json_data

def main():
Expand Down
4 changes: 3 additions & 1 deletion parsers/killsec.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ def main():

# Extract the description from the post block's body
description = post.select_one('.post-block-text').text.strip() if post.select_one('.post-block-text') else 'No Description'
appender(title,'killsec',description.replace('\n',' '),'','',url)
#appender(title,'killsec',description.replace('\n',' '),'','',url)
appender(title,'killsec',description,'','',url)

except Exception as e:
errlog(group_name + ' - parsing fail with error: ' + str(e) + 'in file:' + filename)
3 changes: 2 additions & 1 deletion parsers/medusa-api.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,9 @@ def main():
json_onion_url= 'http://cx5u7zxbvrfyoj6ughw76oa264ucuuizmmzypwum6ear7pct4yc723qd.onion/api/search?company=&page=0'
site_onion_url= 'http://cx5u7zxbvrfyoj6ughw76oa264ucuuizmmzypwum6ear7pct4yc723qd.onion/detail?id='

json_data = fetch_json_from_onion_url(json_onion_url)
# json_data = fetch_json_from_onduion_url(json_onion_url)
try:
json_data = openjson('/tmp/medusa.json')
if json_data is not None:
for item in json_data['list']:
victim = item['company_name']
Expand Down
3 changes: 2 additions & 1 deletion parsers/medusa.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ def main():
title = title.lstrip()
description = div.find("div", {"class": "card-body"}).text.strip()
published = div.find("div", {"class": "date-updated"}).text.strip() + '.000000'
appender(title.rstrip(), 'medusa', description.replace('\n',' '),'',published,post_url)
#appender(title.rstrip(), 'medusa', description.replace('\n',' '),'',published,post_url)
appender(title.rstrip(), 'medusa', description,'',published,post_url)
except:
pass
file.close()
Expand Down
68 changes: 68 additions & 0 deletions parsers/orca.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
"""
From Template v3 - 20240807
+----------------------------------------------+
| Description | Website | published | post URL |
+-----------------------+-----------+----------+
| X | | | X |
+-----------------------+-----------+----------+
Rappel : def appender(post_title, group_name, description="", website="", published="", post_url="")
"""

import os,datetime,sys,re
from bs4 import BeautifulSoup
from datetime import datetime

## Import Ransomware.live libs
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), 'libs')))
from ransomwarelive import stdlog, errlog, extract_md5_from_filename, find_slug_by_md5, appender

def main():

# Define the date format to convert to
date_format = "%Y-%m-%d %H:%M:%S.%f"

## Get the ransomware group name from the script name
script_path = os.path.abspath(__file__)
# If it's a symbolic link find the link source
if os.path.islink(script_path):
original_path = os.readlink(script_path)
if not os.path.isabs(original_path):
original_path = os.path.join(os.path.dirname(script_path), original_path)
original_path = os.path.abspath(original_path)
original_name = os.path.basename(original_path)
group_name = original_name.replace('.py','')
# else get the script name
else:
script_name = os.path.basename(script_path)
group_name = script_name.replace('.py','')

for filename in os.listdir('source'):
try:
if filename.startswith(group_name+'-'):
html_doc='source/'+filename
file=open(html_doc,'r')
soup=BeautifulSoup(file,'html.parser')
blog_cards = soup.find_all('div', class_='blog__card')
for card in blog_cards:
# Extract title of the blog post
title = card.find('h2', class_='blog__card-top-info-title').get_text(strip=True)

# Extract date of publication
publication_date = card.find('p', class_='blog__card-top-date').find('span').get_text(strip=True)
# print(f'Date of Publication: {publication_date}')

# Extract description of the publication
description = card.find('div', class_='blog__card-description').find('p', class_='blog__card-description-text').get_text(strip=True)

# Extract company URL if it exists
url_element = card.find('a', class_='blog__card-details-item-text --small-title --blog__card-details-item-text=;oml')
company_url = url_element['href'].replace('https://','') if url_element else ''

# Extract the main link
main_link = card.find('a', class_='blog__card-btn --button')['href']

if title != 'INTRODUCTION':
appender(title, group_name, description,company_url,"",main_link)

except Exception as e:
errlog(group_name + ' - parsing fail with error: ' + str(e) + 'in file:' + filename)
2 changes: 1 addition & 1 deletion parsers/threeam.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def main():
soup=BeautifulSoup(file,'html.parser')
post_divs = soup.find_all('div', class_='post')
for post_div in post_divs:
post_title = post_div.find('div', class_='post-title-block').text.strip().replace('published files','').replace('...','')
post_title = post_div.find('div', class_='post-title-block').text.strip().replace('published files','').replace('...','').replace('NEW','')
victim = post_title.split('\n')[0].strip()
description = post_div.find('div', class_='post-text').text.strip()
link = post_div.find('a', class_='post-more-link')
Expand Down

0 comments on commit 247b5b8

Please sign in to comment.