update parsers

JMousqueton · Sep 19, 2024 · 247b5b8 · 247b5b8
1 parent 0e721cb
commit 247b5b8
Show file tree

Hide file tree

Showing 15 changed files with 159 additions and 28 deletions.
diff --git a/parsers/8base.py b/parsers/8base.py
@@ -35,7 +35,8 @@ def main():
                     except: 
                         published = datetime.strptime(date_string, "%d.%m.%y").strftime("%Y-%m-%d %H:%M:%S.%f")
                     description = div.find('div', class_='small opacity-50').text.strip()
-                    appender(name, '8base', description.replace('\n',' '),"",published,link)
+                    #appender(name, '8base', description.replace('\n',' '),"",published,link)
+                    appender(name, '8base', description,"",published,link)
                 file.close()
         except:
            errlog('8base : ' + 'parsing fail')

diff --git a/parsers/ElDorado.py b/parsers/ElDorado.py
@@ -42,20 +42,14 @@ def main():
                 html_doc='source/'+filename
                 file=open(html_doc,'r')
                 soup=BeautifulSoup(file,'html.parser')
-                articles = soup.find_all('article')
-                for article in articles:
-                    title_tag = article.find('h1')
-                    link_tag = article.find('a', href=True)
-                    summary_tag = article.find('p', class_='opacity-70')
-                    tags = article.find_all('span', class_='inline-flex')
-                    if title_tag and link_tag:
-                        title = title_tag.get_text(strip=True)
-                        link = link_tag['href']
-                        summary = summary_tag.get_text(strip=True) if summary_tag else ""
-                        tags_list = [tag.get_text(strip=True) for tag in tags]
-                        tags_summary = ' '.join(tags_list)
-                        full_summary = f"{summary} Tags: {tags_summary}" if tags_summary else summary
-
-                        appender(title, group_name, full_summary,"","",link )
+                containers = soup.find_all('div', class_='u-container-style')
+                for container in containers:
+                    title = container.find('h5', class_='u-text-default')  # Find the title
+                    description = container.find('p', class_='u-text')  # Find the description
+                    image = container.find('img')  # Find the image
+                    href = container.get('data-href')  # Get the href attribute
+                    if title: 
+
+                        appender(title.text, group_name, "","","","","" )
         except Exception as e:
             errlog(group_name + ' - parsing fail with error: ' + str(e) + 'in file:' + filename)
diff --git a/parsers/ValenciaLeaks.py b/parsers/ValenciaLeaks.py
@@ -0,0 +1,59 @@
+"""
+    From Template v3 - 20240807
+    +----------------------------------------------+
+    | Description | Website | published | post URL |
+    +-----------------------+-----------+----------+
+    |       X     |         |           |     X    |
+    +-----------------------+-----------+----------+
+    Rappel : def appender(post_title, group_name, description="", website="", published="", post_url="")
+"""
+
+import os,datetime,sys,re
+from bs4 import BeautifulSoup
+from datetime import datetime
+
+## Import Ransomware.live libs 
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), 'libs')))
+from ransomwarelive import stdlog, errlog, extract_md5_from_filename, find_slug_by_md5, appender
+
+def main():
+
+    # Define the date format to convert to
+    date_format = "%Y-%m-%d %H:%M:%S.%f"
+
+    ## Get the ransomware group name from the script name 
+    script_path = os.path.abspath(__file__)
+    # If it's a symbolic link find the link source 
+    if os.path.islink(script_path):
+        original_path = os.readlink(script_path)
+        if not os.path.isabs(original_path):
+            original_path = os.path.join(os.path.dirname(script_path), original_path)
+        original_path = os.path.abspath(original_path)
+        original_name = os.path.basename(original_path)
+        group_name = original_name.replace('.py','')
+    # else get the script name 
+    else:
+        script_name = os.path.basename(script_path)
+        group_name = script_name.replace('.py','')
+
+    for filename in os.listdir('source'):
+        try:
+            if filename.startswith(group_name+'-'):
+                html_doc='source/'+filename
+                file=open(html_doc,'r')
+                soup=BeautifulSoup(file,'html.parser')
+                thread_boxes = soup.find_all('div', class_='thread-box')
+                for i, box in enumerate(thread_boxes, start=1):
+                    card_title = box.find('h5', class_='card-title').string if box.find('h5', class_='card-title') else "N/A"
+                    time_left = box.find('p', class_='time-left').string if box.find('p', class_='time-left') else "N/A"
+                    data_finaldate = box.get('data-finaldate', 'N/A')
+                    data_leak = box.get('data-leak', 'N/A')
+                    size = box.find('span', class_='price').string if box.find('span', class_='price') else "N/A"
+                    view_link = box.find('a', class_='btn').get('href') if box.find('a', class_='btn') else "N/A"
+                    link = find_slug_by_md5(group_name, extract_md5_from_filename(html_doc)) + view_link
+                    description = f"Data Exfiltrated : {size} - Leak Date : {data_finaldate}"
+
+
+                    appender(card_title, group_name, description,"","",link )
+        except Exception as e:
+            errlog(group_name + ' - parsing fail with error: ' + str(e) + 'in file:' + filename)
diff --git a/parsers/abyss.py b/parsers/abyss.py
@@ -28,7 +28,8 @@ def main():
                 for div in divs_name:
                     title = div.find('h5',{"class": "card-title"}).text.strip()
                     description = div.find('p',{"class" : "card-text"}).text.strip()
-                    appender(title, 'abyss', description.replace('\n',' '))
+                    # appender(title, 'abyss', description.replace('\n',' '))
+                    appender(title, 'abyss', description)
                 file.close()
         except:
             errlog('abyss: ' + 'parsing fail')

diff --git a/parsers/akira.py b/parsers/akira.py
@@ -37,7 +37,8 @@ def main():
                     combined_datetime = datetime.datetime.combine(dt_object.date(), current_time)
                     published = combined_datetime.strftime("%Y-%m-%d %H:%M:%S.%f")
                     #published = dt_object.strftime("%Y-%m-%d %H:%M:%S.%f")
-                    appender(title.replace('\n',''), 'akira', description.replace('\n',' '),'',published)
+                    #appender(title.replace('\n',''), 'akira', description.replace('\n',' '),'',published)
+                    appender(title.replace('\n',''), 'akira', description,'',published)
                 file.close()
         except Exception as e:
             errlog(f"Akira parsinf failed with error: {e}")

diff --git a/parsers/alphv-api.py b/parsers/alphv-api.py
@@ -92,4 +92,5 @@ def main():
                         +------------------------------+------------------+----------+
                         Rappel : def appender(post_title, group_name, description="", website="", published="", post_url=""):
                         """
-                        appender(title.rstrip('.'), 'alphv', description.replace('\n',' '),url,convert_date(created_dt)+'.123456','http://alphvuzxyxv6ylumd2ngp46xzq3pw6zflomrghvxeuks6kklberrbmyd.onion/' + id)
+                        #appender(title.rstrip('.'), 'alphv', description.replace('\n',' '),url,convert_date(created_dt)+'.123456','http://alphvuzxyxv6ylumd2ngp46xzq3pw6zflomrghvxeuks6kklberrbmyd.onion/' + id)
+                        appender(title.replace('\n',''), 'akira', description,'',published)
diff --git a/parsers/cactus.py b/parsers/cactus.py
@@ -54,8 +54,9 @@ def main():
                     published_at = datetime.strptime(post['attributes']['publishedAt'], "%Y-%m-%dT%H:%M:%S.%fZ")
                     postdate = published_at.strftime("%Y-%m-%d %H:%M:%S.%f")
                     #victim =  post['attributes']['slug']
-                    content = strip_html_tags(post['attributes']['content'])
-                    content = cut_content_before_link(content).replace('6wuivqgrv2g7brcwhjw5co3vligiqowpumzkcyebku7i2busrvlxnzid','***************')
+                    content = post['attributes']['content']
+                    # content = strip_html_tags(post['attributes']['content'])
+                    # content = cut_content_before_link(content) # .replace('6wuivqgrv2g7brcwhjw5co3vligiqowpumzkcyebku7i2busrvlxnzid','***************')
                     post_url = post['attributes']['slug']  # Replace 'link_field' with actual field name
                     post_url = "https://cactusbloguuodvqjmnzlwetjlpj6aggc6iocwhuupb47laukux7ckid.onion/posts/" + post_url
                     appender(victim,'cactus',content,website,postdate,post_url)

diff --git a/parsers/ciphbit.py b/parsers/ciphbit.py
@@ -50,7 +50,7 @@ def main():
                         published = ''
                     p_elements = h2.find_parent().find_all('p')
                     description = ' '.join(p.get_text() for p in p_elements)
-                    description = description.replace('\n',' ')
+                    # description = description.replace('\n',' ')
                     appender(victim, 'ciphbit', description,website,published)
                 file.close()
         #except:

diff --git a/parsers/clop.py b/parsers/clop.py
@@ -29,4 +29,5 @@ def main():
                     if item in blacklist:
                         continue
                     post_url= "http://santat7kpllt6iyvqbr7q4amdv6dzrh6paatvyrzl7ry3zm72zigf4ad.onion/" + str.lower(item.replace(".","-"))
-                    appender(item, 'clop','_URL_','','',post_url)
+                    if "ARCHIVE" not in item:
+                        appender(item, 'clop','_URL_','','',post_url)
diff --git a/parsers/dragonforce-api.py b/parsers/dragonforce-api.py
@@ -51,7 +51,7 @@ def fetch_json_from_onion_url(onion_url):
 
     # Assuming the response contains JSON data, parse it
     #json_data = response.json()
-    json_data = openjson('./source/dragonforce.json')
+    json_data = openjson('/tmp/dragon.json')
     return json_data
 
 def main():

diff --git a/parsers/killsec.py b/parsers/killsec.py
@@ -38,6 +38,8 @@ def main():
 
                     # Extract the description from the post block's body
                     description = post.select_one('.post-block-text').text.strip() if post.select_one('.post-block-text') else 'No Description'
-                    appender(title,'killsec',description.replace('\n',' '),'','',url)
+                    #appender(title,'killsec',description.replace('\n',' '),'','',url)
+                    appender(title,'killsec',description,'','',url)
+
         except Exception as e:
             errlog(group_name + ' - parsing fail with error: ' + str(e) + 'in file:' + filename)
diff --git a/parsers/medusa-api.py b/parsers/medusa-api.py
@@ -46,8 +46,9 @@ def main():
     json_onion_url= 'http://cx5u7zxbvrfyoj6ughw76oa264ucuuizmmzypwum6ear7pct4yc723qd.onion/api/search?company=&page=0'
     site_onion_url= 'http://cx5u7zxbvrfyoj6ughw76oa264ucuuizmmzypwum6ear7pct4yc723qd.onion/detail?id='
 
-    json_data = fetch_json_from_onion_url(json_onion_url)
+    # json_data = fetch_json_from_onduion_url(json_onion_url)
     try:
+        json_data = openjson('/tmp/medusa.json')
         if json_data is not None:
             for item in json_data['list']:
                 victim = item['company_name']

diff --git a/parsers/medusa.py b/parsers/medusa.py
@@ -36,7 +36,8 @@ def main():
                         title = title.lstrip()
                         description = div.find("div", {"class": "card-body"}).text.strip()
                         published = div.find("div", {"class": "date-updated"}).text.strip() + '.000000'
-                        appender(title.rstrip(), 'medusa', description.replace('\n',' '),'',published,post_url)
+                        #appender(title.rstrip(), 'medusa', description.replace('\n',' '),'',published,post_url)
+                        appender(title.rstrip(), 'medusa', description,'',published,post_url)
                     except:
                         pass
                 file.close()

diff --git a/parsers/orca.py b/parsers/orca.py
@@ -0,0 +1,68 @@
+"""
+    From Template v3 - 20240807
+    +----------------------------------------------+
+    | Description | Website | published | post URL |
+    +-----------------------+-----------+----------+
+    |       X     |         |           |     X    |
+    +-----------------------+-----------+----------+
+    Rappel : def appender(post_title, group_name, description="", website="", published="", post_url="")
+"""
+
+import os,datetime,sys,re
+from bs4 import BeautifulSoup
+from datetime import datetime
+
+## Import Ransomware.live libs 
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), 'libs')))
+from ransomwarelive import stdlog, errlog, extract_md5_from_filename, find_slug_by_md5, appender
+
+def main():
+
+    # Define the date format to convert to
+    date_format = "%Y-%m-%d %H:%M:%S.%f"
+
+    ## Get the ransomware group name from the script name 
+    script_path = os.path.abspath(__file__)
+    # If it's a symbolic link find the link source 
+    if os.path.islink(script_path):
+        original_path = os.readlink(script_path)
+        if not os.path.isabs(original_path):
+            original_path = os.path.join(os.path.dirname(script_path), original_path)
+        original_path = os.path.abspath(original_path)
+        original_name = os.path.basename(original_path)
+        group_name = original_name.replace('.py','')
+    # else get the script name 
+    else:
+        script_name = os.path.basename(script_path)
+        group_name = script_name.replace('.py','')
+
+    for filename in os.listdir('source'):
+        try:
+            if filename.startswith(group_name+'-'):
+                html_doc='source/'+filename
+                file=open(html_doc,'r')
+                soup=BeautifulSoup(file,'html.parser')
+                blog_cards = soup.find_all('div', class_='blog__card')
+                for card in blog_cards:
+                    # Extract title of the blog post
+                    title = card.find('h2', class_='blog__card-top-info-title').get_text(strip=True)
+
+                    # Extract date of publication
+                    publication_date = card.find('p', class_='blog__card-top-date').find('span').get_text(strip=True)
+                    # print(f'Date of Publication: {publication_date}')
+
+                    # Extract description of the publication
+                    description = card.find('div', class_='blog__card-description').find('p', class_='blog__card-description-text').get_text(strip=True)
+
+                    # Extract company URL if it exists
+                    url_element = card.find('a', class_='blog__card-details-item-text --small-title --blog__card-details-item-text=;oml')
+                    company_url = url_element['href'].replace('https://','') if url_element else ''
+
+                     # Extract the main link
+                    main_link = card.find('a', class_='blog__card-btn --button')['href']
+
+                    if title != 'INTRODUCTION':
+                        appender(title, group_name, description,company_url,"",main_link)
+
+        except Exception as e:
+            errlog(group_name + ' - parsing fail with error: ' + str(e) + 'in file:' + filename)
diff --git a/parsers/threeam.py b/parsers/threeam.py
@@ -26,7 +26,7 @@ def main():
                 soup=BeautifulSoup(file,'html.parser')
                 post_divs = soup.find_all('div', class_='post')
                 for post_div in post_divs:
-                    post_title = post_div.find('div', class_='post-title-block').text.strip().replace('published files','').replace('...','')
+                    post_title = post_div.find('div', class_='post-title-block').text.strip().replace('published files','').replace('...','').replace('NEW','')
                     victim  = post_title.split('\n')[0].strip()
                     description = post_div.find('div', class_='post-text').text.strip()
                     link = post_div.find('a', class_='post-more-link')