Skip to content

Commit

Permalink
test: Rewrite automatch tests to a cleaner version and adding async test
Browse files Browse the repository at this point in the history
  • Loading branch information
D4Vinci committed Dec 16, 2024
1 parent 69e3161 commit 20ef453
Showing 1 changed file with 62 additions and 7 deletions.
69 changes: 62 additions & 7 deletions tests/parser/test_automatch.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import unittest
import asyncio

from scrapling import Adaptor
import pytest

from scrapling import Adaptor

class TestParserAutoMatch(unittest.TestCase):

class TestParserAutoMatch:
def test_element_relocation(self):
"""Test relocating element after structure change"""
original_html = '''
Expand Down Expand Up @@ -50,7 +51,61 @@ def test_element_relocation(self):
_ = old_page.css('#p1, #p2', auto_save=True)[0]
relocated = new_page.css('#p1', auto_match=True)

self.assertIsNotNone(relocated)
self.assertEqual(relocated[0].attrib['data-id'], 'p1')
self.assertTrue(relocated[0].has_class('new-class'))
self.assertEqual(relocated[0].css('.new-description')[0].text, 'Description 1')
assert relocated is not None
assert relocated[0].attrib['data-id'] == 'p1'
assert relocated[0].has_class('new-class')
assert relocated[0].css('.new-description')[0].text == 'Description 1'

@pytest.mark.asyncio
async def test_element_relocation_async(self):
"""Test relocating element after structure change in async mode"""
original_html = '''
<div class="container">
<section class="products">
<article class="product" id="p1">
<h3>Product 1</h3>
<p class="description">Description 1</p>
</article>
<article class="product" id="p2">
<h3>Product 2</h3>
<p class="description">Description 2</p>
</article>
</section>
</div>
'''
changed_html = '''
<div class="new-container">
<div class="product-wrapper">
<section class="products">
<article class="product new-class" data-id="p1">
<div class="product-info">
<h3>Product 1</h3>
<p class="new-description">Description 1</p>
</div>
</article>
<article class="product new-class" data-id="p2">
<div class="product-info">
<h3>Product 2</h3>
<p class="new-description">Description 2</p>
</div>
</article>
</section>
</div>
</div>
'''

# Simulate async operation
await asyncio.sleep(0.1) # Minimal async operation

old_page = Adaptor(original_html, url='example.com', auto_match=True)
new_page = Adaptor(changed_html, url='example.com', auto_match=True)

# 'p1' was used as ID and now it's not and all the path elements have changes
# Also at the same time testing auto-match vs combined selectors
_ = old_page.css('#p1, #p2', auto_save=True)[0]
relocated = new_page.css('#p1', auto_match=True)

assert relocated is not None
assert relocated[0].attrib['data-id'] == 'p1'
assert relocated[0].has_class('new-class')
assert relocated[0].css('.new-description')[0].text == 'Description 1'

0 comments on commit 20ef453

Please sign in to comment.