From 6222a16a2fec477e7a6e610e0fdd4960e7ccd1b5 Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Thu, 5 Dec 2024 18:35:41 +0000
Subject: [PATCH 1/4] ci(release): 1.1.0-beta.1 [skip ci]

## [1.1.0-beta.1](https://github.com/ScrapeGraphAI/langchain-scrapegraph/compare/v1.0.0...v1.1.0-beta.1) (2024-12-05)

### Features

* added markdownify and localscraper tools ([03e49dc](https://github.com/ScrapeGraphAI/langchain-scrapegraph/commit/03e49dce84ef5a1b7a59b6dfd046eb563c14d283))

### CI

* **release:** 1.0.0-beta.1 [skip ci] ([fc06960](https://github.com/ScrapeGraphAI/langchain-scrapegraph/commit/fc06960340c68ff325656074d47b0b793a3b25fe))
---
 CHANGELOG.md   | 12 ++++++++++++
 pyproject.toml |  2 +-
 2 files changed, 13 insertions(+), 1 deletion(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 14d999c..aa5cdbc 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,15 @@
+## [1.1.0-beta.1](https://github.com/ScrapeGraphAI/langchain-scrapegraph/compare/v1.0.0...v1.1.0-beta.1) (2024-12-05)
+
+
+### Features
+
+* added markdownify and localscraper tools ([03e49dc](https://github.com/ScrapeGraphAI/langchain-scrapegraph/commit/03e49dce84ef5a1b7a59b6dfd046eb563c14d283))
+
+
+### CI
+
+* **release:** 1.0.0-beta.1 [skip ci] ([fc06960](https://github.com/ScrapeGraphAI/langchain-scrapegraph/commit/fc06960340c68ff325656074d47b0b793a3b25fe))
+
 ## 1.0.0 (2024-12-05)
 
 ## 1.0.0-beta.1 (2024-12-05)
diff --git a/pyproject.toml b/pyproject.toml
index 46b49c3..4ca4f7b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langchain-scrapegraph"
-version = "1.0.0b1"
+version = "1.1.0b1"
 description = "Library for extracting structured data from websites using ScrapeGraphAI"
 authors = ["Marco Perini <marco.perini@scrapegraphai.com>", "Marco Vinciguerra <marco.vinciguerra@scrapegraphai.com>", "Lorenzo Padoan <lorenzo.padoan@scrapegraphai.com>"]
 license = "MIT"

From 34b5f1089059daa25c756b44da593a7c0db97aa9 Mon Sep 17 00:00:00 2001
From: PeriniM <perinim.98@gmail.com>
Date: Wed, 18 Dec 2024 17:31:53 +0100
Subject: [PATCH 2/4] =?UTF-8?q?feat:=20added=20pydantic=20output=20schema?=
 =?UTF-8?q?=20=F0=9F=94=8D?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md                                   | 81 +++++++++++++++++++++
 examples/localscraper_tool_schema.py        | 38 ++++++++++
 examples/smartscraper_tool_schema.py        | 26 +++++++
 langchain_scrapegraph/tools/localscraper.py | 50 ++++++++++---
 langchain_scrapegraph/tools/smartscraper.py | 40 +++++++++-
 5 files changed, 219 insertions(+), 16 deletions(-)
 create mode 100644 examples/localscraper_tool_schema.py
 create mode 100644 examples/smartscraper_tool_schema.py

diff --git a/README.md b/README.md
index 510ea82..d672f6e 100644
--- a/README.md
+++ b/README.md
@@ -44,6 +44,39 @@ result = tool.invoke({
 print(result)
 ```
 
+<details>
+<summary>🔍 Using Output Schemas with SmartscraperTool</summary>
+
+You can define the structure of the output using Pydantic models:
+
+```python
+from typing import List
+from pydantic import BaseModel, Field
+from langchain_scrapegraph.tools import SmartscraperTool
+
+class WebsiteInfo(BaseModel):
+    title: str = Field(description="The main title of the webpage")
+    description: str = Field(description="The main description or first paragraph")
+    urls: List[str] = Field(description="The URLs inside the webpage")
+
+# Initialize with schema
+tool = SmartscraperTool(llm_output_schema=WebsiteInfo)
+
+# The output will conform to the WebsiteInfo schema
+result = tool.invoke({
+    "website_url": "https://www.example.com",
+    "user_prompt": "Extract the website information"
+})
+
+print(result)
+# {
+#     "title": "Example Domain",
+#     "description": "This domain is for use in illustrative examples...",
+#     "urls": ["https://www.iana.org/domains/example"]
+# }
+```
+</details>
+
 ### 💻 LocalscraperTool
 Extract information from HTML content using AI.
 
@@ -59,6 +92,54 @@ result = tool.invoke({
 print(result)
 ```
 
+<details>
+<summary>🔍 Using Output Schemas with LocalscraperTool</summary>
+
+You can define the structure of the output using Pydantic models:
+
+```python
+from typing import Optional
+from pydantic import BaseModel, Field
+from langchain_scrapegraph.tools import LocalscraperTool
+
+class CompanyInfo(BaseModel):
+    name: str = Field(description="The company name")
+    description: str = Field(description="The company description")
+    email: Optional[str] = Field(description="Contact email if available")
+    phone: Optional[str] = Field(description="Contact phone if available")
+
+# Initialize with schema
+tool = LocalscraperTool(llm_output_schema=CompanyInfo)
+
+html_content = """
+<html>
+    <body>
+        <h1>TechCorp Solutions</h1>
+        <p>We are a leading AI technology company.</p>
+        <div class="contact">
+            <p>Email: contact@techcorp.com</p>
+            <p>Phone: (555) 123-4567</p>
+        </div>
+    </body>
+</html>
+"""
+
+# The output will conform to the CompanyInfo schema
+result = tool.invoke({
+    "website_html": html_content,
+    "user_prompt": "Extract the company information"
+})
+
+print(result)
+# {
+#     "name": "TechCorp Solutions",
+#     "description": "We are a leading AI technology company.",
+#     "email": "contact@techcorp.com",
+#     "phone": "(555) 123-4567"
+# }
+```
+</details>
+
 ## 🌟 Key Features
 
 - 🐦 **LangChain Integration**: Seamlessly works with LangChain agents and chains
diff --git a/examples/localscraper_tool_schema.py b/examples/localscraper_tool_schema.py
new file mode 100644
index 0000000..85f3ab9
--- /dev/null
+++ b/examples/localscraper_tool_schema.py
@@ -0,0 +1,38 @@
+from typing import List
+
+from pydantic import BaseModel, Field
+from scrapegraph_py.logger import sgai_logger
+
+from langchain_scrapegraph.tools import LocalScraperTool
+
+
+class WebsiteInfo(BaseModel):
+    title: str = Field(description="The main title of the webpage")
+    description: str = Field(description="The main description or first paragraph")
+    urls: List[str] = Field(description="The URLs inside the webpage")
+
+
+sgai_logger.set_logging(level="INFO")
+
+# Initialize with Pydantic model class
+tool = LocalScraperTool(llm_output_schema=WebsiteInfo)
+
+# Example website and prompt
+html_content = """
+<html>
+    <body>
+        <h1>Company Name</h1>
+        <p>We are a technology company focused on AI solutions.</p>
+        <div class="contact">
+            <p>Email: contact@example.com</p>
+            <p>Phone: (555) 123-4567</p>
+        </div>
+    </body>
+</html>
+"""
+user_prompt = "Make a summary of the webpage and extract the email and phone number"
+
+# Use the tool
+result = tool.invoke({"website_html": html_content, "user_prompt": user_prompt})
+
+print(result)
diff --git a/examples/smartscraper_tool_schema.py b/examples/smartscraper_tool_schema.py
new file mode 100644
index 0000000..bded746
--- /dev/null
+++ b/examples/smartscraper_tool_schema.py
@@ -0,0 +1,26 @@
+from typing import List
+
+from pydantic import BaseModel, Field
+from scrapegraph_py.logger import sgai_logger
+
+from langchain_scrapegraph.tools import SmartScraperTool
+
+
+class WebsiteInfo(BaseModel):
+    title: str = Field(description="The main title of the webpage")
+    description: str = Field(description="The main description or first paragraph")
+    urls: List[str] = Field(description="The URLs inside the webpage")
+
+
+sgai_logger.set_logging(level="INFO")
+
+# Initialize with Pydantic model class
+tool = SmartScraperTool(llm_output_schema=WebsiteInfo)
+
+# Example website and prompt
+website_url = "https://www.example.com"
+user_prompt = "Extract info about the website"
+
+# Use the tool - output will conform to WebsiteInfo schema
+result = tool.invoke({"website_url": website_url, "user_prompt": user_prompt})
+print(result)
diff --git a/langchain_scrapegraph/tools/localscraper.py b/langchain_scrapegraph/tools/localscraper.py
index 0b7382c..926d6fd 100644
--- a/langchain_scrapegraph/tools/localscraper.py
+++ b/langchain_scrapegraph/tools/localscraper.py
@@ -37,6 +37,8 @@ class LocalScraperTool(BaseTool):
     Key init args:
         api_key: Your ScrapeGraph AI API key. If not provided, will look for SGAI_API_KEY env var.
         client: Optional pre-configured ScrapeGraph client instance.
+        llm_output_schema: Optional Pydantic model or dictionary schema to structure the output.
+                      If provided, the tool will ensure the output conforms to this schema.
 
     Instantiate:
         .. code-block:: python
@@ -49,6 +51,16 @@ class LocalScraperTool(BaseTool):
             # Or provide API key directly
             tool = LocalScraperTool(api_key="your-api-key")
 
+            # Optionally, you can provide an output schema:
+            from pydantic import BaseModel, Field
+
+            class CompanyInfo(BaseModel):
+                name: str = Field(description="Company name")
+                description: str = Field(description="Company description")
+                email: str = Field(description="Contact email")
+
+            tool_with_schema = LocalScraperTool(llm_output_schema=CompanyInfo)
+
     Use the tool:
         .. code-block:: python
 
@@ -71,6 +83,7 @@ class LocalScraperTool(BaseTool):
             })
 
             print(result)
+            # Without schema:
             # {
             #     "description": "We are a technology company focused on AI solutions",
             #     "contact": {
@@ -78,14 +91,13 @@ class LocalScraperTool(BaseTool):
             #         "phone": "(555) 123-4567"
             #     }
             # }
-
-    Async usage:
-        .. code-block:: python
-
-            result = await tool.ainvoke({
-                "user_prompt": "Extract contact information",
-                "website_html": html_content
-            })
+            #
+            # With CompanyInfo schema:
+            # {
+            #     "name": "Company Name",
+            #     "description": "We are a technology company focused on AI solutions",
+            #     "email": "contact@example.com"
+            # }
     """
 
     name: str = "LocalScraper"
@@ -96,6 +108,7 @@ class LocalScraperTool(BaseTool):
     return_direct: bool = True
     client: Optional[Client] = None
     api_key: str
+    llm_output_schema: Optional[Type[BaseModel]] = None
 
     @model_validator(mode="before")
     @classmethod
@@ -117,10 +130,23 @@ def _run(
         """Use the tool to extract data from a website."""
         if not self.client:
             raise ValueError("Client not initialized")
-        response = self.client.localscraper(
-            website_html=website_html,
-            user_prompt=user_prompt,
-        )
+
+        if self.llm_output_schema is None:
+            response = self.client.localscraper(
+                website_html=website_html,
+                user_prompt=user_prompt,
+            )
+        elif isinstance(self.llm_output_schema, type) and issubclass(
+            self.llm_output_schema, BaseModel
+        ):
+            response = self.client.localscraper(
+                website_html=website_html,
+                user_prompt=user_prompt,
+                output_schema=self.llm_output_schema,
+            )
+        else:
+            raise ValueError("llm_output_schema must be a Pydantic model class")
+
         return response["result"]
 
     async def _arun(
diff --git a/langchain_scrapegraph/tools/smartscraper.py b/langchain_scrapegraph/tools/smartscraper.py
index a48030e..7b07915 100644
--- a/langchain_scrapegraph/tools/smartscraper.py
+++ b/langchain_scrapegraph/tools/smartscraper.py
@@ -37,6 +37,8 @@ class SmartScraperTool(BaseTool):
     Key init args:
         api_key: Your ScrapeGraph AI API key. If not provided, will look for SGAI_API_KEY env var.
         client: Optional pre-configured ScrapeGraph client instance.
+        llm_output_schema: Optional Pydantic model or dictionary schema to structure the output.
+                      If provided, the tool will ensure the output conforms to this schema.
 
     Instantiate:
         .. code-block:: python
@@ -49,6 +51,15 @@ class SmartScraperTool(BaseTool):
             # Or provide API key directly
             tool = SmartScraperTool(api_key="your-api-key")
 
+            # Optionally, you can provide an output schema:
+            from pydantic import BaseModel, Field
+
+            class WebsiteInfo(BaseModel):
+                title: str = Field(description="The main title")
+                description: str = Field(description="The main description")
+
+            tool_with_schema = SmartScraperTool(llm_output_schema=WebsiteInfo)
+
     Use the tool:
         .. code-block:: python
 
@@ -58,10 +69,17 @@ class SmartScraperTool(BaseTool):
             })
 
             print(result)
+            # Without schema:
             # {
             #     "main_heading": "Example Domain",
             #     "first_paragraph": "This domain is for use in illustrative examples..."
             # }
+            #
+            # With WebsiteInfo schema:
+            # {
+            #     "title": "Example Domain",
+            #     "description": "This domain is for use in illustrative examples..."
+            # }
 
     Async usage:
         .. code-block:: python
@@ -80,6 +98,7 @@ class SmartScraperTool(BaseTool):
     return_direct: bool = True
     client: Optional[Client] = None
     api_key: str
+    llm_output_schema: Optional[Type[BaseModel]] = None
 
     @model_validator(mode="before")
     @classmethod
@@ -101,10 +120,23 @@ def _run(
         """Use the tool to extract data from a website."""
         if not self.client:
             raise ValueError("Client not initialized")
-        response = self.client.smartscraper(
-            website_url=website_url,
-            user_prompt=user_prompt,
-        )
+
+        if self.llm_output_schema is None:
+            response = self.client.smartscraper(
+                website_url=website_url,
+                user_prompt=user_prompt,
+            )
+        elif isinstance(self.llm_output_schema, type) and issubclass(
+            self.llm_output_schema, BaseModel
+        ):
+            response = self.client.smartscraper(
+                website_url=website_url,
+                user_prompt=user_prompt,
+                output_schema=self.llm_output_schema,
+            )
+        else:
+            raise ValueError("llm_output_schema must be a Pydantic model class")
+
         return response["result"]
 
     async def _arun(

From d5dae575921cfa14daa4ceb887b0d7d037d3773d Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Wed, 18 Dec 2024 16:34:05 +0000
Subject: [PATCH 3/4] ci(release): 1.1.0-beta.2 [skip ci]
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## [1.1.0-beta.2](https://github.com/ScrapeGraphAI/langchain-scrapegraph/compare/v1.1.0-beta.1...v1.1.0-beta.2) (2024-12-18)

### Features

* added pydantic output schema 🔍 ([34b5f10](https://github.com/ScrapeGraphAI/langchain-scrapegraph/commit/34b5f1089059daa25c756b44da593a7c0db97aa9))
---
 CHANGELOG.md   | 7 +++++++
 pyproject.toml | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index aa5cdbc..d0dfd1e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+## [1.1.0-beta.2](https://github.com/ScrapeGraphAI/langchain-scrapegraph/compare/v1.1.0-beta.1...v1.1.0-beta.2) (2024-12-18)
+
+
+### Features
+
+* added pydantic output schema 🔍 ([34b5f10](https://github.com/ScrapeGraphAI/langchain-scrapegraph/commit/34b5f1089059daa25c756b44da593a7c0db97aa9))
+
 ## [1.1.0-beta.1](https://github.com/ScrapeGraphAI/langchain-scrapegraph/compare/v1.0.0...v1.1.0-beta.1) (2024-12-05)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index 4ca4f7b..9548722 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langchain-scrapegraph"
-version = "1.1.0b1"
+version = "1.1.0b2"
 description = "Library for extracting structured data from websites using ScrapeGraphAI"
 authors = ["Marco Perini <marco.perini@scrapegraphai.com>", "Marco Vinciguerra <marco.vinciguerra@scrapegraphai.com>", "Lorenzo Padoan <lorenzo.padoan@scrapegraphai.com>"]
 license = "MIT"

From 9da0f957fa438ec6062ee3fb0cda2023ee262373 Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Wed, 18 Dec 2024 16:48:13 +0000
Subject: [PATCH 4/4] ci(release): 1.2.0-beta.1 [skip ci]
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## [1.2.0-beta.1](https://github.com/ScrapeGraphAI/langchain-scrapegraph/compare/v1.1.0...v1.2.0-beta.1) (2024-12-18)

### Features

* added pydantic output schema 🔍 ([34b5f10](https://github.com/ScrapeGraphAI/langchain-scrapegraph/commit/34b5f1089059daa25c756b44da593a7c0db97aa9))

### Docs

* added API reference ([d3ce047](https://github.com/ScrapeGraphAI/langchain-scrapegraph/commit/d3ce0470f5c89da910540e42d71afdddd80e8c15))

### CI

* **release:** 1.1.0-beta.1 [skip ci] ([6222a16](https://github.com/ScrapeGraphAI/langchain-scrapegraph/commit/6222a16a2fec477e7a6e610e0fdd4960e7ccd1b5))
* **release:** 1.1.0-beta.2 [skip ci] ([d5dae57](https://github.com/ScrapeGraphAI/langchain-scrapegraph/commit/d5dae575921cfa14daa4ceb887b0d7d037d3773d))
---
 CHANGELOG.md   | 17 +++++++++++++++++
 pyproject.toml |  2 +-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index cc8a37a..9e8e15d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,20 @@
+## [1.2.0-beta.1](https://github.com/ScrapeGraphAI/langchain-scrapegraph/compare/v1.1.0...v1.2.0-beta.1) (2024-12-18)
+
+
+### Features
+
+* added pydantic output schema 🔍 ([34b5f10](https://github.com/ScrapeGraphAI/langchain-scrapegraph/commit/34b5f1089059daa25c756b44da593a7c0db97aa9))
+
+
+### Docs
+
+* added API reference ([d3ce047](https://github.com/ScrapeGraphAI/langchain-scrapegraph/commit/d3ce0470f5c89da910540e42d71afdddd80e8c15))
+
+
+### CI
+
+* **release:** 1.1.0-beta.1 [skip ci] ([6222a16](https://github.com/ScrapeGraphAI/langchain-scrapegraph/commit/6222a16a2fec477e7a6e610e0fdd4960e7ccd1b5))
+* **release:** 1.1.0-beta.2 [skip ci] ([d5dae57](https://github.com/ScrapeGraphAI/langchain-scrapegraph/commit/d5dae575921cfa14daa4ceb887b0d7d037d3773d))
 
 ## [1.1.0-beta.2](https://github.com/ScrapeGraphAI/langchain-scrapegraph/compare/v1.1.0-beta.1...v1.1.0-beta.2) (2024-12-18)
 
diff --git a/pyproject.toml b/pyproject.toml
index a903766..c655c3c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langchain-scrapegraph"
-version = "1.2.0b2"
+version = "1.2.0b1"
 description = "Library for extracting structured data from websites using ScrapeGraphAI"
 authors = ["Marco Perini <marco.perini@scrapegraphai.com>", "Marco Vinciguerra <marco.vinciguerra@scrapegraphai.com>", "Lorenzo Padoan <lorenzo.padoan@scrapegraphai.com>"]
 license = "MIT"