diff --git a/libs/megaparse/src/megaparse/megaparse.py b/libs/megaparse/src/megaparse/megaparse.py index 3a829cf..191edec 100644 --- a/libs/megaparse/src/megaparse/megaparse.py +++ b/libs/megaparse/src/megaparse/megaparse.py @@ -67,11 +67,6 @@ def validate_input( raise ValueError( f"Format Checker : Unsupported file extension: {file_extension}" ) - # TODO(@chloedia): each parser should have a list of supported extensions - if not isinstance(self.parser, UnstructuredParser): - raise ValueError( - f" Unsupported file extension : Parser {self.parser} do not support {file_extension}" - ) return file_extension async def aload( diff --git a/libs/megaparse/src/megaparse/parser/base.py b/libs/megaparse/src/megaparse/parser/base.py index 30757cc..ab378d8 100644 --- a/libs/megaparse/src/megaparse/parser/base.py +++ b/libs/megaparse/src/megaparse/parser/base.py @@ -8,6 +8,23 @@ class BaseParser(ABC): """Mother Class for all the parsers [Unstructured, LlamaParse, MegaParseVision]""" + supported_extensions = [] + + def check_supported_extension( + self, file_extension: FileExtension | None, file_path: str | Path | None = None + ): + if not file_extension and not file_path: + raise ValueError( + "Either file_path or file_extension must be provided for {self.__class__.__name__}" + ) + if file_path and not file_extension: + file_path = Path(file_path) if isinstance(file_path, str) else file_path + file_extension = FileExtension(file_path.suffix) + if file_extension not in self.supported_extensions: + raise ValueError( + f"Unsupported file extension {file_extension.value} for {self.__class__.__name__}" + ) + @abstractmethod async def aconvert( self, diff --git a/libs/megaparse/src/megaparse/parser/doctr_parser.py b/libs/megaparse/src/megaparse/parser/doctr_parser.py index e51bb62..ce2b6a6 100644 --- a/libs/megaparse/src/megaparse/parser/doctr_parser.py +++ b/libs/megaparse/src/megaparse/parser/doctr_parser.py @@ -14,6 +14,8 @@ class DoctrParser(BaseParser): + supported_extensions = [FileExtension.PDF] + def __init__( self, det_predictor_model: str = "db_resnet50", @@ -74,6 +76,9 @@ def convert( pdf = file_path # type: ignore else: raise ValueError("Can't convert if file and file_path are None") + + self.check_supported_extension(file_extension, file_path) + doc = DocumentFile.from_pdf(pdf) # Analyze result = self.predictor(doc) diff --git a/libs/megaparse/src/megaparse/parser/llama.py b/libs/megaparse/src/megaparse/parser/llama.py index d098ec1..9cb0d8c 100644 --- a/libs/megaparse/src/megaparse/parser/llama.py +++ b/libs/megaparse/src/megaparse/parser/llama.py @@ -11,6 +11,8 @@ class LlamaParser(BaseParser): + supported_extensions = [FileExtension.PDF] + def __init__( self, api_key: str, @@ -37,6 +39,7 @@ async def aconvert( ) -> str: if not file_path: raise ValueError("File_path should be provided to run LlamaParser") + self.check_supported_extension(file_extension, file_path) llama_parser = _LlamaParse( api_key=self.api_key, @@ -64,6 +67,7 @@ def convert( ) -> str: if not file_path: raise ValueError("File_path should be provided to run LlamaParser") + self.check_supported_extension(file_extension, file_path) llama_parser = _LlamaParse( api_key=self.api_key, diff --git a/libs/megaparse/src/megaparse/parser/megaparse_vision.py b/libs/megaparse/src/megaparse/parser/megaparse_vision.py index 55e244b..0b05e73 100644 --- a/libs/megaparse/src/megaparse/parser/megaparse_vision.py +++ b/libs/megaparse/src/megaparse/parser/megaparse_vision.py @@ -53,6 +53,8 @@ class MegaParseVision(BaseParser): + supported_extensions = [FileExtension.PDF] + def __init__(self, model: BaseChatModel, **kwargs): if hasattr(model, "model_name"): if not SupportedModel.is_supported(model.model_name): @@ -158,6 +160,9 @@ async def aconvert( if isinstance(file_path, Path): file_path = str(file_path) + + self.check_supported_extension(file_extension, file_path) + pdf_base64 = self.process_file(file_path) tasks = [ self.asend_to_mlm(pdf_base64[i : i + batch_size]) @@ -187,6 +192,9 @@ def convert( if isinstance(file_path, Path): file_path = str(file_path) + + self.check_supported_extension(file_extension, file_path) + pdf_base64 = self.process_file(file_path) chunks = [ pdf_base64[i : i + batch_size] diff --git a/libs/megaparse/src/megaparse/parser/unstructured_parser.py b/libs/megaparse/src/megaparse/parser/unstructured_parser.py index 5e1d7dc..b5cca09 100644 --- a/libs/megaparse/src/megaparse/parser/unstructured_parser.py +++ b/libs/megaparse/src/megaparse/parser/unstructured_parser.py @@ -15,6 +15,21 @@ class UnstructuredParser(BaseParser): load_dotenv() + supported_extensions = [ + FileExtension.PDF, + FileExtension.DOCX, + FileExtension.TXT, + FileExtension.OTF, + FileExtension.EPUB, + FileExtension.HTML, + FileExtension.XML, + FileExtension.CSV, + FileExtension.XLSX, + FileExtension.XLS, + FileExtension.PPTX, + FileExtension.MD, + FileExtension.MARKDOWN, + ] def __init__( self, strategy=StrategyEnum.AUTO, model: BaseChatModel | None = None, **kwargs @@ -107,6 +122,7 @@ async def aconvert( file_extension: FileExtension | None = None, **kwargs, ) -> str: + self.check_supported_extension(file_extension, file_path) warnings.warn( "The UnstructuredParser is a sync parser, please use the sync convert method", UserWarning, @@ -121,6 +137,8 @@ def convert( file_extension: FileExtension | None = None, **kwargs, ) -> str: + self.check_supported_extension(file_extension, file_path) + elements = partition( filename=str(file_path) if file_path else None, file=file, diff --git a/libs/megaparse/tests/docx/test_docx_processing.py b/libs/megaparse/tests/docx/test_docx_processing.py deleted file mode 100644 index a1a13a0..0000000 --- a/libs/megaparse/tests/docx/test_docx_processing.py +++ /dev/null @@ -1,32 +0,0 @@ -from pathlib import Path -from uuid import uuid4 - -import pytest -from langchain_core.language_models import FakeListChatModel -from megaparse.megaparse import MegaParse -from megaparse.parser.llama import LlamaParser -from megaparse.parser.megaparse_vision import MegaParseVision - - -@pytest.mark.asyncio -async def test_megaparse_docx_processor(): - p = Path("./tests/docx/sample.docx") - processor = MegaParse() - result = await processor.aload(file_path=p) - assert len(result) > 0 - - -@pytest.mark.asyncio -async def test_megaparse_docx_processor_fail(): - p = Path("./tests/docx/sample.docx") - parser = LlamaParser(api_key=str(uuid4())) - processor = MegaParse(parser=parser) - with pytest.raises(ValueError): - await processor.aload(file_path=p) - - parser = MegaParseVision( - model=FakeListChatModel(responses=["good"]), - ) # type: ignore - processor = MegaParse(parser=parser) - with pytest.raises(ValueError): - await processor.aload(file_path=p) diff --git a/libs/megaparse/tests/epub/test_epub_processing.py b/libs/megaparse/tests/epub/test_epub_processing.py deleted file mode 100644 index d5aa128..0000000 --- a/libs/megaparse/tests/epub/test_epub_processing.py +++ /dev/null @@ -1,30 +0,0 @@ -from pathlib import Path -from uuid import uuid4 - -import pytest -from langchain_core.language_models import FakeListChatModel -from megaparse.megaparse import MegaParse -from megaparse.parser.llama import LlamaParser -from megaparse.parser.megaparse_vision import MegaParseVision - - -@pytest.mark.asyncio -async def test_megaparse_epub_processor(): - p = Path("./tests/epub/Sway.epub") - processor = MegaParse() - result = await processor.aload(file_path=p) - assert len(result) > 0 - - -@pytest.mark.asyncio -async def test_megaparse_epub_processor_fail(): - p = Path("./tests/epub/Sway.epub") - parser = LlamaParser(api_key=str(uuid4())) - processor = MegaParse(parser=parser) - with pytest.raises(ValueError): - await processor.aload(file_path=p) - - parser = MegaParseVision(model=FakeListChatModel(responses=["good"])) # type: ignore - processor = MegaParse(parser=parser) - with pytest.raises(ValueError): - await processor.aload(file_path=p) diff --git a/libs/megaparse/tests/fixtures/__init__.py b/libs/megaparse/tests/fixtures/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/libs/megaparse/tests/html/test_html_processing.py b/libs/megaparse/tests/html/test_html_processing.py deleted file mode 100644 index f905713..0000000 --- a/libs/megaparse/tests/html/test_html_processing.py +++ /dev/null @@ -1,30 +0,0 @@ -from pathlib import Path -from uuid import uuid4 - -import pytest -from langchain_core.language_models import FakeListChatModel -from megaparse.megaparse import MegaParse -from megaparse.parser.llama import LlamaParser -from megaparse.parser.megaparse_vision import MegaParseVision - - -@pytest.mark.asyncio -async def test_megaparse_html_processor(): - p = Path("./tests/html/sample_complexe.html") - processor = MegaParse() - result = await processor.aload(file_path=p) - assert len(result) > 0 - - -@pytest.mark.asyncio -async def test_megaparse_html_processor_fail(): - p = Path("./tests/html/sample_complexe.html") - parser = LlamaParser(api_key=str(uuid4())) - processor = MegaParse(parser=parser) - with pytest.raises(ValueError): - await processor.aload(file_path=p) - - parser = MegaParseVision(model=FakeListChatModel(responses=["good"])) # type: ignore - processor = MegaParse(parser=parser) - with pytest.raises(ValueError): - await processor.aload(file_path=p) diff --git a/libs/megaparse/tests/odt/test_odt_processing.py b/libs/megaparse/tests/odt/test_odt_processing.py deleted file mode 100644 index 9a14133..0000000 --- a/libs/megaparse/tests/odt/test_odt_processing.py +++ /dev/null @@ -1,30 +0,0 @@ -from pathlib import Path -from uuid import uuid4 - -import pytest -from langchain_core.language_models import FakeListChatModel -from megaparse.megaparse import MegaParse -from megaparse.parser.llama import LlamaParser -from megaparse.parser.megaparse_vision import MegaParseVision - - -@pytest.mark.asyncio -async def test_megaparse_odt_processor(): - p = Path("./tests/odt/file-sample_500kB.odt") - processor = MegaParse() - result = await processor.aload(file_path=p) - assert len(result) > 0 - - -@pytest.mark.asyncio -async def test_megaparse_odt_processor_fail(): - p = Path("./tests/odt/file-sample_500kB.odt") - parser = LlamaParser(api_key=str(uuid4())) - processor = MegaParse(parser=parser) - with pytest.raises(ValueError): - await processor.aload(file_path=p) - - parser = MegaParseVision(model=FakeListChatModel(responses=["good"])) # type: ignore - processor = MegaParse(parser=parser) - with pytest.raises(ValueError): - await processor.aload(file_path=p) diff --git a/libs/megaparse/tests/pdf/test_unstructured_parser.py b/libs/megaparse/tests/pdf/test_pdfium_parser.py similarity index 100% rename from libs/megaparse/tests/pdf/test_unstructured_parser.py rename to libs/megaparse/tests/pdf/test_pdfium_parser.py diff --git a/libs/megaparse/tests/pptx/test_pptx_processing.py b/libs/megaparse/tests/pptx/test_pptx_processing.py deleted file mode 100644 index a1f226d..0000000 --- a/libs/megaparse/tests/pptx/test_pptx_processing.py +++ /dev/null @@ -1,30 +0,0 @@ -from pathlib import Path -from uuid import uuid4 - -import pytest -from langchain_core.language_models import FakeListChatModel -from megaparse.megaparse import MegaParse -from megaparse.parser.llama import LlamaParser -from megaparse.parser.megaparse_vision import MegaParseVision - - -@pytest.mark.asyncio -async def test_megaparse_pptx_processor(): - p = Path("./tests/pptx/sample.pptx") - processor = MegaParse() - result = await processor.aload(file_path=p) - assert len(result) > 0 - - -@pytest.mark.asyncio -async def test_megaparse_pptx_processor_fail(): - p = Path("./tests/pptx/sample.pptx") - parser = LlamaParser(api_key=str(uuid4())) - processor = MegaParse(parser=parser) - with pytest.raises(ValueError): - await processor.aload(file_path=p) - - parser = MegaParseVision(model=FakeListChatModel(responses=["good"])) # type: ignore - processor = MegaParse(parser=parser) - with pytest.raises(ValueError): - await processor.aload(file_path=p) diff --git a/libs/megaparse/tests/epub/Sway.epub b/libs/megaparse/tests/supported_docs/Sway.epub similarity index 100% rename from libs/megaparse/tests/epub/Sway.epub rename to libs/megaparse/tests/supported_docs/Sway.epub diff --git a/libs/megaparse/tests/odt/file-sample_500kB.odt b/libs/megaparse/tests/supported_docs/file-sample_500kB.odt similarity index 100% rename from libs/megaparse/tests/odt/file-sample_500kB.odt rename to libs/megaparse/tests/supported_docs/file-sample_500kB.odt diff --git a/libs/megaparse/tests/xls/file_example_XLSX_50.xlsx b/libs/megaparse/tests/supported_docs/file_example_XLSX_50.xlsx similarity index 100% rename from libs/megaparse/tests/xls/file_example_XLSX_50.xlsx rename to libs/megaparse/tests/supported_docs/file_example_XLSX_50.xlsx diff --git a/libs/megaparse/tests/xls/file_example_XLS_50.xls b/libs/megaparse/tests/supported_docs/file_example_XLS_50.xls similarity index 100% rename from libs/megaparse/tests/xls/file_example_XLS_50.xls rename to libs/megaparse/tests/supported_docs/file_example_XLS_50.xls diff --git a/libs/megaparse/tests/supported_docs/sample.csv b/libs/megaparse/tests/supported_docs/sample.csv new file mode 100644 index 0000000..9b3f73c --- /dev/null +++ b/libs/megaparse/tests/supported_docs/sample.csv @@ -0,0 +1,4 @@ +Name,Description +MegaParse,"MegaParse is the best parser, even with accents like é, è, and ñ." +OtherParse,"OtherParse is a decent parser, but it struggles with accents." +RandomParse,"RandomParse is another parser, but it often fails with special characters." \ No newline at end of file diff --git a/libs/megaparse/tests/docx/sample.docx b/libs/megaparse/tests/supported_docs/sample.docx similarity index 100% rename from libs/megaparse/tests/docx/sample.docx rename to libs/megaparse/tests/supported_docs/sample.docx diff --git a/libs/megaparse/tests/supported_docs/sample.markdown b/libs/megaparse/tests/supported_docs/sample.markdown new file mode 100644 index 0000000..58d7267 --- /dev/null +++ b/libs/megaparse/tests/supported_docs/sample.markdown @@ -0,0 +1,21 @@ +# The Difficulty of Parsing Files + +Parsing files can be a challenging task due to several factors: + +## 1. File Format Variability +Different file formats (e.g., JSON, XML, CSV) require different parsing techniques. Each format has its own structure and rules, making it necessary to handle each one uniquely. + +## 2. Inconsistent Data +Files often contain inconsistent or malformed data. Handling these inconsistencies requires robust error-checking and validation mechanisms. + +## 3. Large File Sizes +Parsing large files can be resource-intensive and time-consuming. Efficient algorithms and memory management techniques are essential to handle large datasets. + +## 4. Encoding Issues +Files may use different character encodings (e.g., UTF-8, ASCII). Properly detecting and handling these encodings is crucial to avoid data corruption. + +## 5. Nested Structures +Some file formats, like JSON and XML, can have deeply nested structures. Parsing these nested structures requires recursive algorithms and careful handling of hierarchical data. + +## Conclusion +Despite these challenges, effective file parsing is essential for data processing and analysis. By understanding and addressing these difficulties, developers can create robust parsers that handle a wide variety of file formats and data inconsistencies. diff --git a/libs/megaparse/tests/supported_docs/sample.md b/libs/megaparse/tests/supported_docs/sample.md new file mode 100644 index 0000000..58d7267 --- /dev/null +++ b/libs/megaparse/tests/supported_docs/sample.md @@ -0,0 +1,21 @@ +# The Difficulty of Parsing Files + +Parsing files can be a challenging task due to several factors: + +## 1. File Format Variability +Different file formats (e.g., JSON, XML, CSV) require different parsing techniques. Each format has its own structure and rules, making it necessary to handle each one uniquely. + +## 2. Inconsistent Data +Files often contain inconsistent or malformed data. Handling these inconsistencies requires robust error-checking and validation mechanisms. + +## 3. Large File Sizes +Parsing large files can be resource-intensive and time-consuming. Efficient algorithms and memory management techniques are essential to handle large datasets. + +## 4. Encoding Issues +Files may use different character encodings (e.g., UTF-8, ASCII). Properly detecting and handling these encodings is crucial to avoid data corruption. + +## 5. Nested Structures +Some file formats, like JSON and XML, can have deeply nested structures. Parsing these nested structures requires recursive algorithms and careful handling of hierarchical data. + +## Conclusion +Despite these challenges, effective file parsing is essential for data processing and analysis. By understanding and addressing these difficulties, developers can create robust parsers that handle a wide variety of file formats and data inconsistencies. diff --git a/libs/megaparse/tests/supported_docs/sample.otf b/libs/megaparse/tests/supported_docs/sample.otf new file mode 100644 index 0000000..2fbddd0 Binary files /dev/null and b/libs/megaparse/tests/supported_docs/sample.otf differ diff --git a/libs/megaparse/tests/pptx/sample.pptx b/libs/megaparse/tests/supported_docs/sample.pptx similarity index 100% rename from libs/megaparse/tests/pptx/sample.pptx rename to libs/megaparse/tests/supported_docs/sample.pptx diff --git a/libs/megaparse/tests/supported_docs/sample.txt b/libs/megaparse/tests/supported_docs/sample.txt new file mode 100644 index 0000000..22d314f --- /dev/null +++ b/libs/megaparse/tests/supported_docs/sample.txt @@ -0,0 +1,13 @@ +Lorem ipsum + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nunc ac faucibus odio. + +Vestibulum neque massa, scelerisque sit amet ligula eu, congue molestie mi. Praesent ut varius sem. Nullam at porttitor arcu, nec lacinia nisi. Ut ac dolor vitae odio interdum condimentum. Vivamus dapibus sodales ex, vitae malesuada ipsum cursus convallis. Maecenas sed egestas nulla, ac condimentum orci. Mauris diam felis, vulputate ac suscipit et, iaculis non est. Curabitur semper arcu ac ligula semper, nec luctus nisl blandit. Integer lacinia ante ac libero lobortis imperdiet. Nullam mollis convallis ipsum, ac accumsan nunc vehicula vitae. Nulla eget justo in felis tristique fringilla. Morbi sit amet tortor quis risus auctor condimentum. Morbi in ullamcorper elit. Nulla iaculis tellus sit amet mauris tempus fringilla. +Maecenas mauris lectus, lobortis et purus mattis, blandit dictum tellus. +Maecenas non lorem quis tellus placerat varius. +Nulla facilisi. +Aenean congue fringilla justo ut aliquam. +Mauris id ex erat. Nunc vulputate neque vitae justo facilisis, non condimentum ante sagittis. +Morbi viverra semper lorem nec molestie. +Maecenas tincidunt est efficitur ligula euismod, sit amet ornare est vulputate. +https://github.com/QuivrHQ/MegaParse \ No newline at end of file diff --git a/libs/megaparse/tests/supported_docs/sample.xml b/libs/megaparse/tests/supported_docs/sample.xml new file mode 100644 index 0000000..b252409 --- /dev/null +++ b/libs/megaparse/tests/supported_docs/sample.xml @@ -0,0 +1,23 @@ + + + + Charter Group +
+ 100 Main + Framingham + MA + 01701 +
+
+ 720 Prospect + Framingham + MA + 01701 +
+
+ 120 Ridge + MA + 01760 +
+
+
\ No newline at end of file diff --git a/libs/megaparse/tests/html/sample_complexe.html b/libs/megaparse/tests/supported_docs/sample_complexe.html similarity index 100% rename from libs/megaparse/tests/html/sample_complexe.html rename to libs/megaparse/tests/supported_docs/sample_complexe.html diff --git a/libs/megaparse/tests/supported_docs/sample_native.pdf b/libs/megaparse/tests/supported_docs/sample_native.pdf new file mode 100644 index 0000000..774c2ea Binary files /dev/null and b/libs/megaparse/tests/supported_docs/sample_native.pdf differ diff --git a/libs/megaparse/tests/test_parsers.py b/libs/megaparse/tests/test_parsers.py new file mode 100644 index 0000000..ae081dd --- /dev/null +++ b/libs/megaparse/tests/test_parsers.py @@ -0,0 +1,40 @@ +import os + +import pytest +from megaparse.parser.doctr_parser import DoctrParser +from megaparse.parser.llama import LlamaParser +from megaparse.parser.megaparse_vision import MegaParseVision +from megaparse.parser.unstructured_parser import UnstructuredParser +from megaparse_sdk.schema.extensions import FileExtension + +PARSER_LIST = [ + UnstructuredParser, + DoctrParser, +] + + +@pytest.mark.parametrize("parser", PARSER_LIST) +@pytest.mark.parametrize("extension", list(FileExtension)) +def test_sync_parser(parser, extension): + directory = "./tests/supported_docs" + file_path = next( + ( + os.path.join(root, file) + for root, _, files in os.walk(directory) + for file in files + if file.endswith(extension.value) + ), + None, + ) + if file_path is None: + pytest.fail(f"No file with extension {extension.value} found in {directory}") + + myparser = parser() + if extension in myparser.supported_extensions: + response = myparser.convert(file_path) + + assert response + assert len(response) > 0 + else: + with pytest.raises(ValueError): + myparser.convert(file_path) diff --git a/libs/megaparse/tests/xls/test_xls_processing.py b/libs/megaparse/tests/xls/test_xls_processing.py deleted file mode 100644 index ccf1d12..0000000 --- a/libs/megaparse/tests/xls/test_xls_processing.py +++ /dev/null @@ -1,38 +0,0 @@ -from pathlib import Path -from uuid import uuid4 - -import pytest -from langchain_core.language_models import FakeListChatModel -from megaparse.megaparse import MegaParse -from megaparse.parser.llama import LlamaParser -from megaparse.parser.megaparse_vision import MegaParseVision - - -@pytest.mark.asyncio -async def test_megaparse_xls_processor(): - p = Path("./tests/xls/file_example_XLS_50.xls") - processor = MegaParse() - result = await processor.aload(file_path=p) - assert len(result) > 0 - - -@pytest.mark.asyncio -async def test_megaparse_xlsx_processor(): - p = Path("./tests/xls/file_example_XLSX_50.xlsx") - processor = MegaParse() - result = await processor.aload(file_path=p) - assert len(result) > 0 - - -@pytest.mark.asyncio -async def test_megaparse_xls_processor_fail(): - p = Path("./tests/xls/file_example_XLS_50.xls") - parser = LlamaParser(api_key=str(uuid4())) - processor = MegaParse(parser=parser) - with pytest.raises(ValueError): - await processor.aload(file_path=p) - - parser = MegaParseVision(model=FakeListChatModel(responses=["good"])) # type: ignore - processor = MegaParse(parser=parser) - with pytest.raises(ValueError): - await processor.aload(file_path=p) diff --git a/libs/megaparse_sdk/megaparse_sdk/schema/extensions.py b/libs/megaparse_sdk/megaparse_sdk/schema/extensions.py index 6ef1683..dc6b4d2 100644 --- a/libs/megaparse_sdk/megaparse_sdk/schema/extensions.py +++ b/libs/megaparse_sdk/megaparse_sdk/schema/extensions.py @@ -13,7 +13,6 @@ def __new__(cls, value: str, mimetype: str): return obj PDF = (".pdf", "application/pdf") - DOC = (".doc", "application/msword") DOCX = ( ".docx", "application/vnd.openxmlformats-officedocument.wordprocessingml.document", @@ -29,12 +28,10 @@ def __new__(cls, value: str, mimetype: str): "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", ) XLS = (".xls", "application/vnd.ms-excel") - PPT = (".ppt", "application/vnd.ms-powerpoint") PPTX = ( ".pptx", "application/vnd.openxmlformats-officedocument.presentationml.presentation", ) - JSON = (".json", "application/json") MD = (".md", "text/markdown") MARKDOWN = (".markdown", "text/markdown") diff --git a/libs/megaparse_sdk/pyproject.toml b/libs/megaparse_sdk/pyproject.toml index b561f10..c54c0d4 100644 --- a/libs/megaparse_sdk/pyproject.toml +++ b/libs/megaparse_sdk/pyproject.toml @@ -12,7 +12,7 @@ dependencies = [ ] readme = "README.md" -requires-python = "< 3.12" +requires-python = ">= 3.11" [build-system] requires = ["hatchling==1.26.3"]