Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Kyle/fsspec integration #60

Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
05c1f5d
Streaming list results (#35)
kylebarron Oct 23, 2024
ba3e83c
Update package description (#36)
kylebarron Oct 23, 2024
f5a5c29
Optionally return list result as arrow (#38)
kylebarron Oct 23, 2024
b581ce4
Return buffer protocol object from `get_range` and `get_ranges` (#39)
kylebarron Oct 23, 2024
6b6f666
Support range in GetOptions (#40)
kylebarron Oct 23, 2024
1d403ea
Rename package to object-store-py (#41)
kylebarron Oct 23, 2024
70a0de9
Fix typing and docs website (#42)
kylebarron Oct 24, 2024
5f50fda
Rename to obstore (#45)
kylebarron Oct 24, 2024
f3644a6
Update docs website CSS (#46)
kylebarron Oct 24, 2024
ae2de0e
Add custom exceptions (#48)
kylebarron Oct 25, 2024
6e6dc10
Add put options (#50)
kylebarron Oct 25, 2024
245bf50
remove `range` from `GetOptions` for now (#51)
kylebarron Oct 25, 2024
ff99c42
bump to 0.2.0-beta.1 (#52)
kylebarron Oct 25, 2024
cd50634
Fix python package name (#53)
kylebarron Oct 25, 2024
eba5019
Bump to 0.2 (#56)
kylebarron Oct 25, 2024
c318a53
boto3 region name can be None (#59)
kylebarron Oct 29, 2024
bf4f5e0
Add test fixtures
martindurant Oct 30, 2024
fdd2a79
Merge branch 'kyle/fsspec-integration' of https://github.com/developm…
martindurant Oct 30, 2024
9779d3b
Add __repr__ to store classes (#61)
kylebarron Oct 30, 2024
e1ed765
Use `moto` for S3 tests (#62)
kylebarron Oct 30, 2024
6495836
Merge branch 'main' into kyle/fsspec-integration
martindurant Oct 31, 2024
68a2d1f
Simple file override
martindurant Oct 31, 2024
ec9c559
silghtly more
martindurant Oct 31, 2024
956b1c0
lint
martindurant Oct 31, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions object-store-rs/python/object_store_rs/fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,12 @@

import object_store_rs as obs

if TYPE_CHECKING:
from object_store_rs.store import ObjectStore


class AsyncFsspecStore(fsspec.asyn.AsyncFileSystem):
store: ObjectStore

def __init__(
self,
store: ObjectStore,
store,
Copy link
Member

@kylebarron kylebarron Oct 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why remove the ObjectStore typing here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It wasn't importing, and this is only typing. Perhaps something with my environment.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah it's a type-only object. Perhaps it should be changed in the future to be an actual Python object that can be imported at runtime. The from __future__ import annotations should allow this to not be evaluated.

*args,
asynchronous=False,
loop=None,
Expand Down
69 changes: 63 additions & 6 deletions tests/test_fsspec.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,68 @@
import boto3
import os

import pytest
pytest.importorskip("moto")
from moto.moto_server.threaded_moto_server import ThreadedMotoServer

from object_store_rs.store import S3Store
import object_store_rs as obs
import pyarrow.parquet as pq
from object_store_rs.fsspec import AsyncFsspecStore

# session = boto3.Session()

store = obs.store.HTTPStore.from_url("https://github.com")
fs = AsyncFsspecStore(store)
url = "opengeospatial/geoparquet/raw/refs/heads/main/examples/example.parquet"
test = pq.read_metadata(url, filesystem=fs)
ip = "localhost"
port = 5555
endpoint_uri = f"http://{ip}:{port}"
test_bucket_name = "test"


@pytest.fixture(scope="module")
def s3_base():
server = ThreadedMotoServer(ip_address=ip, port=port)
server.start()
os.environ["AWS_SECRET_ACCESS_KEY"] = "foo"
os.environ["AWS_ACCESS_KEY_ID"] = "foo"
os.environ["AWS_ENDPOINT_URL"] = endpoint_uri

print("server up")
yield
print("moto done")
server.stop()


@pytest.fixture()
def s3(s3_base):
from botocore.session import Session
session = Session()
client = session.create_client("s3", endpoint_url=endpoint_uri)
client.create_bucket(Bucket=test_bucket_name, ACL="public-read")
client.put_object(Bucket=test_bucket_name, Key="afile", Body=b"hello world")


@pytest.fixture(autouse=True)
def reset_s3_fixture():
import requests
# We reuse the MotoServer for all tests
# But we do want a clean state for every test
try:
requests.post(f"{endpoint_uri}/moto-api/reset")
except:
pass


@pytest.fixture()
def fs(s3):
return AsyncFsspecStore(S3Store.from_env(test_bucket_name))


def test_list(fs):
out = fs.ls("", detail=False)
breakpoint()
1


def test_remote_parquet():
store = obs.store.HTTPStore.from_url("https://github.com")
fs = AsyncFsspecStore(store)
url = "opengeospatial/geoparquet/raw/refs/heads/main/examples/example.parquet"
pq.read_metadata(url, filesystem=fs)
Loading