diff --git a/README.md b/README.md index 4c0ca95..9e2534f 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,7 @@ properties = scrape_property( property_younger_than=30, # sold in last 30 days - listed in last x days if (for_sale, for_rent) # pending_or_contingent=True # use on for_sale listings to find pending / contingent listings # mls_only=True, # only fetch MLS listings + # proxy="http://user:pass@host:port" # use a proxy to change your IP address ) print(f"Number of properties: {len(properties)}") @@ -61,7 +62,7 @@ print(properties.head()) ### CLI ``` -usage: homeharvest [-l {for_sale,for_rent,sold}] [-o {excel,csv}] [-f FILENAME] [-p PROXY] [-d DAYS] [-r RADIUS] [-m] location +usage: homeharvest [-l {for_sale,for_rent,sold}] [-o {excel,csv}] [-f FILENAME] [-p PROXY] [-d DAYS] [-r RADIUS] [-m] [-c] location Home Harvest Property Scraper @@ -79,8 +80,11 @@ options: Proxy to use for scraping -d DAYS, --days DAYS Sold/listed in last _ days filter. -r RADIUS, --radius RADIUS - Get comparable properties within _ (eg. 0.0) miles. Only applicable for individual addresses. - -m, --mls_only If set, fetches only MLS listings. + Get comparable properties within _ (e.g., 0.0) miles. Only applicable for individual addresses. + -m, --mls_only If set, fetches only MLS listings. + -c, --pending_or_contingent + If set, fetches only pending or contingent listings. Only applicable for for_sale listings from general area searches. + ``` ```bash > homeharvest "San Francisco, CA" -l for_rent -o excel -f HomeHarvest diff --git a/examples/HomeHarvest_Demo.py b/examples/HomeHarvest_Demo.py index a78ddeb..6ab64d9 100644 --- a/examples/HomeHarvest_Demo.py +++ b/examples/HomeHarvest_Demo.py @@ -7,9 +7,11 @@ properties = scrape_property( location="San Diego, CA", - listing_type="sold", # for_sale, for_rent - property_younger_than=30, # sold/listed in last 30 days - mls_only=True, # only fetch MLS listings + listing_type="sold", # or (for_sale, for_rent) + property_younger_than=30, # sold in last 30 days - listed in last x days if (for_sale, for_rent) + # pending_or_contingent=True # use on for_sale listings to find pending / contingent listings + # mls_only=True, # only fetch MLS listings + # proxy="http://user:pass@host:port" # use a proxy to change your IP address ) print(f"Number of properties: {len(properties)}") diff --git a/homeharvest/cli.py b/homeharvest/cli.py index 198de12..13ed44d 100644 --- a/homeharvest/cli.py +++ b/homeharvest/cli.py @@ -60,6 +60,13 @@ def main(): help="If set, fetches only MLS listings.", ) + parser.add_argument( + "-c", + "--pending_or_contingent", + action="store_true", + help="If set, fetches only pending or contingent listings. Only applicable for for_sale listings from general area searches.", + ) + args = parser.parse_args() result = scrape_property( @@ -69,6 +76,7 @@ def main(): proxy=args.proxy, mls_only=args.mls_only, property_younger_than=args.days, + pending_or_contingent=args.pending_or_contingent, ) if not args.filename: diff --git a/homeharvest/core/scrapers/realtor/__init__.py b/homeharvest/core/scrapers/realtor/__init__.py index fcd96b2..532efbb 100644 --- a/homeharvest/core/scrapers/realtor/__init__.py +++ b/homeharvest/core/scrapers/realtor/__init__.py @@ -105,10 +105,10 @@ def handle_listing(self, listing_id: str) -> list[Property]: ) able_to_get_lat_long = ( - property_info - and property_info.get("address") - and property_info["address"].get("location") - and property_info["address"]["location"].get("coordinate") + property_info + and property_info.get("address") + and property_info["address"].get("location") + and property_info["address"]["location"].get("coordinate") ) listing = Property( @@ -122,8 +122,10 @@ def handle_listing(self, listing_id: str) -> list[Property]: list_date=property_info["basic"]["list_date"].split("T")[0] if property_info["basic"].get("list_date") else None, - prc_sqft=property_info["basic"].get("price") / property_info["basic"].get("sqft") - if property_info["basic"].get("price") and property_info["basic"].get("sqft") + prc_sqft=property_info["basic"].get("price") + / property_info["basic"].get("sqft") + if property_info["basic"].get("price") + and property_info["basic"].get("sqft") else None, last_sold_date=property_info["basic"]["sold_date"].split("T")[0] if property_info["basic"].get("sold_date") @@ -146,7 +148,7 @@ def handle_listing(self, listing_id: str) -> list[Property]: year_built=property_info["details"].get("year_built"), garage=property_info["details"].get("garage"), stories=property_info["details"].get("stories"), - ) + ), ) return [listing] @@ -175,7 +177,10 @@ def get_latest_listing_id(self, property_id: str) -> str | None: if property_info["listings"] is None: return None - primary_listing = next((listing for listing in property_info["listings"] if listing["primary"]), None) + primary_listing = next( + (listing for listing in property_info["listings"] if listing["primary"]), + None, + ) if primary_listing: return primary_listing["listing_id"] else: @@ -328,7 +333,11 @@ def general_search( else "sort: [{ field: list_date, direction: desc }]" ) - pending_or_contingent_param = "or_filters: { contingent: true, pending: true }" if self.pending_or_contingent else "" + pending_or_contingent_param = ( + "or_filters: { contingent: true, pending: true }" + if self.pending_or_contingent + else "" + ) if search_type == "comps": #: comps search, came from an address query = """query Property_search( @@ -384,7 +393,7 @@ def general_search( ) else: #: general search, came from an address query = ( - """query Property_search( + """query Property_search( $property_id: [ID]! $offset: Int!, ) { @@ -394,7 +403,9 @@ def general_search( } limit: 1 offset: $offset - ) %s""" % results_query) + ) %s""" + % results_query + ) payload = { "query": query, @@ -477,13 +488,21 @@ def search(self): "offset": 0, } - search_type = "comps" if self.radius and location_type == "address" else "address" if location_type == "address" and not self.radius else "area" + search_type = ( + "comps" + if self.radius and location_type == "address" + else "address" + if location_type == "address" and not self.radius + else "area" + ) if location_type == "address": if not self.radius: #: single address search, non comps property_id = location_info["mpr_id"] search_variables |= {"property_id": property_id} - gql_results = self.general_search(search_variables, search_type=search_type) + gql_results = self.general_search( + search_variables, search_type=search_type + ) if gql_results["total"] == 0: listing_id = self.get_latest_listing_id(property_id) if listing_id is None: @@ -561,8 +580,17 @@ def _parse_address(result: dict, search_type): @staticmethod def _parse_description(result: dict) -> Description: description_data = result.get("description", {}) + + if description_data is None or not isinstance(description_data, dict): + print("Warning: description_data is invalid!") + description_data = {} + + style = description_data.get("type", "") + if style is not None: + style = style.upper() + return Description( - style=description_data.get("type", "").upper(), + style=style, beds=description_data.get("beds"), baths_full=description_data.get("baths_full"), baths_half=description_data.get("baths_half"), diff --git a/pyproject.toml b/pyproject.toml index c79f04e..71b9421 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "homeharvest" -version = "0.3.0" +version = "0.3.1" description = "Real estate scraping library supporting Zillow, Realtor.com & Redfin." authors = ["Zachary Hampton ", "Cullen Watson "] homepage = "https://github.com/ZacharyHampton/HomeHarvest" diff --git a/tests/test_realtor.py b/tests/test_realtor.py index a498112..fd16cf0 100644 --- a/tests/test_realtor.py +++ b/tests/test_realtor.py @@ -16,7 +16,12 @@ def test_realtor_pending_or_contingent(): pending_or_contingent=False, ) - assert all([result is not None for result in [pending_or_contingent_result, regular_result]]) + assert all( + [ + result is not None + for result in [pending_or_contingent_result, regular_result] + ] + ) assert len(pending_or_contingent_result) != len(regular_result)