Spaces:
Running
Running
Update custom_utils.py
Browse files- custom_utils.py +0 -79
custom_utils.py
CHANGED
@@ -1,6 +1,4 @@
|
|
1 |
import os
|
2 |
-
from typing import List, Optional
|
3 |
-
from pydantic import BaseModel, ValidationError
|
4 |
from datetime import datetime
|
5 |
import pandas as pd
|
6 |
import openai
|
@@ -13,83 +11,6 @@ import time
|
|
13 |
DB_NAME = "airbnb_dataset"
|
14 |
COLLECTION_NAME = "listings_reviews"
|
15 |
|
16 |
-
class Host(BaseModel):
|
17 |
-
host_id: str
|
18 |
-
host_url: str
|
19 |
-
host_name: str
|
20 |
-
host_location: str
|
21 |
-
host_about: str
|
22 |
-
host_response_time: Optional[str] = None
|
23 |
-
host_thumbnail_url: str
|
24 |
-
host_picture_url: str
|
25 |
-
host_response_rate: Optional[int] = None
|
26 |
-
host_is_superhost: bool
|
27 |
-
host_has_profile_pic: bool
|
28 |
-
host_identity_verified: bool
|
29 |
-
|
30 |
-
class Location(BaseModel):
|
31 |
-
type: str
|
32 |
-
coordinates: List[float]
|
33 |
-
is_location_exact: bool
|
34 |
-
|
35 |
-
class Address(BaseModel):
|
36 |
-
street: str
|
37 |
-
government_area: str
|
38 |
-
market: str
|
39 |
-
country: str
|
40 |
-
country_code: str
|
41 |
-
location: Location
|
42 |
-
|
43 |
-
class Review(BaseModel):
|
44 |
-
_id: str
|
45 |
-
date: Optional[datetime] = None
|
46 |
-
listing_id: str
|
47 |
-
reviewer_id: str
|
48 |
-
reviewer_name: Optional[str] = None
|
49 |
-
comments: Optional[str] = None
|
50 |
-
|
51 |
-
class Listing(BaseModel):
|
52 |
-
_id: int
|
53 |
-
listing_url: str
|
54 |
-
name: str
|
55 |
-
summary: str
|
56 |
-
space: str
|
57 |
-
description: str
|
58 |
-
neighborhood_overview: Optional[str] = None
|
59 |
-
notes: Optional[str] = None
|
60 |
-
transit: Optional[str] = None
|
61 |
-
access: str
|
62 |
-
interaction: Optional[str] = None
|
63 |
-
house_rules: str
|
64 |
-
property_type: str
|
65 |
-
room_type: str
|
66 |
-
bed_type: str
|
67 |
-
minimum_nights: int
|
68 |
-
maximum_nights: int
|
69 |
-
cancellation_policy: str
|
70 |
-
last_scraped: Optional[datetime] = None
|
71 |
-
calendar_last_scraped: Optional[datetime] = None
|
72 |
-
first_review: Optional[datetime] = None
|
73 |
-
last_review: Optional[datetime] = None
|
74 |
-
accommodates: int
|
75 |
-
bedrooms: Optional[float] = 0
|
76 |
-
beds: Optional[float] = 0
|
77 |
-
number_of_reviews: int
|
78 |
-
bathrooms: Optional[float] = 0
|
79 |
-
amenities: List[str]
|
80 |
-
price: int
|
81 |
-
security_deposit: Optional[float] = None
|
82 |
-
cleaning_fee: Optional[float] = None
|
83 |
-
extra_people: int
|
84 |
-
guests_included: int
|
85 |
-
images: dict
|
86 |
-
host: Host
|
87 |
-
address: Address
|
88 |
-
availability: dict
|
89 |
-
review_scores: dict
|
90 |
-
reviews: List[Review]
|
91 |
-
text_embeddings: List[float]
|
92 |
-
|
93 |
def process_records(data_frame):
|
94 |
records = data_frame.to_dict(orient='records')
|
95 |
# Handle potential `NaT` values
|
|
|
1 |
import os
|
|
|
|
|
2 |
from datetime import datetime
|
3 |
import pandas as pd
|
4 |
import openai
|
|
|
11 |
DB_NAME = "airbnb_dataset"
|
12 |
COLLECTION_NAME = "listings_reviews"
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
def process_records(data_frame):
|
15 |
records = data_frame.to_dict(orient='records')
|
16 |
# Handle potential `NaT` values
|