In [11]:
import pandas as pd

data = pd.read_csv("house_prices.csv", index_col=0)
data = data.sample(10000, random_state=0)
data

Unnamed: 0,property_type,price,location,city,baths,purpose,bedrooms,Area_in_Marla
108839,House,13800000,Pak Arab Housing Society,Lahore,3,For Sale,3,5.0
97355,House,17500000,Marghzar Officers Colony,Lahore,6,For Sale,6,10.0
125129,House,12500000,Adiala Road,Rawalpindi,5,For Sale,5,10.0
155467,Lower Portion,47000,Satellite Town,Rawalpindi,3,For Rent,3,7.0
81132,House,7800000,Shalimar Housing Scheme,Lahore,4,For Sale,3,4.0
...,...,...,...,...,...,...,...,...
122491,House,19000000,Lake City,Lahore,5,For Sale,4,10.0
44101,Upper Portion,40000,Korang Town,Islamabad,5,For Rent,4,20.0
99634,House,42500000,DHA Defence,Lahore,5,For Sale,4,10.0
147606,Flat,6800000,Bahria Town Karachi,Karachi,2,For Sale,2,4.2


In [12]:
!pip install pandas


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m23.3.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [13]:
data = data[data["Area_in_Marla"] > 0]
data

Unnamed: 0,property_type,price,location,city,baths,purpose,bedrooms,Area_in_Marla
108839,House,13800000,Pak Arab Housing Society,Lahore,3,For Sale,3,5.0
97355,House,17500000,Marghzar Officers Colony,Lahore,6,For Sale,6,10.0
125129,House,12500000,Adiala Road,Rawalpindi,5,For Sale,5,10.0
155467,Lower Portion,47000,Satellite Town,Rawalpindi,3,For Rent,3,7.0
81132,House,7800000,Shalimar Housing Scheme,Lahore,4,For Sale,3,4.0
...,...,...,...,...,...,...,...,...
122491,House,19000000,Lake City,Lahore,5,For Sale,4,10.0
44101,Upper Portion,40000,Korang Town,Islamabad,5,For Rent,4,20.0
99634,House,42500000,DHA Defence,Lahore,5,For Sale,4,10.0
147606,Flat,6800000,Bahria Town Karachi,Karachi,2,For Sale,2,4.2


In [14]:
!pip install scikit-learn


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m23.3.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [15]:
from sklearn.model_selection import train_test_split

train_data, test_data = train_test_split(data, test_size=0.3, random_state=0)
valid_data, test_data = train_test_split(test_data, test_size=0.5, random_state=0)
train_data.shape[0], valid_data.shape[0], test_data.shape[0]

(6999, 1500, 1500)

In [16]:
label = "price"
train_data[label]

57800     29000000
166018     6000000
150291      100000
133319    10500000
119191     9800000
            ...   
101191    14700000
48738     43500000
69691     33500000
160108       53000
124749       30000
Name: price, Length: 6999, dtype: int64

In [17]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import FunctionTransformer, StandardScaler

import numpy as np

label_pipeline = make_pipeline(
    FunctionTransformer(np.log2, inverse_func=np.exp2),
    StandardScaler(),
)

train_data[label] = label_pipeline.fit_transform(train_data[[label]])
valid_data[label] = label_pipeline.transform(valid_data[[label]])
test_data[label] = label_pipeline.transform(test_data[[label]])

train_data[label]

57800     0.983117
166018    0.356093
150291   -1.273352
133319    0.578806
119191    0.551349
            ...   
101191    0.712714
48738     1.144482
69691     1.040525
160108   -1.526018
124749   -1.752503
Name: price, Length: 6999, dtype: float64

In [18]:
features = [
    "city",
    "location",
    "Area_in_Marla",
    "bedrooms",
    "baths",
]

train_data[features]

Unnamed: 0,city,location,Area_in_Marla,bedrooms,baths
57800,Karachi,Cantt,11.4,3,3
166018,Lahore,Green Cap Housing Society,3.0,4,4
150291,Karachi,DHA Defence,20.0,2,2
133319,Rawalpindi,Bahria Town Rawalpindi,5.0,3,4
119191,Lahore,Canal Garden,5.0,3,3
...,...,...,...,...,...
101191,Rawalpindi,Bahria Town Rawalpindi,7.0,5,5
48738,Lahore,DHA Defence,20.0,5,6
69691,Karachi,Gulshan-e-Iqbal Town,12.0,3,3
160108,Karachi,Cantt,11.4,3,3


In [19]:
from sklearn.dummy import DummyRegressor

baseline = DummyRegressor(strategy="mean") # DummyClassifier() for classification
baseline.fit(train_data[features], train_data[label])

baseline_score = baseline.score(test_data[features], test_data[label])
print(f"{baseline_score:,.3f}")

-0.002


In [20]:
cities = train_data["city"].unique()
cities = list(cities)
cities

['Karachi', 'Lahore', 'Rawalpindi', 'Islamabad', 'Faisalabad']

In [21]:
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.feature_extraction.text import TfidfVectorizer as TfidfVec
from sklearn.linear_model import SGDRegressor

def create_pipeline():
    return make_pipeline(
        make_column_transformer(
            (OneHotEncoder(), ["city"]),
            (TfidfVec(ngram_range=(1, 3), min_df=5, max_df=0.5), "location"),
            (StandardScaler(), ["Area_in_Marla"]),
            (StandardScaler(), ["bedrooms"]),
            (StandardScaler(), ["baths"]),
            remainder="passthrough",
        ),
        SGDRegressor(random_state=0),
    )

pipeline = create_pipeline()
pipeline

In [22]:
pipeline.fit(train_data[features], train_data[label])

train_score = pipeline.score(train_data[features], train_data[label])
print(f"{train_score:,.3f}")

0.372


In [23]:
pipeline.steps[-1][1].n_features_in_

458

In [24]:
from sklearn.model_selection import RandomizedSearchCV

params = dict(
    sgdregressor__penalty=["l1", "l2", "elasticnet"],
    sgdregressor__alpha=[0.00001, 0.0001, 0.001],
    sgdregressor__l1_ratio=[0.01, 0.1, 1],
)

search = RandomizedSearchCV(create_pipeline(), params, n_iter=10, cv=5, n_jobs=2, random_state=0)
search.fit(train_data[features], train_data[label])

print(f"{search.best_score_:,.3f}", search.best_params_)

0.363 {'sgdregressor__penalty': 'elasticnet', 'sgdregressor__l1_ratio': 0.01, 'sgdregressor__alpha': 1e-05}


In [25]:
test_score = search.best_estimator_.score(test_data[features], test_data[label])
print(f"{test_score:,.3f}")

0.365


In [26]:
import joblib

# "Artifacts"
joblib.dump(search.best_estimator_, "pipeline.joblib")
joblib.dump(label_pipeline, "label_pipeline.joblib")

joblib.dump(cities, "cities.joblib")

['cities.joblib']

In [27]:
pipeline = joblib.load("pipeline.joblib")
label_pipeline = joblib.load("label_pipeline.joblib")
cities = joblib.load("cities.joblib")

pipeline

In [28]:
%%writefile app.py
# !pip install gradio ipywidgets
import pandas as pd
import gradio as gr
import joblib

# "Artifacts"
pipeline = joblib.load("pipeline.joblib")
label_pipeline = joblib.load("label_pipeline.joblib")
cities = joblib.load("cities.joblib")

def predict(city, location, area, bedrooms, baths):
    sample = dict()
    sample["city"] = city
    sample["location"] = location
    sample["Area_in_Marla"] = area # Column names matching feature names
    sample["bedrooms"] = bedrooms
    sample["baths"] = baths

    price = pipeline.predict(pd.DataFrame([sample]))
    price = label_pipeline.inverse_transform([price])
    
    return int(price[0][0])

# https://www.gradio.app/guides
with gr.Blocks() as blocks:
    city = gr.Dropdown(cities, value=cities[0], label="City")
    location = gr.Textbox(label="Location")
    area = gr.Number(label="Area", value=1, minimum=0.5, step=0.5)
    bedrooms = gr.Slider(label="Bedrooms", minimum=0, maximum=10, step=1)
    baths = gr.Slider(label="Baths", minimum=0, maximum=10, step=1)

    price = gr.Number(label="Price")

    inputs = [city, location, area, bedrooms, baths]
    outputs = [price]

    predict_btn = gr.Button("Predict")
    predict_btn.click(predict, inputs=inputs, outputs=outputs)

if __name__ == "__main__":
    blocks.launch() # Local machine only
    # blocks.launch(server_name="0.0.0.0") # LAN access to local machine
    # blocks.launch(share=True) # Public access to local machine

Writing app.py


In [29]:
!pip install gradio

Collecting gradio
  Using cached gradio-4.16.0-py3-none-any.whl (16.7 MB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Using cached aiofiles-23.2.1-py3-none-any.whl (15 kB)
Collecting altair<6.0,>=4.2.0 (from gradio)
  Using cached altair-5.2.0-py3-none-any.whl (996 kB)
Collecting fastapi (from gradio)
  Using cached fastapi-0.109.0-py3-none-any.whl (92 kB)
Collecting ffmpy (from gradio)
  Using cached ffmpy-0.3.1.tar.gz (5.5 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting gradio-client==0.8.1 (from gradio)
  Using cached gradio_client-0.8.1-py3-none-any.whl (305 kB)
Collecting httpx (from gradio)
  Using cached httpx-0.26.0-py3-none-any.whl (75 kB)
Collecting huggingface-hub>=0.19.3 (from gradio)
  Using cached huggingface_hub-0.20.3-py3-none-any.whl (330 kB)
Collecting importlib-resources<7.0,>=1.3 (from gradio)
  Using cached importlib_resource

In [30]:
!pip install pandas joblib


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m23.3.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [31]:
%run app.py

  from .autonotebook import tqdm as notebook_tqdm


Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.


In [32]:
!pip freeze

aiofiles==23.2.1
altair==5.2.0
annotated-types==0.6.0
anyio==4.2.0
appnope==0.1.3
asttokens==2.4.1
attrs==23.2.0
certifi==2023.11.17
charset-normalizer==3.3.2
click==8.1.7
colorama==0.4.6
comm==0.2.1
contourpy==1.2.0
cycler==0.12.1
debugpy==1.8.0
decorator==5.1.1
executing==2.0.1
fastapi==0.109.0
ffmpy==0.3.1
filelock==3.13.1
fonttools==4.47.2
fsspec==2023.12.2
gradio==4.16.0
gradio_client==0.8.1
h11==0.14.0
httpcore==1.0.2
httpx==0.26.0
huggingface-hub==0.20.3
idna==3.6
importlib-resources==6.1.1
ipykernel==6.29.0
ipython==8.20.0
jedi==0.19.1
Jinja2==3.1.3
joblib==1.3.2
jsonschema==4.21.1
jsonschema-specifications==2023.12.1
jupyter_client==8.6.0
jupyter_core==5.7.1
kiwisolver==1.4.5
markdown-it-py==3.0.0
MarkupSafe==2.1.4
matplotlib==3.8.2
matplotlib-inline==0.1.6
mdurl==0.1.2
nest-asyncio==1.6.0
numpy==1.26.3
orjson==3.9.12
packaging==23.2
pandas==2.2.0
parso==0.8.3
pexpect==4.9.0
pillow==10.2.0
platformdirs==4.1.0
prompt-toolkit==3.0.43
psutil==5.9.8
ptyprocess==0.7.0
pure-eval==0.

In [33]:
# Recreate virtual environment and install required modules if needed

In [34]:
!pip freeze > requirements.txt

In [61]:
!git clone https://huggingface.co/spaces/Beerth21624/eco

Cloning into 'eco'...
remote: Enumerating objects: 4, done.[K
remote: Total 4 (delta 0), reused 0 (delta 0), pack-reused 4[K
Unpacking objects: 100% (4/4), 1.27 KiB | 433.00 KiB/s, done.


In [1]:
# !git clone xxx
# OR
# !git init
# !git remote add origin https://huggingface.co/spaces/Beerth21624/eco
# !git remote get-url origin
# !git clone https://huggingface.co/spaces/Beerth21624/eco
# OPTIONAL
# !git lfs install
# !git lfs track "*.pipeline"
# !git add .gitattributes



!git add .
!git config user.name "beerth21624"
!git config user.email "beerzii.4321@gmail.com"
!git commit -m "Add artifacts"
!git push origin -f main