Spaces:

valory
/

olas_predict_live_markets

Sleeping

App Files Files Community

cyberosa commited on Aug 21, 2024

Commit

11a5d2b

1 Parent(s): 37fdecc

new price weighted distribution

Browse files

Files changed (9) hide show

app.py +32 -27
live_data/markets_live_data.parquet +2 -2
live_data/{markets_live_data_sample.parquet → markets_live_data_with_votes.parquet} +2 -2
notebooks/analysis_of_markets_data.ipynb +0 -0
notebooks/analysis_of_markets_data_with_votes.ipynb +0 -0
scripts/live_markets_data.py +6 -3
scripts/live_traders_data.py +31 -15
tabs/dist_gap.py +29 -0
tabs/{tokens_votes_dist.py → tokens_pwc_dist.py} +0 -0

app.py CHANGED Viewed

@@ -4,16 +4,14 @@ import pandas as pd
 import duckdb
 import logging
-from tabs.tokens_votes_dist import (
-    get_based_tokens_distribution,
-    get_based_votes_distribution,
     get_extreme_cases,
 )
 from tabs.dist_gap import (
     get_distribution_plot,
     get_correlation_map,
     get_kde_with_trades,
-    get_regplot_with_mean_trade_size,
 )
@@ -69,7 +67,7 @@ with demo:
         live_markets_data
     )
     with gr.Tabs():
-        with gr.TabItem("💹 Probability distributions of live markets"):
             with gr.Row():
                 gr.Markdown("Best case: a market with a low gap between distributions")
             with gr.Row():
@@ -77,16 +75,19 @@ with demo:
                     f"Market id = {best_market_id} Dist gap = {round(best_gap,2)}"
                 )
             with gr.Row():
-                with gr.Column(min_width=350):
-                    gr.Markdown("# Evolution of outcomes probability based on tokens")
-                    best_market_tokens_dist = get_based_tokens_distribution(
-                        best_market_id, live_markets_data
-                    )
-                with gr.Column(min_width=350):
-                    gr.Markdown("# Evolution of outcomes probability based on votes")
-                    best_market_votes_dist = get_based_votes_distribution(
-                        best_market_id, live_markets_data
-                    )
             with gr.Row():
                 gr.Markdown("Worst case: a market with a high distribution gap metric")
@@ -96,21 +97,25 @@ with demo:
                 )
             with gr.Row():
-                with gr.Column(min_width=350):
-                    # gr.Markdown("# Evolution of outcomes probability based on tokens")
-                    worst_market_tokens_dist = get_based_tokens_distribution(
-                        worst_market_id, live_markets_data
-                    )
-                with gr.Column(min_width=350):
-                    worst_market_votes_dist = get_based_votes_distribution(
-                        worst_market_id, live_markets_data
-                    )
-        with gr.TabItem("📏 Distribution gap metric"):
             # remove samples with no trades
             with gr.Row():
                 gr.Markdown(
-                    "This metric measures the difference between the probability distribution based on the tokens distribution and the one based on the votes distribution"
                 )
             with gr.Row():
                 gr.Markdown("# Density distribution")

 import duckdb
 import logging
+from olas_predict_live_markets.tabs.tokens_pwc_dist import (
     get_extreme_cases,
 )
 from tabs.dist_gap import (
     get_distribution_plot,
     get_correlation_map,
     get_kde_with_trades,
+    get_dist_gap_time_evolution,
 )
         live_markets_data
     )
     with gr.Tabs():
+        with gr.TabItem("💹 Probability distributions of some markets"):
             with gr.Row():
                 gr.Markdown("Best case: a market with a low gap between distributions")
             with gr.Row():
                     f"Market id = {best_market_id} Dist gap = {round(best_gap,2)}"
                 )
             with gr.Row():
+                # with gr.Column(min_width=350):
+                #     gr.Markdown("# Evolution of outcomes probability based on tokens")
+                #     best_market_tokens_dist = get_based_tokens_distribution(
+                #         best_market_id, live_markets_data
+                #     )
+                # with gr.Column(min_width=350):
+                #     gr.Markdown("# Evolution of outcomes probability based on votes")
+                #     best_market_votes_dist = get_based_votes_distribution(
+                #         best_market_id, live_markets_data
+                #     )
+                best_case = get_dist_gap_time_evolution(
+                    best_market_id, live_markets_data
+                )
             with gr.Row():
                 gr.Markdown("Worst case: a market with a high distribution gap metric")
                 )
             with gr.Row():
+                # with gr.Column(min_width=350):
+                #     # gr.Markdown("# Evolution of outcomes probability based on tokens")
+                #     worst_market_tokens_dist = get_based_tokens_distribution(
+                #         worst_market_id, live_markets_data
+                #     )
+                # with gr.Column(min_width=350):
+                #     worst_market_votes_dist = get_based_votes_distribution(
+                #         worst_market_id, live_markets_data
+                #     )
+                worst_case = get_dist_gap_time_evolution(
+                    worst_market_id, live_markets_data
+                )
+        with gr.TabItem("📏 Distribution gap metric for all markets"):
             # remove samples with no trades
             with gr.Row():
                 gr.Markdown(
+                    "This metric measures the difference between the probability distribution based on the tokens distribution and the one based on the price weighted distribution"
                 )
             with gr.Row():
                 gr.Markdown("# Density distribution")

live_data/markets_live_data.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3827de48ab0e0bbc2b0ab8b141e2b815e6d7e28bfb183ed7c4dc3b52b5fe07d4
-size 78693

 version https://git-lfs.github.com/spec/v1
+oid sha256:f3283dcea94638dee3b949a1cbdb90b7acdf8c50aa80c398888ed57525c07158
+size 27358

live_data/{markets_live_data_sample.parquet → markets_live_data_with_votes.parquet} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bedc1873d9d7019ec3c6f394b3e625bf04543d1b00bd943a2b7ca7c39c90091b
-size 72466

 version https://git-lfs.github.com/spec/v1
+oid sha256:3827de48ab0e0bbc2b0ab8b141e2b815e6d7e28bfb183ed7c4dc3b52b5fe07d4
+size 78693

notebooks/analysis_of_markets_data.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

notebooks/analysis_of_markets_data_with_votes.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

scripts/live_markets_data.py CHANGED Viewed

@@ -204,12 +204,15 @@ def get_answer(fpmm: pd.Series) -> str:
 def get_first_token_perc(row):
     if row["total_tokens"] == 0.0:
         return 0
     return 100.0 - round((row["token_first_amount"] / row["total_tokens"]) * 100, 2)
 def get_second_token_perc(row):
     if row["total_tokens"] == 0.0:
         return 0
     return 100.0 - round((row["token_second_amount"] / row["total_tokens"]) * 100, 2)
@@ -286,13 +289,13 @@ def compute_distributions(filename: Optional[str]) -> pd.DataFrame:
     logger.info("transforming and updating previous data")
     fpmms = transform_fpmms(fpmms, filename, current_timestamp)
-    logger.debug(fpmms.info())
     logger.info("Adding trading information")
-    add_trading_info(fpmms, current_timestamp)
     logger.info("saving the data")
-    logger.debug(fpmms.info())
     if filename:
         fpmms.to_parquet(DATA_DIR / filename, index=False)

 def get_first_token_perc(row):
+    """To compute from the total amount of tokens bought how many are for first outcome"""
     if row["total_tokens"] == 0.0:
         return 0
     return 100.0 - round((row["token_first_amount"] / row["total_tokens"]) * 100, 2)
 def get_second_token_perc(row):
+    """To compute from the total amount of tokens bought how many are for second outcome"""
     if row["total_tokens"] == 0.0:
         return 0
     return 100.0 - round((row["token_second_amount"] / row["total_tokens"]) * 100, 2)
     logger.info("transforming and updating previous data")
     fpmms = transform_fpmms(fpmms, filename, current_timestamp)
+    # logger.debug(fpmms.info())
     logger.info("Adding trading information")
+    add_trading_info(fpmms, current_timestamp, logger)
     logger.info("saving the data")
+    # logger.debug(fpmms.info())
     if filename:
         fpmms.to_parquet(DATA_DIR / filename, index=False)

scripts/live_traders_data.py CHANGED Viewed

@@ -13,7 +13,7 @@ from utils import (
 from utils import SUBGRAPH_API_KEY, _to_content
 from queries import omen_trader_votes_query
-logger = logging.getLogger(__name__)
 headers = {
     "Accept": "application/json, multipart/mixed",
@@ -22,7 +22,7 @@ headers = {
 def _query_omen_xdai_subgraph(
-    fpmm_id: str,
 ) -> dict[str, Any]:
     """Query the subgraph."""
     omen_subgraph = OMEN_SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
@@ -66,7 +66,7 @@ def _query_omen_xdai_subgraph(
     return all_results
-def transform_trades(trades_json: dict) -> pd.DataFrame:
     # convert to dataframe
     logger.info("transforming trades")
     df = pd.DataFrame(trades_json["data"]["fpmmTrades"])
@@ -91,7 +91,7 @@ def transform_trades(trades_json: dict) -> pd.DataFrame:
-def compute_votes_distribution(market_trades: pd.DataFrame):
     """Function to compute the distribution of votes for the trades of a market"""
     logger.info("Computing the votes distribution")
     total_trades = len(market_trades)
@@ -103,7 +103,24 @@ def compute_votes_distribution(market_trades: pd.DataFrame):
     return (100 - percentage_index_1), percentage_index_1
-def add_trading_info(fpmms: pd.DataFrame, current_timestamp: int) -> None:
     """Function to update only the information related with the current timestamp"""
     logger.info("Adding votes distribution per market")
@@ -120,33 +137,32 @@ def add_trading_info(fpmms: pd.DataFrame, current_timestamp: int) -> None:
         logger.info(f"Adding trades information for the market {market_id}")
         market_trades_json = _query_omen_xdai_subgraph(
-            fpmm_id=market_id,
         )
-        market_trades = transform_trades(market_trades_json)
         fpmms.at[i,"total_trades"] = len(market_trades)
         if len(market_trades) > 0:
             # adding average trade size
             market_trades["collateralAmountUSD"] = market_trades.collateralAmountUSD.apply(lambda x: round(float(x),3))
             mean_trade_size = market_trades.collateralAmountUSD.mean()
-            first_outcome, second_outcome = compute_votes_distribution(market_trades)
         else:
             logger.info("No trades for this market")
             mean_trade_size = 0.0
             first_outcome, second_outcome = 50.0, 50.0
         fpmms.at[i,"mean_trade_size"] = mean_trade_size
         logger.info(
-            f"first outcome votes ={first_outcome}, second outcome votes = {second_outcome}"
         )
-        fpmms.at[i,"votes_first_outcome_perc"] = first_outcome
-        fpmms.at[i,"votes_second_outcome_perc"] = second_outcome
         metric = abs(fpmm["first_token_perc"] - first_outcome)
         logger.info(f"metric for this market {metric}")
         fpmms.at[i,"dist_gap_perc"] = metric
     logger.debug("Dataset after adding trading info")
     logger.debug(fpmms.head())
     return
-if __name__ == "__main__":
-    print("collecting votes distribution")

 from utils import SUBGRAPH_API_KEY, _to_content
 from queries import omen_trader_votes_query
 headers = {
     "Accept": "application/json, multipart/mixed",
 def _query_omen_xdai_subgraph(
+    fpmm_id: str, logger
 ) -> dict[str, Any]:
     """Query the subgraph."""
     omen_subgraph = OMEN_SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
     return all_results
+def transform_trades(trades_json: dict, logger) -> pd.DataFrame:
     # convert to dataframe
     logger.info("transforming trades")
     df = pd.DataFrame(trades_json["data"]["fpmmTrades"])
+def compute_votes_distribution(market_trades: pd.DataFrame, logger):
     """Function to compute the distribution of votes for the trades of a market"""
     logger.info("Computing the votes distribution")
     total_trades = len(market_trades)
     return (100 - percentage_index_1), percentage_index_1
+def compute_price_weighted_perc(market_trades: pd.DataFrame, logger):
+    """It computes the price weighted distribution with the percentages of each outcome"""
+    logger.info("Computing the price weighted distribution")
+    # trades for outcome 0
+    trades_outcome_0 = market_trades.loc[market_trades["outcomeIndex"]==0]
+    logger.debug(f"Total trades for outcome 0 = {len(trades_outcome_0)}")
+    # trades for outcome 1
+    trades_outcome_1 = market_trades.loc[market_trades["outcomeIndex"]==1]
+    logger.debug(f"Total trades for outcome 1 = {len(trades_outcome_1)}")
+    total_usd_outcome_0 = sum(trades_outcome_0.collateralAmountUSD)
+    total_usd_outcome_1 = sum(trades_outcome_1.collateralAmountUSD)
+    total_usd = total_usd_outcome_0 + total_usd_outcome_1
+    percentage_pwc_outcome_0 = round((total_usd_outcome_0/total_usd)*100, 2)
+    logger.debug(f"total amount for outcome 0 = {total_usd_outcome_0}")
+    logger.debug(f"total usd = {total_usd}")
+    return percentage_pwc_outcome_0, 100 - percentage_pwc_outcome_0
+def add_trading_info(fpmms: pd.DataFrame, current_timestamp: int, logger) -> None:
     """Function to update only the information related with the current timestamp"""
     logger.info("Adding votes distribution per market")
         logger.info(f"Adding trades information for the market {market_id}")
         market_trades_json = _query_omen_xdai_subgraph(
+            fpmm_id=market_id, logger=logger
         )
+        market_trades = transform_trades(market_trades_json, logger)
         fpmms.at[i,"total_trades"] = len(market_trades)
         if len(market_trades) > 0:
             # adding average trade size
             market_trades["collateralAmountUSD"] = market_trades.collateralAmountUSD.apply(lambda x: round(float(x),3))
             mean_trade_size = market_trades.collateralAmountUSD.mean()
+            total_bet_amount = sum(market_trades.collateralAmountUSD)
+            first_outcome, second_outcome = compute_price_weighted_perc(market_trades, logger)
         else:
             logger.info("No trades for this market")
             mean_trade_size = 0.0
+            total_bet_amount = 0.0
             first_outcome, second_outcome = 50.0, 50.0
         fpmms.at[i,"mean_trade_size"] = mean_trade_size
+        fpmms.at[i,"total_bet_amount"] = total_bet_amount
         logger.info(
+            f"first outcome pwc ={first_outcome}, second outcome pwc = {second_outcome}"
         )
+        fpmms.at[i,"price_weighted_first_outcome_perc"] = first_outcome
+        fpmms.at[i,"price_weighted_second_outcome_perc"] = second_outcome
         metric = abs(fpmm["first_token_perc"] - first_outcome)
         logger.info(f"metric for this market {metric}")
         fpmms.at[i,"dist_gap_perc"] = metric
     logger.debug("Dataset after adding trading info")
     logger.debug(fpmms.head())
     return

tabs/dist_gap.py CHANGED Viewed

@@ -5,6 +5,34 @@ import seaborn as sns
 from seaborn import FacetGrid
 import plotly.express as px
 def get_top_best_behaviour_markets(markets_data: pd.DataFrame):
     """Function to paint the top markets with the lowest metric of distribution gap"""
@@ -51,6 +79,7 @@ def get_correlation_map(markets_data: pd.DataFrame):
         "dist_gap_perc",
         "liquidityMeasure",
         "mean_trade_size",
     ]
     data = markets_data[columns_of_interest]

 from seaborn import FacetGrid
 import plotly.express as px
+HEIGHT = 600
+WIDTH = 1000
+def get_dist_gap_time_evolution(market_id: str, all_markets: pd.DataFrame):
+    """Function to paint the evolution in time of the distance gap between the tokens and the price weighted distributions"""
+    sns.set_style("darkgrid")
+    selected_market = all_markets.loc[all_markets["id"] == market_id]
+    selected_market["sample_datetime"] = selected_market["sample_datetime"].astype(str)
+    selected_market.columns = selected_market.columns.astype(str)
+    return gr.LinePlot(
+        value=selected_market,
+        x="sample_datetime",
+        y="dist_gap_perc",
+        y_title="Distribution gap in %",
+        interactive=True,
+        show_actions_button=True,
+        tooltip=[
+            "sample_datetime",
+            "dist_gap_perc",
+            "total_trades",
+            "total_bet_amount",
+        ],
+        height=HEIGHT,
+        width=WIDTH,
+    )
 def get_top_best_behaviour_markets(markets_data: pd.DataFrame):
     """Function to paint the top markets with the lowest metric of distribution gap"""
         "dist_gap_perc",
         "liquidityMeasure",
         "mean_trade_size",
+        "total_bet_amount",
     ]
     data = markets_data[columns_of_interest]

tabs/{tokens_votes_dist.py → tokens_pwc_dist.py} RENAMED Viewed

File without changes