mirror of
https://github.com/uniwhale-io/DefiLlama-yield-server.git
synced 2026-01-12 08:53:45 +08:00
* migration * remove balancer pools from exclusion list * bug fix serverless * remove timestamp from median, update for tests * update get distinct id controller * update distinctID controller, remove dep * move confirm into scripts * testing url * remove Pg suffix from lambda names * change to existing api host * add comments * revert service name, bucketsg * put pack fantom rpc * update package.lock
102 lines
3.3 KiB
Python
102 lines
3.3 KiB
Python
import sys
|
|
import ast
|
|
|
|
import pandas as pd
|
|
|
|
# the way i ran this:
|
|
# Step 1) DOWNLOAD FULL HISTORY from mongodb
|
|
|
|
# Step 2) PREPARE DATA
|
|
# run this python script, which does:
|
|
# filter apy >= 0, tvlUsd >= 1000
|
|
# cast dtype: tvlUsd to integer
|
|
# round apy columns to 5 decimals
|
|
# ...
|
|
# stores 3 outputs: the full hourly history (for yield), last value for each day (for stat), last value (for config)
|
|
|
|
# Step 3) CREATE UUIDS
|
|
# based on output from Step 2) run the bootstrapUUID.js file which creates a unique uuid for each unique pool and stores that locally
|
|
|
|
# Step 4) CREATE THE POSTGRES TABLES
|
|
# run the create scripts starting with config, then the others (order doesn't matter for the rest)
|
|
|
|
def replaceFunc(x: str) -> str:
|
|
if x == "[null]":
|
|
return "[]"
|
|
elif x == "[null,null]":
|
|
return "[]"
|
|
elif "null," in x:
|
|
return x.replace("null,", "")
|
|
elif ",null" in x:
|
|
return x.replace(",null", "")
|
|
else:
|
|
return x
|
|
|
|
|
|
def prepare_snapshot(filename: str) -> None:
|
|
df = pd.read_csv(f"{filename}")
|
|
|
|
# correct none, null values in array
|
|
df.loc[df["underlyingTokens"].notnull(), "underlyingTokens"] = df.loc[
|
|
df["underlyingTokens"].notnull(), "underlyingTokens"
|
|
].apply(lambda x: replaceFunc(x) if "null" in x else x)
|
|
|
|
# remove rows where all 3 apy fields are null
|
|
df = df[
|
|
~((df["apy"].isnull()) & (df["apyReward"].isnull()) & (df["apyBase"].isnull()))
|
|
]
|
|
|
|
# keep positive apy sum values only
|
|
df = df[(df["apy"] >= 0) & (df["apy"] <= 1e6)]
|
|
# tvl btw boundary values
|
|
df = df[(df["tvlUsd"] >= 1000) & (df["tvlUsd"] <= 2e10)]
|
|
|
|
# remove pools and project from exclusion list
|
|
exclude_pools = [
|
|
"0xf4bfe9b4ef01f27920e490cea87fe2642a8da18d",
|
|
"DWmAv5wMun4AHxigbwuJygfmXBBe9WofXAtrMCRJExfb",
|
|
"ripae-seth-weth-42161",
|
|
"ripae-peth-weth-42161",
|
|
"0x3eed430cd45c5e2b45aa1adc609cc77c6728d45b",
|
|
"0x3c42B0f384D2912661C940d46cfFE1CD10F1c66F-ethereum",
|
|
"0x165ab553871b1a6b3c706e15b6a7bb29a244b2f3",
|
|
]
|
|
df = df[~df["pool"].isin(exclude_pools)]
|
|
df = df[df["project"] != "koyo-finance"]
|
|
|
|
# cast dtypes and round
|
|
df["tvlUsd"] = df["tvlUsd"].astype(int)
|
|
apy_columns = ["apy", "apyBase", "apyReward"]
|
|
df[apy_columns] = df[apy_columns].round(5)
|
|
|
|
# 1. hourly (for yield table)
|
|
df["timestamp"] = pd.to_datetime(df["timestamp"])
|
|
df = df.sort_values(["pool", "timestamp"], ascending=True).reset_index(drop=True)
|
|
f = "yield_snapshot"
|
|
df.to_csv(f"{f}_hourly.csv", index=False)
|
|
|
|
# 2. prepare daily (for stat)
|
|
df_daily = (
|
|
df.groupby(["pool", pd.Grouper(key="timestamp", freq="1D")])
|
|
.last()
|
|
.reset_index()
|
|
)
|
|
df_daily.to_json(f"{f}_daily.json", orient="records")
|
|
|
|
# 3. prepare last (for config)
|
|
df_last = (
|
|
df_daily.sort_values(["pool", "timestamp"], ascending=True)
|
|
.groupby("pool")
|
|
.last()
|
|
.reset_index()
|
|
)
|
|
# cast string to arrays
|
|
func = lambda x: ast.literal_eval(x) if type(x) == str else x
|
|
df_last["underlyingTokens"] = df_last["underlyingTokens"].apply(func)
|
|
df_last["rewardTokens"] = df_last["rewardTokens"].apply(func)
|
|
df_last.to_json(f"{f}_last.json", orient="records")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
prepare_snapshot(sys.argv[1])
|