mirror of
https://github.com/uniwhale-io/DefiLlama-yield-server.git
synced 2026-04-30 12:52:06 +08:00
migration (#295)
* migration * remove balancer pools from exclusion list * bug fix serverless * remove timestamp from median, update for tests * update get distinct id controller * update distinctID controller, remove dep * move confirm into scripts * testing url * remove Pg suffix from lambda names * change to existing api host * add comments * revert service name, bucketsg * put pack fantom rpc
This commit is contained in:
@@ -1,21 +1,101 @@
|
||||
import sys
|
||||
import ast
|
||||
|
||||
import pandas as pd
|
||||
|
||||
# the way i ran this:
|
||||
# Step 1) DOWNLOAD FULL HISTORY from mongodb
|
||||
|
||||
def trim(filename: str) -> None:
|
||||
# this script reduces hourly data to daily data (latest value per pool based on timestamp)
|
||||
# note: i didn't manage to run this directly on db server, hence the script
|
||||
# Step 2) PREPARE DATA
|
||||
# run this python script, which does:
|
||||
# filter apy >= 0, tvlUsd >= 1000
|
||||
# cast dtype: tvlUsd to integer
|
||||
# round apy columns to 5 decimals
|
||||
# ...
|
||||
# stores 3 outputs: the full hourly history (for yield), last value for each day (for stat), last value (for config)
|
||||
|
||||
# Step 3) CREATE UUIDS
|
||||
# based on output from Step 2) run the bootstrapUUID.js file which creates a unique uuid for each unique pool and stores that locally
|
||||
|
||||
# Step 4) CREATE THE POSTGRES TABLES
|
||||
# run the create scripts starting with config, then the others (order doesn't matter for the rest)
|
||||
|
||||
def replaceFunc(x: str) -> str:
|
||||
if x == "[null]":
|
||||
return "[]"
|
||||
elif x == "[null,null]":
|
||||
return "[]"
|
||||
elif "null," in x:
|
||||
return x.replace("null,", "")
|
||||
elif ",null" in x:
|
||||
return x.replace(",null", "")
|
||||
else:
|
||||
return x
|
||||
|
||||
|
||||
def prepare_snapshot(filename: str) -> None:
|
||||
df = pd.read_csv(f"{filename}")
|
||||
|
||||
# correct none, null values in array
|
||||
df.loc[df["underlyingTokens"].notnull(), "underlyingTokens"] = df.loc[
|
||||
df["underlyingTokens"].notnull(), "underlyingTokens"
|
||||
].apply(lambda x: replaceFunc(x) if "null" in x else x)
|
||||
|
||||
# remove rows where all 3 apy fields are null
|
||||
df = df[
|
||||
~((df["apy"].isnull()) & (df["apyReward"].isnull()) & (df["apyBase"].isnull()))
|
||||
]
|
||||
|
||||
# keep positive apy sum values only
|
||||
df = df[(df["apy"] >= 0) & (df["apy"] <= 1e6)]
|
||||
# tvl btw boundary values
|
||||
df = df[(df["tvlUsd"] >= 1000) & (df["tvlUsd"] <= 2e10)]
|
||||
|
||||
# remove pools and project from exclusion list
|
||||
exclude_pools = [
|
||||
"0xf4bfe9b4ef01f27920e490cea87fe2642a8da18d",
|
||||
"DWmAv5wMun4AHxigbwuJygfmXBBe9WofXAtrMCRJExfb",
|
||||
"ripae-seth-weth-42161",
|
||||
"ripae-peth-weth-42161",
|
||||
"0x3eed430cd45c5e2b45aa1adc609cc77c6728d45b",
|
||||
"0x3c42B0f384D2912661C940d46cfFE1CD10F1c66F-ethereum",
|
||||
"0x165ab553871b1a6b3c706e15b6a7bb29a244b2f3",
|
||||
]
|
||||
df = df[~df["pool"].isin(exclude_pools)]
|
||||
df = df[df["project"] != "koyo-finance"]
|
||||
|
||||
# cast dtypes and round
|
||||
df["tvlUsd"] = df["tvlUsd"].astype(int)
|
||||
apy_columns = ["apy", "apyBase", "apyReward"]
|
||||
df[apy_columns] = df[apy_columns].round(5)
|
||||
|
||||
# 1. hourly (for yield table)
|
||||
df["timestamp"] = pd.to_datetime(df["timestamp"])
|
||||
df = df.sort_values(["pool", "timestamp"]).reset_index(drop=True)
|
||||
(
|
||||
df = df.sort_values(["pool", "timestamp"], ascending=True).reset_index(drop=True)
|
||||
f = "yield_snapshot"
|
||||
df.to_csv(f"{f}_hourly.csv", index=False)
|
||||
|
||||
# 2. prepare daily (for stat)
|
||||
df_daily = (
|
||||
df.groupby(["pool", pd.Grouper(key="timestamp", freq="1D")])
|
||||
.last()
|
||||
.reset_index()
|
||||
.to_json(f"{filename.split('.')[0]}_daily.json", orient="records")
|
||||
)
|
||||
df_daily.to_json(f"{f}_daily.json", orient="records")
|
||||
|
||||
# 3. prepare last (for config)
|
||||
df_last = (
|
||||
df_daily.sort_values(["pool", "timestamp"], ascending=True)
|
||||
.groupby("pool")
|
||||
.last()
|
||||
.reset_index()
|
||||
)
|
||||
# cast string to arrays
|
||||
func = lambda x: ast.literal_eval(x) if type(x) == str else x
|
||||
df_last["underlyingTokens"] = df_last["underlyingTokens"].apply(func)
|
||||
df_last["rewardTokens"] = df_last["rewardTokens"].apply(func)
|
||||
df_last.to_json(f"{f}_last.json", orient="records")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
trim(sys.argv[1])
|
||||
prepare_snapshot(sys.argv[1])
|
||||
|
||||
Reference in New Issue
Block a user