binance-p2p-market-history/p2p-collector/validator.py
Gabriel Ramos 2c41a7a6b3 feat: implement binance p2p collector daemon
Set up continuous P2P VES/USDT market history data collection, normalization, validation, and date-partitioned Parquet storage.
2026-06-05 14:40:05 -04:00

147 lines
5.7 KiB
Python

import logging
from datetime import datetime, timezone
import numpy as np
logger = logging.getLogger(__name__)
def validate_row(row: dict, config: dict, seen_adv_nos: set) -> bool:
"""
Validates a single normalized ad row.
Returns True if the row is valid, or False if it should be rejected.
"""
val_config = config.get("validation", {})
price_min = val_config.get("price_min", 1.0)
price_max = val_config.get("price_max", 500.0)
reject_zero_finish = val_config.get("reject_zero_finish_rate", True)
reject_zero_surplus = val_config.get("reject_zero_surplus", True)
# 1. Empty adv_no
adv_no = row.get("adv_no")
if not adv_no:
logger.error("Rejecting ad: Missing adv_no.")
return False
# 2. Duplicate adv_no within same snapshot
if adv_no in seen_adv_nos:
logger.warning(f"Rejecting ad {adv_no}: Duplicate within the same snapshot.")
return False
# 3. Price is None or <= 0
price = row.get("price")
if price is None or price <= 0:
logger.warning(f"Rejecting ad {adv_no}: Price is None or <= 0 ({price}).")
return False
# 4. Price outside expected range
if price < price_min or price > price_max:
logger.warning(
f"Rejecting ad {adv_no}: Price {price} is outside configured range [{price_min}, {price_max}]."
)
return False
# 5. Surplus amount None or <= 0
surplus = row.get("surplus_amount")
if reject_zero_surplus and (surplus is None or surplus <= 0):
logger.debug(f"Rejecting ad {adv_no}: Surplus amount is None or <= 0 ({surplus}).")
return False
# 6. Suspicious advertiser stats: monthFinishRate is 0.0 and monthOrderCount > 0
finish_rate = row.get("month_finish_rate")
order_count = row.get("month_order_count")
if reject_zero_finish and finish_rate == 0.0 and order_count > 0:
logger.warning(
f"Rejecting ad {adv_no}: Advertiser finished 0.0% of {order_count} orders."
)
return False
seen_adv_nos.add(adv_no)
return True
def validate_snapshot(flat_ads: list, fetched_at: datetime) -> dict:
"""
Validates a list of all normalized and filtered ads in a single snapshot.
Raises ValueError on critical issues (like completely empty snapshot).
Returns a dictionary of summary statistics for logging/checking.
"""
buy_ads = [ad for ad in flat_ads if ad.get("trade_type") == "BUY"]
sell_ads = [ad for ad in flat_ads if ad.get("trade_type") == "SELL"]
# 1. Empty snapshot validation
if not buy_ads and not sell_ads:
raise ValueError("CRITICAL: Empty snapshot! Both BUY and SELL ad counts are 0.")
buy_count = len(buy_ads)
sell_count = len(sell_ads)
# Warnings for low/high counts
if buy_count < 20 or buy_count > 200:
logger.warning(f"Unusual BUY ad count: {buy_count} (expected 20-200).")
if sell_count < 20 or sell_count > 200:
logger.warning(f"Unusual SELL ad count: {sell_count} (expected 20-200).")
# Extract prices
buy_prices = [ad["price"] for ad in buy_ads]
sell_prices = [ad["price"] for ad in sell_ads]
buy_min = min(buy_prices) if buy_prices else 0.0
buy_max = max(buy_prices) if buy_prices else 0.0
sell_min = min(sell_prices) if sell_prices else 0.0
sell_max = max(sell_prices) if sell_prices else 0.0
# Calculate medians
buy_median = float(np.median(buy_prices)) if buy_prices else 0.0
sell_median = float(np.median(sell_prices)) if sell_prices else 0.0
# Calculate spread: SELL_min - BUY_max
# Wait, spec says: spread = SELL_min - BUY_max
spread = sell_min - buy_max if (sell_prices and buy_prices) else 0.0
# Under normal market conditions, advertisers charge a premium when they sell crypto
# to you (i.e. sell_ads: you BUY from advertiser, so you pay advertiser's SELL price).
# Wait, let's verify what the trade types mean in the spec:
# "tradeType: BUY = advertiser wants to give you VES in exchange for your USDT. They are buying USDT from you."
# So advertiser is BUYING crypto. Since they want to buy, they want to pay as little VES as possible.
# "tradeType: SELL = advertiser wants to give you USDT in exchange for your VES. They are selling USDT to you."
# So advertiser is SELLING crypto. Since they are selling, they want to receive as much VES as possible.
# Therefore, advertiser's SELL price should be higher than advertiser's BUY price.
# So SELL_min should be higher than BUY_max.
# If not (e.g., BUY_max > SELL_min), we have a negative spread or overlap.
if sell_prices and buy_prices:
if buy_max > sell_min:
logger.warning(
f"BUY/SELL price overlap detected! Max BUY price ({buy_max:.2f}) > Min SELL price ({sell_min:.2f})."
)
# Check for stale ads (createTime > 7 days old)
stale_count = 0
for ad in flat_ads:
created_at = ad.get("ad_created_at")
if created_at:
age_days = (fetched_at - created_at).total_seconds() / (24 * 3600)
if age_days > 7.0:
stale_count += 1
if stale_count > 0:
logger.warning(f"Stale ads detected: {stale_count} ads were created > 7 days ago.")
# Get unique payment methods
all_methods = set()
for ad in flat_ads:
all_methods.update(ad.get("payment_methods", []))
summary = {
"buy_count": buy_count,
"sell_count": sell_count,
"buy_min": buy_min,
"buy_max": buy_max,
"sell_min": sell_min,
"sell_max": sell_max,
"buy_median": buy_median,
"sell_median": sell_median,
"spread": spread,
"stale_count": stale_count,
"methods": sorted(list(all_methods)),
}
return summary