import logging from datetime import datetime, timezone import numpy as np logger = logging.getLogger(__name__) def validate_row(row: dict, config: dict, seen_adv_nos: set) -> bool: """ Validates a single normalized ad row. Returns True if the row is valid, or False if it should be rejected. """ val_config = config.get("validation", {}) price_min = val_config.get("price_min", 1.0) price_max = val_config.get("price_max", 500.0) reject_zero_finish = val_config.get("reject_zero_finish_rate", True) reject_zero_surplus = val_config.get("reject_zero_surplus", True) # 1. Empty adv_no adv_no = row.get("adv_no") if not adv_no: logger.error("Rejecting ad: Missing adv_no.") return False # 2. Duplicate adv_no within same snapshot if adv_no in seen_adv_nos: logger.warning(f"Rejecting ad {adv_no}: Duplicate within the same snapshot.") return False # 3. Price is None or <= 0 price = row.get("price") if price is None or price <= 0: logger.warning(f"Rejecting ad {adv_no}: Price is None or <= 0 ({price}).") return False # 4. Price outside expected range if price < price_min or price > price_max: logger.warning( f"Rejecting ad {adv_no}: Price {price} is outside configured range [{price_min}, {price_max}]." ) return False # 5. Surplus amount None or <= 0 surplus = row.get("surplus_amount") if reject_zero_surplus and (surplus is None or surplus <= 0): logger.debug(f"Rejecting ad {adv_no}: Surplus amount is None or <= 0 ({surplus}).") return False # 6. Suspicious advertiser stats: monthFinishRate is 0.0 and monthOrderCount > 0 finish_rate = row.get("month_finish_rate") order_count = row.get("month_order_count") if reject_zero_finish and finish_rate == 0.0 and order_count > 0: logger.warning( f"Rejecting ad {adv_no}: Advertiser finished 0.0% of {order_count} orders." ) return False seen_adv_nos.add(adv_no) return True def validate_snapshot(flat_ads: list, fetched_at: datetime) -> dict: """ Validates a list of all normalized and filtered ads in a single snapshot. Raises ValueError on critical issues (like completely empty snapshot). Returns a dictionary of summary statistics for logging/checking. """ buy_ads = [ad for ad in flat_ads if ad.get("trade_type") == "BUY"] sell_ads = [ad for ad in flat_ads if ad.get("trade_type") == "SELL"] # 1. Empty snapshot validation if not buy_ads and not sell_ads: raise ValueError("CRITICAL: Empty snapshot! Both BUY and SELL ad counts are 0.") buy_count = len(buy_ads) sell_count = len(sell_ads) # Warnings for low/high counts if buy_count < 20 or buy_count > 200: logger.warning(f"Unusual BUY ad count: {buy_count} (expected 20-200).") if sell_count < 20 or sell_count > 200: logger.warning(f"Unusual SELL ad count: {sell_count} (expected 20-200).") # Extract prices buy_prices = [ad["price"] for ad in buy_ads] sell_prices = [ad["price"] for ad in sell_ads] buy_min = min(buy_prices) if buy_prices else 0.0 buy_max = max(buy_prices) if buy_prices else 0.0 sell_min = min(sell_prices) if sell_prices else 0.0 sell_max = max(sell_prices) if sell_prices else 0.0 # Calculate medians buy_median = float(np.median(buy_prices)) if buy_prices else 0.0 sell_median = float(np.median(sell_prices)) if sell_prices else 0.0 # Calculate spread: SELL_min - BUY_max # Wait, spec says: spread = SELL_min - BUY_max spread = sell_min - buy_max if (sell_prices and buy_prices) else 0.0 # Under normal market conditions, advertisers charge a premium when they sell crypto # to you (i.e. sell_ads: you BUY from advertiser, so you pay advertiser's SELL price). # Wait, let's verify what the trade types mean in the spec: # "tradeType: BUY = advertiser wants to give you VES in exchange for your USDT. They are buying USDT from you." # So advertiser is BUYING crypto. Since they want to buy, they want to pay as little VES as possible. # "tradeType: SELL = advertiser wants to give you USDT in exchange for your VES. They are selling USDT to you." # So advertiser is SELLING crypto. Since they are selling, they want to receive as much VES as possible. # Therefore, advertiser's SELL price should be higher than advertiser's BUY price. # So SELL_min should be higher than BUY_max. # If not (e.g., BUY_max > SELL_min), we have a negative spread or overlap. if sell_prices and buy_prices: if buy_max > sell_min: logger.warning( f"BUY/SELL price overlap detected! Max BUY price ({buy_max:.2f}) > Min SELL price ({sell_min:.2f})." ) # Check for stale ads (createTime > 7 days old) stale_count = 0 for ad in flat_ads: created_at = ad.get("ad_created_at") if created_at: age_days = (fetched_at - created_at).total_seconds() / (24 * 3600) if age_days > 7.0: stale_count += 1 if stale_count > 0: logger.warning(f"Stale ads detected: {stale_count} ads were created > 7 days ago.") # Get unique payment methods all_methods = set() for ad in flat_ads: all_methods.update(ad.get("payment_methods", [])) summary = { "buy_count": buy_count, "sell_count": sell_count, "buy_min": buy_min, "buy_max": buy_max, "sell_min": sell_min, "sell_max": sell_max, "buy_median": buy_median, "sell_median": sell_median, "spread": spread, "stale_count": stale_count, "methods": sorted(list(all_methods)), } return summary