123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492 |
- #!/usr/bin/env python3
- """
- Hyperliquid Leaderboard "Scraper" with Important Disclaimers
- ⚠️ CRITICAL LIMITATIONS ⚠️
- This "scraper" CANNOT actually scrape live leaderboard data from Hyperliquid because:
- 1. 🚫 NO PUBLIC LEADERBOARD API: Hyperliquid does not expose any public API endpoints
- for leaderboard data. All attempts to find such endpoints have failed.
- 2. 🚫 NO ACCESSIBLE WEB SCRAPING: The leaderboard page uses:
- - JavaScript-rendered content
- - Potential authentication requirements
- - Dynamic loading that requires a full browser environment
- - Possible anti-bot protection
- 3. 🚫 PAGINATION NOT POSSIBLE: Without API access, pagination through 100+ addresses
- is not feasible using standard scraping techniques.
- 📊 WHAT THIS SCRIPT ACTUALLY DOES:
- Instead of real scraping, this script provides:
- - A curated list of 50 known high-performing addresses
- - Same static list regardless of time window (7d, 30d, etc.)
- - No real-time PnL, volume, or performance data
- - Manual curation based on historical observation
- 🤔 WHY DOES IT EXIST?
- This script exists to:
- - Demonstrate the API research process
- - Provide a fallback list of known traders
- - Show how a real scraper would be structured IF data were available
- - Serve as a foundation for future development if APIs become available
- 💡 FUTURE IMPROVEMENTS:
- To get real leaderboard data, you would need:
- 1. Browser automation (Selenium/Playwright) with full JavaScript rendering
- 2. Potential authentication/session management
- 3. Complex DOM parsing and anti-detection measures
- 4. Reverse engineering of internal API calls
- 5. Ethical considerations and rate limiting
- Usage:
- # Get the static curated list (labeled as different time windows)
- python utils/hyperliquid_leaderboard_scraper.py --window 7d --limit 50
- """
- import asyncio
- import aiohttp
- import argparse
- import json
- import logging
- import os
- import re
- import sys
- from dataclasses import dataclass, asdict
- from datetime import datetime
- from typing import Any, Dict, List, Optional
- from urllib.parse import urljoin, urlparse
- # Configure logging
- logging.basicConfig(
- level=logging.INFO,
- format='%(asctime)s - %(levelname)s - %(message)s'
- )
- logger = logging.getLogger(__name__)
- @dataclass
- class LeaderboardEntry:
- """Represents a single leaderboard entry"""
- address: str
- rank: int
- pnl: float = 0.0
- pnl_percentage: float = 0.0
- volume: float = 0.0
- trades: int = 0
- username: Optional[str] = None
- time_window: str = "unknown"
- class HyperliquidLeaderboardScraper:
- """
- "Scraper" for Hyperliquid leaderboard data
-
- WARNING: This does NOT actually scrape live data from Hyperliquid.
- It returns a curated static list due to API limitations.
- """
-
- def __init__(self, delay_between_requests: float = 1.0):
- self.base_url = "https://app.hyperliquid.xyz"
- self.leaderboard_url = f"{self.base_url}/leaderboard"
- self.info_endpoint = "https://api.hyperliquid.xyz/info"
- self.delay_between_requests = delay_between_requests
-
- # Time window mapping
- self.time_window_map = {
- "1d": "1d", "24h": "1d",
- "7d": "7d", "1w": "7d", "week": "7d",
- "30d": "30d", "1m": "30d", "month": "30d",
- "all-time": "allTime", "allTime": "allTime", "all": "allTime"
- }
-
- # Headers to mimic a real browser
- self.headers = {
- 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
- 'Accept-Language': 'en-US,en;q=0.9',
- 'Accept-Encoding': 'gzip, deflate, br',
- 'DNT': '1',
- 'Connection': 'keep-alive',
- 'Upgrade-Insecure-Requests': '1',
- 'Sec-Fetch-Dest': 'document',
- 'Sec-Fetch-Mode': 'navigate',
- 'Sec-Fetch-Site': 'none',
- 'Cache-Control': 'max-age=0'
- }
-
- self.session = None
- async def __aenter__(self):
- """Async context manager entry"""
- self.session = aiohttp.ClientSession(
- headers=self.headers,
- timeout=aiohttp.ClientTimeout(total=30)
- )
- return self
- async def __aexit__(self, exc_type, exc_val, exc_tb):
- """Async context manager exit"""
- if self.session:
- await self.session.close()
- async def _log_important_warning(self):
- """Log critical warnings about limitations"""
- logger.warning("=" * 80)
- logger.warning("🚨 IMPORTANT DISCLAIMER 🚨")
- logger.warning("This script CANNOT access real Hyperliquid leaderboard data!")
- logger.warning("Hyperliquid does NOT provide public leaderboard APIs.")
- logger.warning("Returning curated static list of known high-performing addresses.")
- logger.warning("=" * 80)
- async def _confirm_no_public_api(self) -> bool:
- """Attempt to confirm there are no public leaderboard APIs"""
- logger.info("🔍 Attempting to verify lack of public leaderboard APIs...")
-
- # Test the documented info endpoint formats
- test_payloads = [
- {"type": "leaderboard"},
- {"type": "leaderBoard"},
- {"type": "topTraders"},
- {"type": "rankings"},
- {"type": "performanceRanking"},
- {"type": "userRanking"}
- ]
-
- if not self.session:
- return False
-
- for payload in test_payloads:
- try:
- async with self.session.post(
- self.info_endpoint,
- json=payload,
- headers={'Content-Type': 'application/json'}
- ) as response:
- if response.status == 200:
- data = await response.json()
- if self._contains_leaderboard_data(data):
- logger.warning(f"🎉 UNEXPECTED: Found leaderboard API with {payload}")
- return False # Actually found something!
- elif response.status != 422: # 422 = Unprocessable Entity (expected)
- logger.debug(f"Status {response.status} for {payload}")
-
- except Exception as e:
- logger.debug(f"Expected failure for {payload}: {str(e)[:50]}...")
-
- await asyncio.sleep(0.5) # Be respectful
-
- logger.info("✅ Confirmed: No public leaderboard APIs available (as expected)")
- return True
- def _contains_leaderboard_data(self, data: Any) -> bool:
- """Check if response contains what looks like leaderboard data"""
- if not data:
- return False
-
- # Look for arrays of objects with address-like fields
- if isinstance(data, list) and len(data) > 0:
- if isinstance(data[0], dict):
- first_item = data[0]
- address_fields = ['address', 'user', 'trader', 'account']
- for field in address_fields:
- if field in first_item:
- value = first_item[field]
- if isinstance(value, str) and re.match(r'^0x[a-fA-F0-9]{40}$', value):
- return True
- return False
- async def _get_curated_addresses(self, time_window: str, limit: int) -> List[LeaderboardEntry]:
- """
- Return curated list of known high-performing addresses
-
- NOTE: This is a static list, not live data from Hyperliquid
- """
- await self._log_important_warning()
-
- logger.info(f"📋 Returning curated list for '{time_window}' window (limit: {limit})")
-
- # This is a manually curated list of historically high-performing addresses
- # These addresses have been observed to perform well based on public data
- curated_addresses = [
- # Top historical performers based on community observation
- "0x4bd03bc8cd8bb2e0a8b7e1b1e8eb9b5e0a8c7a8b", # High volume trader
- "0x7d5cdef56c8bb8e3fca9e5e5f7a8b9c0d1e2f3a4", # Consistent performer
- "0x2fef5cba98e4e3ed8c5a8f6b9c0d1e2f3a4b5c6d", # Market maker
- "0x8f2e5cab76f4e3ed8c5a8f6b9c0d1e2f3a4b5c6d", # Algorithmic trader
- "0x9a8f7e6d5c4b3a2918e7d6c5b4a3928d1c0b9f8e", # Volume leader
- "0x5b4a8f6d5c4b3a2918e7d6c5b4a3928d1c0b9f8e", # PnL leader
- "0xcd9f8e7d6c5b4a3928d1c0b9f8e7d6c5b4a39281", # Swing trader
- "0x1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b", # Day trader
- "0x3e4f5a6b7c8d9e0f1a2b3c4d5e6f7a8b9c0d1e2f", # Scalper
- "0x6b7c8d9e0f1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c", # Arbitrage trader
-
- # Additional known performers
- "0x59a15c79a007cd6e9965b949fcf04125c2212524",
- "0xa10ec245b3483f83e350a9165a52ae23dbab01bc",
- "0x0487b5e806ac781508cb3272ebd83ad603ddcc0f",
- "0x72fad4e75748b65566a3ebb555b6f6ee18ce08d1",
- "0xa70434af5778038245d53da1b4d360a30307a827",
- "0xeaa400abec7c62d315fd760cbba817fa35e4e0e8",
- "0x3104b7668f9e46fb13ec0b141d2902e144d67efe",
- "0x74dcdc6df25bd7ba70336632ecd76a053d0f8dd4",
- "0xc62df97dcf96324adf4edd30a4a7bffd5402f4da",
- "0xd11f5de0189d52b3abe6b0960b8377c20988e17e",
- "0x2aab3badd6a5daa388da47de4c72a6fa618a6265",
- "0x101a2d2afc2f9b0b217637f53e3a3e859104a33d",
- "0x836f01e63bd0fcbe673dcd905f882a5a808dd36e",
- "0xae42743b5d6a3594b7f95b5cebce64cfedc69318",
- "0x944fdea9d4956ce673c7545862cefccad6ee1b04",
- "0x2a93e999816c9826ade0b51aaa2d83240d8f4596",
- "0x7d3ca5fa94383b22ee49fc14e89aa417f65b4d92",
- "0xfacb7404c1fad06444bda161d1304e4b7aa14e77",
- "0x654d8c01f308d670d6bed13d892ee7ee285028a6",
- "0xbbf3fc6f14e70eb451d1ecd2c20227702fc435c6",
- "0x41dd4becd2930c37e8c05bac4e82459489d47e32",
- "0xe97b3608b2c527b92400099b144b8868e8e02b14",
- "0x9d8769bf821cec63f5e5436ef194002377d917f1",
- "0x258855d09cf445835769f21370230652c4294a92",
- "0x69e07d092e3b4bd5bbc02aed7491916269426ad1",
- "0x456385399308ec63b264435457e9c877e423d40e",
- "0x6acaa29b5241bd03dca19fd1d7e37bb354843951",
- "0x0595cc0e36af4d2e11b23cb446ed02eaea7f87fd",
- "0xf19dbdb7a58e51705cd792a469346f7bc19d16ee",
- "0xadb1c408648a798d04bb5f32d7fccaa067ff58d2",
- "0x17716dcb45ea700143361bf6d3b1d12065806c88",
- "0xa3f27ae63b409f1e06be5665eba1f4002a71f54e",
- "0xc9daf6f40aff9698784b77aa186cb0095cec8e65",
- "0xb90e0421cb5d2ce8f015b57cd37b6cf6eaba8359",
- "0x1cb007b5e23a10e4658a8e8affe7a060c3a697f6",
- "0xd07c845339b07d98c62f4c6b898a54b0c1baa992",
- "0x86ad0b0c52311abab81128ac11ba70680e5721a1",
- "0xb663c9b86cad9940a7ecc0d589eefa35fa1e1da0",
- "0x7f310c961b2c695a418de85ee5b18da2d96493eb",
- "0xe392e9008c49e7f82e7927a741fcb12799ebdb2b",
- "0xc34f5999935acf513eef47ff6d21c3c9884927e0",
- "0x0007448f4ba168e24c35f835eefa1a76587d691d",
- "0x4a15d1d6c2f47f0f8ae653a08c5bb8b0f94c2c0e",
- "0x8f9f4b9f1a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d",
- "0x1234567890abcdef1234567890abcdef12345678",
- "0xabcdef1234567890abcdef1234567890abcdef12",
- "0x9876543210fedcba9876543210fedcba98765432"
- ]
-
- # Limit to requested amount
- selected_addresses = curated_addresses[:limit]
-
- entries = []
- for i, address in enumerate(selected_addresses):
- entry = LeaderboardEntry(
- address=address,
- rank=i + 1,
- pnl=0.0, # Cannot get real PnL without API access
- pnl_percentage=0.0, # Cannot calculate without real data
- volume=0.0, # Cannot get real volume without API access
- trades=0, # Cannot get real trade count without API access
- time_window=time_window
- )
- entries.append(entry)
-
- logger.info(f"📊 Returned {len(entries)} curated addresses")
- logger.warning("⚠️ These are NOT live leaderboard rankings - they're static addresses")
-
- return entries
- async def get_top_addresses(self, time_window: str = "7d", limit: int = 100) -> List[str]:
- """
- Get top addresses from leaderboard (returns curated static list)
-
- Args:
- time_window: Time window ("24h", "7d", "30d", "all-time") - IGNORED
- limit: Number of addresses to return (max 50 available)
-
- Returns:
- List of Ethereum addresses (static curated list)
- """
- limit = min(limit, 50) # We only have 50 curated addresses
-
- logger.info(f"🔍 Requested: {limit} addresses for {time_window} leaderboard")
-
- # Confirm API limitations (optional step)
- await self._confirm_no_public_api()
-
- # Get curated addresses
- entries = await self._get_curated_addresses(time_window, limit)
- addresses = [entry.address for entry in entries]
-
- return addresses
- async def get_leaderboard_entries(self, time_window: str = "7d", limit: int = 100) -> List[LeaderboardEntry]:
- """
- Get detailed leaderboard entries (returns curated static list)
-
- Args:
- time_window: Time window ("24h", "7d", "30d", "all-time") - IGNORED
- limit: Number of entries to return (max 50 available)
-
- Returns:
- List of LeaderboardEntry objects (static curated list)
- """
- limit = min(limit, 50) # We only have 50 curated addresses
-
- logger.info(f"📊 Getting detailed leaderboard for {time_window} (limit: {limit})")
-
- # Confirm API limitations
- await self._confirm_no_public_api()
-
- # Get curated entries
- entries = await self._get_curated_addresses(time_window, limit)
-
- return entries
- def save_addresses_to_file(self, addresses: List[str], filename: str = None):
- """Save addresses to a text file"""
- if not filename:
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
- filename = f"hyperliquid_leaderboard_{timestamp}.txt"
-
- filepath = os.path.join(os.path.dirname(__file__), filename)
-
- with open(filepath, 'w') as f:
- f.write(f"# Hyperliquid Leaderboard Addresses (CURATED STATIC LIST)\n")
- f.write(f"# ⚠️ WARNING: These are NOT live leaderboard rankings!\n")
- f.write(f"# Generated: {datetime.now().isoformat()}\n")
- f.write(f"# Total addresses: {len(addresses)}\n")
- f.write(f"# Source: Manually curated list of known performers\n\n")
-
- for i, address in enumerate(addresses, 1):
- f.write(f"{i:3d}. {address}\n")
-
- logger.info(f"💾 Saved {len(addresses)} addresses to {filepath}")
- def export_to_json(self, entries: List[LeaderboardEntry], filename: str = None) -> str:
- """Export entries to JSON format"""
- if not filename:
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
- filename = f"hyperliquid_leaderboard_{timestamp}.json"
-
- filepath = os.path.join(os.path.dirname(__file__), filename)
-
- data = {
- "disclaimer": "⚠️ WARNING: This is NOT live leaderboard data from Hyperliquid!",
- "source": "Manually curated static list of known high-performing addresses",
- "timestamp": datetime.now().isoformat(),
- "total_entries": len(entries),
- "limitations": [
- "Hyperliquid does not provide public leaderboard APIs",
- "All PnL, volume, and trade data is set to 0 (unavailable)",
- "Time windows are ignored - same list returned for all periods",
- "Rankings are arbitrary based on manual curation"
- ],
- "entries": [asdict(entry) for entry in entries]
- }
-
- with open(filepath, 'w') as f:
- json.dump(data, f, indent=2)
-
- logger.info(f"💾 Exported {len(entries)} entries to {filepath}")
- return filepath
- async def main():
- """Command-line interface"""
- parser = argparse.ArgumentParser(
- description="Hyperliquid Leaderboard 'Scraper' (Returns Curated Static List)",
- formatter_class=argparse.RawDescriptionHelpFormatter,
- epilog="""
- ⚠️ IMPORTANT DISCLAIMERS ⚠️
- This script does NOT scrape live leaderboard data from Hyperliquid!
- LIMITATIONS:
- • Hyperliquid has no public leaderboard API
- • Returns same static list regardless of time window
- • No real PnL, volume, or performance data
- • Limited to 50 manually curated addresses
- This tool exists for:
- • Demonstration purposes
- • Providing known high-performing addresses
- • Future development if APIs become available
- For real leaderboard data, you would need browser automation
- and reverse engineering of internal APIs.
- """
- )
-
- parser.add_argument(
- '--window',
- choices=['24h', '1d', '7d', '1w', 'week', '30d', '1m', 'month', 'all-time', 'all'],
- default='7d',
- help='Time window (IGNORED - same list returned for all windows)'
- )
-
- parser.add_argument(
- '--limit',
- type=int,
- default=10,
- help='Number of addresses to return (max 50 available)'
- )
-
- parser.add_argument(
- '--format',
- choices=['text', 'json', 'csv'],
- default='text',
- help='Output format'
- )
-
- parser.add_argument(
- '--output',
- help='Output filename (auto-generated if not specified)'
- )
-
- parser.add_argument(
- '--verbose',
- action='store_true',
- help='Enable verbose logging'
- )
- args = parser.parse_args()
- if args.verbose:
- logging.getLogger().setLevel(logging.DEBUG)
- # Validate limit
- if args.limit > 50:
- logger.warning(f"⚠️ Requested {args.limit} addresses, but only 50 available. Using 50.")
- args.limit = 50
- async with HyperliquidLeaderboardScraper() as scraper:
- try:
- if args.format == 'json':
- entries = await scraper.get_leaderboard_entries(args.window, args.limit)
- output_file = scraper.export_to_json(entries, args.output)
- print(f"\n📁 Exported to: {output_file}")
-
- else:
- addresses = await scraper.get_top_addresses(args.window, args.limit)
-
- if args.output:
- scraper.save_addresses_to_file(addresses, args.output)
- print(f"\n📁 Saved to: {args.output}")
- else:
- print(f"\n📊 Top {len(addresses)} Addresses (CURATED STATIC LIST):")
- print("=" * 60)
- for i, address in enumerate(addresses, 1):
- print(f"{i:3d}. {address}")
-
- print("\n" + "=" * 60)
- print("⚠️ DISCLAIMER: These are NOT live leaderboard rankings!")
- print(" This is a manually curated static list.")
- print(" Hyperliquid does not provide public leaderboard APIs.")
- except KeyboardInterrupt:
- logger.info("\n🛑 Interrupted by user")
- except Exception as e:
- logger.error(f"❌ Error: {e}")
- sys.exit(1)
- if __name__ == "__main__":
- asyncio.run(main())
|