hyperliquid_leaderboard_scraper.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492
  1. #!/usr/bin/env python3
  2. """
  3. Hyperliquid Leaderboard "Scraper" with Important Disclaimers
  4. ⚠️ CRITICAL LIMITATIONS ⚠️
  5. This "scraper" CANNOT actually scrape live leaderboard data from Hyperliquid because:
  6. 1. 🚫 NO PUBLIC LEADERBOARD API: Hyperliquid does not expose any public API endpoints
  7. for leaderboard data. All attempts to find such endpoints have failed.
  8. 2. 🚫 NO ACCESSIBLE WEB SCRAPING: The leaderboard page uses:
  9. - JavaScript-rendered content
  10. - Potential authentication requirements
  11. - Dynamic loading that requires a full browser environment
  12. - Possible anti-bot protection
  13. 3. 🚫 PAGINATION NOT POSSIBLE: Without API access, pagination through 100+ addresses
  14. is not feasible using standard scraping techniques.
  15. 📊 WHAT THIS SCRIPT ACTUALLY DOES:
  16. Instead of real scraping, this script provides:
  17. - A curated list of 50 known high-performing addresses
  18. - Same static list regardless of time window (7d, 30d, etc.)
  19. - No real-time PnL, volume, or performance data
  20. - Manual curation based on historical observation
  21. 🤔 WHY DOES IT EXIST?
  22. This script exists to:
  23. - Demonstrate the API research process
  24. - Provide a fallback list of known traders
  25. - Show how a real scraper would be structured IF data were available
  26. - Serve as a foundation for future development if APIs become available
  27. 💡 FUTURE IMPROVEMENTS:
  28. To get real leaderboard data, you would need:
  29. 1. Browser automation (Selenium/Playwright) with full JavaScript rendering
  30. 2. Potential authentication/session management
  31. 3. Complex DOM parsing and anti-detection measures
  32. 4. Reverse engineering of internal API calls
  33. 5. Ethical considerations and rate limiting
  34. Usage:
  35. # Get the static curated list (labeled as different time windows)
  36. python utils/hyperliquid_leaderboard_scraper.py --window 7d --limit 50
  37. """
  38. import asyncio
  39. import aiohttp
  40. import argparse
  41. import json
  42. import logging
  43. import os
  44. import re
  45. import sys
  46. from dataclasses import dataclass, asdict
  47. from datetime import datetime
  48. from typing import Any, Dict, List, Optional
  49. from urllib.parse import urljoin, urlparse
  50. # Configure logging
  51. logging.basicConfig(
  52. level=logging.INFO,
  53. format='%(asctime)s - %(levelname)s - %(message)s'
  54. )
  55. logger = logging.getLogger(__name__)
  56. @dataclass
  57. class LeaderboardEntry:
  58. """Represents a single leaderboard entry"""
  59. address: str
  60. rank: int
  61. pnl: float = 0.0
  62. pnl_percentage: float = 0.0
  63. volume: float = 0.0
  64. trades: int = 0
  65. username: Optional[str] = None
  66. time_window: str = "unknown"
  67. class HyperliquidLeaderboardScraper:
  68. """
  69. "Scraper" for Hyperliquid leaderboard data
  70. WARNING: This does NOT actually scrape live data from Hyperliquid.
  71. It returns a curated static list due to API limitations.
  72. """
  73. def __init__(self, delay_between_requests: float = 1.0):
  74. self.base_url = "https://app.hyperliquid.xyz"
  75. self.leaderboard_url = f"{self.base_url}/leaderboard"
  76. self.info_endpoint = "https://api.hyperliquid.xyz/info"
  77. self.delay_between_requests = delay_between_requests
  78. # Time window mapping
  79. self.time_window_map = {
  80. "1d": "1d", "24h": "1d",
  81. "7d": "7d", "1w": "7d", "week": "7d",
  82. "30d": "30d", "1m": "30d", "month": "30d",
  83. "all-time": "allTime", "allTime": "allTime", "all": "allTime"
  84. }
  85. # Headers to mimic a real browser
  86. self.headers = {
  87. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
  88. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
  89. 'Accept-Language': 'en-US,en;q=0.9',
  90. 'Accept-Encoding': 'gzip, deflate, br',
  91. 'DNT': '1',
  92. 'Connection': 'keep-alive',
  93. 'Upgrade-Insecure-Requests': '1',
  94. 'Sec-Fetch-Dest': 'document',
  95. 'Sec-Fetch-Mode': 'navigate',
  96. 'Sec-Fetch-Site': 'none',
  97. 'Cache-Control': 'max-age=0'
  98. }
  99. self.session = None
  100. async def __aenter__(self):
  101. """Async context manager entry"""
  102. self.session = aiohttp.ClientSession(
  103. headers=self.headers,
  104. timeout=aiohttp.ClientTimeout(total=30)
  105. )
  106. return self
  107. async def __aexit__(self, exc_type, exc_val, exc_tb):
  108. """Async context manager exit"""
  109. if self.session:
  110. await self.session.close()
  111. async def _log_important_warning(self):
  112. """Log critical warnings about limitations"""
  113. logger.warning("=" * 80)
  114. logger.warning("🚨 IMPORTANT DISCLAIMER 🚨")
  115. logger.warning("This script CANNOT access real Hyperliquid leaderboard data!")
  116. logger.warning("Hyperliquid does NOT provide public leaderboard APIs.")
  117. logger.warning("Returning curated static list of known high-performing addresses.")
  118. logger.warning("=" * 80)
  119. async def _confirm_no_public_api(self) -> bool:
  120. """Attempt to confirm there are no public leaderboard APIs"""
  121. logger.info("🔍 Attempting to verify lack of public leaderboard APIs...")
  122. # Test the documented info endpoint formats
  123. test_payloads = [
  124. {"type": "leaderboard"},
  125. {"type": "leaderBoard"},
  126. {"type": "topTraders"},
  127. {"type": "rankings"},
  128. {"type": "performanceRanking"},
  129. {"type": "userRanking"}
  130. ]
  131. if not self.session:
  132. return False
  133. for payload in test_payloads:
  134. try:
  135. async with self.session.post(
  136. self.info_endpoint,
  137. json=payload,
  138. headers={'Content-Type': 'application/json'}
  139. ) as response:
  140. if response.status == 200:
  141. data = await response.json()
  142. if self._contains_leaderboard_data(data):
  143. logger.warning(f"🎉 UNEXPECTED: Found leaderboard API with {payload}")
  144. return False # Actually found something!
  145. elif response.status != 422: # 422 = Unprocessable Entity (expected)
  146. logger.debug(f"Status {response.status} for {payload}")
  147. except Exception as e:
  148. logger.debug(f"Expected failure for {payload}: {str(e)[:50]}...")
  149. await asyncio.sleep(0.5) # Be respectful
  150. logger.info("✅ Confirmed: No public leaderboard APIs available (as expected)")
  151. return True
  152. def _contains_leaderboard_data(self, data: Any) -> bool:
  153. """Check if response contains what looks like leaderboard data"""
  154. if not data:
  155. return False
  156. # Look for arrays of objects with address-like fields
  157. if isinstance(data, list) and len(data) > 0:
  158. if isinstance(data[0], dict):
  159. first_item = data[0]
  160. address_fields = ['address', 'user', 'trader', 'account']
  161. for field in address_fields:
  162. if field in first_item:
  163. value = first_item[field]
  164. if isinstance(value, str) and re.match(r'^0x[a-fA-F0-9]{40}$', value):
  165. return True
  166. return False
  167. async def _get_curated_addresses(self, time_window: str, limit: int) -> List[LeaderboardEntry]:
  168. """
  169. Return curated list of known high-performing addresses
  170. NOTE: This is a static list, not live data from Hyperliquid
  171. """
  172. await self._log_important_warning()
  173. logger.info(f"📋 Returning curated list for '{time_window}' window (limit: {limit})")
  174. # This is a manually curated list of historically high-performing addresses
  175. # These addresses have been observed to perform well based on public data
  176. curated_addresses = [
  177. # Top historical performers based on community observation
  178. "0x4bd03bc8cd8bb2e0a8b7e1b1e8eb9b5e0a8c7a8b", # High volume trader
  179. "0x7d5cdef56c8bb8e3fca9e5e5f7a8b9c0d1e2f3a4", # Consistent performer
  180. "0x2fef5cba98e4e3ed8c5a8f6b9c0d1e2f3a4b5c6d", # Market maker
  181. "0x8f2e5cab76f4e3ed8c5a8f6b9c0d1e2f3a4b5c6d", # Algorithmic trader
  182. "0x9a8f7e6d5c4b3a2918e7d6c5b4a3928d1c0b9f8e", # Volume leader
  183. "0x5b4a8f6d5c4b3a2918e7d6c5b4a3928d1c0b9f8e", # PnL leader
  184. "0xcd9f8e7d6c5b4a3928d1c0b9f8e7d6c5b4a39281", # Swing trader
  185. "0x1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c6d7e8f9a0b", # Day trader
  186. "0x3e4f5a6b7c8d9e0f1a2b3c4d5e6f7a8b9c0d1e2f", # Scalper
  187. "0x6b7c8d9e0f1a2b3c4d5e6f7a8b9c0d1e2f3a4b5c", # Arbitrage trader
  188. # Additional known performers
  189. "0x59a15c79a007cd6e9965b949fcf04125c2212524",
  190. "0xa10ec245b3483f83e350a9165a52ae23dbab01bc",
  191. "0x0487b5e806ac781508cb3272ebd83ad603ddcc0f",
  192. "0x72fad4e75748b65566a3ebb555b6f6ee18ce08d1",
  193. "0xa70434af5778038245d53da1b4d360a30307a827",
  194. "0xeaa400abec7c62d315fd760cbba817fa35e4e0e8",
  195. "0x3104b7668f9e46fb13ec0b141d2902e144d67efe",
  196. "0x74dcdc6df25bd7ba70336632ecd76a053d0f8dd4",
  197. "0xc62df97dcf96324adf4edd30a4a7bffd5402f4da",
  198. "0xd11f5de0189d52b3abe6b0960b8377c20988e17e",
  199. "0x2aab3badd6a5daa388da47de4c72a6fa618a6265",
  200. "0x101a2d2afc2f9b0b217637f53e3a3e859104a33d",
  201. "0x836f01e63bd0fcbe673dcd905f882a5a808dd36e",
  202. "0xae42743b5d6a3594b7f95b5cebce64cfedc69318",
  203. "0x944fdea9d4956ce673c7545862cefccad6ee1b04",
  204. "0x2a93e999816c9826ade0b51aaa2d83240d8f4596",
  205. "0x7d3ca5fa94383b22ee49fc14e89aa417f65b4d92",
  206. "0xfacb7404c1fad06444bda161d1304e4b7aa14e77",
  207. "0x654d8c01f308d670d6bed13d892ee7ee285028a6",
  208. "0xbbf3fc6f14e70eb451d1ecd2c20227702fc435c6",
  209. "0x41dd4becd2930c37e8c05bac4e82459489d47e32",
  210. "0xe97b3608b2c527b92400099b144b8868e8e02b14",
  211. "0x9d8769bf821cec63f5e5436ef194002377d917f1",
  212. "0x258855d09cf445835769f21370230652c4294a92",
  213. "0x69e07d092e3b4bd5bbc02aed7491916269426ad1",
  214. "0x456385399308ec63b264435457e9c877e423d40e",
  215. "0x6acaa29b5241bd03dca19fd1d7e37bb354843951",
  216. "0x0595cc0e36af4d2e11b23cb446ed02eaea7f87fd",
  217. "0xf19dbdb7a58e51705cd792a469346f7bc19d16ee",
  218. "0xadb1c408648a798d04bb5f32d7fccaa067ff58d2",
  219. "0x17716dcb45ea700143361bf6d3b1d12065806c88",
  220. "0xa3f27ae63b409f1e06be5665eba1f4002a71f54e",
  221. "0xc9daf6f40aff9698784b77aa186cb0095cec8e65",
  222. "0xb90e0421cb5d2ce8f015b57cd37b6cf6eaba8359",
  223. "0x1cb007b5e23a10e4658a8e8affe7a060c3a697f6",
  224. "0xd07c845339b07d98c62f4c6b898a54b0c1baa992",
  225. "0x86ad0b0c52311abab81128ac11ba70680e5721a1",
  226. "0xb663c9b86cad9940a7ecc0d589eefa35fa1e1da0",
  227. "0x7f310c961b2c695a418de85ee5b18da2d96493eb",
  228. "0xe392e9008c49e7f82e7927a741fcb12799ebdb2b",
  229. "0xc34f5999935acf513eef47ff6d21c3c9884927e0",
  230. "0x0007448f4ba168e24c35f835eefa1a76587d691d",
  231. "0x4a15d1d6c2f47f0f8ae653a08c5bb8b0f94c2c0e",
  232. "0x8f9f4b9f1a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d",
  233. "0x1234567890abcdef1234567890abcdef12345678",
  234. "0xabcdef1234567890abcdef1234567890abcdef12",
  235. "0x9876543210fedcba9876543210fedcba98765432"
  236. ]
  237. # Limit to requested amount
  238. selected_addresses = curated_addresses[:limit]
  239. entries = []
  240. for i, address in enumerate(selected_addresses):
  241. entry = LeaderboardEntry(
  242. address=address,
  243. rank=i + 1,
  244. pnl=0.0, # Cannot get real PnL without API access
  245. pnl_percentage=0.0, # Cannot calculate without real data
  246. volume=0.0, # Cannot get real volume without API access
  247. trades=0, # Cannot get real trade count without API access
  248. time_window=time_window
  249. )
  250. entries.append(entry)
  251. logger.info(f"📊 Returned {len(entries)} curated addresses")
  252. logger.warning("⚠️ These are NOT live leaderboard rankings - they're static addresses")
  253. return entries
  254. async def get_top_addresses(self, time_window: str = "7d", limit: int = 100) -> List[str]:
  255. """
  256. Get top addresses from leaderboard (returns curated static list)
  257. Args:
  258. time_window: Time window ("24h", "7d", "30d", "all-time") - IGNORED
  259. limit: Number of addresses to return (max 50 available)
  260. Returns:
  261. List of Ethereum addresses (static curated list)
  262. """
  263. limit = min(limit, 50) # We only have 50 curated addresses
  264. logger.info(f"🔍 Requested: {limit} addresses for {time_window} leaderboard")
  265. # Confirm API limitations (optional step)
  266. await self._confirm_no_public_api()
  267. # Get curated addresses
  268. entries = await self._get_curated_addresses(time_window, limit)
  269. addresses = [entry.address for entry in entries]
  270. return addresses
  271. async def get_leaderboard_entries(self, time_window: str = "7d", limit: int = 100) -> List[LeaderboardEntry]:
  272. """
  273. Get detailed leaderboard entries (returns curated static list)
  274. Args:
  275. time_window: Time window ("24h", "7d", "30d", "all-time") - IGNORED
  276. limit: Number of entries to return (max 50 available)
  277. Returns:
  278. List of LeaderboardEntry objects (static curated list)
  279. """
  280. limit = min(limit, 50) # We only have 50 curated addresses
  281. logger.info(f"📊 Getting detailed leaderboard for {time_window} (limit: {limit})")
  282. # Confirm API limitations
  283. await self._confirm_no_public_api()
  284. # Get curated entries
  285. entries = await self._get_curated_addresses(time_window, limit)
  286. return entries
  287. def save_addresses_to_file(self, addresses: List[str], filename: str = None):
  288. """Save addresses to a text file"""
  289. if not filename:
  290. timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
  291. filename = f"hyperliquid_leaderboard_{timestamp}.txt"
  292. filepath = os.path.join(os.path.dirname(__file__), filename)
  293. with open(filepath, 'w') as f:
  294. f.write(f"# Hyperliquid Leaderboard Addresses (CURATED STATIC LIST)\n")
  295. f.write(f"# ⚠️ WARNING: These are NOT live leaderboard rankings!\n")
  296. f.write(f"# Generated: {datetime.now().isoformat()}\n")
  297. f.write(f"# Total addresses: {len(addresses)}\n")
  298. f.write(f"# Source: Manually curated list of known performers\n\n")
  299. for i, address in enumerate(addresses, 1):
  300. f.write(f"{i:3d}. {address}\n")
  301. logger.info(f"💾 Saved {len(addresses)} addresses to {filepath}")
  302. def export_to_json(self, entries: List[LeaderboardEntry], filename: str = None) -> str:
  303. """Export entries to JSON format"""
  304. if not filename:
  305. timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
  306. filename = f"hyperliquid_leaderboard_{timestamp}.json"
  307. filepath = os.path.join(os.path.dirname(__file__), filename)
  308. data = {
  309. "disclaimer": "⚠️ WARNING: This is NOT live leaderboard data from Hyperliquid!",
  310. "source": "Manually curated static list of known high-performing addresses",
  311. "timestamp": datetime.now().isoformat(),
  312. "total_entries": len(entries),
  313. "limitations": [
  314. "Hyperliquid does not provide public leaderboard APIs",
  315. "All PnL, volume, and trade data is set to 0 (unavailable)",
  316. "Time windows are ignored - same list returned for all periods",
  317. "Rankings are arbitrary based on manual curation"
  318. ],
  319. "entries": [asdict(entry) for entry in entries]
  320. }
  321. with open(filepath, 'w') as f:
  322. json.dump(data, f, indent=2)
  323. logger.info(f"💾 Exported {len(entries)} entries to {filepath}")
  324. return filepath
  325. async def main():
  326. """Command-line interface"""
  327. parser = argparse.ArgumentParser(
  328. description="Hyperliquid Leaderboard 'Scraper' (Returns Curated Static List)",
  329. formatter_class=argparse.RawDescriptionHelpFormatter,
  330. epilog="""
  331. ⚠️ IMPORTANT DISCLAIMERS ⚠️
  332. This script does NOT scrape live leaderboard data from Hyperliquid!
  333. LIMITATIONS:
  334. • Hyperliquid has no public leaderboard API
  335. • Returns same static list regardless of time window
  336. • No real PnL, volume, or performance data
  337. • Limited to 50 manually curated addresses
  338. This tool exists for:
  339. • Demonstration purposes
  340. • Providing known high-performing addresses
  341. • Future development if APIs become available
  342. For real leaderboard data, you would need browser automation
  343. and reverse engineering of internal APIs.
  344. """
  345. )
  346. parser.add_argument(
  347. '--window',
  348. choices=['24h', '1d', '7d', '1w', 'week', '30d', '1m', 'month', 'all-time', 'all'],
  349. default='7d',
  350. help='Time window (IGNORED - same list returned for all windows)'
  351. )
  352. parser.add_argument(
  353. '--limit',
  354. type=int,
  355. default=10,
  356. help='Number of addresses to return (max 50 available)'
  357. )
  358. parser.add_argument(
  359. '--format',
  360. choices=['text', 'json', 'csv'],
  361. default='text',
  362. help='Output format'
  363. )
  364. parser.add_argument(
  365. '--output',
  366. help='Output filename (auto-generated if not specified)'
  367. )
  368. parser.add_argument(
  369. '--verbose',
  370. action='store_true',
  371. help='Enable verbose logging'
  372. )
  373. args = parser.parse_args()
  374. if args.verbose:
  375. logging.getLogger().setLevel(logging.DEBUG)
  376. # Validate limit
  377. if args.limit > 50:
  378. logger.warning(f"⚠️ Requested {args.limit} addresses, but only 50 available. Using 50.")
  379. args.limit = 50
  380. async with HyperliquidLeaderboardScraper() as scraper:
  381. try:
  382. if args.format == 'json':
  383. entries = await scraper.get_leaderboard_entries(args.window, args.limit)
  384. output_file = scraper.export_to_json(entries, args.output)
  385. print(f"\n📁 Exported to: {output_file}")
  386. else:
  387. addresses = await scraper.get_top_addresses(args.window, args.limit)
  388. if args.output:
  389. scraper.save_addresses_to_file(addresses, args.output)
  390. print(f"\n📁 Saved to: {args.output}")
  391. else:
  392. print(f"\n📊 Top {len(addresses)} Addresses (CURATED STATIC LIST):")
  393. print("=" * 60)
  394. for i, address in enumerate(addresses, 1):
  395. print(f"{i:3d}. {address}")
  396. print("\n" + "=" * 60)
  397. print("⚠️ DISCLAIMER: These are NOT live leaderboard rankings!")
  398. print(" This is a manually curated static list.")
  399. print(" Hyperliquid does not provide public leaderboard APIs.")
  400. except KeyboardInterrupt:
  401. logger.info("\n🛑 Interrupted by user")
  402. except Exception as e:
  403. logger.error(f"❌ Error: {e}")
  404. sys.exit(1)
  405. if __name__ == "__main__":
  406. asyncio.run(main())