Issue
I have the following snippet that already works however, I wanted to clean up a bit in the formatting by removing some duplicates 1st column data and make it more readable.
from urllib.request import Request, urlopen
from bs4 import BeautifulSoup
import re, random, ctypes
import requests
from time import sleep
url = 'https://bscscan.com/tokentxns'
user_agent_list = [
"header = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:86.0Gecko/20100101 Firefox/86.0'}",
"header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36'}",
"header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15'}",
"header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36'}",
"header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36'}",
"header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36'}"
]
header = random.choice(user_agent_list)
pausesleep = float(random.randint(10000,30000)) / 10000 #orig
req = requests.get(url,header, timeout=10)
soup = BeautifulSoup(req.content, 'html.parser')
rows = soup.findAll('table')[0].findAll('tr')
for row in rows[1:]:
tds = row.find_all('td')
txnhash = tds[1].text[0:]
age = tds[2].text[0:]
value = tds[7].text[0:]
token = tds[8].text[0:]
link = urljoin(url, tds[8].find('a')['href'])
print (str(txnhash) + " " + str(value) + " " + str(token))
Current Output:
0x70e16e1cbcd30d1c3a2abb03a3d3c43fc324aa794c45b10cd5ef1001e9af0915 899.885819768 TrusterCoin (TSC)
0x70e16e1cbcd30d1c3a2abb03a3d3c43fc324aa794c45b10cd5ef1001e9af0915 0.62679168 Wrapped BNB (WBNB)
0x52d862d3f920370d84039f2dccb40edc7343699310d3436b71738d4176997398 388,214,984,514.909719227 WoofCoin (WOOF)
0x52d862d3f920370d84039f2dccb40edc7343699310d3436b71738d4176997398 0.003 Wrapped BNB (WBNB)
0x4fe83f2ebad772b4292e81f418a6f54572f7462934358a356787f8d777c58c8b 26.737674146727101117 Binance-Peg ... (BUSD)
0x4fe83f2ebad772b4292e81f418a6f54572f7462934358a356787f8d777c58c8b 1.251364193609566793 Binance-Peg ... (ADA)
0x4fe83f2ebad772b4292e81f418a6f54572f7462934358a356787f8d777c58c8b 0.03997685638568537 Binance-Peg ... (ADA)
0x4fe83f2ebad772b4292e81f418a6f54572f7462934358a356787f8d777c58c8b 0.041171860015645402 Binance-Peg ... (ADA)
0x4fe83f2ebad772b4292e81f418a6f54572f7462934358a356787f8d777c58c8b 0.089939749761843203 Wrapped BNB (WBNB)
Wanted Improvement:
0x70e16e1cbcd30d1c3a2abb03a3d3c43fc324aa794c45b10cd5ef1001e9af0915 899.885819768 TrusterCoin (TSC)
0.62679168 Wrapped BNB (WBNB)
0x52d862d3f920370d84039f2dccb40edc7343699310d3436b71738d4176997398 388,214,984,514.909719227 WoofCoin (WOOF)
0.003 Wrapped BNB (WBNB)
0x4fe83f2ebad772b4292e81f418a6f54572f7462934358a356787f8d777c58c8b 26.737674146727101117 Binance-Peg ... (BUSD)
1.251364193609566793 Binance-Peg ... (ADA)
0.03997685638568537 Binance-Peg ... (ADA)
0.041171860015645402 Binance-Peg ... (ADA)
0.089939749761843203 Wrapped BNB (WBNB)
Solution
Try this:
from urllib.request import Request, urlopen,urljoin
from bs4 import BeautifulSoup
import re, random, ctypes
import requests
from time import sleep
url = 'https://bscscan.com/tokentxns'
user_agent_list = [
"header = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:86.0Gecko/20100101 Firefox/86.0'}",
"header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36'}",
"header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15'}",
"header = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36'}",
"header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36'}",
"header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36'}"
]
header = random.choice(user_agent_list)
pausesleep = float(random.randint(10000,30000)) / 10000
req = requests.get(url,header, timeout=10)
soup = BeautifulSoup(req.content, 'html.parser')
rows = soup.findAll('table')[0].findAll('tr')
ne=[]
for row in rows[1:]:
tds = row.find_all('td')
txnhash = tds[1].text[0:]
age = tds[2].text[0:]
value = tds[7].text[0:]
token = tds[8].text[0:]
link = urljoin(url, tds[8].find('a')['href'])
if str(txnhash) not in ne:
ne.append(str(txnhash))
print (str(txnhash),end=" ")
else:# If you want those tab also then. Otherwise remove else
print("\t\t\t",end=" ")
print(str(value) + " " + str(token))
We are creating list of txnhash
in ne
and then checking everytime if new txnhash
is in that list or not.
Answered By - Xitiz
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.