Issue
Im trying to get details about youtube videos based on their IDs. I have a list of 40k IDs and i divide it to list of lists containing 50 ids, since yt api can handle 50 ids at once.
My code just freezes at task number 101.
import asyncio
import time
import aiohttp
from aiohttp.client import ClientSession
import requests
results = []
chunks = [click the link for IDs]
start = time.time()
def get_tasks(session):
tasks = []
for x in range(len(chunks)):
params = {'key': 'yourapikey',
'part': 'statistics,contentDetails,snippet',
'id': chunks[x]
}
url = "https://www.googleapis.com/youtube/v3/videos"
tasks.append(session.get(url=url, params=params, ssl=False))
# print(x)
print(len(tasks))
return tasks
async def get_data():
async with aiohttp.ClientSession() as session:
tasks = get_tasks(session)
print("len ", len(tasks))
responses = await asyncio.gather(*tasks)
for response in responses:
results.append(await response.json())
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
asyncio.run(get_data())
print(time.time() - start)
I'm not sure what is happening here, how can i debug this problem? Link to the chunks lists:
Solution
I don't know why you had errors, I just know if you do it like below, you wont have errors (as long as your params are correct). In essence, we are just taking this example from aiohttp
docs, sticking the session
part in a loop, and modifying the response for your needs.
import asyncio, aiohttp, ssl, certifi, requests, random, time
CHROME = ('Mozilla/5.0 (Macintosh; Intel Mac OS X 12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36')
SSL_CONTEXT = ssl.create_default_context(cafile=certifi.where())
HEADERS = requests.utils.default_headers()
HEADERS['User-Agent'] = random.choice(CHROME)
URL = "https://www.googleapis.com/youtube/v3/videos"
start = time.time()
chunks = [["Ul3w34H-byY", "OJdiW61Q9bM"], ["R3En-aLGwhw", "nrwXOn1rITY"]]
params = {'key' : 'APIKEY',
'part': 'statistics,contentDetails,snippet'}
async def get_data():
results = []
async with aiohttp.ClientSession(headers=HEADERS) as session:
for chunk in chunks:
params['id'] = chunk
async with session.get(url=URL, params=params, ssl=SSL_CONTEXT) as resp:
if not (resp.status==200): continue
results.append(await resp.json())
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
asyncio.run(get_data())
print(time.time() - start)
Answered By - OneMadGypsy
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.