Issue
I think the error occurs because bs is getting the coroutine from page.content, I'm unsure how to fix it. Any help is appreciated.
This is all the relevant code in the program:
async def find_song_on_youtube(song_name, session):
# TODO: Make this fx async
song_name_query = song_name.replace(" ", "+")
page = await session.request(method='GET', url=
f"https://www.youtube.com/results?search_query={song_name_query}")
page = page.content
return sort_return_final_result(page, song_name)
not sorting these because unsure how to check if it's the actual artist page, all this transformation is to grab the area of html where the result json is - could break very easily
def sort_return_final_result(page, song_name):
page = bs(page, 'html5lib')
page = str(page.find_all("script")[-3]).split("=", 1)[1].strip()[:-1].split("\n")[0][:-1]
page = json.loads(page)
# drilling down to where the video contents are
full_page = (page["contents"]["twoColumnSearchResultsRenderer"]["primaryContents"]
["sectionListRenderer"]["contents"][0]["itemSectionRenderer"]["contents"])
# sometimes the video is not in the first position, this should drill down until it finds
# the first video item, should be in the videoRenderer key
first_two_results = []
for item in full_page:
if len(first_two_results) >= 2:
break
try:
page = item["videoRenderer"]
first_two_results.append(parse_video_info(page))
except KeyError:
continue
Sort by views first, then grab the highest viewed video by the official artist if it's available
first_two_results.sort(key=itemgetter("Views"), reverse=True)
first_two_results.sort(key=itemgetter("Official Artist"), reverse=True)
final_result = {}
for item in first_two_results:
if fuzz.partial_ratio(item["Name"], song_name.split('+')[1]) > 50:
final_result = item
break
print(final_result)
return final_result
def parse_video_info(page):
# name of video
name = page["title"]["runs"][0]["text"]
# url of video
url = page["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"]
url = f'https://youtube.com{url}'
# views
views = int(page["viewCountText"]["simpleText"].split()[0].replace(",", ""))
# official artist check
try:
official_artist = page["ownerBadges"][0]["metadataBadgeRenderer"]["tooltip"]
if official_artist != "Official Artist Channel":
raise KeyError
official_artist = True
except KeyError:
official_artist = False
return {
"Name": name, # encoding issues here might be a problem later
"Url": url,
"Views": views,
"Official Artist": official_artist
}
async def get_song_urls(playlist_or_album, resource_id):
song_list = [] # to get rid of pycharm error
rsrc_name = ""
if playlist_or_album == "Playlist": # rsrc id could be playlist or album id
song_list = return_songs_from_playlist(resource_id)
rsrc_name = spotfy_obj.playlist(resource_id)['name']
elif playlist_or_album == "Album":
song_list = return_songs_from_album(resource_id)
rsrc = spotfy_obj.album(resource_id)
rsrc_name = rsrc['name']
rsrc_name += f" - by {rsrc['artists'][0]['name']}"
print("Collected Songs from Playlist")
t1 = time()
async with ClientSession() as session:
playlist = await asyncio.gather(*[find_song_on_youtube(song, session)
for song in song_list])
t2 = time()
print(t2 - t1)
dump_playlist_to_db(playlist, rsrc_name)
print(playlist)
asyncio.run(get_song_urls("Album", "6a4HHZe13SySfC50BGy8Hm"))
How do I get the actual content from the request? Instead of the coroutine object?
Solution
Just use your_file_object.file.read()
instead of your_file_object.content
.
So, in your case it should be page.file.read()
.
If you are using flask it will be your_file_object.read()
.
If you are using fastapi it will be your_file_object.file.raed()
.
It works in my case.
Answered By - Shubham Tomar
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.