Issue
from bs4 import BeautifulSoup
import re
text = """<div class="content">
<div class="body">
<img src="https:/document-images/leasing/mynd-logo.png" alt="j0b" width="96px" height="auto" class="kjhjhv">
<div class="title">
You've received a new message.
<div class="subtitle">
Service request <a href="https://-2B06XEJ0r8DVmc_kuX2cI8baDAaOcj-2Fp3iIjU6R7PXKa3dAYAr0B7iMyKwz-2FaV0nnIuCVP1pcf8DEy1UidQbR2IywCV5ueXy1TowXMzFcIPYG2hp7HjP1WzHYI-2FJNMGLZMtC4LXybcCZ4cUOV4DnC6s-2FCIJ-2FrumGmdnE2leBJgM3rWJaEyXOwi4JiHjBHr4rtNh-2BPeP3JFBHpGNp5KWrZkxg-2F9zrih4tp7-2BUUrBo0g8hlG5It1yEVfz9Im2iRVAvdjqHvAqxn63TsV2OFNp8M8DMjuS6aRL3Vki8HfkXx0kD8fGJ6GAKUAOZv-2BCSAgxtcdnIpR8sQU6Jkcm9vxdjG2zmDYFEmVykg0-2BY3uD1ZbGl79dsB68mqJdbbQlgb8ERSRAW3t8cYTXsegrGd5-2Fox-2B9Yo-2FPmUC9cnqEaA-3D-3D" style="color:#5A88AA; text-decoration:underline;" target="_blank">#2819138</a>
</div>
</div> <!-- CONTENT OF EMAIL -->
<p> May 17, 2023 05:33 PDT <b> Charisse </b> wrote: </p>
<p style="white-space: pre-wrap;">Hi team. Is this completed? Please advise. Thank you, Charisse</p>
</div>"""
soup = BeautifulSoup(text,"html.parser")
scores = soup.find_all(text=re.compile('Service request'))
print(scores)
score2 = soup.find('div',text=re.compile('Service request'))
print(score2)
My required answer:
https://-2B06XEJ0r8DVmc_kuX2cI8baDAaOcj-2Fp3iIjU6R7PXKa3dAYAr0B7iMyKwz-2FaV0nnIuCVP1pcf8DEy1UidQbR2IywCV5ueXy1TowXMzFcIPYG2hp7HjP1WzHYI-2FJNMGLZMtC4LXybcCZ4cUOV4DnC6s-2FCIJ-2FrumGmdnE2leBJgM3rWJaEyXOwi4JiHjBHr4rtNh-2BPeP3JFBHpGNp5KWrZkxg-2F9zrih4tp7-2BUUrBo0g8hlG5It1yEVfz9Im2iRVAvdjqHvAqxn63TsV2OFNp8M8DMjuS6aRL3Vki8HfkXx0kD8fGJ6GAKUAOZv-2BCSAgxtcdnIpR8sQU6Jkcm9vxdjG2zmDYFEmVykg0-2BY3uD1ZbGl79dsB68mqJdbbQlgb8ERSRAW3t8cYTXsegrGd5-2Fox-2B9Yo-2FPmUC9cnqEaA-3D-3D
I need to get that URL using the text "Service request".
Solution
Looking at the HTML code you want to search for <a>
tag where previous sibling is text node with Service request
text:
import re
from bs4 import BeautifulSoup
text = """<div class="content">
<div class="body">
<img src="https:/document-images/leasing/mynd-logo.png" alt="j0b" width="96px" height="auto" class="kjhjhv">
<div class="title">
You've received a new message.
<div class="subtitle">
Service request <a href="https://-2B06XEJ0r8DVmc_kuX2cI8baDAaOcj-2Fp3iIjU6R7PXKa3dAYAr0B7iMyKwz-2FaV0nnIuCVP1pcf8DEy1UidQbR2IywCV5ueXy1TowXMzFcIPYG2hp7HjP1WzHYI-2FJNMGLZMtC4LXybcCZ4cUOV4DnC6s-2FCIJ-2FrumGmdnE2leBJgM3rWJaEyXOwi4JiHjBHr4rtNh-2BPeP3JFBHpGNp5KWrZkxg-2F9zrih4tp7-2BUUrBo0g8hlG5It1yEVfz9Im2iRVAvdjqHvAqxn63TsV2OFNp8M8DMjuS6aRL3Vki8HfkXx0kD8fGJ6GAKUAOZv-2BCSAgxtcdnIpR8sQU6Jkcm9vxdjG2zmDYFEmVykg0-2BY3uD1ZbGl79dsB68mqJdbbQlgb8ERSRAW3t8cYTXsegrGd5-2Fox-2B9Yo-2FPmUC9cnqEaA-3D-3D" style="color:#5A88AA; text-decoration:underline;" target="_blank">#2819138</a>
</div>
</div> <!-- CONTENT OF EMAIL -->
<p> May 17, 2023 05:33 PDT <b> Charisse </b> wrote: </p>
<p style="white-space: pre-wrap;">Hi team. Is this completed? Please advise. Thank you, Charisse</p>
</div>"""
soup = BeautifulSoup(text, "html.parser")
a = soup.find(
lambda tag: tag.name == "a"
and tag.find_previous_sibling(string=re.compile("Service request"))
)
print(a["href"])
Prints:
https://-2B06XEJ0r8DVmc_kuX2cI8baDAaOcj-2Fp3iIjU6R7PXKa3dAYAr0B7iMyKwz-2FaV0nnIuCVP1pcf8DEy1UidQbR2IywCV5ueXy1TowXMzFcIPYG2hp7HjP1WzHYI-2FJNMGLZMtC4LXybcCZ4cUOV4DnC6s-2FCIJ-2FrumGmdnE2leBJgM3rWJaEyXOwi4JiHjBHr4rtNh-2BPeP3JFBHpGNp5KWrZkxg-2F9zrih4tp7-2BUUrBo0g8hlG5It1yEVfz9Im2iRVAvdjqHvAqxn63TsV2OFNp8M8DMjuS6aRL3Vki8HfkXx0kD8fGJ6GAKUAOZv-2BCSAgxtcdnIpR8sQU6Jkcm9vxdjG2zmDYFEmVykg0-2BY3uD1ZbGl79dsB68mqJdbbQlgb8ERSRAW3t8cYTXsegrGd5-2Fox-2B9Yo-2FPmUC9cnqEaA-3D-3D
Answered By - Andrej Kesely
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.