import requests | import requests | ||||
import random | |||||
from libs.scrapper.scrappersites import imgur, reddit, gfy, tumblr | from libs.scrapper.scrappersites import imgur, reddit, gfy, tumblr | ||||
class scrapper(): | class scrapper(): | ||||
def __init__(self): | def __init__(self): | ||||
pass | pass | ||||
def linkget(self, subreddit): | |||||
html = requests.get("https://reddit.com/r/"+subreddit+".json", headers = {'User-agent': 'RoxBot Discord Bot'}) | |||||
def linkget(self, subreddit, israndom): | |||||
if israndom: | |||||
options = [".json?count=100", "/top/.json?sort=top&t=all&count=100"] | |||||
choice = random.choice(options) | |||||
subreddit += choice | |||||
html = requests.get("https://reddit.com/r/"+subreddit, headers = {'User-agent': 'RoxBot Discord Bot'}) | |||||
reddit = html.json()["data"]["children"] | reddit = html.json()["data"]["children"] | ||||
return reddit | return reddit | ||||
if "imgur" in url: | if "imgur" in url: | ||||
url2 = imgur.imgur().get(url) | url2 = imgur.imgur().get(url) | ||||
elif "gfycat" in url: | elif "gfycat" in url: | ||||
url2 = gfy.gfycat().get(str(url)) | |||||
url2 = gfy.gfycat().get(url) | |||||
elif "eroshare" in url: | elif "eroshare" in url: | ||||
#eroshare.eroshare().get(url) | #eroshare.eroshare().get(url) | ||||
pass | pass | ||||
url2 = reddit.reddit().get(url) | url2 = reddit.reddit().get(url) | ||||
elif "media.tumblr" in url: | elif "media.tumblr" in url: | ||||
url2 = tumblr.tumblr().get(url) | url2 = tumblr.tumblr().get(url) | ||||
print(url) | |||||
return url2 | return url2 |
def __init__(self): | def __init__(self): | ||||
pass | pass | ||||
def url_get(self,url,urladd): | |||||
def url_get(self,url): | |||||
urlsplit = url.split("/") | urlsplit = url.split("/") | ||||
urlsplit[2] = urladd + urlsplit[2] | |||||
urlsplit.append(".webm") | |||||
i = 0 | |||||
urlnew = "" | |||||
for split in urlsplit: | |||||
urlnew = urlnew + split | |||||
i += 1 | |||||
if i <= 3: | |||||
urlnew = urlnew + "/" | |||||
urlsplit[2] = "giant." + urlsplit[2] | |||||
urlsplit[-1] += ".gif" | |||||
urlnew = "/".join(urlsplit) | |||||
return urlnew | return urlnew | ||||
def get(self,url): | def get(self,url): | ||||
return url | |||||
#url2 = self.url_get(url) | |||||
url2 = url | |||||
return url2 |
def removed(self,url): | def removed(self,url): | ||||
page = requests.get(url) | page = requests.get(url) | ||||
soup = BeautifulSoup(page.content, 'html.parser') | soup = BeautifulSoup(page.content, 'html.parser') | ||||
if "removed.png" in soup.a["src"]: | |||||
if "removed.png" in soup.img["src"]: | |||||
return True | return True | ||||
else: | else: | ||||
return False | return False | ||||
def get(self, url): | def get(self, url): | ||||
if self.removed(url): | |||||
return False | |||||
if url.split(".")[-1] in ("png", "jpg", "jpeg", "gif", "gifv"): | if url.split(".")[-1] in ("png", "jpg", "jpeg", "gif", "gifv"): | ||||
return url | return url | ||||
elif url.split("/")[-2] == "a": | |||||
#elif url.split(".")[-1] == "gifv": | |||||
# urlsplit = url.split(".") | |||||
# urlsplit[-1] = "gif" | |||||
# url = ".".join(urlsplit) | |||||
# return url""" | |||||
else: | |||||
if self.removed(url): | |||||
return False | |||||
page = requests.get(url) | page = requests.get(url) | ||||
soup = BeautifulSoup(page.content, 'html.parser') | soup = BeautifulSoup(page.content, 'html.parser') | ||||
links = [] | links = [] | ||||
if not img["src"] in links: | if not img["src"] in links: | ||||
links.append(img["src"]) | links.append(img["src"]) | ||||
if len(links) > 1: | if len(links) > 1: | ||||
return False | |||||
return url | |||||
else: | else: | ||||
print(links) | |||||
if not "http" in links[0]: | |||||
links[0] = "https:" + links[0] | |||||
return links[0] | return links[0] |