@@ -1,12 +1,17 @@ | |||
import requests | |||
import random | |||
from libs.scrapper.scrappersites import imgur, reddit, gfy, tumblr | |||
class scrapper(): | |||
def __init__(self): | |||
pass | |||
def linkget(self, subreddit): | |||
html = requests.get("https://reddit.com/r/"+subreddit+".json", headers = {'User-agent': 'RoxBot Discord Bot'}) | |||
def linkget(self, subreddit, israndom): | |||
if israndom: | |||
options = [".json?count=100", "/top/.json?sort=top&t=all&count=100"] | |||
choice = random.choice(options) | |||
subreddit += choice | |||
html = requests.get("https://reddit.com/r/"+subreddit, headers = {'User-agent': 'RoxBot Discord Bot'}) | |||
reddit = html.json()["data"]["children"] | |||
return reddit | |||
@@ -15,7 +20,7 @@ class scrapper(): | |||
if "imgur" in url: | |||
url2 = imgur.imgur().get(url) | |||
elif "gfycat" in url: | |||
url2 = gfy.gfycat().get(str(url)) | |||
url2 = gfy.gfycat().get(url) | |||
elif "eroshare" in url: | |||
#eroshare.eroshare().get(url) | |||
pass | |||
@@ -23,4 +28,5 @@ class scrapper(): | |||
url2 = reddit.reddit().get(url) | |||
elif "media.tumblr" in url: | |||
url2 = tumblr.tumblr().get(url) | |||
print(url) | |||
return url2 |
@@ -2,18 +2,14 @@ class gfycat(): | |||
def __init__(self): | |||
pass | |||
def url_get(self,url,urladd): | |||
def url_get(self,url): | |||
urlsplit = url.split("/") | |||
urlsplit[2] = urladd + urlsplit[2] | |||
urlsplit.append(".webm") | |||
i = 0 | |||
urlnew = "" | |||
for split in urlsplit: | |||
urlnew = urlnew + split | |||
i += 1 | |||
if i <= 3: | |||
urlnew = urlnew + "/" | |||
urlsplit[2] = "giant." + urlsplit[2] | |||
urlsplit[-1] += ".gif" | |||
urlnew = "/".join(urlsplit) | |||
return urlnew | |||
def get(self,url): | |||
return url | |||
#url2 = self.url_get(url) | |||
url2 = url | |||
return url2 |
@@ -9,18 +9,22 @@ class imgur(): | |||
def removed(self,url): | |||
page = requests.get(url) | |||
soup = BeautifulSoup(page.content, 'html.parser') | |||
if "removed.png" in soup.a["src"]: | |||
if "removed.png" in soup.img["src"]: | |||
return True | |||
else: | |||
return False | |||
def get(self, url): | |||
if self.removed(url): | |||
return False | |||
if url.split(".")[-1] in ("png", "jpg", "jpeg", "gif", "gifv"): | |||
return url | |||
elif url.split("/")[-2] == "a": | |||
#elif url.split(".")[-1] == "gifv": | |||
# urlsplit = url.split(".") | |||
# urlsplit[-1] = "gif" | |||
# url = ".".join(urlsplit) | |||
# return url""" | |||
else: | |||
if self.removed(url): | |||
return False | |||
page = requests.get(url) | |||
soup = BeautifulSoup(page.content, 'html.parser') | |||
links = [] | |||
@@ -29,6 +33,9 @@ class imgur(): | |||
if not img["src"] in links: | |||
links.append(img["src"]) | |||
if len(links) > 1: | |||
return False | |||
return url | |||
else: | |||
print(links) | |||
if not "http" in links[0]: | |||
links[0] = "https:" + links[0] | |||
return links[0] |