Browse Source

scrapper added

tags/v0.4.0
roxie 6 years ago
parent
commit
6777d3efdf
7 changed files with 174 additions and 0 deletions
  1. +85
    -0
      libs/scrapper/scrapper.py
  2. +0
    -0
      libs/scrapper/sites/__init__.py
  3. +28
    -0
      libs/scrapper/sites/eroshare.py
  4. +19
    -0
      libs/scrapper/sites/gfy.py
  5. +36
    -0
      libs/scrapper/sites/imgur.py
  6. +3
    -0
      libs/scrapper/sites/reddit.py
  7. +3
    -0
      libs/scrapper/sites/tumblr.py

+ 85
- 0
libs/scrapper/scrapper.py View File

@@ -0,0 +1,85 @@
import os
from sites import gfy, imgur, tumblr, reddit

class scrapper():
def __init__(self):
pass

def get(self, url):
url2 = ""
if "imgur" in url:
url2 = imgur.imgur().get(url)
elif "gfycat" in url:
url2 = gfy.gfycat().get(str(url))
elif "eroshare" in url:
#eroshare.eroshare().get(url)
pass
elif "redd.it" in url or "i.reddituploads" in url:
url2 = reddit.reddit().get(url)
elif "media.tumblr" in url:
url2 = tumblr.tumblr().get(url)
return url2


"""def main(choice):
print("====== Menu ======")
print("1: Download Subreddit")
print("2: Download User")
print("3: Download Your Front Page")
print("9: Settings")
while choice == 0:
try:
choice = int(input("Choice: "))
if choice == 1:
reddit().menu()
elif choice == 2 or choice == 3:
print("Feature in development")
choice = 0
elif choice == 9:
schoice = 0
print("")
print("")
print("====== Settings ======")
print("1: Run redditsub in debug mode")
print("2: Run eroshare in debug mode")
print("3: Run imguralbum in debug mode")
print("4: Run gfycatget in debug mode")
print("5: Run Imgur().Get() in debug mode")
print("9: Exit Program")
schoice = int(input("Choice: "))
if schoice == 1:
reddit().menu()
elif schoice == 2:
url = input("Url: ")
eroshare().get(url,"Test")
elif schoice == 3:
id = input("ID: ")
imgur().get_album(id,"Test")
elif schoice == 4:
url = input("Url: ")
gfycat().get(url)
elif schoice == 5:
url = input("URL: ")
imgur().get(url[::-1],"Test")
elif schoice == 6:
reddit().saved()
elif schoice == 9:
return True
else:
print("Can't even fucking select the right shit")
else:
print("Your choice doesn't exist")
choice = 0
except ValueError:
print("Use an interger number to choose from the menu")
choice = 0
"""
if __name__ == "__main__":
varsetup()
#spreadsheetsetup()
print("Setting up Directory")
os.chdir('/home/roxie/Storage_1/Hello')
print("")
exitchoice = False
while exitchoice is False:
exitchoice = main(0)

+ 0
- 0
libs/scrapper/sites/__init__.py View File


+ 28
- 0
libs/scrapper/sites/eroshare.py View File

@@ -0,0 +1,28 @@

class eroshare():
def __init__(self):
pass

def get(self,url, name):
page = requests.get(url)
tree = html.fromstring(page.content)
links = tree.xpath('//source[@src]/@src')
if links:
album_create(name)
for link in links:
if "lowres" not in link:
wget.download(link)
print("Downloaded ", link)
links = tree.xpath('//*[@src]/@src')
if len(links) > 2 and not album_create.hasbeencalled:
album_create(name)
for link in links:
if "i." in link and "thumb" not in link:
if link.split("/")[-1] not in os.listdir("./"):
wget.download("https:" + link)
print("Downloaded ", link)
else:
print("Already exists")
if album_create.hasbeencalled:
os.chdir("../")
album_create.hasbeencalled = False

+ 19
- 0
libs/scrapper/sites/gfy.py View File

@@ -0,0 +1,19 @@
class gfycat():
def __init__(self):
pass

def url_get(self,url,urladd):
urlsplit = url.split("/")
urlsplit[2] = urladd + urlsplit[2]
urlsplit.append(".webm")
i = 0
urlnew = ""
for split in urlsplit:
urlnew = urlnew + split
i += 1
if i <= 3:
urlnew = urlnew + "/"
return urlnew

def get(self,url):
return url

+ 36
- 0
libs/scrapper/sites/imgur.py View File

@@ -0,0 +1,36 @@
import requests
import os
import wget
from bs4 import BeautifulSoup

class imgur():
"""Class for all interactions with Imgur"""
def __init__(self):
pass

def removed(self,url):
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
if "removed.png" in soup.a["src"]:
return True
else:
return False

def get(self, url):
if self.removed(url):
return False
if url.split(".")[-1] in ("png", "jpg", "jpeg", "gif", "gifv"):
return url
elif url.split("/")[-2] == "a":
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
list = []
for img in soup.find_all("img"):
if "imgur" in img["src"]:
if not img["src"] in list:
list.append(img["src"])
if len(list) > 1:
return False
else:
return list[0]

+ 3
- 0
libs/scrapper/sites/reddit.py View File

@@ -0,0 +1,3 @@
class reddit:
def get(self, url):
return url

+ 3
- 0
libs/scrapper/sites/tumblr.py View File

@@ -0,0 +1,3 @@
class tumblr():
def get(self,url):
return url

Loading…
Cancel
Save