Browse Source

scrappers good for now

tags/v0.4.0
roxie 6 years ago
parent
commit
3618014e38
4 changed files with 14 additions and 12 deletions
  1. +8
    -3
      libs/scrapper/scrapper.py
  2. +0
    -1
      libs/scrapper/scrappersites/eroshare.py
  3. +5
    -7
      libs/scrapper/scrappersites/imgur.py
  4. +1
    -1
      libs/scrapper/scrappersites/tumblr.py

+ 8
- 3
libs/scrapper/scrapper.py View File

@@ -1,10 +1,16 @@
import requests
from libs.scrapper.scrappersites import imgur, reddit, gfy, tumblr

class scrapper():
def __init__(self):
pass

def get(self, url):
def linkget(self, subreddit):
html = requests.get("https://reddit.com/r/"+subreddit+".json", headers = {'User-agent': 'RoxBot Discord Bot'})
reddit = html.json()["data"]["children"]
return reddit

def retriveurl(self, url):
url2 = ""
if "imgur" in url:
url2 = imgur.imgur().get(url)
@@ -17,5 +23,4 @@ class scrapper():
url2 = reddit.reddit().get(url)
elif "media.tumblr" in url:
url2 = tumblr.tumblr().get(url)
return url2

return url2

+ 0
- 1
libs/scrapper/scrappersites/eroshare.py View File

@@ -1,4 +1,3 @@

class eroshare():
def __init__(self):
pass

+ 5
- 7
libs/scrapper/scrappersites/imgur.py View File

@@ -1,6 +1,4 @@
import requests
import os
import wget
from bs4 import BeautifulSoup

class imgur():
@@ -25,12 +23,12 @@ class imgur():
elif url.split("/")[-2] == "a":
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
list = []
links = []
for img in soup.find_all("img"):
if "imgur" in img["src"]:
if not img["src"] in list:
list.append(img["src"])
if len(list) > 1:
if not img["src"] in links:
links.append(img["src"])
if len(links) > 1:
return False
else:
return list[0]
return links[0]

+ 1
- 1
libs/scrapper/scrappersites/tumblr.py View File

@@ -1,3 +1,3 @@
class tumblr():
def get(self,url):
return url
return url

Loading…
Cancel
Save