I created a bot for my Discord server, that goes to the Reddit API for a given subreddit, and posts the Top 10 results for the Day in the Discord chat, based on the subreddit(s) that you input. It disregards self posts, and really only posts pictures and GIFs. The Discord message command would look something like this: =get funny awww news
, posting the results for each subreddit as it gets them from the Reddit API. THIS WORKS WITH NO PROBLEM. I know that the bot's ability to hit the API and post to discord works.
THIS IS THE PROBLEM I NEED HELP WITH:
I added another command =getshuffled
which puts all of the results from the subreddits in a large list, and then shuffles them before posting. This works really well with a request of up to ~50 subreddits.
Because it can be such a large list of results, 1000+ results from 100+ subreddits, the bot is crashing on really big requests. Now, I think I know what the problem is: The bot is hosted on PythonAnywhere, where I only get 3GB of RAM. Now, in my head, even with a large list, it shouldn't be chewing up so much memory. No way. So, I think I'm not clearing memory?
I'm sure that doing the data pull from Reddit and creating the big list of results chews up a lot, and then when it tries to shuffle and post the results in the next function, it runs out of memory and PythonAnywhere kills the process.
This is where I get stuck: I'm not really sure how python manages the memory between functions, or even if I'm doing it right. I think it's something simple, like how I'm calling functions or something, or me not understanding how Asyncio is working under the hood.
Here is what PythonAnywhere says about RAM:
Because system RAM is one of the most scarce resources on the PythonAnywhere servers, we limit your processes to a maximum in-memory size of 3GB. This is a per-process limit, not a system-wide one, so if you have larger memory needs, you may be able to do the processing you need by running multiple smaller processes. If a process goes over the memory limit, it will be killed.
Any help in this regard would be appreciated!!!
(Also, if there's anything in the code that makes you shake your head, let me know, I'm sure I've made some bad-practice mistakes! Thanks guys )
EDIT: Python version is 3.6 and Discord.py version is 0.16.12
EDIT 2: I checked the server logs: This is the error that was thrown when it was run with a large amount of results (this is being thrown on the main_post function, I've commented the line):
ERROR:asyncio:Task was destroyed but it is pending!
task: <Task pending coro=<Client._run_event() running at /home/GageBrk/.local/lib/python3.6/site-packages/discord/client.py:307> wait_for=<Future pending cb=[BaseSelectorEventLoop._sock_connect_done(13)(), <TaskWakeupMethWrapper object at 0x7f0aa5bbaa38>()]>>
Code:
import logging
import socket
import sys
import random
import os
from redditBot_auth import reddit
import discord
import asyncio
from discord.ext.commands import Bot
#from discord.ext import commands
import platform
@client.event
async def on_ready():
return await client.change_presence(game=discord.Game(name='Getting The Dank Memes'))
def is_number(s):
try:
int(s)
return True
except:
pass
def show_title(s):
try:
if s == 'TITLE':
return True
if s == 'TITLES':
return True
if s == 'title':
return True
if s == 'titles':
return True
except:
pass
#This gets results for each subreddit and puts into nested list with format
#[[title1, URL1], [title2, URL2], [title3, URL3]]
#It then passes that results list to the function that posts to Discord
async def main_loop(*args, shuffled=False):
q=10
#This takes a integer value argument from the input string.
#It sets the number variable,
#Then deletes the number from the arguments list.
#same with the title variable
title = False
for item in args:
if is_number(item):
q = item
q = int(q)
if q > 15:
q=15
args = [x for x in args if not is_number(x)]
if show_title(item):
title = True
args = [x for x in args if not show_title(x)]
number_of_posts = q * len(args)
results=[]
#results = [[] for x in range(number_of_posts)] #create nested lists for each post. This is ALL the links that will be posted.
TESTING = False #If this is turned to True, the subreddit of each post will be posted. Will use defined list of results
NoGrabResults = False
i = 0
await client.say('*Posting ' + str(number_of_posts) + ' posts from ' + str(len(args))+' subreddits*')
#This pulls the data and creates a list of links for the bot to post
if NoGrabResults == False: #This is for testing, ignore
for item in args:
try:
#subreddit_results = [[] for x in range(q)] #create nested lists for each subreddit.
#This allows for duplicate deletion within the subreddit results,
#rather than going over the entire 'results' list.
subreddit_results=[]
e = 0 #counter for the subreddit_results list.
#await client.say('<'+item+'>')
subreddit = reddit.subreddit(item)
Day = subreddit.top('day', limit= q*2)
Week = subreddit.top('week', limit = q*2)
Month = subreddit.top('month', limit = q*2)
Year = subreddit.top('year', limit = q*2)
AllTime = subreddit.top('all', limit = q*2)
print(item)
for submission in Day:
post = []
if len(subreddit_results) < q :
if submission.is_self is False:
if '/v.redd.it/' not in submission.url:
#print(submission.url)
if '.gif' or 'imgur.com' or 'gfycat' in submission.url:
if submission.url not in subreddit_results:
post.append(submission.title)
post.append(submission.url)
#post.append(item)
subreddit_results.append(post)
#print('pulled posts from Daily')
if len(subreddit_results) < q :
#print('getting posts from Weekly')
for submission in Week:
post = []
if len(subreddit_results) < q :
if submission.is_self is False:
if '/v.redd.it/' not in submission.url:
#print(submission.url)
if '.gif' or 'imgur.com' or 'gfycat' in submission.url:
if submission.url not in subreddit_results:
post.append(submission.title)
post.append(submission.url)
#post.append(item)
subreddit_results.append(post)
if len(subreddit_results) < q :
#print('getting posts from Monthly')
for submission in Month:
post = []
if len(subreddit_results) < q :
if submission.is_self is False:
if '/v.redd.it/' not in submission.url:
#print(submission.url)
if '.gif' or 'imgur.com' or 'gfycat' in submission.url:
if submission.url not in subreddit_results:
post.append(submission.title)
post.append(submission.url)
#post.append(item)
subreddit_results.append(post)
if len(subreddit_results) < q :
#print('getting posts from Yearly')
for submission in Year:
post = []
if len(subreddit_results) < q :
if submission.is_self is False:
if '/v.redd.it/' not in submission.url:
if '.gif' or 'imgur.com' or 'gfycat' in submission.url:
if submission.url not in subreddit_results:
post.append(submission.title)
post.append(submission.url)
#post.append(item)
subreddit_results.append(post)
if len(subreddit_results) < q :
#print('getting posts from All Time')
for submission in AllTime:
post = []
if len(subreddit_results) < q :
if submission.is_self is False:
if '/v.redd.it/' not in submission.url:
if '.gif' or 'imgur.com' or 'gfycat' in submission.url:
if submission.url not in subreddit_results:
post.append(submission.title)
post.append(submission.url)
#post.append(item)
subreddit_results.append(post)
#print (subreddit_results)
# If they don't want shuffled results, it will post results
# to Discord as it gets them, instead of creating the nested list
if shuffled == False:
await client.say('<'+item+'>')
for link in subreddit_results:
if title==True:
await client.say(link[0]) #title
await client.say(link[1]) #post url 1
if TESTING == True:
await client.say(link[2]) #subreddit
if title==True:
await client.say("_") #spacer to seperate title from post above
else:
for link in subreddit_results:
results.append(link)
except Exception as e:
if 'Redirect to /subreddits/search' or '404' in str(e):
await client.say('*'+item+' failed...* '+'`Subreddit Does Not Exist`')
if '403' in str(e):
await client.say('*'+item+' failed...* '+'`Access Denied`')
print(str(e) + ' --> ' + item)
pass
print ('results loaded')
await main_post(results, shuffled, title, TESTING)
else:
from Test_args import LargeResults as results
#print(results)
await main_post(results, shuffled, title, TESTING)
.
#this shuffles the posts and posts to Discord.
async def main_post(results, shuffled, title, TESTING):
try:
if shuffled == True:
print('____SHUFFLED___')
random.shuffle(results)
random.shuffle(results)
random.shuffle(results)
#This posts the links in the 'results' list to Discord
for post in results:
try:
# THIS IS WHERE THE PROGRAM IS FAILING!!
if title==True:
await client.say(post[0]) #title
await client.say(post[1]) #post url
if TESTING == True:
await client.say(post[2]) #subreddit
if title==True:
await client.say("_") #spacer to separate title from post above
except Exception as e:
print(e)
pass
await client.say('ALL DONE! ! !')
except Exception as e:
print (e)
pass
await client.say('`' +str(e) +'`')
.
@client.command()
async def get(*args, brief="say '=get' followed by a list of subreddits", description="To get the 10 Top posts from a subreddit, say '=get' followed by a list of subreddits:\n'=get funny news pubg'\n would get the top 10 posts for today for each subreddit and post to the chat."):
#sr = '+'.join(args)
await main_loop(*args)
#THIS POSTS THE POSTS RANDOMLY
@client.command()
async def getshuffled(*args, brief="say '=getshuffled' followed by a list of subreddits", description="Does the same thing as =get, but grabs ALL of the posts and shuffles them, before posting."):
await main_loop(*args, shuffled=True)
client.run('my ID')