add hacky support for misskey and calckey/firefish

This commit is contained in:
Toad King 2023-07-25 01:02:41 -05:00
parent 213ef57abe
commit c169b2ae30

View file

@ -75,7 +75,7 @@ def add_user_posts(server, access_token, followings, know_followings, all_known_
count = 0 count = 0
failed = 0 failed = 0
for post in posts: for post in posts:
if post.get('reblog') is None and post.get('url') is not None and post.get('url') not in seen_urls: if post.get('reblog') is None and post.get('renoteId') is None and post.get('url') is not None and post.get('url') not in seen_urls:
added = add_post_with_context(post, server, access_token, seen_urls) added = add_post_with_context(post, server, access_token, seen_urls)
if added is True: if added is True:
seen_urls.add(post['url']) seen_urls.add(post['url'])
@ -150,7 +150,11 @@ def get_user_posts(user, know_followings, server):
user_id = get_user_id(parsed_url[0], parsed_url[1]) user_id = get_user_id(parsed_url[0], parsed_url[1])
except Exception as ex: except Exception as ex:
log(f"Error getting user ID for user {user['acct']}: {ex}") log(f"Error getting user ID for user {user['acct']}: {ex}")
return None
## HACK HACK HACK: in the future actually try to detect the correct APIs to use at runtime
log('trying misskey')
return get_user_posts_misskey(user, know_followings, parsed_url[0])
try: try:
url = f"https://{parsed_url[0]}/api/v1/accounts/{user_id}/statuses?limit=40" url = f"https://{parsed_url[0]}/api/v1/accounts/{user_id}/statuses?limit=40"
@ -170,6 +174,56 @@ def get_user_posts(user, know_followings, server):
log(f"Error getting posts for user {user['acct']}: {ex}") log(f"Error getting posts for user {user['acct']}: {ex}")
return None return None
def get_user_posts_misskey(user, know_followings, server):
# query user info via search api
# we could filter by host but there's no way to limit that to just the main host on firefish currently
# on misskey it works if you supply '.' as the host but firefish does not
url = f'https://{server}/api/users/search-by-username-and-host'
try:
resp = post(url, { 'username': user['username'] })
except Exception as ex:
log(f"Error finding user {user['username']} from {server}. Exception: {ex}")
return None
if resp.status_code == 200:
try:
res = resp.json()
for user in res:
if user['host'] is None:
userId = user['id']
break
if userId is None:
raise Exception('user not found on server in search')
except Exception as ex:
log(f"Error finding user {user['username']} from {server}. Exception: {ex}")
return None
else:
log(f"Error finding user {user['username']} from {server}. Status Code: {resp.status_code}")
return None
url = f'https://{server}/api/users/notes'
try:
resp = post(url, { 'userId': userId, 'limit': 40 })
except Exception as ex:
log(f"Error getting posts by user {user['username']} from {server}. Exception: {ex}")
return None
if resp.status_code == 200:
try:
notes = resp.json()
for note in notes:
if note.get('url') is None:
# add this to make it look like Mastodon status objects
note.update({ 'url': f"https://{server}/notes/{note['id']}" })
return notes
except Exception as ex:
log(f"Error getting posts by user {user['username']} from {server}. Exception: {ex}")
return None
else:
log(f"Error getting posts by user {user['username']} from {server}. Status Code: {resp.status_code}")
return None
def get_new_follow_requests(server, access_token, max, known_followings): def get_new_follow_requests(server, access_token, max, known_followings):
"""Get any new follow requests for the specified user, up to the max number provided""" """Get any new follow requests for the specified user, up to the max number provided"""
@ -504,6 +558,11 @@ def parse_url(url, parsed_urls):
if match is not None: if match is not None:
parsed_urls[url] = match parsed_urls[url] = match
if url not in parsed_urls:
match = parse_misskey_url(url)
if match is not None:
parsed_urls[url] = match
if url not in parsed_urls: if url not in parsed_urls:
log(f"Error parsing toot URL {url}") log(f"Error parsing toot URL {url}")
parsed_urls[url] = None parsed_urls[url] = None
@ -560,6 +619,15 @@ def parse_pixelfed_url(url):
return (match.group("server"), match.group("toot_id")) return (match.group("server"), match.group("toot_id"))
return None return None
def parse_misskey_url(url):
"""parse a Misskey URL and return the server and ID"""
match = re.match(
r"https://(?P<server>[^/]+)/notes/(?P<toot_id>[^/]+)", url
)
if match is not None:
return (match.group("server"), match.group("toot_id"))
return None
def parse_pixelfed_profile_url(url): def parse_pixelfed_profile_url(url):
"""parse a Pixelfed Profile URL and return the server and username""" """parse a Pixelfed Profile URL and return the server and username"""
match = re.match(r"https://(?P<server>[^/]+)/(?P<username>[^/]+)", url) match = re.match(r"https://(?P<server>[^/]+)/(?P<username>[^/]+)", url)
@ -623,6 +691,8 @@ def get_toot_context(server, toot_id, toot_url):
return get_comment_context(server, toot_id, toot_url) return get_comment_context(server, toot_id, toot_url)
if toot_url.find("/post/") != -1: if toot_url.find("/post/") != -1:
return get_comments_urls(server, toot_id, toot_url) return get_comments_urls(server, toot_id, toot_url)
if toot_url.find("/notes/") != -1:
return get_misskey_urls(server, toot_id, toot_url)
url = f"https://{server}/api/v1/statuses/{toot_id}/context" url = f"https://{server}/api/v1/statuses/{toot_id}/context"
try: try:
resp = get(url) resp = get(url)
@ -716,6 +786,47 @@ def get_comments_urls(server, post_id, toot_url):
log(f"Error getting comments for post {toot_url}. Status code: {resp.status_code}") log(f"Error getting comments for post {toot_url}. Status code: {resp.status_code}")
return [] return []
def get_misskey_urls(server, post_id, toot_url):
"""get the URLs of the comments of a given misskey post"""
urls = []
url = f"https://{server}/api/notes/children"
try:
resp = post(url, { 'noteId': post_id })
except Exception as ex:
log(f"Error getting post {post_id} from {toot_url}. Exception: {ex}")
return []
if resp.status_code == 200:
try:
res = resp.json()
log(f"Got children for misskey post {toot_url}")
list_of_urls = [f'https://{server}/notes/{comment_info["id"]}' for comment_info in res]
urls.extend(list_of_urls)
except Exception as ex:
log(f"Error parsing post {post_id} from {toot_url}. Exception: {ex}")
else:
log(f"Error getting post {post_id} from {toot_url}. Status Code: {resp.status_code}")
url = f"https://{server}/api/notes/conversation"
try:
resp = post(url, { 'noteId': post_id })
except Exception as ex:
log(f"Error getting post {post_id} from {toot_url}. Exception: {ex}")
return []
if resp.status_code == 200:
try:
res = resp.json()
log(f"Got conversation for misskey post {toot_url}")
list_of_urls = [f'https://{server}/notes/{comment_info["id"]}' for comment_info in res]
urls.extend(list_of_urls)
except Exception as ex:
log(f"Error parsing post {post_id} from {toot_url}. Exception: {ex}")
else:
log(f"Error getting post {post_id} from {toot_url}. Status Code: {resp.status_code}")
return urls
def add_context_urls(server, access_token, context_urls, seen_urls): def add_context_urls(server, access_token, context_urls, seen_urls):
"""add the given toot URLs to the server""" """add the given toot URLs to the server"""
count = 0 count = 0
@ -845,6 +956,28 @@ def get(url, headers = {}, timeout = 0, max_tries = 5):
raise Exception(f"Maximum number of retries exceeded for rate limited request {url}") raise Exception(f"Maximum number of retries exceeded for rate limited request {url}")
return response return response
def post(url, json, headers = {}, timeout = 0, max_tries = 5):
"""A simple wrapper to make a post request while providing our user agent, and respecting rate limits"""
h = headers.copy()
if 'User-Agent' not in h:
h['User-Agent'] = 'FediFetcher (https://go.thms.uk/mgr)'
if timeout == 0:
timeout = arguments.http_timeout
response = requests.post( url, json=json, headers= h, timeout=timeout)
if response.status_code == 429:
if max_tries > 0:
reset = parser.parse(response.headers['x-ratelimit-reset'])
now = datetime.now(datetime.now().astimezone().tzinfo)
wait = (reset - now).total_seconds() + 1
log(f"Rate Limit hit requesting {url}. Waiting {wait} sec to retry at {response.headers['x-ratelimit-reset']}")
time.sleep(wait)
return post(url, json, headers, timeout, max_tries - 1)
raise Exception(f"Maximum number of retries exceeded for rate limited request {url}")
return response
def log(text): def log(text):
print(f"{datetime.now()} {datetime.now().astimezone().tzinfo}: {text}") print(f"{datetime.now()} {datetime.now().astimezone().tzinfo}: {text}")