diff --git a/resources/scripts/scrapers/twitter.py b/resources/scripts/scrapers/twitter.py index c096bbdfd..e25ce197d 100755 --- a/resources/scripts/scrapers/twitter.py +++ b/resources/scripts/scrapers/twitter.py @@ -1,31 +1,35 @@ # Generates JSON feed from Twitter timeline URL. # -# This script expects two input parameters: -# twitter.py [twitter-user-name] [twitter-user-id] +# This script expects one input parameter: +# twitter.py [twitter-user-name] # # For example: -# twitter.py 'NASA' '11348282' +# twitter.py 'NASA' import json import re import sys import time import html -import urllib.request import requests import distutils.util from datetime import datetime twitter_url = "https://twitter.com/" + sys.argv[1] -twitter_id = sys.argv[2] twitter_username = twitter_url[twitter_url.rfind("/") + 1:] twitter_bearer = "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA" # Download RAW Twitter HTML data and extract token. -url_response = urllib.request.urlopen(twitter_url) -twitter_html = url_response.read().decode("utf-8") +url_response = url_response = requests.get(twitter_url) +twitter_html = url_response.text twitter_token = re.search("gt=(\d+);", twitter_html).group(1) +url_response = requests.get("https://twitter.com/i/api/graphql/hc-pka9A7gyS3xODIafnrQ/UserByScreenName?variables=%7B%22screen_name%22%3A%22sportscenter%22%2C%22withHighlightedLabel%22%3Atrue%7D", headers = { + "x-guest-token": twitter_token, + "Authorization": twitter_bearer +}) +twitter_id = json.loads(url_response.text)["data"]["user"]["rest_id"] + # Obtain JSON Twitter data with token. twitter_json_url = "https://twitter.com/i/api/2/timeline/profile/{user_id}.json?include_profile_interstitial_type=1&include_blocking=1&include_blocked_by=1&include_followed_by=1&include_want_retweets=1&include_mute_edge=1&include_can_dm=1&include_can_media_tag=1&skip_status=1&cards_platform=Web-12&include_cards=1&include_ext_alt_text=true&include_quote_count=true&include_reply_count=1&tweet_mode=extended&include_entities=true&include_user_entities=true&include_ext_media_color=true&include_ext_media_availability=true&send_error_codes=true&simple_quoted_tweet=true&include_tweet_replies=false&count=50&userId={user_id}&ext=mediaStats&2ChighlightedLabel".format(user_id = twitter_id)