The rtweet package makes it easy to scrape twitter. You fird need a twitter account and your access tokens (see instructions in the following link).

Getting started instructions
Additional example code from the vignette for more example code.

library(rtweet)

library(tidyverse)

set twitter token

consumer_key <- "YYYYYYYYYYY"
consumer_secret <- "XXXXXXXXXXXX"
appname <- 'stor390'

twitter_token <- create_token(app = appname,
                            consumer_key = consumer_key,
                            consumer_secret = consumer_secret)

Search twitter

search_results <- search_tweets("data science")

## Searching for tweets...

## Finished collecting tweets!

## # A tibble: 100 × 35
##        screen_name    user_id          created_at          status_id
##              <chr>      <chr>              <dttm>              <chr>
## 1      JasMRussell  414848069 2017-04-04 20:13:00 849354132586733569
## 2       predartbot 3369695584 2017-04-04 20:12:58 849354124068098050
## 3      jomcinerney   22478416 2017-04-04 20:12:41 849354055986139139
## 4        cpaynejr1  126676393 2017-04-04 20:12:41 849354055864520704
## 5        GmbakerNC  634571080 2017-04-04 20:12:19 849353961425567745
## 6     alevergara78  104520454 2017-04-04 20:12:11 849353928290562049
## 7        EGKBJames 3429712161 2017-04-04 20:11:43 849353810749345793
## 8     stephen_oman   84029229 2017-04-04 20:11:41 849353803648421890
## 9  astorino_steven 2176745083 2017-04-04 20:11:36 849353783809363969
## 10        IoTBlogs 1141258524 2017-04-04 20:11:23 849353725776994305
## # ... with 90 more rows, and 31 more variables: text <chr>,
## #   retweet_count <int>, favorite_count <int>, is_quote_status <lgl>,
## #   quote_status_id <chr>, is_retweet <lgl>, retweet_status_id <chr>,
## #   in_reply_to_status_status_id <chr>, in_reply_to_status_user_id <chr>,
## #   in_reply_to_status_screen_name <chr>, lang <chr>, source <chr>,
## #   media_id <chr>, media_url <chr>, media_url_expanded <chr>, urls <chr>,
## #   urls_display <chr>, urls_expanded <chr>, mentions_screen_name <chr>,
## #   mentions_user_id <chr>, symbols <chr>, hashtags <chr>,
## #   coordinates <chr>, place_id <chr>, place_type <chr>, place_name <chr>,
## #   place_full_name <chr>, country_code <chr>, country <chr>,
## #   bounding_box_coordinates <chr>, bounding_box_type <chr>

search_results$text[1]

## [1] "RT @monicatdata: Art Technologies: @ArthenaArt uses data science to find the best investments in art: https://t.co/JOZjloDrLL https://t.co/…"

User’s timeline

timelime <- get_timeline("UNC_Basketball")

## # A tibble: 200 × 35
##       screen_name  user_id          created_at          status_id
##             <chr>    <chr>              <dttm>              <chr>
## 1  UNC_Basketball 45575375 2017-04-04 17:40:48 849315833503502337
## 2  UNC_Basketball 45575375 2017-04-04 16:36:24 849299625127313408
## 3  UNC_Basketball 45575375 2017-04-04 16:12:11 849293532804206592
## 4  UNC_Basketball 45575375 2017-04-04 15:01:01 849275621473607680
## 5  UNC_Basketball 45575375 2017-04-04 14:19:01 849265052116963328
## 6  UNC_Basketball 45575375 2017-04-04 13:47:01 849256996389871616
## 7  UNC_Basketball 45575375 2017-04-04 13:27:01 849251965267656704
## 8  UNC_Basketball 45575375 2017-04-04 13:05:12 849246473573236738
## 9  UNC_Basketball 45575375 2017-04-04 12:00:20 849230152496279553
## 10 UNC_Basketball 45575375 2017-04-04 08:20:59 849174948052516865
## # ... with 190 more rows, and 31 more variables: text <chr>,
## #   retweet_count <int>, favorite_count <int>, is_quote_status <lgl>,
## #   quote_status_id <chr>, is_retweet <lgl>, retweet_status_id <chr>,
## #   in_reply_to_status_status_id <chr>, in_reply_to_status_user_id <chr>,
## #   in_reply_to_status_screen_name <chr>, lang <chr>, source <chr>,
## #   media_id <chr>, media_url <chr>, media_url_expanded <chr>, urls <chr>,
## #   urls_display <chr>, urls_expanded <chr>, mentions_screen_name <chr>,
## #   mentions_user_id <chr>, symbols <lgl>, hashtags <chr>,
## #   coordinates <lgl>, place_id <lgl>, place_type <lgl>, place_name <lgl>,
## #   place_full_name <lgl>, country_code <lgl>, country <lgl>,
## #   bounding_box_coordinates <lgl>, bounding_box_type <lgl>

# last five tweets
timelime$text[1:5]

## [1] "See you soon Chapel Hill!\n\n#GetIntoIt https://t.co/pqWkHBVPpF"                                                                 
## [2] "That happened. \n#GetIntoIt https://t.co/dAmpPCULyn"                                                                             
## [3] "RT @UNC_Basketball: THIS IS CAROLINA BASKETBALL!\n\nNATIONAL CHAMPIONS!\n\n#GetIntoIt\n#OneShiningMoment https://t.co/PUMfEdFYBM"
## [4] "Carolina captured its sixth NCAA championship last night in Phoenix. #GetIntoIt https://t.co/TM50LlZTdG"                         
## [5] "Lucas: Lucky Us https://t.co/bso8J9VZzy #GetIntoIt https://t.co/X6UWUHsh0G"

Look up a user

user <- lookup_users('UNC_basketball')

## # A tibble: 1 × 36
##    user_id                name    screen_name          location
##      <chr>               <chr>          <chr>             <chr>
## 1 45575375 Carolina Basketball UNC_Basketball Chapel Hill, N.C.
## # ... with 32 more variables: description <chr>, protected <lgl>,
## #   followers_count <int>, friends_count <int>, listed_count <int>,
## #   created_at <dttm>, favourites_count <int>, utc_offset <int>,
## #   time_zone <chr>, geo_enabled <lgl>, verified <lgl>,
## #   statuses_count <int>, lang <chr>, contributors_enabled <lgl>,
## #   is_translator <lgl>, is_translation_enabled <lgl>,
## #   profile_background_color <chr>, profile_background_image_url <chr>,
## #   profile_background_image_url_https <chr>,
## #   profile_background_tile <lgl>, profile_image_url <chr>,
## #   profile_image_url_https <chr>, profile_image_url.1 <chr>,
## #   profile_image_url_https.1 <chr>, profile_link_color <chr>,
## #   profile_sidebar_border_color <chr>, profile_sidebar_fill_color <chr>,
## #   profile_text_color <chr>, profile_use_background_image <lgl>,
## #   default_profile <lgl>, default_profile_image <lgl>,
## #   profile_banner_url <chr>

Recent trends

trends <- get_trends()

## # A tibble: 50 × 9
##                             trend
## *                           <chr>
## 1                   #LeGrandDebat
## 2                    #EqualPayDay
## 3                      José Mayer
## 4  #لماذا_المراه_السعوديه_مستهدفه
## 5                      #RomaLazio
## 6                         #MUNEVE
## 7                           Syria
## 8                            Acre
## 9                       Tony Romo
## 10                  Ramón Espinar
## # ... with 40 more rows, and 8 more variables: url <chr>,
## #   promoted_content <lgl>, query <chr>, tweet_volume <int>, as_of <dttm>,
## #   created_at <dttm>, place <fctr>, woeid <int>

Scraping twitter with the rtweet package

STOR 390

set twitter token

Search twitter

User’s timeline

Look up a user

Recent trends