library(readr)
IRAhandle_tweets_1 <- read.csv("IRAhandle_tweets_1.csv", header = T)

IRAhandle_tweets_2 <- read_csv("IRAhandle_tweets_2.csv")
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   external_author_id = col_double(),
##   following = col_double(),
##   followers = col_double(),
##   updates = col_double(),
##   retweet = col_double(),
##   new_june_2018 = col_double(),
##   alt_external_id = col_double(),
##   tweet_id = col_double(),
##   tco3_step1 = col_logical()
## )
## See spec(...) for full column specifications.
## Warning: 2042 parsing failures.
##   row        col           expected                                                                     actual                     file
##  3345 tco3_step1 1/0/T/F/TRUE/FALSE http://rus.myprintbar.ru                                                   'IRAhandle_tweets_2.csv'
##  4375 tco3_step1 1/0/T/F/TRUE/FALSE http://gamiliel.com/2012/06/05/john-8-the-adulterous-woman/                'IRAhandle_tweets_2.csv'
##  5187 tco3_step1 1/0/T/F/TRUE/FALSE http://www.meadowvalecrc.org/?powerpress_pinw=3162-podcast                 'IRAhandle_tweets_2.csv'
##  6714 tco3_step1 1/0/T/F/TRUE/FALSE http://www.meadowvalecrc.org/?powerpress_pinw=3321-podcast                 'IRAhandle_tweets_2.csv'
## 15483 tco3_step1 1/0/T/F/TRUE/FALSE http://www.wbaltv.com/national/uk-votes-to-leave-in-eu-referendum/40202614 'IRAhandle_tweets_2.csv'
## ..... .......... .................. .......................................................................... ........................
## See problems(...) for more details.
IRAhandle_tweets_3 <- read_csv("IRAhandle_tweets_3.csv")
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   external_author_id = col_double(),
##   following = col_double(),
##   followers = col_double(),
##   updates = col_double(),
##   retweet = col_double(),
##   new_june_2018 = col_double(),
##   alt_external_id = col_double(),
##   tweet_id = col_double()
## )
## See spec(...) for full column specifications.
IRAhandle_tweets_4 <- read_csv("IRAhandle_tweets_4.csv")
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   external_author_id = col_double(),
##   following = col_double(),
##   followers = col_double(),
##   updates = col_double(),
##   post_type = col_logical(),
##   retweet = col_double(),
##   new_june_2018 = col_double(),
##   alt_external_id = col_double(),
##   tweet_id = col_double(),
##   tco3_step1 = col_logical()
## )
## See spec(...) for full column specifications.
## Warning: 99326 parsing failures.
##   row        col           expected                actual                     file
##  4038 tco3_step1 1/0/T/F/TRUE/FALSE http://Dictionary.com 'IRAhandle_tweets_4.csv'
##  4239 tco3_step1 1/0/T/F/TRUE/FALSE http://Backpage.com   'IRAhandle_tweets_4.csv'
##  5554 tco3_step1 1/0/T/F/TRUE/FALSE http://ABC7NEWS.COM   'IRAhandle_tweets_4.csv'
##  9972 post_type  1/0/T/F/TRUE/FALSE RETWEET               'IRAhandle_tweets_4.csv'
## 10183 post_type  1/0/T/F/TRUE/FALSE RETWEET               'IRAhandle_tweets_4.csv'
## ..... .......... .................. ..................... ........................
## See problems(...) for more details.
IRAhandle_tweets_5 <- read_csv("IRAhandle_tweets_5.csv")
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   external_author_id = col_double(),
##   following = col_double(),
##   followers = col_double(),
##   updates = col_double(),
##   retweet = col_double(),
##   new_june_2018 = col_double(),
##   alt_external_id = col_double(),
##   tweet_id = col_double(),
##   tco3_step1 = col_logical()
## )
## See spec(...) for full column specifications.
## Warning: 305 parsing failures.
##  row        col           expected                          actual                     file
## 1084 tco3_step1 1/0/T/F/TRUE/FALSE http://vice.com                 'IRAhandle_tweets_5.csv'
## 3026 tco3_step1 1/0/T/F/TRUE/FALSE http://dlvr.it/Nk4PFy           'IRAhandle_tweets_5.csv'
## 3426 tco3_step1 1/0/T/F/TRUE/FALSE http://freecomicsonlinewoke.com 'IRAhandle_tweets_5.csv'
## 3828 tco3_step1 1/0/T/F/TRUE/FALSE https://youtu.be/1vWvl3sMPlg    'IRAhandle_tweets_5.csv'
## 4745 tco3_step1 1/0/T/F/TRUE/FALSE http://Refusefascism.org        'IRAhandle_tweets_5.csv'
## .... .......... .................. ............................... ........................
## See problems(...) for more details.
IRAhandle_tweets_6 <- read_csv("IRAhandle_tweets_6.csv")
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   external_author_id = col_double(),
##   following = col_double(),
##   followers = col_double(),
##   updates = col_double(),
##   retweet = col_double(),
##   new_june_2018 = col_double(),
##   alt_external_id = col_double(),
##   tweet_id = col_double(),
##   tco3_step1 = col_logical()
## )
## See spec(...) for full column specifications.
## Warning: 1708 parsing failures.
##  row        col           expected                                                     actual                     file
## 1605 tco3_step1 1/0/T/F/TRUE/FALSE http://www.meadowvalecrc.org/?powerpress_pinw=3381-podcast 'IRAhandle_tweets_6.csv'
## 2500 tco3_step1 1/0/T/F/TRUE/FALSE http://gamiliel.com/2012/06/06/john-13-the-passover/       'IRAhandle_tweets_6.csv'
## 2819 tco3_step1 1/0/T/F/TRUE/FALSE http://gamiliel.com/2013/01/07/romans-7/                   'IRAhandle_tweets_6.csv'
## 2825 tco3_step1 1/0/T/F/TRUE/FALSE https://500px.com/gamilliell                               'IRAhandle_tweets_6.csv'
## 4061 tco3_step1 1/0/T/F/TRUE/FALSE http://www.meadowvalecrc.org/?powerpress_pinw=3303-podcast 'IRAhandle_tweets_6.csv'
## .... .......... .................. .......................................................... ........................
## See problems(...) for more details.
IRAhandle_tweets_7 <- read_csv("IRAhandle_tweets_7.csv")
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   external_author_id = col_double(),
##   following = col_double(),
##   followers = col_double(),
##   updates = col_double(),
##   retweet = col_double(),
##   new_june_2018 = col_double(),
##   alt_external_id = col_double(),
##   tweet_id = col_double(),
##   tco3_step1 = col_logical()
## )
## See spec(...) for full column specifications.
## Warning: 383 parsing failures.
##  row        col           expected                                                                       actual                     file
## 2893 tco3_step1 1/0/T/F/TRUE/FALSE http://bit.ly/2ismFW6                                                        'IRAhandle_tweets_7.csv'
## 3022 tco3_step1 1/0/T/F/TRUE/FALSE http://kNOwBETTERHIPHOP.com                                                  'IRAhandle_tweets_7.csv'
## 3056 tco3_step1 1/0/T/F/TRUE/FALSE http://RightOnTVMobileApp.com                                                'IRAhandle_tweets_7.csv'
## 4200 tco3_step1 1/0/T/F/TRUE/FALSE http://kweliclub.com/products/walter-rodney-how-europe-underdeveloped-africa 'IRAhandle_tweets_7.csv'
## 5294 tco3_step1 1/0/T/F/TRUE/FALSE http://www.loonwatch.com/tag/sam-harris/                                     'IRAhandle_tweets_7.csv'
## .... .......... .................. ............................................................................ ........................
## See problems(...) for more details.
IRAhandle_tweets_8 <- read_csv("IRAhandle_tweets_8.csv")
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   external_author_id = col_double(),
##   following = col_double(),
##   followers = col_double(),
##   updates = col_double(),
##   retweet = col_double(),
##   new_june_2018 = col_double(),
##   alt_external_id = col_double(),
##   tweet_id = col_double(),
##   tco3_step1 = col_logical()
## )
## See spec(...) for full column specifications.
## Warning: 285 parsing failures.
##   row        col           expected                                                                                   actual                     file
##  4681 tco3_step1 1/0/T/F/TRUE/FALSE http://rus.myprintbar.ru                                                                 'IRAhandle_tweets_8.csv'
##  5289 tco3_step1 1/0/T/F/TRUE/FALSE https://www.smashwords.com/books/view/474508                                             'IRAhandle_tweets_8.csv'
##  5564 tco3_step1 1/0/T/F/TRUE/FALSE http://patriotsunite.info                                                                'IRAhandle_tweets_8.csv'
## 13745 tco3_step1 1/0/T/F/TRUE/FALSE http://www.rosbalt.ru/piter/2015/06/18/1409874.html                                      'IRAhandle_tweets_8.csv'
## 17910 tco3_step1 1/0/T/F/TRUE/FALSE https://nevnov.ru/457531-aleksandr-perendzhiev-ozhidaem-popytku-gosperevorota-na-ukraine 'IRAhandle_tweets_8.csv'
## ..... .......... .................. ........................................................................................ ........................
## See problems(...) for more details.
IRAhandle_tweets_9 <- read_csv("IRAhandle_tweets_9.csv")
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   external_author_id = col_double(),
##   following = col_double(),
##   followers = col_double(),
##   updates = col_double(),
##   post_type = col_logical(),
##   retweet = col_double(),
##   new_june_2018 = col_double(),
##   alt_external_id = col_double(),
##   tweet_id = col_double(),
##   tco3_step1 = col_logical()
## )
## See spec(...) for full column specifications.
## Warning: 67645 parsing failures.
##  row       col           expected  actual                     file
## 1036 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_9.csv'
## 1037 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_9.csv'
## 1061 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_9.csv'
## 1064 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_9.csv'
## 1069 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_9.csv'
## .... ......... .................. ....... ........................
## See problems(...) for more details.
IRAhandle_tweets_10 <- read_csv("IRAhandle_tweets_10.csv")
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   external_author_id = col_double(),
##   following = col_double(),
##   followers = col_double(),
##   updates = col_double(),
##   post_type = col_logical(),
##   retweet = col_double(),
##   new_june_2018 = col_double(),
##   alt_external_id = col_double(),
##   tweet_id = col_double(),
##   tco3_step1 = col_logical()
## )
## See spec(...) for full column specifications.
## Warning: 89701 parsing failures.
##  row        col           expected                                            actual                      file
## 1100 post_type  1/0/T/F/TRUE/FALSE RETWEET                                           'IRAhandle_tweets_10.csv'
## 1344 post_type  1/0/T/F/TRUE/FALSE RETWEET                                           'IRAhandle_tweets_10.csv'
## 2019 tco3_step1 1/0/T/F/TRUE/FALSE https://www.ridus.ru/news/248567                  'IRAhandle_tweets_10.csv'
## 2163 tco3_step1 1/0/T/F/TRUE/FALSE https://www.ridus.ru/news/248770                  'IRAhandle_tweets_10.csv'
## 2165 tco3_step1 1/0/T/F/TRUE/FALSE http://vietnam.mid.ru/elektronnaa-viza-vo-v-etnam 'IRAhandle_tweets_10.csv'
## .... .......... .................. ................................................. .........................
## See problems(...) for more details.
IRAhandle_tweets_11 <- read_csv("IRAhandle_tweets_11.csv")
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   external_author_id = col_double(),
##   following = col_double(),
##   followers = col_double(),
##   updates = col_double(),
##   post_type = col_logical(),
##   retweet = col_double(),
##   new_june_2018 = col_double(),
##   alt_external_id = col_double(),
##   tweet_id = col_double(),
##   tco3_step1 = col_logical()
## )
## See spec(...) for full column specifications.
## Warning: 66039 parsing failures.
##  row       col           expected  actual                      file
## 5247 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_11.csv'
## 5274 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_11.csv'
## 5606 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_11.csv'
## 5701 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_11.csv'
## 6011 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_11.csv'
## .... ......... .................. ....... .........................
## See problems(...) for more details.
IRAhandle_tweets_12 <- read_csv("IRAhandle_tweets_12.csv")
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   external_author_id = col_double(),
##   following = col_double(),
##   followers = col_double(),
##   updates = col_double(),
##   post_type = col_logical(),
##   retweet = col_double(),
##   new_june_2018 = col_double(),
##   alt_external_id = col_double(),
##   tweet_id = col_double(),
##   tco3_step1 = col_logical()
## )
## See spec(...) for full column specifications.
## Warning: 110978 parsing failures.
##   row       col           expected  actual                      file
## 14784 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_12.csv'
## 26336 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_12.csv'
## 27167 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_12.csv'
## 27168 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_12.csv'
## 27169 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_12.csv'
## ..... ......... .................. ....... .........................
## See problems(...) for more details.
IRAhandle_tweets_13 <- read_csv("IRAhandle_tweets_13.csv")
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   external_author_id = col_double(),
##   following = col_double(),
##   followers = col_double(),
##   updates = col_double(),
##   retweet = col_double(),
##   new_june_2018 = col_double(),
##   alt_external_id = col_double(),
##   tweet_id = col_double()
## )
## See spec(...) for full column specifications.
y_test <- read.csv("y_test.csv", header = FALSE)
y_pred <- read.csv("y_pred.csv")
names(y_test)[1] = "author"
names(y_test)[2] = "X0_test"
names(y_pred)[2] = "X0_pred"

total_y <- list(y_test,y_pred)
fail_accounts <- data.frame()
success_accounts <- data.frame()

for(i in 1:557){
  if(y_test[i,2] != y_pred[i,2])
    fail_accounts[i,1] <- y_pred[i,1]
  else
    success_accounts[i,1] <- y_pred[i,1]
}

fail_accounts <- na.omit(unique(fail_accounts))
success_accounts <- na.omit(unique(success_accounts))


names(fail_accounts) = "author"
names(success_accounts) = "author"

summary(fail_accounts)
##              author  
##  _RUBY_WILLS_   : 1  
##  _YOUR_LIFESTYLE: 1  
##  AFONINMIXAIL   : 1  
##  ALVA_MC_GHEE   : 1  
##  AMBBERTHTT     : 1  
##  ANCARICTRS     : 1  
##  (Other)        :79
summary(success_accounts)
##            author   
##  _RONBEN      :  1  
##  _SASHALAPIN  :  1  
##  4EVER1937    :  1  
##  AARONALLENALL:  1  
##  ABBYLOPTRT   :  1  
##  ABISADMASST  :  1  
##  (Other)      :466
fail_authors <- as.character(fail_accounts$author)
fail_authors <- sort(fail_authors, decreasing = FALSE)
fail_authors <- as.data.frame(fail_authors)
names(fail_authors)[1] = "author"
success_authors <- as.character(success_accounts$author)
success_authors <- sort(success_authors, decreasing = FALSE)
success_authors <- as.data.frame(success_authors)
names(success_authors)[1] = "author"

Se une todo el dataset original

total_tweets<- rbind(IRAhandle_tweets_1,IRAhandle_tweets_2,IRAhandle_tweets_3,IRAhandle_tweets_4,IRAhandle_tweets_5,IRAhandle_tweets_6,IRAhandle_tweets_7,IRAhandle_tweets_8,IRAhandle_tweets_9,IRAhandle_tweets_10,IRAhandle_tweets_11,IRAhandle_tweets_12,IRAhandle_tweets_13)

total_tweets <- data.frame(total_tweets$author,total_tweets$content, total_tweets$following, total_tweets$followers, total_tweets$updates)

names(total_tweets)[1] = "author"

Se hace un cruce entre el dataset original y el que contiene los autores de clases mal predichas para crear un data frame con los respectivos atributos de dichos autores

library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.1       v purrr   0.3.1  
## v tibble  2.0.1       v dplyr   0.8.0.1
## v tidyr   0.8.3       v stringr 1.4.0  
## v ggplot2 3.1.1       v forcats 0.4.0
## -- Conflicts ---------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
total_failed_tweets <- total_tweets %>% 
  right_join(fail_authors, total_failed_tweets, by="author") 
## Warning: Column `author` joining factors with different levels, coercing to
## character vector
summary(total_failed_tweets)
##     author         
##  Length:155980     
##  Class :character  
##  Mode  :character  
##                    
##                    
##                    
##                    
##                                                    total_tweets.content
##  GUIDE: What's up in Pittsburgh this weekend?  #entertainment:    27   
##  Thursday on the Fan  #news                                  :    15   
##  Wednesday on the Fan  #news                                 :    14   
##  Friday on the Fan  #news                                    :    13   
##  GUIDE: What's up in Pittsburgh this weekend?  #local        :    13   
##  What's up in Pittsburgh this weekend?  #entertainment       :    12   
##  (Other)                                                     :155886   
##  total_tweets.following total_tweets.followers total_tweets.updates
##  Min.   :   -1          Min.   :    -1         Min.   :   -1       
##  1st Qu.:  664          1st Qu.:  1050         1st Qu.: 2466       
##  Median : 3210          Median :  5845         Median : 8770       
##  Mean   : 4297          Mean   : 10699         Mean   :12078       
##  3rd Qu.: 6355          3rd Qu.: 15087         3rd Qu.:18240       
##  Max.   :33666          Max.   :103197         Max.   :56470       
## 

Ahora lo mismo pero con los autores que fueron bien clasificados:

library(tidyverse)
total_successful_tweets <- total_tweets %>% 
  right_join(success_authors, total_successful_tweets, by="author") 
## Warning: Column `author` joining factors with different levels, coercing to
## character vector
summary(total_successful_tweets)
##     author         
##  Length:698202     
##  Class :character  
##  Mode  :character  
##                    
##                    
##                    
##                    
##                                                                                                                                                                                                                                                                                                                                                                                          total_tweets.content
##  <U+0412> <U+0433><U+043E><U+0440><U+043E><U+0434><U+0435> <U+0421><U+043E><U+0447><U+0438>. <U+041E><U+043B><U+0438><U+043C><U+043F><U+0438><U+0430><U+0434><U+0430> – <U+043F><U+0440><U+0430><U+0437><U+0434><U+043D><U+0438><U+043A> <U+0438><U+043B><U+0438> <U+0441><U+0442><U+0438><U+0445><U+0438><U+0439><U+043D><U+043E><U+0435>...                                                      :   137   
##  Celebrity style: Red carpet looks  #celebs #news                                                                                                                                                                                                                                                                                                                                                  :    79   
##  Daily Celebrity Watch  #celebs #news                                                                                                                                                                                                                                                                                                                                                              :    72   
##  ...<U+0441><U+0442><U+0430><U+0434><U+0438><U+043E><U+043D>, <U+0423> <U+043D><U+0430><U+0441> <U+0441><U+0432><U+043E><U+044F> <U+043E><U+043B><U+0438><U+043C><U+043F><U+0438><U+0430><U+0434><U+0430> – <U+0417><U+0430> <U+043C><U+0430><U+043B><U+044B><U+0448><U+043E><U+043C> <U+0431><U+0440><U+043E><U+0441><U+043E><U+043A> <U+043F><U+043E><U+0434> <U+0441><U+0442><U+043E><U+043B>...:    53   
##  NewsOne Now Audio Podcast: Bishop E.W. Jackson Calls #BlackLivesMatter Is Movement “Disgraceful”                                                                                                                                                                                                                                                                                                  :    50   
##  <U+041B><U+043E><U+043D><U+0434><U+043E><U+043D> 2012 — <U+041E><U+043B><U+0438><U+043C><U+043F><U+0438><U+0430><U+0434><U+0430> <U+0410><U+043D><U+0442><U+0438><U+0445><U+0440><U+0438><U+0441><U+0442><U+0430>                                                                                                                                                                                 :    39   
##  (Other)                                                                                                                                                                                                                                                                                                                                                                                           :697772   
##  total_tweets.following total_tweets.followers total_tweets.updates
##  Min.   :    0          Min.   :    0          Min.   :    1       
##  1st Qu.:  429          1st Qu.:  291          1st Qu.: 1631       
##  Median : 1838          Median : 1521          Median : 4274       
##  Mean   : 3625          Mean   : 5078          Mean   : 8170       
##  3rd Qu.: 4848          3rd Qu.: 4369          3rd Qu.:11203       
##  Max.   :26371          Max.   :71022          Max.   :46749       
## 

Wordcloud para los autores mal clasificados:

library(tm)
## Loading required package: NLP
## 
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
## 
##     annotate
library(SnowballC)
library(RColorBrewer)
library(wordcloud)

total_failed_tweets.Corpus <- Corpus(VectorSource(total_failed_tweets$total_tweets.content))

total_failed_tweets.Clean <- tm_map(total_failed_tweets.Corpus, PlainTextDocument)
## Warning in tm_map.SimpleCorpus(total_failed_tweets.Corpus,
## PlainTextDocument): transformation drops documents
total_failed_tweets.Clean <- tm_map(total_failed_tweets.Corpus, tolower)
## Warning in tm_map.SimpleCorpus(total_failed_tweets.Corpus, tolower):
## transformation drops documents
total_failed_tweets.Clean <- tm_map(total_failed_tweets.Clean, removeNumbers)
## Warning in tm_map.SimpleCorpus(total_failed_tweets.Clean, removeNumbers):
## transformation drops documents
total_failed_tweets.Clean <- tm_map(total_failed_tweets.Clean, removeWords, stopwords("english"))
## Warning in tm_map.SimpleCorpus(total_failed_tweets.Clean, removeWords,
## stopwords("english")): transformation drops documents
total_failed_tweets.Clean <- tm_map(total_failed_tweets.Clean,removePunctuation)
## Warning in tm_map.SimpleCorpus(total_failed_tweets.Clean,
## removePunctuation): transformation drops documents
total_failed_tweets.Clean <- tm_map(total_failed_tweets.Clean, stripWhitespace)
## Warning in tm_map.SimpleCorpus(total_failed_tweets.Clean, stripWhitespace):
## transformation drops documents
total_failed_tweets.Clean <- tm_map(total_failed_tweets.Clean, stemDocument)
## Warning in tm_map.SimpleCorpus(total_failed_tweets.Clean, stemDocument):
## transformation drops documents
wordcloud(total_failed_tweets.Clean, min.freq = 1, max.words = 50, random.order = FALSE, rot.per = 0.25, colors = brewer.pal(8, "Dark2"))

Wordcloud para los bien clasificados

library(tm)
library(SnowballC)
library(RColorBrewer)
library(wordcloud)

total_successful_tweets.Corpus <- Corpus(VectorSource(total_successful_tweets$total_tweets.content))

total_successful_tweets.Clean <- tm_map(total_successful_tweets.Corpus, PlainTextDocument)
## Warning in tm_map.SimpleCorpus(total_successful_tweets.Corpus,
## PlainTextDocument): transformation drops documents
total_successful_tweets.Clean <- tm_map(total_successful_tweets.Corpus, tolower)
## Warning in tm_map.SimpleCorpus(total_successful_tweets.Corpus, tolower):
## transformation drops documents
total_successful_tweets.Clean <- tm_map(total_successful_tweets.Clean, removeNumbers)
## Warning in tm_map.SimpleCorpus(total_successful_tweets.Clean,
## removeNumbers): transformation drops documents
total_successful_tweets.Clean <- tm_map(total_successful_tweets.Clean, removeWords, stopwords("english"))
## Warning in tm_map.SimpleCorpus(total_successful_tweets.Clean,
## removeWords, : transformation drops documents
total_successful_tweets.Clean <- tm_map(total_successful_tweets.Clean,removePunctuation)
## Warning in tm_map.SimpleCorpus(total_successful_tweets.Clean,
## removePunctuation): transformation drops documents
total_successful_tweets.Clean <- tm_map(total_successful_tweets.Clean, stripWhitespace)
## Warning in tm_map.SimpleCorpus(total_successful_tweets.Clean,
## stripWhitespace): transformation drops documents
total_successful_tweets.Clean <- tm_map(total_successful_tweets.Clean, stemDocument)
## Warning in tm_map.SimpleCorpus(total_successful_tweets.Clean,
## stemDocument): transformation drops documents
wordcloud(total_successful_tweets.Clean, min.freq = 1, max.words = 50, random.order = FALSE, rot.per = 0.4, colors = brewer.pal(8, "Dark2"))

```

Se puede observar que los datos que se clasifican mal suelen tener palabras rusas con mayor frecuencia.