library(readr)
IRAhandle_tweets_1 <- read.csv("IRAhandle_tweets_1.csv", header = T)
IRAhandle_tweets_2 <- read_csv("IRAhandle_tweets_2.csv")
## Parsed with column specification:
## cols(
## .default = col_character(),
## external_author_id = col_double(),
## following = col_double(),
## followers = col_double(),
## updates = col_double(),
## retweet = col_double(),
## new_june_2018 = col_double(),
## alt_external_id = col_double(),
## tweet_id = col_double(),
## tco3_step1 = col_logical()
## )
## See spec(...) for full column specifications.
## Warning: 2042 parsing failures.
## row col expected actual file
## 3345 tco3_step1 1/0/T/F/TRUE/FALSE http://rus.myprintbar.ru 'IRAhandle_tweets_2.csv'
## 4375 tco3_step1 1/0/T/F/TRUE/FALSE http://gamiliel.com/2012/06/05/john-8-the-adulterous-woman/ 'IRAhandle_tweets_2.csv'
## 5187 tco3_step1 1/0/T/F/TRUE/FALSE http://www.meadowvalecrc.org/?powerpress_pinw=3162-podcast 'IRAhandle_tweets_2.csv'
## 6714 tco3_step1 1/0/T/F/TRUE/FALSE http://www.meadowvalecrc.org/?powerpress_pinw=3321-podcast 'IRAhandle_tweets_2.csv'
## 15483 tco3_step1 1/0/T/F/TRUE/FALSE http://www.wbaltv.com/national/uk-votes-to-leave-in-eu-referendum/40202614 'IRAhandle_tweets_2.csv'
## ..... .......... .................. .......................................................................... ........................
## See problems(...) for more details.
IRAhandle_tweets_3 <- read_csv("IRAhandle_tweets_3.csv")
## Parsed with column specification:
## cols(
## .default = col_character(),
## external_author_id = col_double(),
## following = col_double(),
## followers = col_double(),
## updates = col_double(),
## retweet = col_double(),
## new_june_2018 = col_double(),
## alt_external_id = col_double(),
## tweet_id = col_double()
## )
## See spec(...) for full column specifications.
IRAhandle_tweets_4 <- read_csv("IRAhandle_tweets_4.csv")
## Parsed with column specification:
## cols(
## .default = col_character(),
## external_author_id = col_double(),
## following = col_double(),
## followers = col_double(),
## updates = col_double(),
## post_type = col_logical(),
## retweet = col_double(),
## new_june_2018 = col_double(),
## alt_external_id = col_double(),
## tweet_id = col_double(),
## tco3_step1 = col_logical()
## )
## See spec(...) for full column specifications.
## Warning: 99326 parsing failures.
## row col expected actual file
## 4038 tco3_step1 1/0/T/F/TRUE/FALSE http://Dictionary.com 'IRAhandle_tweets_4.csv'
## 4239 tco3_step1 1/0/T/F/TRUE/FALSE http://Backpage.com 'IRAhandle_tweets_4.csv'
## 5554 tco3_step1 1/0/T/F/TRUE/FALSE http://ABC7NEWS.COM 'IRAhandle_tweets_4.csv'
## 9972 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_4.csv'
## 10183 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_4.csv'
## ..... .......... .................. ..................... ........................
## See problems(...) for more details.
IRAhandle_tweets_5 <- read_csv("IRAhandle_tweets_5.csv")
## Parsed with column specification:
## cols(
## .default = col_character(),
## external_author_id = col_double(),
## following = col_double(),
## followers = col_double(),
## updates = col_double(),
## retweet = col_double(),
## new_june_2018 = col_double(),
## alt_external_id = col_double(),
## tweet_id = col_double(),
## tco3_step1 = col_logical()
## )
## See spec(...) for full column specifications.
## Warning: 305 parsing failures.
## row col expected actual file
## 1084 tco3_step1 1/0/T/F/TRUE/FALSE http://vice.com 'IRAhandle_tweets_5.csv'
## 3026 tco3_step1 1/0/T/F/TRUE/FALSE http://dlvr.it/Nk4PFy 'IRAhandle_tweets_5.csv'
## 3426 tco3_step1 1/0/T/F/TRUE/FALSE http://freecomicsonlinewoke.com 'IRAhandle_tweets_5.csv'
## 3828 tco3_step1 1/0/T/F/TRUE/FALSE https://youtu.be/1vWvl3sMPlg 'IRAhandle_tweets_5.csv'
## 4745 tco3_step1 1/0/T/F/TRUE/FALSE http://Refusefascism.org 'IRAhandle_tweets_5.csv'
## .... .......... .................. ............................... ........................
## See problems(...) for more details.
IRAhandle_tweets_6 <- read_csv("IRAhandle_tweets_6.csv")
## Parsed with column specification:
## cols(
## .default = col_character(),
## external_author_id = col_double(),
## following = col_double(),
## followers = col_double(),
## updates = col_double(),
## retweet = col_double(),
## new_june_2018 = col_double(),
## alt_external_id = col_double(),
## tweet_id = col_double(),
## tco3_step1 = col_logical()
## )
## See spec(...) for full column specifications.
## Warning: 1708 parsing failures.
## row col expected actual file
## 1605 tco3_step1 1/0/T/F/TRUE/FALSE http://www.meadowvalecrc.org/?powerpress_pinw=3381-podcast 'IRAhandle_tweets_6.csv'
## 2500 tco3_step1 1/0/T/F/TRUE/FALSE http://gamiliel.com/2012/06/06/john-13-the-passover/ 'IRAhandle_tweets_6.csv'
## 2819 tco3_step1 1/0/T/F/TRUE/FALSE http://gamiliel.com/2013/01/07/romans-7/ 'IRAhandle_tweets_6.csv'
## 2825 tco3_step1 1/0/T/F/TRUE/FALSE https://500px.com/gamilliell 'IRAhandle_tweets_6.csv'
## 4061 tco3_step1 1/0/T/F/TRUE/FALSE http://www.meadowvalecrc.org/?powerpress_pinw=3303-podcast 'IRAhandle_tweets_6.csv'
## .... .......... .................. .......................................................... ........................
## See problems(...) for more details.
IRAhandle_tweets_7 <- read_csv("IRAhandle_tweets_7.csv")
## Parsed with column specification:
## cols(
## .default = col_character(),
## external_author_id = col_double(),
## following = col_double(),
## followers = col_double(),
## updates = col_double(),
## retweet = col_double(),
## new_june_2018 = col_double(),
## alt_external_id = col_double(),
## tweet_id = col_double(),
## tco3_step1 = col_logical()
## )
## See spec(...) for full column specifications.
## Warning: 383 parsing failures.
## row col expected actual file
## 2893 tco3_step1 1/0/T/F/TRUE/FALSE http://bit.ly/2ismFW6 'IRAhandle_tweets_7.csv'
## 3022 tco3_step1 1/0/T/F/TRUE/FALSE http://kNOwBETTERHIPHOP.com 'IRAhandle_tweets_7.csv'
## 3056 tco3_step1 1/0/T/F/TRUE/FALSE http://RightOnTVMobileApp.com 'IRAhandle_tweets_7.csv'
## 4200 tco3_step1 1/0/T/F/TRUE/FALSE http://kweliclub.com/products/walter-rodney-how-europe-underdeveloped-africa 'IRAhandle_tweets_7.csv'
## 5294 tco3_step1 1/0/T/F/TRUE/FALSE http://www.loonwatch.com/tag/sam-harris/ 'IRAhandle_tweets_7.csv'
## .... .......... .................. ............................................................................ ........................
## See problems(...) for more details.
IRAhandle_tweets_8 <- read_csv("IRAhandle_tweets_8.csv")
## Parsed with column specification:
## cols(
## .default = col_character(),
## external_author_id = col_double(),
## following = col_double(),
## followers = col_double(),
## updates = col_double(),
## retweet = col_double(),
## new_june_2018 = col_double(),
## alt_external_id = col_double(),
## tweet_id = col_double(),
## tco3_step1 = col_logical()
## )
## See spec(...) for full column specifications.
## Warning: 285 parsing failures.
## row col expected actual file
## 4681 tco3_step1 1/0/T/F/TRUE/FALSE http://rus.myprintbar.ru 'IRAhandle_tweets_8.csv'
## 5289 tco3_step1 1/0/T/F/TRUE/FALSE https://www.smashwords.com/books/view/474508 'IRAhandle_tweets_8.csv'
## 5564 tco3_step1 1/0/T/F/TRUE/FALSE http://patriotsunite.info 'IRAhandle_tweets_8.csv'
## 13745 tco3_step1 1/0/T/F/TRUE/FALSE http://www.rosbalt.ru/piter/2015/06/18/1409874.html 'IRAhandle_tweets_8.csv'
## 17910 tco3_step1 1/0/T/F/TRUE/FALSE https://nevnov.ru/457531-aleksandr-perendzhiev-ozhidaem-popytku-gosperevorota-na-ukraine 'IRAhandle_tweets_8.csv'
## ..... .......... .................. ........................................................................................ ........................
## See problems(...) for more details.
IRAhandle_tweets_9 <- read_csv("IRAhandle_tweets_9.csv")
## Parsed with column specification:
## cols(
## .default = col_character(),
## external_author_id = col_double(),
## following = col_double(),
## followers = col_double(),
## updates = col_double(),
## post_type = col_logical(),
## retweet = col_double(),
## new_june_2018 = col_double(),
## alt_external_id = col_double(),
## tweet_id = col_double(),
## tco3_step1 = col_logical()
## )
## See spec(...) for full column specifications.
## Warning: 67645 parsing failures.
## row col expected actual file
## 1036 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_9.csv'
## 1037 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_9.csv'
## 1061 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_9.csv'
## 1064 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_9.csv'
## 1069 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_9.csv'
## .... ......... .................. ....... ........................
## See problems(...) for more details.
IRAhandle_tweets_10 <- read_csv("IRAhandle_tweets_10.csv")
## Parsed with column specification:
## cols(
## .default = col_character(),
## external_author_id = col_double(),
## following = col_double(),
## followers = col_double(),
## updates = col_double(),
## post_type = col_logical(),
## retweet = col_double(),
## new_june_2018 = col_double(),
## alt_external_id = col_double(),
## tweet_id = col_double(),
## tco3_step1 = col_logical()
## )
## See spec(...) for full column specifications.
## Warning: 89701 parsing failures.
## row col expected actual file
## 1100 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_10.csv'
## 1344 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_10.csv'
## 2019 tco3_step1 1/0/T/F/TRUE/FALSE https://www.ridus.ru/news/248567 'IRAhandle_tweets_10.csv'
## 2163 tco3_step1 1/0/T/F/TRUE/FALSE https://www.ridus.ru/news/248770 'IRAhandle_tweets_10.csv'
## 2165 tco3_step1 1/0/T/F/TRUE/FALSE http://vietnam.mid.ru/elektronnaa-viza-vo-v-etnam 'IRAhandle_tweets_10.csv'
## .... .......... .................. ................................................. .........................
## See problems(...) for more details.
IRAhandle_tweets_11 <- read_csv("IRAhandle_tweets_11.csv")
## Parsed with column specification:
## cols(
## .default = col_character(),
## external_author_id = col_double(),
## following = col_double(),
## followers = col_double(),
## updates = col_double(),
## post_type = col_logical(),
## retweet = col_double(),
## new_june_2018 = col_double(),
## alt_external_id = col_double(),
## tweet_id = col_double(),
## tco3_step1 = col_logical()
## )
## See spec(...) for full column specifications.
## Warning: 66039 parsing failures.
## row col expected actual file
## 5247 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_11.csv'
## 5274 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_11.csv'
## 5606 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_11.csv'
## 5701 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_11.csv'
## 6011 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_11.csv'
## .... ......... .................. ....... .........................
## See problems(...) for more details.
IRAhandle_tweets_12 <- read_csv("IRAhandle_tweets_12.csv")
## Parsed with column specification:
## cols(
## .default = col_character(),
## external_author_id = col_double(),
## following = col_double(),
## followers = col_double(),
## updates = col_double(),
## post_type = col_logical(),
## retweet = col_double(),
## new_june_2018 = col_double(),
## alt_external_id = col_double(),
## tweet_id = col_double(),
## tco3_step1 = col_logical()
## )
## See spec(...) for full column specifications.
## Warning: 110978 parsing failures.
## row col expected actual file
## 14784 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_12.csv'
## 26336 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_12.csv'
## 27167 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_12.csv'
## 27168 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_12.csv'
## 27169 post_type 1/0/T/F/TRUE/FALSE RETWEET 'IRAhandle_tweets_12.csv'
## ..... ......... .................. ....... .........................
## See problems(...) for more details.
IRAhandle_tweets_13 <- read_csv("IRAhandle_tweets_13.csv")
## Parsed with column specification:
## cols(
## .default = col_character(),
## external_author_id = col_double(),
## following = col_double(),
## followers = col_double(),
## updates = col_double(),
## retweet = col_double(),
## new_june_2018 = col_double(),
## alt_external_id = col_double(),
## tweet_id = col_double()
## )
## See spec(...) for full column specifications.
y_test <- read.csv("y_test.csv", header = FALSE)
y_pred <- read.csv("y_pred.csv")
names(y_test)[1] = "author"
names(y_test)[2] = "X0_test"
names(y_pred)[2] = "X0_pred"
total_y <- list(y_test,y_pred)
fail_accounts <- data.frame()
success_accounts <- data.frame()
for(i in 1:557){
if(y_test[i,2] != y_pred[i,2])
fail_accounts[i,1] <- y_pred[i,1]
else
success_accounts[i,1] <- y_pred[i,1]
}
fail_accounts <- na.omit(unique(fail_accounts))
success_accounts <- na.omit(unique(success_accounts))
names(fail_accounts) = "author"
names(success_accounts) = "author"
summary(fail_accounts)
## author
## _RUBY_WILLS_ : 1
## _YOUR_LIFESTYLE: 1
## AFONINMIXAIL : 1
## ALVA_MC_GHEE : 1
## AMBBERTHTT : 1
## ANCARICTRS : 1
## (Other) :79
summary(success_accounts)
## author
## _RONBEN : 1
## _SASHALAPIN : 1
## 4EVER1937 : 1
## AARONALLENALL: 1
## ABBYLOPTRT : 1
## ABISADMASST : 1
## (Other) :466
fail_authors <- as.character(fail_accounts$author)
fail_authors <- sort(fail_authors, decreasing = FALSE)
fail_authors <- as.data.frame(fail_authors)
names(fail_authors)[1] = "author"
success_authors <- as.character(success_accounts$author)
success_authors <- sort(success_authors, decreasing = FALSE)
success_authors <- as.data.frame(success_authors)
names(success_authors)[1] = "author"
Se une todo el dataset original
total_tweets<- rbind(IRAhandle_tweets_1,IRAhandle_tweets_2,IRAhandle_tweets_3,IRAhandle_tweets_4,IRAhandle_tweets_5,IRAhandle_tweets_6,IRAhandle_tweets_7,IRAhandle_tweets_8,IRAhandle_tweets_9,IRAhandle_tweets_10,IRAhandle_tweets_11,IRAhandle_tweets_12,IRAhandle_tweets_13)
total_tweets <- data.frame(total_tweets$author,total_tweets$content, total_tweets$following, total_tweets$followers, total_tweets$updates)
names(total_tweets)[1] = "author"
Se hace un cruce entre el dataset original y el que contiene los autores de clases mal predichas para crear un data frame con los respectivos atributos de dichos autores
library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.1 v purrr 0.3.1
## v tibble 2.0.1 v dplyr 0.8.0.1
## v tidyr 0.8.3 v stringr 1.4.0
## v ggplot2 3.1.1 v forcats 0.4.0
## -- Conflicts ---------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
total_failed_tweets <- total_tweets %>%
right_join(fail_authors, total_failed_tweets, by="author")
## Warning: Column `author` joining factors with different levels, coercing to
## character vector
summary(total_failed_tweets)
## author
## Length:155980
## Class :character
## Mode :character
##
##
##
##
## total_tweets.content
## GUIDE: What's up in Pittsburgh this weekend? #entertainment: 27
## Thursday on the Fan #news : 15
## Wednesday on the Fan #news : 14
## Friday on the Fan #news : 13
## GUIDE: What's up in Pittsburgh this weekend? #local : 13
## What's up in Pittsburgh this weekend? #entertainment : 12
## (Other) :155886
## total_tweets.following total_tweets.followers total_tweets.updates
## Min. : -1 Min. : -1 Min. : -1
## 1st Qu.: 664 1st Qu.: 1050 1st Qu.: 2466
## Median : 3210 Median : 5845 Median : 8770
## Mean : 4297 Mean : 10699 Mean :12078
## 3rd Qu.: 6355 3rd Qu.: 15087 3rd Qu.:18240
## Max. :33666 Max. :103197 Max. :56470
##
Ahora lo mismo pero con los autores que fueron bien clasificados:
library(tidyverse)
total_successful_tweets <- total_tweets %>%
right_join(success_authors, total_successful_tweets, by="author")
## Warning: Column `author` joining factors with different levels, coercing to
## character vector
summary(total_successful_tweets)
## author
## Length:698202
## Class :character
## Mode :character
##
##
##
##
## total_tweets.content
## <U+0412> <U+0433><U+043E><U+0440><U+043E><U+0434><U+0435> <U+0421><U+043E><U+0447><U+0438>. <U+041E><U+043B><U+0438><U+043C><U+043F><U+0438><U+0430><U+0434><U+0430> – <U+043F><U+0440><U+0430><U+0437><U+0434><U+043D><U+0438><U+043A> <U+0438><U+043B><U+0438> <U+0441><U+0442><U+0438><U+0445><U+0438><U+0439><U+043D><U+043E><U+0435>... : 137
## Celebrity style: Red carpet looks #celebs #news : 79
## Daily Celebrity Watch #celebs #news : 72
## ...<U+0441><U+0442><U+0430><U+0434><U+0438><U+043E><U+043D>, <U+0423> <U+043D><U+0430><U+0441> <U+0441><U+0432><U+043E><U+044F> <U+043E><U+043B><U+0438><U+043C><U+043F><U+0438><U+0430><U+0434><U+0430> – <U+0417><U+0430> <U+043C><U+0430><U+043B><U+044B><U+0448><U+043E><U+043C> <U+0431><U+0440><U+043E><U+0441><U+043E><U+043A> <U+043F><U+043E><U+0434> <U+0441><U+0442><U+043E><U+043B>...: 53
## NewsOne Now Audio Podcast: Bishop E.W. Jackson Calls #BlackLivesMatter Is Movement “Disgraceful” : 50
## <U+041B><U+043E><U+043D><U+0434><U+043E><U+043D> 2012 — <U+041E><U+043B><U+0438><U+043C><U+043F><U+0438><U+0430><U+0434><U+0430> <U+0410><U+043D><U+0442><U+0438><U+0445><U+0440><U+0438><U+0441><U+0442><U+0430> : 39
## (Other) :697772
## total_tweets.following total_tweets.followers total_tweets.updates
## Min. : 0 Min. : 0 Min. : 1
## 1st Qu.: 429 1st Qu.: 291 1st Qu.: 1631
## Median : 1838 Median : 1521 Median : 4274
## Mean : 3625 Mean : 5078 Mean : 8170
## 3rd Qu.: 4848 3rd Qu.: 4369 3rd Qu.:11203
## Max. :26371 Max. :71022 Max. :46749
##
Wordcloud para los autores mal clasificados:
library(tm)
## Loading required package: NLP
##
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
##
## annotate
library(SnowballC)
library(RColorBrewer)
library(wordcloud)
total_failed_tweets.Corpus <- Corpus(VectorSource(total_failed_tweets$total_tweets.content))
total_failed_tweets.Clean <- tm_map(total_failed_tweets.Corpus, PlainTextDocument)
## Warning in tm_map.SimpleCorpus(total_failed_tweets.Corpus,
## PlainTextDocument): transformation drops documents
total_failed_tweets.Clean <- tm_map(total_failed_tweets.Corpus, tolower)
## Warning in tm_map.SimpleCorpus(total_failed_tweets.Corpus, tolower):
## transformation drops documents
total_failed_tweets.Clean <- tm_map(total_failed_tweets.Clean, removeNumbers)
## Warning in tm_map.SimpleCorpus(total_failed_tweets.Clean, removeNumbers):
## transformation drops documents
total_failed_tweets.Clean <- tm_map(total_failed_tweets.Clean, removeWords, stopwords("english"))
## Warning in tm_map.SimpleCorpus(total_failed_tweets.Clean, removeWords,
## stopwords("english")): transformation drops documents
total_failed_tweets.Clean <- tm_map(total_failed_tweets.Clean,removePunctuation)
## Warning in tm_map.SimpleCorpus(total_failed_tweets.Clean,
## removePunctuation): transformation drops documents
total_failed_tweets.Clean <- tm_map(total_failed_tweets.Clean, stripWhitespace)
## Warning in tm_map.SimpleCorpus(total_failed_tweets.Clean, stripWhitespace):
## transformation drops documents
total_failed_tweets.Clean <- tm_map(total_failed_tweets.Clean, stemDocument)
## Warning in tm_map.SimpleCorpus(total_failed_tweets.Clean, stemDocument):
## transformation drops documents
wordcloud(total_failed_tweets.Clean, min.freq = 1, max.words = 50, random.order = FALSE, rot.per = 0.25, colors = brewer.pal(8, "Dark2"))
Wordcloud para los bien clasificados
library(tm)
library(SnowballC)
library(RColorBrewer)
library(wordcloud)
total_successful_tweets.Corpus <- Corpus(VectorSource(total_successful_tweets$total_tweets.content))
total_successful_tweets.Clean <- tm_map(total_successful_tweets.Corpus, PlainTextDocument)
## Warning in tm_map.SimpleCorpus(total_successful_tweets.Corpus,
## PlainTextDocument): transformation drops documents
total_successful_tweets.Clean <- tm_map(total_successful_tweets.Corpus, tolower)
## Warning in tm_map.SimpleCorpus(total_successful_tweets.Corpus, tolower):
## transformation drops documents
total_successful_tweets.Clean <- tm_map(total_successful_tweets.Clean, removeNumbers)
## Warning in tm_map.SimpleCorpus(total_successful_tweets.Clean,
## removeNumbers): transformation drops documents
total_successful_tweets.Clean <- tm_map(total_successful_tweets.Clean, removeWords, stopwords("english"))
## Warning in tm_map.SimpleCorpus(total_successful_tweets.Clean,
## removeWords, : transformation drops documents
total_successful_tweets.Clean <- tm_map(total_successful_tweets.Clean,removePunctuation)
## Warning in tm_map.SimpleCorpus(total_successful_tweets.Clean,
## removePunctuation): transformation drops documents
total_successful_tweets.Clean <- tm_map(total_successful_tweets.Clean, stripWhitespace)
## Warning in tm_map.SimpleCorpus(total_successful_tweets.Clean,
## stripWhitespace): transformation drops documents
total_successful_tweets.Clean <- tm_map(total_successful_tweets.Clean, stemDocument)
## Warning in tm_map.SimpleCorpus(total_successful_tweets.Clean,
## stemDocument): transformation drops documents
wordcloud(total_successful_tweets.Clean, min.freq = 1, max.words = 50, random.order = FALSE, rot.per = 0.4, colors = brewer.pal(8, "Dark2"))
```
Se puede observar que los datos que se clasifican mal suelen tener palabras rusas con mayor frecuencia.