/*Counts of headline results for Myspace, Facebook, and Twitter respectively starting from July 2005, when Myspace was first mentioned, until Dec 2021 when my dataset ends*/ int[][] raw_data = {{0, 0, 0},{0, 0, 0},{0, 0, 0},{0, 0, 0},{0, 0, 0},{0, 0, 0}, {1, 0, 0},{0, 0, 0},{0, 0, 0},{0, 0, 0},{0, 0, 0},{0, 0, 0}, {0, 0, 0},{0, 0, 0},{0, 0, 0},{0, 0, 0},{0, 0, 0},{0, 0, 0}, {0, 0, 0},{1, 0, 0},{0, 0, 0},{0, 0, 0},{0, 0, 0},{1, 0, 0}, {1, 0, 0},{0, 0, 0},{0, 0, 0},{3, 0, 0},{1, 0, 0},{1, 1, 0}, {3, 2, 0},{1, 2, 0},{0, 5, 0},{2, 3, 0},{1, 1, 0},{0, 3, 0}, {1, 1, 0},{1, 2, 0},{0, 1, 0},{1, 0, 0},{0, 2, 0},{0, 2, 0}, {1, 3, 0},{2, 4, 0},{0, 1, 0},{0, 2, 0},{1, 0, 0},{0, 5, 1}, {0, 1, 1},{1, 7, 0},{0, 3, 4},{0, 4, 2},{0, 3, 2},{0, 6, 5}, {0, 5, 5},{0, 0, 3},{0, 11, 0},{1, 8, 6},{0, 7, 2},{0, 5, 2}, {0, 6, 1},{0, 8, 0},{0, 7, 0},{0, 6, 2},{0, 13, 4},{0, 6, 3}, {0, 15, 4},{0, 5, 5},{0, 4, 7},{0, 6, 3},{0, 4, 1},{0, 8, 4}, {0, 14, 5},{0, 2, 3},{0, 9, 3},{0, 6, 1},{0, 7, 6},{0, 10, 2}, {0, 4, 6},{0, 4, 3},{0, 3, 4},{0, 2, 4},{0, 5, 7},{0, 6, 6}, {0, 6, 4},{0, 9, 2},{0, 6, 6},{0, 9, 3},{0, 24, 2},{0, 4, 2}, {0, 5, 4},{0, 7, 3},{1, 1, 3},{0, 11, 4},{0, 5, 3},{0, 5, 7}, {0, 2, 3},{0, 2, 4},{0, 3, 2},{0, 5, 5},{0, 6, 9},{0, 4, 4}, {0, 3, 3},{0, 4, 5},{0, 2, 8},{0, 8, 9},{0, 4, 8},{0, 6, 7}, {0, 7, 3},{0, 7, 3},{0, 5, 4},{0, 6, 4},{0, 4, 5},{0, 5, 1}, {0, 4, 0},{0, 3, 3},{0, 4, 3},{0, 7, 3},{0, 4, 2},{0, 5, 1}, {0, 1, 3},{0, 1, 6},{0, 2, 3},{0, 5, 5},{0, 13, 2},{0, 3, 1}, {0, 5, 7},{0, 5, 1},{0, 11, 7},{0, 10, 4},{0, 7, 5},{0, 11, 5}, {0, 13, 8},{0, 9, 7},{0, 12, 4},{0, 13, 0},{0, 14, 4},{1, 14, 5}, {0, 13, 6},{0, 10, 4},{0, 10, 1},{0, 7, 5},{0, 14, 3},{0, 7, 4}, {0, 7, 4},{0, 8, 6},{0, 7, 5},{0, 11, 1},{0, 14, 3},{0, 4, 1}, {0, 9, 2},{0, 7, 5},{0, 11, 9},{0, 11, 8},{0, 19, 9},{0, 10, 3}, {0, 11, 5},{1, 15, 2},{0, 35, 7},{0, 31, 3},{0, 17, 6},{0, 8, 3}, {0, 18, 2},{0, 11, 2},{0, 9, 7},{0, 8, 2},{0, 8, 5},{0, 8, 4}, {0, 6, 2},{0, 15, 3},{1, 16, 3},{0, 8, 3},{0, 14, 4},{0, 11, 0}, {0, 7, 3},{0, 3, 1},{0, 2, 3},{0, 6, 1},{0, 9, 3},{0, 7, 0}, {0, 5, 0},{0, 7, 1},{0, 1, 1},{0, 6, 1},{0, 7, 5},{0, 9, 5}, {0, 3, 7},{0, 7, 5},{0, 15, 5},{0, 8, 6},{0, 7, 5},{0, 9, 5}, {0, 12, 9},{0, 68, 5},{0, 6, 2},{0, 3, 0},{0, 0, 0},{0, 0, 0}, {0, 1, 0},{0, 0, 0},{0, 0, 0},{0, 9, 0},{0, 0, 0},{0, 0, 0}}; //takes a 2d array and removes all the leading empty lists int[][] sanitize (int[][] data) { int i = 0; while ((i < data.length && data[i][0] == 0 && data[i][1] == 0 && data[i][2] == 0)) { i += 1; } return(sublist(data, i, data.length-i)); } //splits a 2d array into a 1d array using one element of each list int[] splitList(int[][] data, int index) { int[] list = new int[data.length]; for(int i = 0; i < data.length; i++) { list[i] = data[i][index]; } return list; } //returns just the part of a 2d array starting with a given value up to a given length int[][] sublist(int[][] data, int min, int sublength) { int[][] sublist = new int[sublength][3]; int index = 0; for(int i = min; i < min+sublength; i++) { sublist[index] = data[i]; index++; } return sublist; } //Converts data index (starting with July 2005) to month/year String getDate(int index) { int year = (index+6)/12+5; int month = (index+7)%12; if (month == 0) { month = 12; } String date = ""; //accounts for date formatting if(month < 10) { date += "0"; } date += month; date += "/"; if(year < 10) { date += "0"; } date += year; return date; } //returns the LAST index at which a particular value is found int searchArrayReverse(int[] array, int value) { for(int i = array.length-1; i >= 0; i--) { if(array[i] == value) { return i; } } return -1; } //splits 3d year array into 1d array of each year for a given company int[] yearArray(int[][][] years, int index) { int[] perYear = new int[years.length]; int months; for(int i = 0; i < years.length; i++) { months = 0; for(int j = 0; j < years[0].length; j++) { months += years[i][j][index]; } perYear[i] = months; } return perYear; } //Finds the sum of an array int sum(int[] list) { int sum = 0; for(int i = 0; i < list.length; i++) { sum += list[i]; } return sum; } //Finds the mean of an array float mean(int[] list) { return float(sum(list))/list.length; } //Finds median of an array //(The way I learned to do it, where an even length returns the mean of the medians) int median(int[] list) { if(list.length % 2 == 1) { return list[(list.length-1)/2]; } return (list[list.length/2] + list[list.length/2 - 1])/2; } //Finds standard deviation of an array float stDev(int[] list) { float mean = mean(list); float sumSqDev = 0; for(int i = 0; i < list.length; i++) { sumSqDev += (list[i] - mean) * (list[i] - mean); } return sqrt(sumSqDev/list.length); } void setup() { fullScreen(); background(255); /*cuts off the months before our companies appear in headlines for the first time*/ int[][] mft = sanitize(raw_data); //splits the unsanitized data by year int[][][] years = new int[raw_data.length/12][12][mft[0].length]; int index = 0; for(int i = 0; i < years.length; i++) { for(int j = 0; j < 12; j++) { years[i][j] = raw_data[index]; index++; } } //splits the full dataset by company int[] myspace = splitList(mft, 0); int[] facebook = splitList(mft, 1); int[] twitter = splitList(mft, 2); int xaxis = height*8/10; int yaxis = width * 4/10; int horizBound = width * 9/10; int vertBound = height/10; float xinterval = float((horizBound-yaxis))/raw_data.length; //draw and label axes line(yaxis, xaxis, yaxis, vertBound-height/50); line(yaxis, xaxis, horizBound, xaxis); float tick = yaxis; int label = 2006; textAlign(CENTER); fill(0); //Label x-axis for(int i = 0; i < years.length; i++) { tick += xinterval*12; line(tick, xaxis-5, tick, xaxis+5); text(str(label).substring(2), tick, xaxis+20); label++; } tick = xaxis; int ticksize; int max = max(max(myspace), max(twitter), max(facebook)); //scale graph reasonably if(max > 100) { ticksize = 10; } else if(max > 20) { ticksize = 5; } else { ticksize = 1; } //label y-axis for(int i = 0; i < max+ticksize; i += ticksize) { line(yaxis-5, tick, yaxis+5, tick); text(i, yaxis-15, tick+2); tick += (vertBound - xaxis) * ticksize / max; } //Title graph float sizeBound = min(height,width); textSize(sizeBound/20); textAlign(CENTER); text("Social Media headlines over time", (yaxis+horizBound)/2, sizeBound/20); //Create facebook graph stroke(#D4C651); noFill(); strokeWeight(2); beginShape(); float x = yaxis+7*xinterval; for (int i = 0; i < facebook.length; i++) { vertex(x, xaxis + facebook[i]*(vertBound-xaxis)/max); x += xinterval; } endShape(); //Create Twitter graph stroke(#1D9BF0); beginShape(); x = yaxis+7*xinterval; for (int i = 0; i < twitter.length; i++) { vertex(x, xaxis + twitter[i]*(vertBound-xaxis)/max); x += xinterval; } endShape(); //Create myspace graph stroke(#003399); beginShape(); x = yaxis+7*xinterval; for (int i = 0; i < myspace.length; i++) { vertex(x, xaxis + myspace[i]*(vertBound-xaxis)/max); x += xinterval; } endShape(); //Create legend for graph textSize(height/60); textAlign(LEFT); stroke(255); int boxSize = height/60; x = width/2; int y = vertBound; fill(#003399); rect(x,y,boxSize, boxSize); y += boxSize * 2; fill(#D4C651); rect(x, y, boxSize, boxSize); y += boxSize * 2; fill(#1D9BF0); rect(x, y, boxSize, boxSize); fill(0); x += boxSize * 1.5; y = vertBound + boxSize; text("Myspace", x, y); y += boxSize * 2; text("Facebook", x, y); y += boxSize * 2; text("Twitter", x, y); textSize(sizeBound/15); text("Statistics:",sizeBound/15, sizeBound/15); textSize(sizeBound/50); y = height/10; int myX = width/12 + width/20; int faX = 2*width/12+width/20; int twX = 3*width/12+width/20; text("Myspace", myX, y); text("Facebook", faX, y); text("Twitter", twX, y); stroke(0); strokeWeight(1); textSize(max(sizeBound/50, 12)); int tableRight = yaxis-24; int tableTop = y+5; int tableLeft = myX - 5; line(tableLeft, tableTop, tableRight, tableTop); y += height/20; //Finds most mentions in a month text("Max/month", 10, y); text(max(myspace), myX, y); text(max(facebook), faX, y); text(max(twitter), twX, y); line(tableLeft, y+5, tableRight, y+5); y += height/20; //Finds LAST month with max mentions text("Best month", 10, y); text(getDate(searchArrayReverse(myspace, max(myspace))), myX, y); text(getDate(searchArrayReverse(facebook, max(facebook))), faX, y); text(getDate(searchArrayReverse(twitter, max(twitter))), twX, y); line(tableLeft, y+5, tableRight, y+5); y += height/20; //Creates an array of years by company int[] myYears = yearArray(years, 0); int[] faYears = yearArray(years, 1); int[] twYears = yearArray(years, 2); //Finds most mentions per year text("Max/year", 10, y); text(max(myYears), myX, y); text(max(faYears), faX, y); text(max(twYears), twX, y); line(tableLeft, y+5, tableRight, y+5); y += height/20; //Finds LAST month with max mentions text("Best year", 10, y); text(searchArrayReverse(myYears, max(myYears)) + 2005, myX, y); text(searchArrayReverse(faYears, max(faYears)) + 2005, faX, y); text(searchArrayReverse(twYears, max(twYears)) + 2005, twX, y); line(tableLeft, y+5, tableRight, y+5); y += height/20; //Finds the mean mentions per year text("Mean/year", 10, y); text(mean(myYears), myX, y); text(mean(faYears), faX, y); text(mean(twYears), twX, y); line(tableLeft, y+5, tableRight, y+5); y += height/20; //Finds the standard deviation of mentions per year text("StDev/year", 10, y); text(stDev(myYears), myX, y); text(stDev(faYears), faX, y); text(stDev(twYears), twX, y); line(tableLeft, y+5, tableRight, y+5); y+= height/20; //Finds the median mentions per year text("Median/year", 10, y); text(median(myYears), myX, y); text(median(faYears), faX, y); text(median(twYears), twX, y); line(tableLeft, y+5, tableRight, y+5); //Fill in table grid line(tableLeft, tableTop, tableRight, tableTop); line(myX-5, tableTop, myX-5, y+5); line(faX-5, tableTop, faX-5, y+5); line(twX-5, tableTop, twX-5, y+5); line(tableRight, tableTop, tableRight, y+5); //Display some notes! String notes = "Notes: Mode not included, since it would have been 0 across the board. \n" + "The minima are all likewise 0.\n" + "Although the data source included headlines starting from 2003, the first relevant headline appeared in 2005.\n" + "The data end in 2021, but it is plausible that Twitter might have overtaken Facebook's headlines " + "in 2022, when Elon Musk bought the company.\n" + "Interestingly, the spike for Facebook in February of 2021 might have been region-specific, " + "when Facebook restricted access to news content in Australia."; text(notes, 10, y+20, tableRight-10, height-20); //Display data source String source = "Source: Rohit Kulkarni, \"A Million News Headlines\"\n " + "(19 years of Australian Broadcasting Corporation headlines)\n" + "https://www.kaggle.com/datasets/therohk/million-headlines"; text(source, yaxis, xaxis+30, width-20, height); print(horizBound); }