Karl Pearson's Correlation Coefficient: Formula, Property, Video, Example (2024)

Correlation and Regression

There are many situations in our daily life where we know from experience, the direct association between certain variables but we can’t put a certain measure to it. For example, you know that the chances of you going out to watch a newly released movie is directly associated with the number of friends who go with you because the more the merrier!

Suggested videos

Karl Pearson's Correlation Coefficient: Formula, Property, Video, Example (1)

Karl Pearson's Correlation Coefficient: Formula, Property, Video, Example (2)

Karl Pearson's Correlation Coefficient: Formula, Property, Video, Example (3)

Karl Pearson's Correlation Coefficient: Formula, Property, Video, Example (4)

Karl Pearson's Correlation Coefficient: Formula, Property, Video, Example (5)

Karl Pearson's Correlation Coefficient: Formula, Property, Video, Example (6)

Karl Pearson's Correlation Coefficient: Formula, Property, Video, Example (7)Karl Pearson's Correlation Coefficient: Formula, Property, Video, Example (8)

Karl Pearson's Correlation Coefficient: Formula, Property, Video, Example (9)

').appendTo(this.scroller));n2const.rtl.isRtl?(this.previous=this.$widget.find(".nextend-thumbnail-next").on("click",this.previousPane.bind(this)),this.next=this.$widget.find(".nextend-thumbnail-previous").on("click",this.nextPane.bind(this))):(this.previous=this.$widget.find(".nextend-thumbnail-previous").on("click",this.previousPane.bind(this)),this.next=this.$widget.find(".nextend-thumbnail-next").on("click",this.nextPane.bind(this))),this.slider.stages.done("BeforeShow",this.onBeforeShow.bind(this)),this.slider.stages.done("WidgetsReady",this.onWidgetsReady.bind(this))},t.prototype.renderThumbnails=function(){var t;this.parameters.invertGroupDirection&&(t=Math.ceil(this.slider.visibleRealSlides.length/this.group));for(var i=0;i

');if(this.parameters.invertGroupDirection?s.appendTo(this.$groups.eq(Math.floor(i/t))):s.appendTo(this.$groups.eq(i%this.group)),s.data("slide",e),e.$thumbnail=s,this.parameters.thumbnail!==c){var h=e.getThumbnailType(),n=p[h]!==c?p[h]:"";d('

'+n+"

").css("background-image","url('"+e.getThumbnail()+"')").appendTo(s)}if(this.parameters.caption!==c){var r=d('');switch(this.parameters.caption.placement){case"before":r.prependTo(s);break;default:r.appendTo(s)}if(this.parameters.title!==c&&r.append('

'+e.getTitle()+"

"),this.parameters.description!==c){var o=e.getDescription();o&&r.append('

'+o+"

")}}}var a="universalclick",l="onDotClick";"mouseenter"===this.parameters.action?(a="universalenter",l="onDotHover"):this.slider.hasTouch()&&(a="n2click"),this.dots=this.scroller.find(".nextend-thumbnail-scroller-group > div").on(a,this[l].bind(this)),this.images=this.dots.find(".n2-ss-thumb-image")},t.prototype.onTap=function(t){i||(d(t.target).trigger("n2click"),i=!0,setTimeout(function(){i=!1},500))},t.prototype.onBeforeShow=function(){var t=!1;switch(this.parameters.area){case 5:t="left";break;case 8:t="right"}t&&(this.offset=parseFloat(this.$widget.data("offset")),this.slider.responsive.addHorizontalSpacingControl(t,this)),this.renderThumbnails(),this.slider.hasTouch()&&(N2Classes.EventBurrito(this.$widget.get(0),{mouse:!0,axis:"x",start:function(){this.bar.width();this._touch={start:parseInt(this.scroller.css(n2const.rtl.left)),max:0},this.getScrollerWidth()this._touch.start?this.previousPane():this.nextPane(),Math.abs(e.x)<10&&Math.abs(e.y)<10?this.onTap(t):nextend.preventClick(),delete this._touch}.bind(this)}),this.slider.parameters.controls.drag||this.$widget.on("click",this.onTap.bind(this))),this.widthPercent=this.$widget.data("width-percent"),this.thumbnailDimension={widthLocal:this.dots.width(),width:this.dots.outerWidth(!0),height:this.dots.outerHeight(!0),widthBorder:parseInt(this.dots.css("borderLeftWidth"))+parseInt(this.dots.css("borderRightWidth"))+parseInt(this.dots.css("paddingLeft"))+parseInt(this.dots.css("paddingRight")),heightBorder:parseInt(this.dots.css("borderTopWidth"))+parseInt(this.dots.css("borderBottomWidth"))+parseInt(this.dots.css("paddingTop"))+parseInt(this.dots.css("paddingBottom"))},this.thumbnailDimension.widthMargin=this.thumbnailDimension.width-this.dots.outerWidth(),this.thumbnailDimension.heightMargin=this.thumbnailDimension.height-this.dots.outerHeight(),this.imageDimension={width:this.images.outerWidth(!0),height:this.images.outerHeight(!0)},this.sideDimension=.25*this.thumbnailDimension.width,this.scroller.height(this.thumbnailDimension.height*this.ratio*this.group+"px"),this.bar.height(this.scroller.outerHeight(!0)+"px"),this.horizontalSpacing=this.bar.outerWidth()-this.bar.width(),this.slider.sliderElement.on({SlideWillChange:this.onSlideSwitch.bind(this),visibleRealSlidesChanged:this.onVisibleRealSlidesChanged.bind(this)})},t.prototype.onWidgetsReady=function(){this.activateDots(this.slider.currentSlide.index),this.slider.sliderElement.on("SliderResize",this.onSliderResize.bind(this)),this.onSliderResize()},t.prototype.filterSliderVerticalCSS=function(t){};var e=!(t.prototype.onSliderResize=function(){if(this.slider.visibleRealSlides.length){if(this.lastScrollerWidth!==this.getScrollerWidth()){var t,i=1,e=this.getScrollerWidth(),s=e-2*this.sideDimension;if((t=e/this.thumbnailDimension.width)=t&&(this.localSideDimension=.1*e,i=(s=e-2*this.localSideDimension)/(this.parameters.minimumThumbnailCount*this.thumbnailDimension.width),t=s/(this.thumbnailDimension.width*i),(t=e/(this.thumbnailDimension.width*i))e;e++)i[e].$thumbnail.addClass("n2-active")},t.prototype.resetPane=function(){this.goToDot(this.currentI)},t.prototype.previousPane=function(){this.goToDot(this.currentI-this.itemsPerPane*this.group)},t.prototype.nextPane=function(){this.goToDot(this.currentI+this.itemsPerPane*this.group)},t.prototype.getPaneByIndex=function(t){return t=Math.max(0,Math.min(this.dots.length-1,t)),this.parameters.invertGroupDirection?Math.floor(t%Math.ceil(this.dots.length/this.group)/this.itemsPerPane):Math.floor(t/this.group/this.itemsPerPane)},t.prototype.getScrollerTargetLeft=function(t){this.lastScrollerWidth=this.getScrollerWidth();var i=0;t===Math.floor((this.dots.length-1)/this.group/this.itemsPerPane)?(i=-t*this.itemsPerPane*this.thumbnailDimension.width*this.ratio,0===t?this.previous.removeClass("n2-active"):this.previous.addClass("n2-active"),this.next.removeClass("n2-active")):(0

But there are many other factors too, like your interest in that movie, your budget etc. Thus to analyze the situation in detail, you need to note down your similar past experiences and form a sort of distribution from that data. It is at this point that you require a Correlation Coefficient, which will now provide you with a value, based on which you can calculate the possibility of you not going for the movie this time if your friends don’t turn up! Karl Pearson’s Coefficient of Correlation is one such type of parameter which we’ll be studying in this section.

Learn more about Rank Correlation here in detail

Introduction to Coefficient of Correlation

The Karl Pearson’s product-moment correlation coefficient (or simply, the Pearson’s correlation coefficient) is a measure of the strength of a linear association between two variables and is denoted by r or rxy(x and y being the two variables involved).

This method of correlation attempts to draw a line of best fit through the data of two variables, and the value of the Pearson correlation coefficient, r, indicates how far away all these data points are to this line of best fit.

Browse more Topics under Correlation And Regression

  • Scatter Diagram
  • Karl Pearson’s Coefficient of Correlation
  • Rank Correlation
  • Probable Error and Probable Limits
  • Regression Lines, Regression Equations and Regression Coefficients

Karl Pearson Correlation Coefficient Formula

The coefficient of correlation rxy between two variables x and y, for the bivariate dataset (xi,yi) where i = 1,2,3…..N; is given by –

$$r_{(x,y)} = \frac{\text{cov}(x,y)}{\sigma_x \sigma_y}$$

where,

⇒ cov(x,y): the covariance between x and y

– \(\Sigma_{i = 1}^{N}\frac{(x_i – \bar{x})(y_i – \bar{y})}{N} = \frac{\Sigma x_iy_i}{N} – \bar{x}\bar{y}\)

Here, \(bar{x}\) and \(\bar{y}\) are simply the respective means of the distributions of x and y.

⇒ σx and σy are the standard deviations of the distributions x and y.

– \(\sigma_x = \sqrt{\Sigma \frac{(x_i – \bar{x})^2}{N}} = \sqrt{\frac{\Sigma x_i^2}{N} – \bar{x}^2}\)

– \(\sigma_y = \sqrt{\Sigma \frac{(y_i – \bar{y})^2}{N}} = \sqrt{\frac{\Sigma y_i^2}{N} – \bar{y}^2}\)

Alternate Formula

If some data is given in the form of a class-distributed frequency distribution, you may use the following formulae –

⇒ cov(x,y): the covariance between x and y

– \(\frac{\Sigma_{i,j} x_iy_if_{ij}}{N} – \bar{x}\bar{y}\)

Here, \(\bar{x}\) and \(\bar{y}\) are simply the respective means of the distributions of x and y.

⇒ σx and σy are the standard deviations of the distributions x and y.

– \(\sigma_x =\sqrt{\frac{\Sigma_i f_{io} x_i^2}{N} – \bar{x}^2}\)

– \(\sigma_y =\sqrt{\frac{\Sigma_j f_{io} y_i^2}{N} – \bar{y}^2}\)

where,

xi: The central value of the i’th class of x

yj: The central value of the j’th class of y

fio,fij: Marginal Frequencies of x and y

fij: Frequency of the (i,j)th cell

In any case, the following equality must always hold:

Total frequency = N = \(\Sigma_{i,j} f_{ij}\) = \(\Sigma_i f_{io}\) = \(\Sigma_j f_{jo}\)

A Single Formula for Discrete Datasets –

$$r_{xy} = \frac{N\Sigma x_iy_i – \Sigma x_i \Sigma y_i}{\sqrt{N\Sigma x_i^2 – (\Sigma x_i)^2} \sqrt{N\Sigma y_i^2 – (\Sigma y_i)^2}}$$

Let us understand more about Scatter Diagram here

Properties of the Pearson’s Correlation Coefficient

r is unit-less. Thus, we may use it to compare association between totally different bivariate distributions as well. For eg – you may compare how much of you not going for a movie is related to your friends not joining you, and to you not being much interested for the movie yourself, both at the same time, with the Pearson’s correlation coefficients obtained from both the cases. In economics therefore, where the cost price or the market shares depend on lots of different factors, this parameter is of utmost importance in ascertaining the connection between various quantities.

The value of r always lies between +1 and -1. Depending on its exact value, we see the following degrees of association between the variables-

r value variation:

STRENGTH OF ASSOCIATIONNEGATIvE rPOSITIvE r
weak-0.1 to -0.30.1 to 0.3
average-0.3 to -0.50.3 to 0.5
strong-0.5 to -1.00.5 to 1.0

A value greater than 0 indicates a positive association i.e. as the value of one variable increases, so does the value of the other variable. A value less than 0 indicates a negative association i.e. as the value of one variable increases, the value of the other variable decreases.

⇒ The Pearson product-moment correlation does not take into consideration whether a variable has been classified as a dependent or independent variable. It treats all variables equally.

A change of origin of the system, or any scaling of the variables doesn’t affect the value of r. The sign might change depending on the sign of scaling done.

Basically, if the bivariate system (x, y) is converted to another bivariate system (u, v) by a change of origin or scaling or both, in the following way –

$$u = \frac{x – a}{b}, v = \frac{y – c}{d}$$

Then the correlation coefficient takes on the following value – $$r_{(u,v)} = \frac{bd}{|b||d|} \ r_{(x,y)}$$

Assumptions

While calculating the Pearson’s Correlation Coefficient, we make the following assumptions –

  • There is a linear relationship (or any linear component of the relationship) between the two variables
  • We keep Outliers either to a minimum or remove them entirely

An outlier is a data point that does not fit the general trend of your data but would appear to be an extreme value and not what you would expect compared to the rest of your data points. you can detect outliers by plotting the two variables against each other on a graph and visually inspecting the graph for extreme points.

you can then either remove or manipulate that particular point as long as you can justify why you did so. Outliers can have a very large effect on the line of best fit and the Pearson correlation coefficient, which can lead to very different conclusions regarding your data. Both of the above points for a given pair of variables can be analyzed easily by studying their scatter plots.

Solved Example on Coefficient of Correlation

Question: An experiment conducted on 9 different cigarette smoking subjects resulted in the following data –

Subject NumberCigarettes smoked per weekNumber of years lived
(averaged over the last 5 years of their life)
12563
23568
31072
44062
58565
67546
76051
84560
95055

Calculate the correlation of coefficient between the number of cigarettes smoked and the longevity of a test subject.

Solution

Let us first assign random variables to our data in the following way –

x – the number of cigarettes smoked

y – years lived

We’ll be using the single formula for discrete data points here –

\(r_{xy} = \frac{N\Sigma x_iy_i – \Sigma x_i \Sigma y_i}{\sqrt{N\Sigma x_i^2 – (\Sigma x_i)^2} \sqrt{N\Sigma y_i^2 – (\Sigma y_i)^2}}\)

Let us now construct a table to compute all the values we are going to use in our correlation formula. Note that N here = 9

xx2yy2xy
2562563 39691575
3512256846242380
10100725184720
4016006238442480
8572256542255525
7556254621163450
6036005126013060
4520256036002700
5025005531362750
Σxi = 425 Σxi2 = 24525Σyi = 542Σyi2 = 33188Σxiyi = 24640
(Σxi)2 = 4252 = 180625(Σyi)2 = 5422 = 293764

using the values in the formula, we get – $$ r_{xy} = \frac{N\Sigma x_iy_i – \Sigma x_i \Sigma y_i}{\sqrt{N\Sigma x_i^2 – (\Sigma x_i)^2} \sqrt{N\Sigma y_i^2 – (\Sigma y_i)^2}}$$ $$ = \frac{9.24640 – 425.542}{\sqrt{9.24525 – 180625} \sqrt{9.33188 – 293764}}$$ $$ = \frac{-8590}{\sqrt{40100}\sqrt{4928}} $$ $$ = -0.61 $$

This implies a negative correlation between the considered variables i.e. The higher the number of cigarettes smoked per week in last 5 years, the lesser the number of years lived. Note that it DOES NOT mean that smoking cigarettes decreases the life span. Because, many other factors might be responsible for one’s death. Still, it is an important conclusion nevertheless.

This way you can solve for other datasets similarly.

PreviousScatter Diagram
NextCorrelation and Regression

Customize your course in 30 seconds

Which class are you in?

5th

6th

7th

8th

9th

10th

11th

12th

Karl Pearson's Correlation Coefficient: Formula, Property, Video, Example (2024)
Top Articles
Latest Posts
Recommended Articles
Article information

Author: Kerri Lueilwitz

Last Updated:

Views: 5916

Rating: 4.7 / 5 (47 voted)

Reviews: 94% of readers found this page helpful

Author information

Name: Kerri Lueilwitz

Birthday: 1992-10-31

Address: Suite 878 3699 Chantelle Roads, Colebury, NC 68599

Phone: +6111989609516

Job: Chief Farming Manager

Hobby: Mycology, Stone skipping, Dowsing, Whittling, Taxidermy, Sand art, Roller skating

Introduction: My name is Kerri Lueilwitz, I am a courageous, gentle, quaint, thankful, outstanding, brave, vast person who loves writing and wants to share my knowledge and understanding with you.