%!PS-Adobe-3.0 %%Title: (Microsoft Word - notre dame) %%Creator: (Microsoft Word: LaserWriter 8 8.3.4) %%CreationDate: (11:17 PM Wednesday, January 22, 1997) %%For: (peter) %%Pages: 16 %%DocumentFonts: Times-Bold Symbol Times-Roman Times-Italic TimesNewRomanPSMT TimesNewRomanPS-BoldMT %%DocumentNeededFonts: Times-Bold Symbol Times-Roman Times-Italic TimesNewRomanPSMT TimesNewRomanPS-BoldMT %%DocumentSuppliedFonts: %%DocumentData: Clean7Bit %%PageOrder: Ascend %%Orientation: Portrait %%DocumentMedia: Default 612 792 0 () () %ADO_ImageableArea: 31 31 583 761 %%EndComments userdict begin/dscInfo 5 dict dup begin /Title(Microsoft Word - notre dame)def /Creator(Microsoft Word: LaserWriter 8 8.3.4)def /CreationDate(11:17 PM Wednesday, January 22, 1997)def /For(peter)def /Pages 16 def end def end save /version23-manualfeedpatch where { pop false } { true }ifelse % we don't do an explicit 'get' since product and version MAY % be in systemdict or statusdict - this technique gets the lookup % without failure statusdict begin product (LaserWriter) eq % true if LaserWriter version cvr 23.0 eq % true if version 23 end and % only install this patch if both are true and % true only if patch is not installed and is for this printer % save object and boolean on stack dup { exch restore }if % either true OR saveobject false dup { /version23-manualfeedpatch true def /oldversion23-showpage /showpage load def /showpage % this showpage will wait extra time if manualfeed is true {% statusdict /manualfeed known {% manualfeed known in statusdict statusdict /manualfeed get {% if true then we loop for 5 seconds usertime 5000 add % target usertime { % loop dup usertime sub 0 lt { exit }if }loop pop % pop the usertime off the stac }if }if oldversion23-showpage }bind def }if not{ restore }if /md 214 dict def md begin/currentpacking where {pop /sc_oldpacking currentpacking def true setpacking}if %%BeginFile: adobe_psp_basic %%Copyright: Copyright 1990-1993 Adobe Systems Incorporated. All Rights Reserved. /bd{bind def}bind def /xdf{exch def}bd /xs{exch store}bd /ld{load def}bd /Z{0 def}bd /T/true /F/false /:L/lineto /lw/setlinewidth /:M/moveto /rl/rlineto /rm/rmoveto /:C/curveto /:T/translate /:K/closepath /:mf/makefont /gS/gsave /gR/grestore /np/newpath 14{ld}repeat /$m matrix def /av 83 def /por true def /normland false def /psb-nosave{}bd /pse-nosave{}bd /us Z /psb{/us save store}bd /pse{us restore}bd /level2 /languagelevel where { pop languagelevel 2 ge }{ false }ifelse def /featurecleanup { stopped cleartomark countdictstack exch sub dup 0 gt { {end}repeat }{ pop }ifelse }bd /noload Z /startnoload { {/noload save store}if }bd /endnoload { {noload restore}if }bd level2 startnoload /setjob { statusdict/jobname 3 -1 roll put }bd /setcopies { userdict/#copies 3 -1 roll put }bd level2 endnoload level2 not startnoload /setjob { 1 dict begin/JobName xdf currentdict end setuserparams }bd /setcopies { 1 dict begin/NumCopies xdf currentdict end setpagedevice }bd level2 not endnoload /pm Z /mT Z /sD Z /realshowpage Z /initializepage { /pm save store mT concat }bd /endp { pm restore showpage }def /$c/DeviceRGB def /rectclip where { pop/rC/rectclip ld }{ /rC { np 4 2 roll :M 1 index 0 rl 0 exch rl neg 0 rl :K clip np }bd }ifelse /rectfill where { pop/rF/rectfill ld }{ /rF { gS np 4 2 roll :M 1 index 0 rl 0 exch rl neg 0 rl fill gR }bd }ifelse /rectstroke where { pop/rS/rectstroke ld }{ /rS { gS np 4 2 roll :M 1 index 0 rl 0 exch rl neg 0 rl :K stroke gR }bd }ifelse %%EndFile %%BeginFile: adobe_psp_colorspace_level1 %%Copyright: Copyright 1991-1993 Adobe Systems Incorporated. All Rights Reserved. /G/setgray ld /:F1/setgray ld /:F/setrgbcolor ld /:F4/setcmykcolor where { pop /setcmykcolor ld }{ { 3 { dup 3 -1 roll add dup 1 gt{pop 1}if 1 exch sub 4 1 roll }repeat pop setrgbcolor }bd }ifelse /:Fx { counttomark {0{G}0{:F}{:F4}} exch get exec pop }bd /:rg{/DeviceRGB :ss}bd /:sc{$cs :ss}bd /:dc{/$cs xdf}bd /:sgl{}def /:dr{}bd /:fCRD{pop}bd /:ckcs{}bd /:ss{/$c xdf}bd /$cs Z %%EndFile %%BeginFile: adobe_psp_uniform_graphics %%Copyright: Copyright 1990-1993 Adobe Systems Incorporated. All Rights Reserved. /@a { np :M 0 rl :L 0 exch rl 0 rl :L fill }bd /@b { np :M 0 rl 0 exch rl :L 0 rl 0 exch rl fill }bd /arct where { pop }{ /arct { arcto pop pop pop pop }bd }ifelse /x1 Z /x2 Z /y1 Z /y2 Z /rad Z /@q { /rad xs /y2 xs /x2 xs /y1 xs /x1 xs np x2 x1 add 2 div y1 :M x2 y1 x2 y2 rad arct x2 y2 x1 y2 rad arct x1 y2 x1 y1 rad arct x1 y1 x2 y1 rad arct fill }bd /@s { /rad xs /y2 xs /x2 xs /y1 xs /x1 xs np x2 x1 add 2 div y1 :M x2 y1 x2 y2 rad arct x2 y2 x1 y2 rad arct x1 y2 x1 y1 rad arct x1 y1 x2 y1 rad arct :K stroke }bd /@i { np 0 360 arc fill }bd /@j { gS np :T scale 0 0 .5 0 360 arc fill gR }bd /@e { np 0 360 arc :K stroke }bd /@f { np $m currentmatrix pop :T scale 0 0 .5 0 360 arc :K $m setmatrix stroke }bd /@k { gS np :T 0 0 :M 0 0 5 2 roll arc fill gR }bd /@l { gS np :T 0 0 :M scale 0 0 .5 5 -2 roll arc fill gR }bd /@m { np arc stroke }bd /@n { np $m currentmatrix pop :T scale 0 0 .5 5 -2 roll arc $m setmatrix stroke }bd %%EndFile %%BeginFile: adobe_psp_basic_text %%Copyright: Copyright 1990-1993 Adobe Systems Incorporated. All Rights Reserved. /S/show ld /A{ 0.0 exch ashow }bd /R{ 0.0 exch 32 exch widthshow }bd /W{ 0.0 3 1 roll widthshow }bd /J{ 0.0 32 4 2 roll 0.0 exch awidthshow }bd /V{ 0.0 4 1 roll 0.0 exch awidthshow }bd /fcflg true def /fc{ fcflg{ vmstatus exch sub 50000 lt{ (%%[ Warning: Running out of memory ]%%\r)print flush/fcflg false store }if pop }if }bd /$f[1 0 0 -1 0 0]def /:ff{$f :mf}bd /MacEncoding StandardEncoding 256 array copy def MacEncoding 39/quotesingle put MacEncoding 96/grave put /Adieresis/Aring/Ccedilla/Eacute/Ntilde/Odieresis/Udieresis/aacute /agrave/acircumflex/adieresis/atilde/aring/ccedilla/eacute/egrave /ecircumflex/edieresis/iacute/igrave/icircumflex/idieresis/ntilde/oacute /ograve/ocircumflex/odieresis/otilde/uacute/ugrave/ucircumflex/udieresis /dagger/degree/cent/sterling/section/bullet/paragraph/germandbls /registered/copyright/trademark/acute/dieresis/notequal/AE/Oslash /infinity/plusminus/lessequal/greaterequal/yen/mu/partialdiff/summation /product/pi/integral/ordfeminine/ordmasculine/Omega/ae/oslash /questiondown/exclamdown/logicalnot/radical/florin/approxequal/Delta/guillemotleft /guillemotright/ellipsis/space/Agrave/Atilde/Otilde/OE/oe /endash/emdash/quotedblleft/quotedblright/quoteleft/quoteright/divide/lozenge /ydieresis/Ydieresis/fraction/currency/guilsinglleft/guilsinglright/fi/fl /daggerdbl/periodcentered/quotesinglbase/quotedblbase/perthousand /Acircumflex/Ecircumflex/Aacute/Edieresis/Egrave/Iacute/Icircumflex/Idieresis/Igrave /Oacute/Ocircumflex/apple/Ograve/Uacute/Ucircumflex/Ugrave/dotlessi/circumflex/tilde /macron/breve/dotaccent/ring/cedilla/hungarumlaut/ogonek/caron MacEncoding 128 128 getinterval astore pop level2 startnoload /copyfontdict { findfont dup length dict begin { 1 index/FID ne{def}{pop pop}ifelse }forall }bd level2 endnoload level2 not startnoload /copyfontdict { findfont dup length dict copy begin }bd level2 not endnoload md/fontname known not{ /fontname/customfont def }if /Encoding Z /:mre { copyfontdict /Encoding MacEncoding def fontname currentdict end definefont :ff def }bd /:bsr { copyfontdict /Encoding Encoding 256 array copy def Encoding dup }bd /pd{put dup}bd /:esr { pop pop fontname currentdict end definefont :ff def }bd /scf { scalefont def }bd /scf-non { $m scale :mf setfont }bd /ps Z /fz{/ps xs}bd /sf/setfont ld /cF/currentfont ld /mbf { /makeblendedfont where { pop makeblendedfont /ABlend exch definefont }{ pop }ifelse def }def %%EndFile %%BeginFile: adobe_psp_derived_styles %%Copyright: Copyright 1990-1993 Adobe Systems Incorporated. All Rights Reserved. /wi version(23.0)eq { { gS 0 0 0 0 rC stringwidth gR }bind }{ /stringwidth load }ifelse def /$o 1. def /gl{$o G}bd /ms{:M S}bd /condensedmtx[.82 0 0 1 0 0]def /:mc { condensedmtx :mf def }bd /extendedmtx[1.18 0 0 1 0 0]def /:me { extendedmtx :mf def }bd /basefont Z /basefonto Z /dxa Z /dxb Z /dxc Z /dxd Z /dsdx2 Z /bfproc Z /:fbase { dup/FontType get 0 eq{ dup length dict begin dup{1 index/FID ne 2 index/UniqueID ne and{def}{pop pop}ifelse}forall /FDepVector exch/FDepVector get[exch/:fbase load forall]def }/bfproc load ifelse /customfont currentdict end definefont }bd /:mo { /bfproc{ dup dup length 2 add dict begin { 1 index/FID ne 2 index/UniqueID ne and{def}{pop pop}ifelse }forall /PaintType 2 def /StrokeWidth .012 0 FontMatrix idtransform pop def /customfont currentdict end definefont 8 dict begin /basefonto xdf /basefont xdf /FontType 3 def /FontMatrix[1 0 0 1 0 0]def /FontBBox[0 0 1 1]def /Encoding StandardEncoding def /BuildChar { exch begin basefont setfont ( )dup 0 4 -1 roll put dup wi setcharwidth 0 0 :M gS gl dup show gR basefonto setfont show end }def }store :fbase }bd /:mso { /bfproc{ 7 dict begin /basefont xdf /FontType 3 def /FontMatrix[1 0 0 1 0 0]def /FontBBox[0 0 1 1]def /Encoding StandardEncoding def /BuildChar { exch begin sD begin /dxa 1 ps div def basefont setfont ( )dup 0 4 -1 roll put dup wi 1 index 0 ne { exch dxa add exch }if setcharwidth dup 0 0 ms dup dxa 0 ms dup dxa dxa ms dup 0 dxa ms gl dxa 2. div dup ms end end }def }store :fbase }bd /:ms { /bfproc{ dup dup length 2 add dict begin { 1 index/FID ne 2 index/UniqueID ne and{def}{pop pop}ifelse }forall /PaintType 2 def /StrokeWidth .012 0 FontMatrix idtransform pop def /customfont currentdict end definefont 8 dict begin /basefonto xdf /basefont xdf /FontType 3 def /FontMatrix[1 0 0 1 0 0]def /FontBBox[0 0 1 1]def /Encoding StandardEncoding def /BuildChar { exch begin sD begin /dxb .05 def basefont setfont ( )dup 0 4 -1 roll put dup wi exch dup 0 ne { dxb add }if exch setcharwidth dup dxb .01 add 0 ms 0 dxb :T gS gl dup 0 0 ms gR basefonto setfont 0 0 ms end end }def }store :fbase }bd /:mss { /bfproc{ 7 dict begin /basefont xdf /FontType 3 def /FontMatrix[1 0 0 1 0 0]def /FontBBox[0 0 1 1]def /Encoding StandardEncoding def /BuildChar { exch begin sD begin /dxc 1 ps div def /dsdx2 .05 dxc 2 div add def basefont setfont ( )dup 0 4 -1 roll put dup wi exch dup 0 ne { dsdx2 add }if exch setcharwidth dup dsdx2 .01 add 0 ms 0 .05 dxc 2 div sub :T dup 0 0 ms dup dxc 0 ms dup dxc dxc ms dup 0 dxc ms gl dxc 2 div dup ms end end }def }store :fbase }bd /:msb { /bfproc{ 7 dict begin /basefont xdf /FontType 3 def /FontMatrix[1 0 0 1 0 0]def /FontBBox[0 0 1 1]def /Encoding StandardEncoding def /BuildChar { exch begin sD begin /dxd .03 def basefont setfont ( )dup 0 4 -1 roll put dup wi 1 index 0 ne { exch dxd add exch }if setcharwidth dup 0 0 ms dup dxd 0 ms dup dxd dxd ms 0 dxd ms end end }def }store :fbase }bd /italicmtx[1 0 -.212557 1 0 0]def /:mi { italicmtx :mf def }bd /:v { [exch dup/FontMatrix get exch dup/FontInfo known { /FontInfo get dup/UnderlinePosition known { dup/UnderlinePosition get 2 index 0 3 1 roll transform exch pop }{ .1 }ifelse 3 1 roll dup/UnderlineThickness known { /UnderlineThickness get exch 0 3 1 roll transform exch pop abs }{ pop pop .067 }ifelse }{ pop pop .1 .067 }ifelse ] }bd /$t Z /$p Z /$s Z /:p { aload pop 2 index mul/$t xs 1 index mul/$p xs .012 mul/$s xs }bd /:m {gS 0 $p rm $t lw 0 rl stroke gR }bd /:n { gS 0 $p rm $t lw 0 rl gS gl stroke gR strokepath $s lw /setstrokeadjust where{pop currentstrokeadjust true setstrokeadjust stroke setstrokeadjust }{ stroke }ifelse gR }bd /:o {gS 0 $p rm $t 2 div dup rm $t lw dup 0 rl stroke gR :n }bd %%EndFile /currentpacking where {pop sc_oldpacking setpacking}if end %%EndProlog %%BeginSetup md begin countdictstack[{ %%BeginFeature: *ManualFeed False statusdict /manualfeed false put %%EndFeature }featurecleanup countdictstack[{ %%BeginFeature: *InputSlot Cassette %%EndFeature }featurecleanup countdictstack[{ %%BeginFeature: *PageRegion LetterSmall lettersmall %%EndFeature }featurecleanup (peter)setjob /mT[1 0 0 -1 31 761]def /sD 16 dict def 300 level2{1 dict dup/WaitTimeout 4 -1 roll put setuserparams}{statusdict/waittimeout 3 -1 roll put}ifelse %%IncludeFont: Times-Bold %%IncludeFont: Symbol %%IncludeFont: Times-Roman %%IncludeFont: Times-Italic %%IncludeFont: TimesNewRomanPSMT %%IncludeFont: TimesNewRomanPS-BoldMT /f0_1/Times-Bold :mre /f0_14 f0_1 14 scf /f0_12 f0_1 12 scf /f0_11 f0_1 11 scf /f1_1/Symbol :bsr 240/apple pd :esr /f1_12 f1_1 12 scf /f1_11 f1_1 11 scf /f1_10 f1_1 10 scf /f1_9 f1_1 9 scf /f1_8 f1_1 8 scf /f1_7 f1_1 7 scf /f1_6 f1_1 6 scf /f2_1 f1_1 def /f2_12 f2_1 12 scf /f2_11 f2_1 11 scf /f3_1/Times-Roman :mre /f3_12 f3_1 12 scf /f3_11 f3_1 11 scf /f3_10 f3_1 10 scf /f3_9 f3_1 9 scf /f3_8 f3_1 8 scf /f3_7 f3_1 7 scf /f3_6 f3_1 6 scf /f4_1/Times-Italic :mre /f4_12 f4_1 12 scf /f5_1 f1_1 :mi /f5_12 f5_1 12 scf /f6_1 f3_1 :v def /f7_1 f1_1 :v def /f8_1/TimesNewRomanPSMT :mre /f8_10 f8_1 10 scf /f9_1/TimesNewRomanPS-BoldMT :mre /f9_10 f9_1 10 scf /Courier findfont[10 0 0 -10 0 0]:mf setfont %%EndSetup %%Page: 1 1 %%BeginPageSetup initializepage (peter; page: 1 of 16)setjob %%EndPageSetup gS 0 0 552 730 rC 229 54 :M f0_14 sf (An Introduction to)S 235 70 :M (Causal Inference)S 234 102 :M (Richard Scheines)S 63 136 :M f3_12 sf .363 .036(In )J f4_12 sf .842 .084(Causation, Prediction, and Search )J 252 136 :M f3_12 sf <28>S 256 136 :M f4_12 sf .27(CPS)A f3_12 sf .729 .073( hereafter\), Peter Spirtes, Clark Glymour and I)J 41 152 :M 2.014 .201(developed a theory of statistical causal inference. In his presentation at the Notre Dame)J 41 168 :M .918 .092(conference \(and in his paper, this volume\), Glymour discussed the assumptions on which this)J 41 184 :M .135 .014(theory is built, traced some of the mathematical consequences of the assumptions, and pointed to)J 41 200 :M .893 .089(situations in which the assumptions might fail. Nevertheless, many at Notre Dame found the)J 41 216 :M .053 .005(theory difficult to understand and/or assess. As a result I was asked to write this paper to provide)J 41 232 :M .07 .007(a more intuitive introduction to the theory. In what follows I shun almost all formality and avoid)J 41 248 :M 1.694 .169(the numerous and complicated qualifiers that typically accompany definitions or important)J 41 264 :M .393 .039(philosophical concepts. They can be all be found in Glymour's paper or in )J f4_12 sf .159(CPS)A f3_12 sf .386 .039(, which are clear)J 41 280 :M .32 .032(although sometimes dense. Here I attempt to fix intuitions by highlighting a few of the essential)J 41 296 :M (ideas and by providing extremely simple examples throughout.)S 63 312 :M .895 .089(The route I take is a response to the core concern of many I talked to at the Notre Dame)J 41 328 :M 2.267 .227(conference. Our techniques take statistical data and output sets of directed graphs. Most)J 41 344 :M 1.572 .157(everyone saw how that worked-but they could not easily assess the additional assumptions)J 41 360 :M .457 .046(necessary to give such output a causal interpretation, that is an interpretation that would inform)J 41 376 :M .217 .022(us about how systems would )J 184 376 :M f4_12 sf .154 .015(respond to interventions)J 302 376 :M f3_12 sf .228 .023(. I will try to present in the simplest terms)J 41 392 :M .653 .065(the assumptions that allow us to move from probabilistic independence relations to the kind of)J 41 408 :M 1.708 .171(causal relations that involve counterfactuals about manipulations and interventions. I first)J 41 424 :M .37 .037(separate out the various parts of the theory: directed graphs, probability, and causality, and then)J 41 440 :M 2.078 .208(clarify the assumptions that connect causal structure to probability. Finally, I discuss the)J 41 456 :M (additional assumptions needed to make inferences from statistical data to causal structure.)S 63 488 :M f0_12 sf (1. DAGs and d-separation)S 63 520 :M f3_12 sf .989 .099(The theory we developed unites two pieces of mathematics and one piece of philosophy.)J 41 536 :M .941 .094(The mathematical pieces are directed acyclic graphs \(DAGs\) and probability theory \(with the)J 41 552 :M (focus on conditional independence\), and the philosophy involves causation among variables.)S 63 568 :M .594 .059(A DAG is a set of vertices and a set of edges \(arrows\) that connect pairs of these vertices.)J 41 584 :M .3 .03(For example, we might have a set of three vertices: {X)J 309 586 :M f3_8 sf .098(1)A f3_12 sf 0 -2 rm .277 .028(, X)J 0 2 rm f3_8 sf .098(2)A f3_12 sf 0 -2 rm .3 .03(, X)J 0 2 rm 348 586 :M f3_8 sf .081(3)A f3_12 sf 0 -2 rm .313 .031(}, and a set of two edges among)J 0 2 rm 41 600 :M .503 .05(these vertices: {X)J 129 602 :M f3_8 sf (1)S f1_12 sf 0 -2 rm S 0 2 rm 145 600 :M f3_12 sf (X)S 154 602 :M f3_8 sf .344(2)A f3_12 sf 0 -2 rm .66 .066( , X)J 0 2 rm 178 602 :M f3_8 sf (2)S f1_12 sf 0 -2 rm S 0 2 rm 194 600 :M f3_12 sf (X)S 203 602 :M f3_8 sf .138(3)A f3_12 sf 0 -2 rm .601 .06(}. We almost always represent DAGs with a picture, or path)J 0 2 rm 41 616 :M (diagram, e.g., this DAG looks like: X)S 227 618 :M f3_8 sf (1)S f1_12 sf 0 -2 rm S 0 2 rm 243 616 :M f3_12 sf ( X)S 255 618 :M f3_8 sf (2)S f3_12 sf 0 -2 rm ( )S 0 2 rm f1_12 sf 0 -2 rm S 0 2 rm 274 616 :M f3_12 sf ( X)S 286 618 :M f3_8 sf (3)S 63 632 :M f3_12 sf 1.458 .146(Prior to any interpretation, a DAG is a completely abstract mathematical object. In our)J 41 648 :M 1.281 .128(theory, DAGs are given two distinct functions. In the first they represent sets of probability)J 41 664 :M 2.163 .216(distributions and in the second they represent causal structures. The way they represent)J 41 680 :M 1.199 .12(probability distributions is given by the Markov condition, which \(in DAGs\) turns out to be)J endp %%Page: 2 2 %%BeginPageSetup initializepage (peter; page: 2 of 16)setjob %%EndPageSetup gS 0 0 552 730 rC 41 54 :M f3_12 sf 2.216 .222(equivalent to a more generally useful graphical relation: d-separation \(Pearl 1988\).)J 479 49 :M f3_7 sf (1)S 482 54 :M f3_12 sf 3.526 .353( D-)J 41 70 :M .419 .042(separation is a relation between three disjoint sets of vertices in a directed graph. Although too)J 41 86 :M .187 .019(complicated to explain or define here,)J 225 81 :M f3_7 sf (2)S 228 86 :M f3_12 sf .206 .021( the basic idea involves checking whether a set of vertices)J 41 102 :M .106 .011(Z blocks all connections of a certain type between X and Y in a graph G. If so, then X and Y are)J 41 118 :M .68 .068(d-separated by Z in G. In the DAG on the left side of Fig. 1, for example, X)J 425 120 :M f3_9 sf (2)S 430 118 :M f3_12 sf .645 .065( blocks the only)J 41 134 :M .99 .099(directed path connecting X)J 177 136 :M f3_9 sf (1)S 182 134 :M f3_12 sf 1.224 .122( and X)J f3_9 sf 0 2 rm (3)S 0 -2 rm 222 134 :M f3_12 sf 1.452 .145(, so X)J 254 136 :M f3_9 sf (1)S 259 134 :M f3_12 sf 1.452 .145( and X)J 295 136 :M f3_9 sf (3)S 300 134 :M f3_12 sf 1.21 .121( are d-separated by X)J 410 136 :M f3_9 sf (2)S 415 134 :M f3_12 sf 1.375 .137( in this DAG. By)J 41 150 :M .349 .035(choosing d-separation to connect DAGs to probability distributions, we assume that in all of the)J 41 166 :M .434 .043(distributions P a DAG G can represent, if sets of vertices X and Y are d-separated by a set Z in)J 41 182 :M 1.031 .103(the DAG G, then X and Y are independent conditional on Z in P. For example, applying d-)J 41 198 :M .094 .009(separation to the DAG in Fig. 1 gives us: X)J 252 200 :M f3_9 sf (1)S 257 198 :M f3_12 sf .093 .009( and X)J f3_9 sf 0 2 rm (3)S 0 -2 rm 294 198 :M f3_12 sf .087 .009( are d-separated by X)J f3_9 sf 0 2 rm (2)S 0 -2 rm 402 198 :M f3_12 sf .092 .009(. We then assume that)J 41 214 :M .249 .025(in all distributions this DAG can represent, X)J 262 216 :M f3_8 sf .065(1)A f3_12 sf 0 -2 rm .267 .027( is independent of X)J 0 2 rm 365 216 :M f3_8 sf .056(3)A f3_12 sf 0 -2 rm .255 .026( conditional on X)J 0 2 rm 454 216 :M f3_8 sf .091(2)A f3_12 sf 0 -2 rm .292 .029(. We use a)J 0 2 rm 41 230 :M .457 .046(notation for independence introduced by Phil Dawid \(1979\); X)J f3_9 sf 0 2 rm (1)S 0 -2 rm 354 230 :M f3_12 sf .526 .053( _||_ X)J f3_9 sf 0 2 rm (3)S 0 -2 rm 392 230 :M f3_12 sf .706 .071( | X)J 411 232 :M f3_9 sf (2)S 416 230 :M f3_12 sf .738 .074( means: X)J f3_9 sf 0 2 rm .27 .027(1 )J 0 -2 rm 474 230 :M f3_12 sf .58 .058(and X)J 504 232 :M f3_9 sf (3)S 41 246 :M f3_12 sf (are independent conditional on X)S 201 248 :M f3_9 sf (2)S 206 246 :M f3_12 sf (.)S 63 265 424 48 rC gS 1 1.021 scale 96 276.127 :M f8_10 sf (DAG)S gR gS 1 1.021 scale 71 292.773 :M f8_10 sf (X)S gR gS 1 1.021 scale 80 296.689 :M f8_10 sf (1)S gR gS 1 1.021 scale 125 292.773 :M f8_10 sf (X)S gR gS 1 1.021 scale 134 296.689 :M f8_10 sf (2)S gR gS 1 1.021 scale 176 293.752 :M f8_10 sf (X)S gR gS 1 1.021 scale 185 297.668 :M f8_10 sf (3)S gR 13.5 180 270 119.5 294.5 @k 89 295 -1 1 114 294 1 89 294 @a 180 270 26 27 168 294.5 @l 141 295 -1 1 163 294 1 141 294 @a 63.5 266.5 192.5 312.5 11 @s gS 1 1.021 scale 242 276.127 :M f8_10 sf (d-separation)S gR 180 270 40 41 348 291.5 @l 201 294 -3 3 339 291 3 201 291 @a gS 1 1.021 scale 366 276.127 :M f8_10 sf (Set of Independencies)S gR gS 1 1.021 scale 368 292.773 :M f8_10 sf (X)S gR gS 1 1.021 scale 377 296.689 :M f8_10 sf (1)S gR gS 1 1.021 scale 437 292.773 :M f8_10 sf (X)S gR gS 1 1.021 scale 446 296.689 :M f8_10 sf (2)S gR gS 1 1.021 scale 411 293.752 :M f8_10 sf (X)S gR gS 1 1.021 scale 420 297.668 :M f8_10 sf (3)S gR 353.5 267.5 486.5 312.5 11 @s -1 -1 430 300 1 1 429 288 @b -1 -1 399 297 1 1 398 287 @b 386 298 -1 1 406 297 1 386 297 @a -1 -1 395 297 1 1 394 287 @b gS 1 1.021 scale 360 291.793 :M f9_10 sf ({)S gR gS 1 1.021 scale 455 292.773 :M f9_10 sf (})S gR gR gS 0 0 552 730 rC 259 338 :M f0_12 sf ( Fig. 1)S 41 398 :M f3_12 sf .094 .009(It should be stressed that as long as we remain agnostic and give no interpretation to DAGs, then)J 41 414 :M .392 .039(they are just mathematical objects which we can connect to probability distributions in any way)J 41 430 :M 1.916 .192(we like. We could just as easily define and then use e-separation, or f-separation, or any)J 41 446 :M .448 .045(graphical relation we please, as long as it produced consistent sets of independencies. When we)J 41 462 :M .864 .086(give DAGs a causal interpretation, it then becomes necessary to argue that d-separation is the)J 41 478 :M .576 .058(correct connection between a causal DAG and probability distributions. Let us put off that task)J 41 494 :M (for a few more pages, however.)S 63 510 :M 1.029 .103(There are often many distinct DAGs that represent exactly the same set of independence)J 41 526 :M .841 .084(relations, and thus the same set of distributions. And just as one might want a procedure that)J 41 542 :M .471 .047(computes d-separation for any graph, one might want an algorithm that computes all the DAGs)J 41 558 :M (that represent a given set of independence relations \(Fig. 2\).)S 41 650 :M ( )S 161 650 :M ( )S 41 647.48 -.48 .48 185.48 647 .48 41 647 @a 63 659 :M f3_6 sf (1)S 66 663 :M f3_10 sf 1.563 .156( If directed graphs have cycles, or chains of arrows that lead from a variable back to itself, then this)J 41 675 :M (equivalence breaks down.)S 63 683 :M f3_6 sf (2)S 66 687 :M f3_10 sf ( We try to explain it in CPS, pp. 71-74.)S endp %%Page: 3 3 %%BeginPageSetup initializepage (peter; page: 3 of 16)setjob %%EndPageSetup gS 0 0 552 730 rC 75 41 399 96 rC 163 68 :M f3_11 sf (Any )S 163 80 :M (DAG )S 154.5 53.5 195.5 88.5 2.5 @s 265 65 :M (d-separation)S 20 163 209 371 81 @k 192 70 -3 3 363 80 3 192 67 @a 386 83 :M ( Set of )S 386 95 :M (independencies )S 374.5 65.5 472.5 105.5 2.5 @s 110 111 :M (All DAGs that imply )S 110 123 :M (these independencies)S 97.5 46.5 214.5 135.5 5 @s 20 -30 18 219 106 @k -3 -3 231 108 3 3 369 92 @b 255 117 :M (Inference Algorithm )S gR gS 0 0 552 730 rC 260 162 :M f0_12 sf (Fig. 2)S 63 194 :M f3_12 sf .273 .027(We have developed several such algorithms, one of which is called the PC algorithm and is)J 41 210 :M .338 .034(computed by the TETRAD II program.)J 232 205 :M f3_7 sf (3)S 235 210 :M f3_12 sf .379 .038( Its input is a set of independence relations over a set of)J 41 226 :M .593 .059(variables and its output is a set of DAGs over these variables that are d-separation, or Markov,)J 41 242 :M (equivalent.)S 94 237 :M f3_7 sf (4)S 97 242 :M f3_12 sf .48 .048( Applying the PC algorithm to the same set of independence relations shown on the)J 41 258 :M .6 .06(right side of Fig. 1, you can see \(in Fig. 3\) that there are two other DAGs that are d-separation)J 41 274 :M .042 .004(equivalent to the DAG in Fig. 1. PC is known to be complete in the sense that its output contains)J 41 290 :M (all and only those DAGs that are d-separation equivalent.)S 60 305 430 120 rC 95 326 :M f3_11 sf (DAGs)S 72 344 :M (X )S 81 348 :M (1 )S 126 343 :M (X )S 135 347 :M (2 )S 177 345 :M (X )S 186 349 :M (3 )S 13 158 206 120 341 @k 90 342 -1 1 115 341 1 90 341 @a 13 158 206 169 341 @k 142 342 -1 1 164 341 1 142 341 @a 64.5 312.5 200.5 423.5 2.5 @s 241 352 :M (PC Algorithm)S 20 -26 24 203 361 @k 212 363 -3 3 351 360 3 212 360 @a 368 351 :M (Set of Independencies )S 370 367 :M (X )S 379 371 :M (1 )S 439 367 :M (X )S 448 371 :M (2 )S 413 368 :M (X )S 422 372 :M (3 )S 354.5 336.5 488.5 382.5 2.5 @s -1 -1 432 370 1 1 431 358 @b -1 -1 401 367 1 1 400 357 @b 388 368 -1 1 408 367 1 388 367 @a -1 -1 397 367 1 1 396 357 @b 362 366 :M f0_11 sf ({ )S 457 367 :M (} )S 72 373 :M f3_11 sf (X )S 81 377 :M (1 )S 126 372 :M (X )S 135 376 :M (2 )S 177 374 :M (X )S 186 378 :M (3 )S 13 -28 24 92 369 @k 98 370 -1 1 123 369 1 98 369 @a 13 158 206 169 370 @k 142 371 -1 1 164 370 1 142 370 @a 72 403 :M (X )S 81 407 :M (1 )S 126 402 :M (X )S 135 406 :M (2 )S 177 404 :M (X )S 186 408 :M (3 )S 13 -28 24 90 400 @k 96 401 -1 1 121 400 1 96 400 @a 13 -28 24 142 400 @k 148 401 -1 1 170 400 1 148 400 @a gR gS 0 0 552 730 rC 260 450 :M f0_12 sf (Fig. 3)S 63 482 :M (2. Causal Graphs)S 63 498 :M f3_12 sf .453 .045(If taken no further, d-separation and the Markov condition are just mathematics connecting)J 41 514 :M .275 .028(DAGs and probability distributions and need not involve causation at all.)J 397 509 :M f3_7 sf (5)S 400 514 :M f3_12 sf .316 .032( One might be content)J 41 530 :M 2.021 .202(to use this mathematical theory solely to produce compact and elegant representations of)J 41 546 :M .613 .061(independence structures,)J 162 541 :M f3_7 sf (6)S 165 546 :M f3_12 sf 1.267 .127( or one might take a further step by assuming that when )J f4_12 sf 2.348 .235(DAGs are)J 41 578 :M f3_12 sf ( )S 161 578 :M ( )S 41 575.48 -.48 .48 185.48 575 .48 41 575 @a 63 587 :M f3_6 sf (3)S 66 591 :M f3_10 sf .457 .046(For those interested in getting acquainted with the ideas as they are embodied in the program that computes)J 41 603 :M .252 .025(many of the discovery algorithms presented in CPS, the TETRAD II program is available from Lawrence Erlbaum)J 41 615 :M (Associates, Hillsdale, NJ.)S 63 623 :M f3_6 sf (4)S 66 627 :M f3_10 sf ( Sometimes there are no DAGs that can represent a given set of independence relations.)S 63 635 :M f3_6 sf (5)S 66 639 :M f3_10 sf 1.033 .103( In fact, Judea Pearl originally developed the theory connecting DAGs and probability in order to afford)J 41 651 :M .609 .061(robots or other AI agents an efficient way to store and use probability distributions which represented the agent's)J 41 663 :M (uncertainty over states of the world.)S 63 671 :M f3_6 sf (6)S 66 675 :M f3_10 sf .925 .092( In fact we have often heard just such a purpose endorsed explicitly in public by able statisticians, but in)J 41 687 :M .295 .029(almost every case these same people over beer later confess their heresy by concurring that their real ambitions are)J endp %%Page: 4 4 %%BeginPageSetup initializepage (peter; page: 4 of 16)setjob %%EndPageSetup gS 0 0 552 730 rC 41 54 :M f4_12 sf .564 .056(interpreted causally)J 139 54 :M f3_12 sf .933 .093( the Markov condition and d-separation are in fact the )J f4_12 sf .32(correct)A 451 54 :M f3_12 sf .818 .082( connection)J 41 70 :M 1.699 .17(between causal structure and probabilistic independence. We call the latter assumption the)J 41 86 :M (Causal Markov condition, and it is a stronger assumption than the Markov condition.)S 63 102 :M .312 .031(DAGs that are interpreted causally are called )J f4_12 sf .486 .049(causal graphs)J f3_12 sf .229 .023(. There is an arrow from X to Y)J 41 118 :M .12 .012(in a causal graph involving a set of variables )J 260 118 :M f0_12 sf (V)S 269 118 :M f3_12 sf .126 .013( just in case X is a direct cause of Y relative to )J f0_12 sf (V)S 506 118 :M f3_12 sf (.)S 41 134 :M 1.257 .126(For example, if S is a variable that codes for smoking behavior, Y a variable that codes for)J 41 150 :M 1.314 .131(yellowed, or nicotaine stained, fingers, and C a variable that codes for the presence of lung)J 41 166 :M 1.728 .173(cancer, then the following causal graph \(Fig. 4\) represents what I believe to be the causal)J 41 182 :M (structure among these variables.)S 157 197 236 95 rC 258 214 :M f3_11 sf (\(Smoking\) )S 258 226 :M ( )S 258 238 :M ( )S 273 232 :M f0_11 sf ( S )S 168 287 :M f3_11 sf (\(Yellowed Fingers\))S 320 288 :M (\(Lung Cancer\) )S 212 269 :M f0_11 sf (Y )S 342 271 :M (C )S 13 -54 -4 228 260 @k -1 -1 234 258 1 1 273 236 @b 13 188 234 340 259 @k 297 236 -1 1 336 256 1 297 235 @a gR gS 0 0 552 730 rC 260 317 :M f0_12 sf (Fig. 4)S 63 349 :M f3_12 sf 1.869 .187(Causal graphs are assumed to be complete in one sense and not in another. They are)J 41 365 :M (incomplete in that they do not necessarily include )S f4_12 sf (all)S 295 365 :M f3_12 sf -.001( of the causes of each variable in the system.)A 41 381 :M 1.15 .115(Thus many of the causes of lung cancer have been left out, e.g., asbestos inhalation, genetic)J 41 397 :M .381 .038(factors, etc. They also leave out many variables that might lie in between a specified cause and)J 41 413 :M .561 .056(its effect, e.g., cillia trauma in the bronchial lining might lie on the \322true\323 causal pathway from)J 41 429 :M .096 .01(smoking to lung cancer. But a causal graph is assumed to be complete in the sense that all of the)J 41 445 :M f4_12 sf (common causes)S 117 445 :M f3_12 sf -.002( of specified variables have been included. For example, if there is some variable)A 41 461 :M .569 .057(that is a cause of both smoking behavior and lung cancer, e.g., a genetic factor, then the causal)J 41 477 :M .082 .008(graph above is not an accurate depiction of the causal structure among these three variables. The)J 41 493 :M .609 .061(causal graph is also assumed to be complete in the sense that )J 347 493 :M f4_12 sf (all)S 360 493 :M f3_12 sf .581 .058( of the causal relations among)J 41 509 :M .539 .054(the specified variables are included in the graph. For example, the graph in Fig. 4 has no edge)J 41 525 :M 1.285 .128(from Y to S, so it is only accurate if the level of nicotine stains does not in any way cause)J 41 541 :M (smoking behavior.)S 63 557 :M 2.157 .216(The semantics of a causal graph involve ideal manipulations and the changes in the)J 41 573 :M .645 .064(probability distribution that follow such manipulations. Such an account is circular, because to)J 41 589 :M 2.913 .291(manipulate )J f4_12 sf .435(is)A f3_12 sf 1.897 .19( to cause. Our purpose, however, is not to provide a reductive definition of)J 41 605 :M .108 .011(causation, but rather to connect it to probability in a way that accords with scientific practice and)J 41 621 :M (allows a systematic investigation of causal inference.)S 41 662 :M ( )S 161 662 :M ( )S 281 662 :M ( )S 401 662 :M ( )S 41 659.48 -.48 .48 509.48 659 .48 41 659 @a 41 675 :M f3_10 sf .662 .066(causal and their public agnosticism is a prophylactic against the abuse of statistics by their clients or less careful)J 41 687 :M (practitioners.)S endp %%Page: 5 5 %%BeginPageSetup initializepage (peter; page: 5 of 16)setjob %%EndPageSetup gS 0 0 552 730 rC 63 54 :M f3_12 sf ( )S 66 54 :M .054 .005(To manipulate a variable ideally is to change it in a way that, at least for the moment, leaves)J 41 70 :M .65 .065(every other variable undisturbed. Such a manipulation must directly change only its target and)J 41 86 :M .685 .068(leave changes in the other variables to be produced by these targets or not at all. For example,)J 41 102 :M 1.308 .131(suppose we are attempting to experimentally test hypotheses concerning the causal relations)J 41 118 :M 2.623 .262(between athletic performance and confidence. Suppose we intervene to inhibit athletic)J 41 134 :M .924 .092(performance by administering a drug that blocks nutrient uptake in muscle cells, but that this)J 41 150 :M .637 .064(drug also imitates the chemical structure of neurotransmitters that inhibit feelings of insecurity)J 41 166 :M .993 .099(and anxiety, thus serving to directly increase anxiety and lower confidence. This intervention)J 41 182 :M .044 .004(provides little help in trying to reason about the sort of causal relation that exists between athletic)J 41 198 :M .243 .024(performance and confidence, because it directly alters both variables. It is an intervention with a)J 41 214 :M (\322fat hand.\323)S 94 209 :M f3_7 sf (7)S 97 214 :M f3_12 sf ( Ideal interventions are perfectly selective in the variables they )S 401 214 :M f4_12 sf (directly)S 438 214 :M f3_12 sf ( change.)S 63 230 :M 1.887 .189(The causal graph tells us, for any ideal manipulation we might consider, which other)J 41 246 :M .044 .004(variables we would expect to change in some way and which we would not. Put simply, the only)J 41 262 :M 3.045 .304(variables we can hope to change must be causally "downstream" of the variables we)J 41 278 :M .1 .01(manipulated. Although we can make inferences upstream, that is from effects to their causes, we)J 41 294 :M .38 .038(cannot manipulate an effect and hope to change its other causes. In Fig. 4, for example, after an)J 41 310 :M .102 .01(ideal manipulation of the level of nicotine stains, nothing at all would happen to the probabilities)J 41 326 :M .928 .093(of smoking and lung cancer. They would take on the same values they would have if we had)J 41 342 :M 1.583 .158(done no manipulation at all. If we could manipulate the lung cancer level of an individual)J 41 358 :M .709 .071(without directly perturbing his or her smoking behavior or finger stains, then again, we would)J 41 374 :M 1.42 .142(not expect to change the probability of smoking or of finger stains. If, however, we could)J 41 390 :M .428 .043(manipulate smoking behavior in a way that did not directly perturb any other variable, then \(for)J 41 406 :M 1.084 .108(at least some of these manipulations\) we would perturb the probability of the other variables)J 41 422 :M (through the direct causal route from smoking to the other variables.)S 63 438 :M .183 .018(If the causal graph changes, so does the set of counterfactuals about ideal manipulations. If,)J 41 454 :M .659 .066(for example, the causal graph is as I picture it in Fig. 5 \(absurd as it may seem\), then only the)J 41 470 :M .257 .026(statement concerning manipulations of lung cancer remains unchanged. Any ideal manipulation)J 41 486 :M .124 .012(of smoking will result in no change in Y, but some will result in a change in C\325s probability, and)J 41 502 :M (\(some\) manipulations of Y will result in changes in S\325s probability.)S 157 533 236 95 rC 258 550 :M f3_11 sf (\(Smoking\) )S 258 562 :M ( )S 258 574 :M ( )S 273 568 :M f0_11 sf ( S )S 168 623 :M f3_11 sf (\(Yellowed Fingers\))S 320 624 :M (\(Lung Cancer\) )S 212 605 :M f0_11 sf (Y )S 342 607 :M (C )S 13 132 180 273 572 @k -1 -1 229 597 1 1 268 575 @b 13 188 234 340 595 @k 297 572 -1 1 336 592 1 297 571 @a gR gS 0 0 552 730 rC 260 649 :M f0_12 sf (Fig. 5)S 41 674 :M f3_12 sf ( )S 161 674 :M ( )S 41 671.48 -.48 .48 185.48 671 .48 41 671 @a 63 683 :M f3_6 sf (7)S 66 687 :M f3_10 sf ( Kevin Kelly suggested this nomenclature.)S endp %%Page: 6 6 %%BeginPageSetup initializepage (peter; page: 6 of 16)setjob %%EndPageSetup gS 0 0 552 730 rC 63 54 :M f3_12 sf .636 .064(Ignoring all sorts of subtleties, the point should be clear: the sort of causation we are after)J 41 70 :M (involves the response of a system to interventions.)S 63 102 :M f0_12 sf (3. The Causal Markov Condition)S 63 134 :M f3_12 sf (The Causal Markov assumption can be stated simply:)S 95 166 :M .646 .065(A variable X is independent of every other variable \(except X\325s effects\) conditional)J 95 182 :M (on all of its direct causes.)S 41 214 :M 2.783 .278(Applying this to each variable in the causal graph in Fig. 4 yields the following)J 41 230 :M (independence relations:)S 155 225 :M f3_7 sf (8)S 63 262 :M f3_12 sf (For Y: Y is independent of C conditional on S)S 63 278 :M (For S: All of the other variables are S\325s effects, so the condition is vacuous)S 63 294 :M (For C: C is independent of Y conditional on S)S 41 326 :M 1.529 .153(By probability theory, the first and last of these independences are equivalent, so this)J 41 342 :M .022 .002(causal graph entails one independence by the Causal Markov assumption. You can see that)J 41 358 :M .706 .071(Fig. 5 implies the same independence relations as does Fig. 4, even though it is different)J 41 374 :M (causally and thus entails different counterfactuals about interventions.)S 63 390 :M .315 .031(The independence relations entailed by applying the Causal Markov assumption to a causal)J 41 406 :M -.002(graph is the same as those obtained from applying d-separation to a causal graph, but it is simpler)A 41 422 :M .611 .061(to justify the connection between causal graphs and probability when stated in a Markov form.)J 41 438 :M .559 .056(The intuition behind the Causal Markov assumption is simple: ignoring a variable\325s effects, all)J 41 454 :M 1.199 .12(the relevant probabilistic information about a variable that can be obtained from a system is)J 41 470 :M .412 .041(contained in its direct causes. In a Markov process, knowing a system\325s current state is relevant)J 41 486 :M 2.474 .247(to its future, but knowing how it got to its current state is completely irrelevant. Hans)J 41 502 :M 1.389 .139(Reichenbach \(1956\) was the first philosopher to explicitly discuss the Markov properties of)J 41 518 :M .599 .06(causal systems, but variants have been discussed by Nancy Cartwright \(1989\), Wesley Salmon)J 41 534 :M (\(1984\), Brian Skyrms \(1970\), Patrick Suppes \(1970\), and many other philosophers.)S 63 550 :M 1.075 .107(How does such an assumption capture the asymmetry of causation? For systems of two)J 41 566 :M .02 .002(variables it cannot. The two causal graphs in Fig. 6 imply the same independencies by the Causal)J 41 582 :M 3.328 .333(Markov condition, and are thus indistinguishable solely on the basis of probabilistic)J 41 598 :M (independence.)S 41 662 :M ( )S 161 662 :M ( )S 41 659.48 -.48 .48 185.48 659 .48 41 659 @a 63 671 :M f3_6 sf (8)S 66 675 :M f3_10 sf 1.448 .145( A variable X is always independent of Y conditional on Y, so in this list I do not include the trivial)J 41 687 :M (independences between each variable and its direct causes when we condition on these direct causes.)S endp %%Page: 7 7 %%BeginPageSetup initializepage (peter; page: 7 of 16)setjob %%EndPageSetup gS 0 0 552 730 rC 163 41 223 37 rC gS 1.005 1 scale 184.171 73 :M f0_11 sf ( X )S gR gS 1.005 1 scale 246.889 74 :M f0_11 sf (Y )S gR 13 158 206 243 69 @k 210 70 -1 1 238 69 1 210 69 @a gS 1.005 1 scale 308.611 73 :M f0_11 sf ( X )S gR gS 1.005 1 scale 371.329 74 :M f0_11 sf (Y )S gR 13 -28 24 334 69 @k 340 70 -1 1 368 69 1 340 69 @a gR gS 0 0 552 730 rC 260 103 :M f0_12 sf (Fig. 6)S 41 131 :M f3_12 sf .802 .08(But leaping from this simple indistinguishability to the conclusion that probabilities can never)J 41 147 :M 1.973 .197(give us information about causal structure is patently fallacious. As Hausman \(1984\) and)J 41 163 :M .496 .05(Papineau \(1985\) realized, adding a third variable changes the story entirely. The two graphs in)J 41 179 :M .276 .028(Fig. 7 are )J 91 179 :M f4_12 sf .045(not)A f3_12 sf .197 .02( Markov or d-separation equivalent, and the difference between their independence)J 41 195 :M (implications underlies the connection between independence and causal priority more generally.)S 128 214 294 105 rC 13 158 206 185 229 @k 155 230 -1 1 180 229 1 155 229 @a 13 -28 24 207 229 @k 213 230 -1 1 235 229 1 213 229 @a 169 270 :M f3_11 sf (Independence Relations Entailed by d-separation )S -1 -1 380 310 1 1 379 298 @b -1 -1 349 307 1 1 348 297 @b 336 308 -1 1 356 307 1 336 307 @a -1 -1 345 307 1 1 344 297 @b 318 307 :M (X )S 363 307 :M (Z )S 387 307 :M (Y )S -1 -1 196 309 1 1 195 299 @b 183 310 -1 1 203 309 1 183 309 @a -1 -1 192 309 1 1 191 299 @b 165 309 :M (X )S 210 309 :M (Z )S 137 232 :M (X )S 192 233 :M (Y )S 241 233 :M (Z )S 13 -28 24 323 231 @k 329 232 -1 1 354 231 1 329 231 @a 13 -28 24 375 231 @k 381 232 -1 1 403 231 1 381 231 @a 305 234 :M (X )S 360 235 :M (Y )S 409 235 :M (Z )S gR gS 0 0 552 730 rC 260 344 :M f0_12 sf (Fig. 7)S 63 376 :M f3_12 sf .419 .042(We see three main lines of justification for the Causal Markov assumption, although surely)J 41 392 :M .249 .025(there are others. First, versions of the assumption are used, perhaps implicitly, in making causal)J 41 408 :M 2.252 .225(inferences from controlled experiments. Second, philosophical treatments of probabilistic)J 41 424 :M .801 .08(causality embrace it \(Suppes, 1970; Reichenbach, 1956\), and third, structural equation models)J 41 440 :M 1.234 .123(\(Bollen, 1989\), which are perhaps the most widely used class of statistical causal models in)J 41 456 :M .24 .024(social science are Causally Markov. Elaborating on the first two lines of support are beyond the)J 41 472 :M .459 .046(scope of this paper; they are covered in )J f4_12 sf .206(CPS)A f3_12 sf .441 .044( or in Glymour\325s paper. Here I will try to make the)J 41 488 :M .435 .043(connection between structural equation models and the Causal Markov assumption a little more)J 41 504 :M (explicit.)S 63 520 :M .293 .029(In a structural equation model, each variable is equal to a linear function of its direct causes)J 41 536 :M .056 .006(plus an \322error\323 term. Thus the causal graph in Fig. 4 would translate into the following structural)J 41 552 :M (equation model:)S 221 584 :M (Y = )S f1_12 sf (b)S 249 586 :M f3_8 sf (1)S f3_12 sf 0 -2 rm ( S + )S 0 2 rm f1_12 sf 0 -2 rm (e)S 0 2 rm f3_8 sf (y)S 221 600 :M f3_12 sf (C = )S 242 600 :M f1_12 sf (b)S 249 602 :M f3_8 sf (2)S f3_12 sf 0 -2 rm ( S + )S 0 2 rm f1_12 sf 0 -2 rm (e)S 0 2 rm f3_8 sf (c)S 41 632 :M f3_12 sf .741 .074(where )J f1_12 sf (b)S 81 634 :M f3_8 sf 1.297 .13(1 )J 89 632 :M f3_12 sf .318(and)A f3_8 sf 0 2 rm .1 .01( )J 0 -2 rm f1_12 sf (b)S 116 634 :M f3_8 sf .269 .027(2 )J f3_12 sf 0 -2 rm 1.004 .1(are real valued coefficients and )J 0 2 rm 284 632 :M f1_12 sf .695(e)A f3_8 sf 0 2 rm .665 .067(c )J 0 -2 rm 296 632 :M f3_12 sf 1.311 .131(and )J f1_12 sf .476(e)A f3_8 sf 0 2 rm .493 .049(y )J 0 -2 rm 330 632 :M f3_12 sf .82 .082(are error terms with strictly positive)J 41 648 :M .788 .079(variance. If the system in Fig. 4 is complete as specified, that is, its causal graph is complete)J 41 664 :M .364 .036(with respect to common causes, then a structural equation modeller would assume that )J 468 664 :M f1_12 sf .203(e)A f3_8 sf 0 2 rm .178 .018(c )J 0 -2 rm f3_12 sf .602 .06(and )J 500 664 :M f1_12 sf (e)S f3_8 sf 0 2 rm (y)S 0 -2 rm 41 680 :M f3_12 sf .234 .023(are independent of each other and of S. Indeed, in structural equation models in which all of the)J endp %%Page: 8 8 %%BeginPageSetup initializepage (peter; page: 8 of 16)setjob %%EndPageSetup gS 0 0 552 730 rC 41 54 :M f3_12 sf .909 .091(common causes are included the error terms are assumed to be independent. It turns out that)J 41 70 :M 1.97 .197(such models )J 111 70 :M f4_12 sf 1.09 .109(necessarily satisfy)J 202 70 :M f3_12 sf 1.809 .181( the Causal Markov assumption \(Kiiveri and Speed, 1982\).)J 41 86 :M .38 .038(Spirtes \(1994\) has generalized the result to models in which each effect is an )J f4_12 sf .123(arbitrary)A 465 86 :M f3_12 sf .37 .037( function)J 41 102 :M .532 .053(of its immediate causes and an independent error.)J 286 97 :M f3_7 sf (9)S 289 102 :M f3_12 sf .556 .056( The nature of the function connecting cause)J 41 118 :M (and effect is not so important as the independence of the error terms.)S 63 134 :M 2.197 .22(The connection between structural equation models and causation \(as it involves the)J 41 150 :M .586 .059(response of a system to interventions\) arises through the connection between independent error)J 41 166 :M .893 .089(terms and ideal manipulations. Although ideal manipulations provide the semantic ground for)J 41 182 :M .711 .071(causal claims, such manipulations are sometimes only ideal and cannot be practically realized.)J 41 198 :M .949 .095(For example, although poverty may cause crime, we cannot ethically intervene to impoverish)J 41 214 :M 1.173 .117(people. In such situations we resort to collecting data passively. Since experimental science)J 41 230 :M .963 .096(specializes in creating arrangements in which ideal manipulations exist and are subject to our)J 41 246 :M .55 .055(will, it is no surprise that critics of causal inference from statistical data insist that experiments)J 41 262 :M .805 .08(are the only means of establishing causal claims. But besides: \322I can\325t imagine how it can be)J 41 278 :M (done,\323 what is their argument?)S 63 294 :M 1.348 .135(In the first place, there might well be ideally selective sources of variation that exist in)J 41 310 :M -.002(nature but which we cannot now or ever hope to control. For example, the moon\325s position exerts)A 41 326 :M .378 .038(a direct effect on the gravitational field over the oceans, which causes the tides. But though the)J 41 342 :M (moon is a source that we cannot control, at least we can measure it.)S 63 358 :M .146 .015(In other systems, such ideal sources of variation might exist, but be both beyond our control)J 41 374 :M .201 .02(and unobservable. In fact a natural interpretation of the error terms in structural equation models)J 41 390 :M .369 .037(gives them precisely these properties. There is a unique error term )J 369 390 :M f1_12 sf .091(e)A f3_8 sf 0 2 rm .086 .009(x )J 0 -2 rm f3_12 sf .39 .039(for each specified variable)J 41 406 :M 1.179 .118(X, and in systems which include all the common causes )J f1_12 sf .353(e)A f3_8 sf 0 2 rm .365 .037(x )J 0 -2 rm 342 406 :M f3_12 sf 1.191 .119(is assumed to be a source of X\325s)J 41 422 :M .779 .078(variation that )J 111 422 :M f4_12 sf (directly)S 148 422 :M f3_12 sf .845 .085( affects only X. And although we cannot measure them or control them,)J 41 438 :M .074 .007(structural equation modellers assume that such error terms exist. It is this assumption that makes)J 41 454 :M (such systems Causally Markov.)S 193 449 :M f3_7 sf (1)S 196 449 :M (0)S 63 470 :M f3_12 sf ( )S 69 470 :M .918 .092(It is tempting to think that even if we do interpret error terms as unobservable but ideal)J 41 486 :M .666 .067(manipulations, then we are then stopped dead for purposes of causal inference just because we)J 41 502 :M .028 .003(cannot observe them. But this is a fallacy. It is true that we cannot learn as much about the causal)J 41 518 :M 1.323 .132(structure of such systems as we could if the error terms were observable \(the experimenters)J 41 534 :M 1.225 .123(world\), but by no means does it follow that we can learn nothing about them. In fact this is)J 41 550 :M (precisely where causal inference starts, with systems that are assumed to be Causally Markov.)S 41 626 :M ( )S 161 626 :M ( )S 41 623.48 -.48 .48 185.48 623 .48 41 623 @a 63 635 :M f3_6 sf (9)S 66 639 :M f3_10 sf .082 .008( In fact it is only necessary for the proof that the error term is a function of the variable for which it is an error)J 41 651 :M (and that variable's immediate causes_.)S 63 659 :M f3_6 sf (1)S 66 659 :M (0)S 69 663 :M f3_10 sf .275 .028( In certain contexts the detrimental effect on causal inference of violating this assumption is well undersood.)J 41 675 :M .016 .002(For example, in a regression model in which some of the regressors are correlated with the error term, then the result)J 41 687 :M (is a bias in estimating the causal effect of these regressors.)S endp %%Page: 9 9 %%BeginPageSetup initializepage (peter; page: 9 of 16)setjob %%EndPageSetup gS 0 0 552 730 rC 63 54 :M f0_12 sf (4. Inference)S 63 86 :M f3_12 sf .831 .083(Accepting the Causal Markov assumption, I now turn to the subject of inference: moving)J 41 102 :M 1.032 .103(from statistical data to conclusions about causal structure. Beginning with statistical data and)J 41 118 :M 1.775 .178(background knowledge, we want to find all the possible causal structures that might have)J 41 134 :M .435 .043(generated these data. The fewer assumptions we make constraining the class of possible causal)J 41 150 :M .553 .055(structures, the weaker our inferential purchase. I should note, however, that it is not the job of)J 41 166 :M 1.075 .108(our theory to dictate which assumptions a practicing investigator should endorse, but only to)J 41 182 :M .85 .085(characterize what can and cannot be learned about the world given the particular assumptions)J 41 198 :M (chosen.)S 63 214 :M 1.172 .117(In this section I discuss a few of the assumptions that we have studied. There are many)J 41 230 :M .94 .094(others that we are now studying or that would be interesting to study. An enormous class of)J 41 246 :M .715 .071(problems I will not deal with at all involves statistical inference about independence: inferring)J 41 262 :M .692 .069(the set of independence relations in a population from a sample. In what follows I assume that)J 41 278 :M 1.904 .19(the data are statistically ideal and that in effect the population lies before us, so that any)J 41 294 :M (probabilistic independence claim can be decided with perfect reliability.)S 63 0 7 730 rC 63 326 :M 12 f6_1 :p 6 :m ( )S 67 326 :M 6 :m ( )S gR gS 0 0 552 730 rC 63 326 :M f3_12 sf 12 f6_1 :p 58.652 :m (Faithfulness)S 63 358 :M 1.309 .131(The first assumption I will discuss is )J f4_12 sf .394(Faithfulness)A f3_12 sf 1.203 .12(. By assuming that a causal graph is)J 41 374 :M 2.247 .225(Causally Markov, we assume that any population produced by this causal graph has the)J 41 390 :M .176 .018(independence relations obtained by applying d-separation to it. It does not follow, however, that)J 41 406 :M .383 .038(the population has exactly these and no additional independencies. For example, suppose Fig. 8)J 41 422 :M .69 .069(is a causal graph that truly describes the relations among exercise, smoking, and health, where)J 41 438 :M (the + and - signs indicate positive and inhibitory relations respectively.)S 381 433 :M f3_7 sf (1)S 384 433 :M (1)S 200 457 150 117 rC 216 480 :M f3_11 sf (Smoking )S 306 522 :M (Exercise)S 262 570 :M (Health)S 278 490 :M (+ )S 307 541 :M (+ )S 244 516 :M (_ )S 13 188 234 304 509 @k 264 486 -1 1 300 506 1 264 485 @a 13 -77 -28 287 555 @k -1 -1 292 551 1 1 310 526 @b 13 219 266 277 554 @k 246 494 -1 1 276 549 1 246 493 @a gR gS 0 0 552 730 rC 260 599 :M f0_12 sf (Fig. 8)S 41 674 :M f3_12 sf ( )S 161 674 :M ( )S 41 671.48 -.48 .48 185.48 671 .48 41 671 @a 63 683 :M f3_6 sf (1)S 66 683 :M (1)S 69 687 :M f3_10 sf ( This example is from Cartwright \(1983\).)S endp %%Page: 10 10 %%BeginPageSetup initializepage (peter; page: 10 of 16)setjob %%EndPageSetup gS 0 0 552 730 rC 63 54 :M f3_12 sf .024 .002(In this case the Causal Markov assumption alone puts no constraints on the distributions that)J 41 70 :M .353 .035(this structure could produce, because we obtain no independencies whatsoever from applying d-)J 41 86 :M 1.643 .164(separation or the Markov condition to its DAG. But in some of the distributions that this)J 41 102 :M .038 .004(structure could produce, Smoking might be independent of Health \322by coincidence.\323 If Smoking)J 41 118 :M .475 .048(has a negative direct effect on Health, but Smoking has a positive effect on Exercise \(absurd as)J 41 134 :M .988 .099(this may seem\) and Exercise has a positive effect on Health, then Smoking serves to directly)J 41 150 :M .764 .076(inhibit Health and indirectly improve it. If the two effects happen to exactly balance and thus)J 41 166 :M .114 .011(cancel, then there might be no association at all between Smoking and Health. In such a case we)J 41 182 :M (say that the population is )S 164 182 :M f4_12 sf (unfaithful)S 211 182 :M f3_12 sf ( to the causal graph that generated it.)S 63 198 :M .616 .062(If there are any independence relations in the population that are not a consequence of the)J 41 214 :M 1.461 .146(Causal Markov condition \(or d-separation\), then the population is unfaithful. By assuming)J 41 230 :M .481 .048(Faithfulness we eliminate all such cases from consideration. Although at first this seems like a)J 41 246 :M 1.938 .194(hefty assumption, it really isn\325t. Assuming that a population is Faithful is to assume that)J 41 262 :M 1.816 .182(whatever independencies occur in it arise not from incredible coincidence but rather from)J 41 278 :M .363 .036(structure. Some form of this assumption is used in every science. When a theory cannot explain)J 41 294 :M .847 .085(an empirical regularity save by invoking a special parameterization, then scientists are uneasy)J 41 310 :M .07 .007(with the theory and look for an alternative that can explain the same regularity with structure and)J 41 326 :M .654 .065(not luck. In the causal modeling case, the regularities are \(conditional\) independence relations,)J 41 342 :M .059 .006(and the Faithfulness assumption is just one very clear codification of a preference for models that)J 41 358 :M .878 .088(explain these regularities by invoking structure and not by invoking luck. By no means is it a)J 41 374 :M .839 .084(guarantee; nature might indeed be capricious. But the existence of cases in which a procedure)J 41 390 :M .215 .021(that assumes Faithfulness fails seems an awfully weak argument against the possibility of causal)J 41 406 :M 1.026 .103(inference. Nevertheless, critics continue to create unfaithful cases and display them \(see, for)J 41 422 :M (example, David Freedman\325s long paper in this volume\).)S 63 438 :M .834 .083(Assuming Faithfulness seems reasonable and is widely embraced by practicing scientists.)J 41 454 :M .261 .026(The inferential advantage gained from the assumption in causal inference is enormous. Without)J 41 470 :M .684 .068(it, all we can say on the basis of independence data is that whatever causal structure generated)J 41 486 :M .646 .065(the data, it cannot imply any independence relations by d-separation that are not present in the)J 41 502 :M 1.303 .13(population. With it, we can say that whatever structure generated the data, it implies by d-)J 41 518 :M .502 .05(separation )J f4_12 sf .114(exactly)A 129 518 :M f3_12 sf 1.033 .103( the independence relations that are present in the population. For example,)J 41 534 :M 2.947 .295(suppose we have a population involving three variables X)J 359 536 :M f3_8 sf 1.066(1)A f3_12 sf 0 -2 rm 3.256 .326(, X)J 0 2 rm 383 536 :M f3_8 sf 1.066(2)A f3_12 sf 0 -2 rm 3.256 .326(, X)J 0 2 rm 407 536 :M f3_8 sf .689(3)A f3_12 sf 0 -2 rm 3.093 .309(, and suppose the)J 0 2 rm 41 550 :M (independence relations in this population are as given below.)S endp %%Page: 11 11 %%BeginPageSetup initializepage (peter; page: 11 of 16)setjob %%EndPageSetup gS 0 0 552 730 rC 77 54 :M f3_12 sf (All Possible)S 135 49 :M f3_7 sf (1)S 138 49 :M (2)S 141 54 :M f3_12 sf ( Independences )S 257 54 :M (In)S 365 54 :M (Not In)S 77 70 :M (among X)S f3_8 sf 0 2 rm (1)S 0 -2 rm f3_12 sf (, X)S 140 72 :M f3_8 sf (2)S f3_12 sf 0 -2 rm (, X)S 0 2 rm 159 72 :M f3_8 sf (3)S 257 70 :M f3_12 sf (Population)S 365 70 :M (Population)S 77 86 :M (_________________________________________________________)S 77 102 :M (X)S 86 104 :M f3_9 sf (1)S 91 102 :M f3_12 sf ( _||_ X)S f3_9 sf 0 2 rm (2)S 0 -2 rm 257 104 :M f1_12 sf S 77 118 :M f3_12 sf (X)S 86 120 :M f3_9 sf (1)S 91 118 :M f3_12 sf ( _||_ X)S f3_9 sf 0 2 rm (3)S 0 -2 rm 365 120 :M f1_12 sf S 77 134 :M f3_12 sf (X)S 86 136 :M f3_9 sf (2)S 91 134 :M f3_12 sf ( _||_ X)S f3_9 sf 0 2 rm (3)S 0 -2 rm 365 136 :M f1_12 sf S 77 150 :M f3_12 sf (X)S 86 152 :M f3_9 sf (1)S 91 150 :M f3_12 sf ( _||_ X)S f3_9 sf 0 2 rm (2 )S 0 -2 rm 129 150 :M f3_12 sf (| X)S f3_9 sf 0 2 rm (3)S 0 -2 rm 365 152 :M f1_12 sf S 77 166 :M f3_12 sf (X)S 86 168 :M f3_9 sf (1)S 91 166 :M f3_12 sf ( _||_ X)S f3_9 sf 0 2 rm (3 )S 0 -2 rm 129 166 :M f3_12 sf (| X)S f3_9 sf 0 2 rm (2)S 0 -2 rm 365 168 :M f1_12 sf S 77 182 :M f3_12 sf (X)S 86 184 :M f3_9 sf (2)S 91 182 :M f3_12 sf ( _||_ X)S f3_9 sf 0 2 rm (3 )S 0 -2 rm 129 182 :M f3_12 sf (| X)S f3_9 sf 0 2 rm (1)S 0 -2 rm 365 184 :M f1_12 sf S 63 214 :M f3_12 sf .317 .032(Even if we assume that all the Causally Markov graphs that might have produced data with)J 41 230 :M .645 .065(these independencies involve only X)J 223 232 :M f3_9 sf (1)S 228 230 :M f3_12 sf .995 .1(, X)J 244 232 :M f3_9 sf (2)S 249 230 :M f3_12 sf .784 .078(, and X)J f3_9 sf 0 2 rm (3)S 0 -2 rm 291 230 :M f3_12 sf .764 .076(, then there are still nine such graphs. Their)J 41 246 :M .341 .034(only shared feature is that each has some direct connection between X)J f3_9 sf 0 2 rm (1)S 0 -2 rm 389 246 :M f3_12 sf .379 .038( and X)J f3_9 sf 0 2 rm (3)S 0 -2 rm 427 246 :M f3_12 sf .392 .039( and between X)J 504 248 :M f3_9 sf (2)S 41 262 :M f3_12 sf (and X)S 70 264 :M f3_9 sf (3)S 75 262 :M f3_12 sf (. Adding Faithfulness reduces the set of nine to a singleton \(Fig. 9\).)S 243 291 15 12 rC 243 299 :M f3_11 sf (X)S gR gS 252 296 13 12 rC 252 304 :M f3_11 sf (1)S gR gS 232 281 107 86 rC 236.5 285.5 25 23 rS 278 347 15 12 rC 278 355 :M f3_11 sf (X)S gR gS 287 352 13 12 rC 287 360 :M f3_11 sf (3)S gR gS 232 281 107 86 rC 270.5 341.5 26 23 rS 319 291 14 12 rC 319 299 :M f3_11 sf (X)S gR gS 327 296 12 12 rC 327 304 :M f3_11 sf (2)S gR gS 232 281 107 86 rC 311.5 285.5 25 23 rS np 276 336 :M 264 330 :L 265 330 :L 265 330 :L 265 329 :L 265 329 :L 265 329 :L 265 329 :L 265 329 :L 266 329 :L 266 328 :L 266 328 :L 266 328 :L 266 328 :L 266 328 :L 266 328 :L 267 327 :L 267 327 :L 267 327 :L 267 327 :L 267 327 :L 267 327 :L 268 326 :L 268 326 :L 268 326 :L 268 326 :L 268 326 :L 268 326 :L 269 326 :L 269 326 :L 269 325 :L 269 325 :L 269 325 :L 270 325 :L 270 325 :L 270 325 :L 270 325 :L 270 325 :L 271 325 :L 271 324 :L 271 324 :L 271 324 :L 271 324 :L 272 324 :L 272 324 :L 272 324 :L 272 324 :L 273 324 :L 276 336 :L 276 336 :L eofill 258 313 -1 1 274 332 1 258 312 @a np 294 335 :M 296 323 :L 297 323 :L 297 323 :L 297 323 :L 297 323 :L 297 323 :L 298 323 :L 298 323 :L 298 323 :L 298 323 :L 299 323 :L 299 323 :L 299 323 :L 299 323 :L 299 324 :L 300 324 :L 300 324 :L 300 324 :L 300 324 :L 300 324 :L 301 324 :L 301 324 :L 301 324 :L 301 325 :L 301 325 :L 302 325 :L 302 325 :L 302 325 :L 302 325 :L 302 325 :L 302 325 :L 303 326 :L 303 326 :L 303 326 :L 303 326 :L 303 326 :L 303 326 :L 304 327 :L 304 327 :L 304 327 :L 304 327 :L 304 327 :L 304 327 :L 304 328 :L 305 328 :L 305 328 :L 305 328 :L 305 328 :L 305 328 :L 294 335 :L 294 335 :L eofill -1 -1 299 332 1 1 315 310 @b gR gS 0 0 552 730 rC 260 392 :M f0_12 sf (Fig. 9)S 63 0 8 730 rC 63 424 :M f3_12 sf 12 f6_1 :p 6 :m ( )S 68 424 :M 6 :m ( )S gR gS 0 0 552 730 rC 63 424 :M f3_12 sf 12 f6_1 :p 90.961 :m (Causal Sufficiency)S 63 456 :M -.004(In this example we have managed to infer that both X)A 321 458 :M f3_9 sf (1)S 326 456 :M f3_12 sf ( and X)S 358 458 :M f3_9 sf (2)S 363 456 :M f3_12 sf -.004( are direct causes of X)A 470 458 :M f3_9 sf (3)S 475 456 :M f3_12 sf ( from a)S 41 472 :M .53 .053(single marginal independence between X)J 243 474 :M f3_9 sf (1)S 248 472 :M f3_12 sf .688 .069( and X)J f3_9 sf 0 2 rm (2)S 0 -2 rm 287 472 :M f3_12 sf .658 .066(. This gives many people pause, as it should.)J 41 488 :M 2.126 .213(We have achieved such enormous inferential leverage in this case not only by assuming)J 41 504 :M .479 .048(Faithfulness, but also by assuming Causal Sufficiency, which I noted above by writing: \322all the)J 41 520 :M (Causally Markov graphs that . . . involve only X)S 273 522 :M f3_9 sf (1)S 278 520 :M f3_12 sf (, X)S 293 522 :M f3_9 sf (2)S 298 520 :M f3_12 sf (, and X)S 333 522 :M f3_9 sf (3)S 338 520 :M f3_12 sf <2ED3>S 63 536 :M 1.329 .133(The assumption of Causal Sufficiency is satisfied if we have )J 378 536 :M f4_12 sf (measured)S 425 536 :M f3_12 sf 1.45 .145( all the common)J 41 552 :M .581 .058(causes of the measured variables. Although this sounds quite similar to the assumptions about)J 41 568 :M 1.16 .116(the completeness of causal graphs, it is not exactly the same thing. When we assume that a)J 41 584 :M .985 .098(causal graph is complete with respect to common causes, it is in service of being clear about)J 41 600 :M 1.498 .15(what sorts of systems we are representing with such graphs. In the inferential case we are)J 41 616 :M .339 .034(making two assumptions: one involves the existence of some causal graph that is complete with)J 41 632 :M .192 .019(respect to common causes and that is Causally Markov, and the other is an assumption about the)J 41 648 :M .727 .073(variables we have measured as opposed to those we have not. For example, we might build a)J 41 674 :M ( )S 161 674 :M ( )S 41 671.48 -.48 .48 185.48 671 .48 41 671 @a 63 683 :M f3_6 sf (1)S 66 683 :M (2)S 69 687 :M f3_10 sf ( All possible non-trivial independences, that is.)S endp %%Page: 12 12 %%BeginPageSetup initializepage (peter; page: 12 of 16)setjob %%EndPageSetup gS 0 0 552 730 rC 41 54 :M f3_12 sf .648 .065(model in which we specify a variable called Intelligence which we cannot directly or perfectly)J 41 70 :M (measure, but four of whose effects we can measure, say test scores X)S 373 72 :M f3_9 sf (1)S 378 70 :M f3_12 sf ( - X)S 397 72 :M f3_9 sf (4)S 402 70 :M f3_12 sf ( \(Fig. 10\).)S 248 91 58 12 rC 248 99 :M f3_11 sf (I)S 252 99 :M (n)S 258 99 :M (t)S 261 99 :M (e)S 266 99 :M (l)S 269 99 :M (l)S 272 99 :M (i)S 275 99 :M (g)S 281 99 :M (e)S 286 99 :M (n)S 292 99 :M (c)S 297 99 :M (e)S gR gS 209 159 15 12 rC 209 167 :M f3_11 sf (X)S gR gS 218 164 13 12 rC 218 172 :M f3_11 sf (1)S gR gS 192 73 165 106 rC 201.5 150.5 25 26 rS 254 158 15 12 rC 254 166 :M f3_11 sf (X)S gR gS 263 163 13 12 rC 263 171 :M f3_11 sf (2)S gR gS 192 73 165 106 rC 246.5 149.5 25 26 rS 296 158 15 12 rC 296 166 :M f3_11 sf (X)S gR gS 305 163 13 12 rC 305 171 :M f3_11 sf (3)S gR gS 192 73 165 106 rC 288.5 149.5 25 26 rS 338 159 15 12 rC 338 167 :M f3_11 sf (X)S gR gS 347 164 10 12 rC 347 172 :M f3_11 sf (4)S gR gS 192 73 165 106 rC 330.5 150.5 25 26 rS 69 28 275 95.5 @f np 224 146 :M 227 133 :L 227 133 :L 227 133 :L 227 133 :L 228 134 :L 228 134 :L 228 134 :L 228 134 :L 228 134 :L 229 134 :L 229 134 :L 229 134 :L 229 134 :L 229 134 :L 230 134 :L 230 134 :L 230 135 :L 230 135 :L 230 135 :L 231 135 :L 231 135 :L 231 135 :L 231 135 :L 231 135 :L 232 136 :L 232 136 :L 232 136 :L 232 136 :L 232 136 :L 232 136 :L 233 136 :L 233 137 :L 233 137 :L 233 137 :L 233 137 :L 233 137 :L 233 137 :L 234 138 :L 234 138 :L 234 138 :L 234 138 :L 234 138 :L 234 138 :L 234 139 :L 235 139 :L 224 146 :L 224 146 :L eofill -1 -1 228 142 1 1 254 108 @b np 260 146 :M 257 133 :L 257 133 :L 257 133 :L 257 133 :L 258 133 :L 258 133 :L 258 133 :L 258 133 :L 258 133 :L 259 133 :L 259 133 :L 259 133 :L 259 133 :L 260 133 :L 260 133 :L 260 133 :L 260 133 :L 260 133 :L 261 133 :L 261 133 :L 261 133 :L 261 133 :L 262 133 :L 262 133 :L 262 133 :L 262 133 :L 262 133 :L 263 133 :L 263 133 :L 263 133 :L 263 134 :L 264 134 :L 264 134 :L 264 134 :L 264 134 :L 264 134 :L 265 134 :L 265 134 :L 265 134 :L 265 134 :L 265 134 :L 266 134 :L 266 135 :L 266 135 :L 266 135 :L 266 135 :L 260 146 :L 260 146 :L eofill -1 -1 262 141 1 1 267 111 @b np 298 146 :M 289 137 :L 289 137 :L 290 136 :L 290 136 :L 290 136 :L 290 136 :L 290 136 :L 290 136 :L 291 136 :L 291 135 :L 291 135 :L 291 135 :L 291 135 :L 292 135 :L 292 135 :L 292 135 :L 292 135 :L 292 134 :L 293 134 :L 293 134 :L 293 134 :L 293 134 :L 293 134 :L 294 134 :L 294 134 :L 294 134 :L 294 134 :L 294 134 :L 295 134 :L 295 133 :L 295 133 :L 295 133 :L 295 133 :L 296 133 :L 296 133 :L 296 133 :L 296 133 :L 297 133 :L 297 133 :L 297 133 :L 297 133 :L 298 133 :L 298 133 :L 298 133 :L 298 133 :L 298 146 :L 298 146 :L eofill 285 110 -1 1 298 140 1 285 109 @a np 337 147 :M 326 140 :L 326 140 :L 326 140 :L 326 140 :L 326 140 :L 327 139 :L 327 139 :L 327 139 :L 327 139 :L 327 139 :L 327 139 :L 327 138 :L 328 138 :L 328 138 :L 328 138 :L 328 138 :L 328 138 :L 328 137 :L 329 137 :L 329 137 :L 329 137 :L 329 137 :L 329 137 :L 329 137 :L 330 136 :L 330 136 :L 330 136 :L 330 136 :L 330 136 :L 331 136 :L 331 136 :L 331 136 :L 331 135 :L 331 135 :L 332 135 :L 332 135 :L 332 135 :L 332 135 :L 332 135 :L 333 135 :L 333 135 :L 333 135 :L 333 135 :L 337 147 :L 337 147 :L eofill 305 106 -1 1 336 142 1 305 105 @a gR gS 0 0 552 730 rC 257 204 :M f0_12 sf (Fig. 10)S 41 236 :M f3_12 sf 1.051 .105(Supposing that the causal graph among Intelligence and X)J 336 238 :M f3_9 sf (1)S 341 236 :M f3_12 sf 1.191 .119( - X)J f3_9 sf 0 2 rm (4)S 0 -2 rm 368 236 :M f3_12 sf 1.152 .115( is complete with respect to)J 41 252 :M .44 .044(common causes, and that it is Causally Markov and Faithful to whatever population it produces)J 41 268 :M .589 .059(over {Intelligence, X)J 145 270 :M f3_9 sf (1)S 150 268 :M f3_12 sf .791 .079( - X)J f3_9 sf 0 2 rm (4)S 0 -2 rm 176 268 :M f3_12 sf .728 .073(}, then the following list of the independence relations will hold in)J 41 284 :M (this population.)S 185 316 :M (X)S 194 318 :M f3_9 sf (1)S 199 316 :M f3_12 sf ( _||_ X)S f3_9 sf 0 2 rm (2 )S 0 -2 rm 237 316 :M f3_12 sf (| Intelligence)S 185 332 :M (X)S 194 334 :M f3_9 sf (1)S 199 332 :M f3_12 sf ( _||_ X)S f3_9 sf 0 2 rm (3 )S 0 -2 rm 237 332 :M f3_12 sf (| Intelligence)S 185 348 :M (X)S 194 350 :M f3_9 sf (1)S 199 348 :M f3_12 sf ( _||_ X)S f3_9 sf 0 2 rm (4 )S 0 -2 rm 237 348 :M f3_12 sf (| Intelligence)S 185 364 :M (X)S 194 366 :M f3_9 sf (2)S 199 364 :M f3_12 sf ( _||_ X)S f3_9 sf 0 2 rm (3 )S 0 -2 rm 237 364 :M f3_12 sf (| Intelligence)S 185 380 :M (X)S 194 382 :M f3_9 sf (2)S 199 380 :M f3_12 sf ( _||_ X)S f3_9 sf 0 2 rm (4 )S 0 -2 rm 237 380 :M f3_12 sf (| Intelligence)S 185 396 :M (X)S 194 398 :M f3_9 sf (3)S 199 396 :M f3_12 sf ( _||_ X)S f3_9 sf 0 2 rm (4 )S 0 -2 rm 237 396 :M f3_12 sf (| Intelligence)S 63 428 :M .648 .065(Since Intelligence is unmeasured, however, our data will only include independencies that)J 41 444 :M 2.162 .216(do not involve it, which in this case is the empty set. Thus the causal graph involving)J 41 460 :M .223 .022(Intelligence and X)J 131 462 :M f3_9 sf (1)S 136 460 :M f3_12 sf .379 .038( - X)J f3_9 sf 0 2 rm .299 .03(4 )J 0 -2 rm 163 460 :M f3_12 sf .246 .025(is complete with respect to common causes, but the measured variables)J 41 476 :M (X)S 50 478 :M f3_9 sf (1)S 55 476 :M f3_12 sf .738 .074( - X)J 76 478 :M f3_9 sf .771 .077(4 )J 84 476 :M f3_12 sf .484 .048(are not Causally Sufficient. To summarize, the Causal Markov assumption, although it)J 41 492 :M .258 .026(involves a representational form of causal sufficiency, is an assumption about the way causation)J 41 508 :M .841 .084(and probability are connected, while Causal Sufficiency is an assumption about what we have)J 41 524 :M (managed to measure. I have so far discussed three different assumptions:)S 77 554 :M (1\) the Causal Markov assumption: upon accurately specifying a causal graph G among)S 77 566 :M (some set of variables V \(in which V includes all the common causes of pairs in V\), )S f4_12 sf (at)S 77 578 :M (least)S 100 578 :M f3_12 sf ( the independence relations obtained by applying d-separation to G hold in the)S 77 590 :M (population probability distribution over V.)S 77 614 :M (2\) the Faithfulness assumption: )S f4_12 sf (exactly)S 267 614 :M f3_12 sf ( the independence relations obtained by applying)S 77 626 :M (d-separation to G hold in the probability distribution over V.)S 77 650 :M (3\) the Causal Sufficiency assumption: the set of )S 310 650 :M f4_12 sf (measured)S 357 650 :M f3_12 sf ( variables M include all of the)S 77 662 :M (common causes of pairs in M.)S endp %%Page: 13 13 %%BeginPageSetup initializepage (peter; page: 13 of 16)setjob %%EndPageSetup gS 0 0 552 730 rC 63 54 :M f3_12 sf .417 .042(In the example concerning Faithfulness, we managed to infer the unique causal structure in)J 41 70 :M .258 .026(Fig. 9 from the single marginal independence X)J 274 72 :M f3_9 sf (1)S 279 70 :M f3_12 sf .275 .027( _||_ X)J f3_9 sf 0 2 rm (2)S 0 -2 rm 316 70 :M f3_12 sf .293 .029( by making all three assumptions. It is)J 41 86 :M .871 .087(still possible to make inferences about the structure\(s\) underlying the data without the Causal)J 41 102 :M (Sufficiency assumption, but of course we can learn less.)S 63 118 :M 1.047 .105(When we do not assume Causal Sufficiency, we still assume that there )J 425 118 :M f4_12 sf 1.216 .122(is some )J 467 118 :M f3_12 sf (structure)S 41 134 :M .631 .063(involving the measured variables \(and perhaps other variables\) that is complete with respect to)J 41 150 :M .236 .024(common causes and that satisfies the Causal Markov assumption, but we must acknowledge that)J 41 166 :M 1.072 .107(we might not have measured all the common causes. So whatever algorithm we use to move)J 41 182 :M 3.193 .319(from independence relations to all the causal graphs that might have produced these)J 41 198 :M .818 .082(independence relations, the set of graphs must include members that have common causes we)J 41 214 :M .579 .058(have not measured. In the example in Fig. 9, we have measured X)J 373 216 :M f3_9 sf (1)S 378 214 :M f3_12 sf (-X)S 391 216 :M f3_9 sf (3)S 396 214 :M f3_12 sf .555 .055(, and observed a single)J 41 230 :M .811 .081(independence: X)J f3_9 sf 0 2 rm (1)S 0 -2 rm 129 230 :M f3_12 sf 1.494 .149( _||_ X)J f3_9 sf 0 2 rm (2)S 0 -2 rm 174 230 :M f3_12 sf 1.411 .141(. If we assume Causal Markov and Faithfulness, but not Causal)J 41 246 :M .832 .083(Sufficiency, the set of 10 causal graphs that would produce exactly this independence appears)J 41 262 :M .386 .039(below \(Fig. 11\), where the T variables in circles are the common causes that we might not have)J 41 278 :M (measured.)S 57 297 436 358 rC 13 -86 -38 247 350 @k -1 -1 251 346 1 1 260 327 @b 200 321 :M f3_11 sf (X )S 209 326 :M (1 )S 261 322 :M (X )S 270 327 :M (2 )S 235 359 :M (X )S 244 364 :M (3 )S 152 405 12.5 @e 146 407 :M (T )S 153 410 :M (1 )S 13 201 249 234 349 @k 214 329 -1 1 231 345 1 214 328 @a 13 -42 9 116 423 @k -1 -1 123 423 1 1 142 416 @b 69 394 :M (X )S 78 399 :M (1 )S 103 393 :M (X )S 112 398 :M (2 )S 102 435 :M (X )S 111 440 :M (3 )S 13 201 249 103 422 @k 83 402 -1 1 100 418 1 83 401 @a 13 -77 -28 271 428 @k -1 -1 276 424 1 1 289 406 @b 248 400 :M (X )S 257 405 :M (1 )S 289 401 :M (X )S 298 406 :M (2 )S 256 436 :M (X )S 265 441 :M (3 )S 13 144 193 244 397 @k -1 -1 222 404 1 1 238 398 @b 377 398 :M (X )S 386 403 :M (1 )S 402 398 :M (X )S 411 403 :M (2 )S 395 439 :M (X )S 404 444 :M (3 )S 13 -9 41 120 390 @k 126 393 -1 1 141 397 1 126 392 @a 205 404 12.5 @e 199 406 :M (T )S 206 409 :M (1 )S 13 193 240 252 435 @k 218 411 -1 1 248 432 1 218 410 @a 13 -42 9 412 433 @k -1 -1 419 433 1 1 438 426 @b 13 -4 45 421 401 @k 427 404 -1 1 437 407 1 427 403 @a 13 142 188 376 399 @k -1 -1 356 407 1 1 370 401 @b 339 407 12.5 @e 333 409 :M (T )S 340 412 :M (1 )S 13 184 230 391 434 @k 352 414 -1 1 387 431 1 352 413 @a 448 415 12.5 @e 442 417 :M (T )S 449 420 :M (2 )S 407 489 :M (X )S 416 494 :M (1 )S 432 489 :M (X )S 441 494 :M (2 )S 425 530 :M (X )S 434 535 :M (3 )S 13 -42 9 442 524 @k -1 -1 449 524 1 1 468 517 @b 13 -4 45 451 492 @k 457 495 -1 1 467 498 1 457 494 @a 13 142 188 406 490 @k -1 -1 386 498 1 1 400 492 @b 369 498 12.5 @e 363 500 :M (T )S 370 503 :M (1 )S 13 184 230 421 525 @k 382 505 -1 1 417 522 1 382 504 @a 478 506 12.5 @e 472 508 :M (T )S 479 511 :M (2 )S 260 493 :M (X )S 269 498 :M (1 )S 285 493 :M (X )S 294 498 :M (2 )S 278 534 :M (X )S 287 539 :M (3 )S 13 -42 9 295 528 @k -1 -1 302 528 1 1 321 521 @b 13 -4 45 304 496 @k 310 499 -1 1 320 502 1 310 498 @a 13 142 188 259 494 @k -1 -1 239 502 1 1 253 496 @b 222 502 12.5 @e 216 504 :M (T )S 223 507 :M (1 )S 13 184 230 274 529 @k 235 509 -1 1 270 526 1 235 508 @a 331 510 12.5 @e 325 512 :M (T )S 332 515 :M (2 )S 111 485 :M (X )S 120 490 :M (1 )S 136 485 :M (X )S 145 490 :M (2 )S 129 526 :M (X )S 138 531 :M (3 )S 13 -42 9 146 520 @k -1 -1 153 520 1 1 172 513 @b 13 -4 45 155 488 @k 161 491 -1 1 171 494 1 161 490 @a 13 142 188 110 486 @k -1 -1 90 494 1 1 104 488 @b 73 494 12.5 @e 67 496 :M (T )S 74 499 :M (1 )S 13 184 230 125 521 @k 86 501 -1 1 121 518 1 86 500 @a 182 502 12.5 @e 176 504 :M (T )S 184 508 :M (2 )S 80 458 -3 3 457 455 3 80 455 @a -3 -3 350 553 3 3 347 456 @b -3 -3 203 555 3 3 200 458 @b -3 -3 182 457 3 3 179 372 @b -3 -3 321 458 3 3 318 372 @b 81 373 -3 3 458 370 3 81 370 @a 13 216 261 131 513 @k 119 495 -1 1 129 508 1 119 494 @a 13 216 261 428 518 @k 416 500 -1 1 426 513 1 416 499 @a 13 261 309 284 523 @k -1 -1 286 518 1 1 291 500 @b 13 261 309 432 518 @k -1 -1 434 513 1 1 439 495 @b 408 588 :M (X )S 417 593 :M (1 )S 433 588 :M (X )S 442 593 :M (2 )S 426 629 :M (X )S 435 634 :M (3 )S 13 -42 9 443 623 @k -1 -1 450 623 1 1 469 616 @b 13 -4 45 452 591 @k 458 594 -1 1 468 597 1 458 593 @a 13 142 188 407 589 @k -1 -1 387 597 1 1 401 591 @b 370 597 12.5 @e 364 599 :M (T )S 371 602 :M (1 )S 13 184 230 422 624 @k 383 604 -1 1 418 621 1 383 603 @a 479 605 12.5 @e 473 607 :M (T )S 480 610 :M (2 )S 237 583 :M (X )S 246 588 :M (1 )S 269 584 :M (X )S 278 589 :M (2 )S 262 625 :M (X )S 271 630 :M (3 )S 13 -42 9 279 619 @k -1 -1 286 619 1 1 305 612 @b 13 -4 45 288 587 @k 294 590 -1 1 304 593 1 294 589 @a 13 210 257 260 614 @k 244 591 -1 1 258 609 1 244 590 @a 315 601 12.5 @e 309 603 :M (T )S 316 606 :M (2 )S 131 580 :M (X )S 140 585 :M (1 )S 165 581 :M (X )S 174 586 :M (2 )S 149 621 :M (X )S 158 626 :M (3 )S 13 266 312 159 610 @k -1 -1 162 605 1 1 167 588 @b 13 142 188 130 581 @k -1 -1 110 589 1 1 124 583 @b 93 589 12.5 @e 87 591 :M (T )S 94 594 :M (1 )S 13 184 230 145 616 @k 106 596 -1 1 141 613 1 106 595 @a 81 557 -3 3 458 554 3 81 554 @a -3 -3 351 652 3 3 348 555 @b -3 -3 204 654 3 3 201 557 @b 13 216 261 151 608 @k 139 590 -1 1 149 603 1 139 589 @a 13 216 261 429 617 @k 417 599 -1 1 427 612 1 417 598 @a 13 261 309 268 614 @k -1 -1 270 609 1 1 275 591 @b 13 261 309 433 617 @k -1 -1 435 612 1 1 440 594 @b gR gS 0 0 552 730 rC 257 680 :M f0_12 sf (Fig. 11)S endp %%Page: 14 14 %%BeginPageSetup initializepage (peter; page: 14 of 16)setjob %%EndPageSetup gS 0 0 552 730 rC 63 70 :M f3_12 sf .486 .049(In fact this set is still too small, for where we have specified a single unmeasured common)J 41 86 :M .695 .069(cause of two variables \(such as X)J 208 88 :M f3_9 sf (1)S 213 86 :M f3_12 sf .718 .072( and X)J f3_9 sf 0 2 rm (3)S 0 -2 rm 252 86 :M f3_12 sf .761 .076(\) and named it "T)J 341 88 :M f3_9 sf (1)S 346 86 :M f3_12 sf .687 .069(," in actuality there might be any)J 41 102 :M 1.008 .101(number of distinct unmeasured common causes of X)J 306 104 :M f3_9 sf (1)S 311 102 :M f3_12 sf 1.136 .114( and X)J f3_9 sf 0 2 rm (3)S 0 -2 rm 351 102 :M f3_12 sf 1.225 .123(. So wherever T)J 438 104 :M f3_9 sf (1)S 443 102 :M f3_12 sf 1.203 .12( appears as a)J 41 118 :M 2.531 .253(common cause \(of say X)J 176 120 :M f3_9 sf (1)S 181 118 :M f3_12 sf 2.603 .26( and X)J f3_9 sf 0 2 rm (3)S 0 -2 rm 225 118 :M f3_12 sf 2.412 .241(\) this is really an abbreviation for: there exists some)J 41 134 :M (unmeasured common cause of X)S f3_9 sf 0 2 rm (1)S 0 -2 rm 203 134 :M f3_12 sf ( and X)S 235 136 :M f3_9 sf (3)S 240 134 :M f3_12 sf (.)S 63 150 :M .415 .042(Although dropping the assumption of Causal Sufficiency has reduced our inferential power)J 41 166 :M .395 .04(considerably, it has not completely eliminated it. Notice that in none of the structures in Fig. 11)J 41 182 :M .112 .011(is X)J 61 184 :M f3_9 sf (3)S 66 182 :M f3_12 sf .093 .009( a cause of any other variable. So we have learned something about what causal relations do)J 41 198 :M -.005(not exist: X)A 100 200 :M f3_9 sf (3)S 105 198 :M f3_12 sf -.006( is not a cause of X)A 197 200 :M f3_9 sf (1)S 202 198 :M f3_12 sf -.008( or of X)A 240 200 :M f3_9 sf (2)S 245 198 :M f3_12 sf -.003(, even though it is associated with both. In other words,)A 41 214 :M .908 .091(we have inferred from independence data the following: if we were to ideally manipulate X)J f3_9 sf 0 2 rm (3)S 0 -2 rm 506 214 :M f3_12 sf (,)S 41 230 :M (then we would do nothing to alter X)S 215 232 :M f3_9 sf (1)S 220 230 :M f3_12 sf ( or X)S 245 232 :M f3_9 sf (2)S 250 230 :M f3_12 sf (.)S 63 246 :M .41 .041(Can we ever gain knowledge about what causal relations do exist without assuming Causal)J 41 262 :M .869 .087(Sufficiency? Yes, but not unless we either measure at least four variables or make additional)J 41 278 :M 1.164 .116(assumptions. For example, if the following independencies are observed among X)J 457 280 :M f3_9 sf (1)S 462 278 :M f3_12 sf (-X)S 475 280 :M f3_9 sf (4)S 480 278 :M f3_12 sf 1.373 .137(, then)J 41 294 :M 1.714 .171(assuming Causal Markov and Faithfulness we can conclude that in every graph that could)J 41 310 :M (possibly have generated this data, X)S f3_9 sf 0 2 rm (3)S 0 -2 rm 219 310 :M f3_12 sf ( is a cause of X)S 293 312 :M f3_9 sf (4)S 298 310 :M f3_12 sf (.)S 221 342 :M (X)S 230 344 :M f3_9 sf (1)S 235 342 :M f3_12 sf ( _||_ X)S f3_9 sf 0 2 rm (2)S 0 -2 rm 221 358 :M f3_12 sf (X)S 230 360 :M f3_9 sf (1)S 235 358 :M f3_12 sf ( _||_ X)S f3_9 sf 0 2 rm (4)S 0 -2 rm 271 358 :M f3_12 sf ( | X)S f3_9 sf 0 2 rm (3)S 0 -2 rm 221 374 :M f3_12 sf (X)S 230 376 :M f3_9 sf (2)S 235 374 :M f3_12 sf ( _||_ X)S f3_9 sf 0 2 rm (4)S 0 -2 rm 271 374 :M f3_12 sf ( | X)S f3_9 sf 0 2 rm (3)S 0 -2 rm 41 406 :M f3_12 sf .347 .035(That is: if Causal Markov and Faithfulness are satisfied, then from these independence relations)J 41 422 :M (we can conclude that a manipulation of X)S f3_9 sf 0 2 rm (3)S 0 -2 rm 246 422 :M f3_12 sf ( would change the probability of X)S 414 424 :M f3_9 sf (4)S 419 422 :M f3_12 sf (.)S 63 438 :M .997 .1(Adding other sorts of knowledge often improves the situation, e.g., knowledge about the)J 41 454 :M 1.344 .134(time order of the variables. In the following case from James Robins, for example, we can)J 41 470 :M 1.154 .115(obtain knowledge that one variable is a cause of another when we have only measured three)J 41 486 :M -.005(variables, and we do so by assuming Causal Markov, Faithfulness, but not Causal Sufficiency. If)A 41 502 :M .103 .01(we know that X)J 118 504 :M f3_9 sf (1)S 123 502 :M f3_12 sf .091 .009( occurs before X)J f3_9 sf 0 2 rm (2)S 0 -2 rm 208 502 :M f3_12 sf .099 .01( and X)J f3_9 sf 0 2 rm (2)S 0 -2 rm 245 502 :M f3_12 sf .105 .011( before X)J 291 504 :M f3_9 sf (3)S 296 502 :M f3_12 sf .097 .01(, and we know that in the population X)J f3_9 sf 0 2 rm (1)S 0 -2 rm 489 502 :M f3_12 sf .105 .011( _||_)J 41 518 :M (X)S 50 520 :M f3_9 sf (3)S 55 518 :M f3_12 sf .734 .073( | X)J 78 520 :M f3_9 sf (2)S 83 518 :M f3_12 sf .515 .051(, then under these assumptions we can conlcude that X)J f3_9 sf 0 2 rm (2)S 0 -2 rm 358 518 :M f3_12 sf .64 .064( is a cause of X)J 437 520 :M f3_9 sf (3)S 442 518 :M f3_12 sf .594 .059(. We can also)J 41 534 :M (conclude that there is no unmeasured common cause of X)S 319 536 :M f3_9 sf (2)S 324 534 :M f3_12 sf ( and X)S 356 536 :M f3_9 sf (3)S 361 534 :M f3_12 sf (.)S 63 566 :M f0_12 sf (5. Conclusion)S 63 598 :M f3_12 sf 1.011 .101(Contrary to what some take to be our purpose, we are not about trying to magically pull)J 41 614 :M .777 .078(causal rabbits out of a statistical hat. Our theory of causal inference investigates what can and)J 41 630 :M 1.899 .19(cannot be learned about causal structure from a set of assumptions that seem to be made)J 41 646 :M .916 .092(commonly in scientific practice. It is thus a theory about the inferential effect of a variety of)J 41 662 :M .101 .01(assumptions far more than it is an endorsement of particular assumptions. There are situations in)J 41 678 :M .233 .023(which it is unreasonable to endorse the Causal Markov assumption \(e.g., in quantum mechanical)J endp %%Page: 15 15 %%BeginPageSetup initializepage (peter; page: 15 of 16)setjob %%EndPageSetup gS 0 0 552 730 rC 41 54 :M f3_12 sf .39 .039(settings\), Causal Sufficiency rarely seems reasonable, and there are certain situations where one)J 41 70 :M 1.144 .114(might not want to assume Faithfulness \(e.g., if some variables are completely determined by)J 41 86 :M 1.532 .153(others\). In the Robins case above, for example, we inferred that there was no unmeasured)J 41 102 :M .181 .018(common cause, or "confounder," of X)J 226 104 :M f3_9 sf (2)S 231 102 :M f3_12 sf .204 .02( and X)J f3_9 sf 0 2 rm (3)S 0 -2 rm 268 102 :M f3_12 sf .167 .017(. Robins believes that in epidemiological contexts)J 41 118 :M 1.037 .104(there are always unmeasured confounders, and thus makes an informal Bayesian argument in)J 41 134 :M 1.141 .114(which he decides that his degrees of belief favor giving up Faithfulness before accepting the)J 41 150 :M (conclusion that in this case it forced.)S 63 166 :M .158 .016(If our theory has any good effect on practice, it will be as much to cast doubt on pet theories)J 41 182 :M 1.101 .11(by making it easy to show that reasonable and equivalent alternatives exist than it will be to)J 41 198 :M .163 .016(extract causal conclusions from statistical data. If it succeeds in clarifying the scientific rationale)J 41 214 :M .667 .067(that underlies causal inference, which is our real goal, then its most important effect will be to)J 41 230 :M (change the way studies are designed and data is collected.)S 258 262 :M f0_12 sf (References)S 41 298 :M f3_12 sf (Bollen, K. 1989. Structural equations with latent variables. Wiley, New York.)S 41 334 :M (Cartwright, N. 1983. How the Laws of Physics Lie. Oxford University Press, New York.)S 41 368 :M (Cartwright, N. 1989. Nature's capacities and Their Measurement. Clarendon Press, Oxford.)S 41 404 :M .732 .073(Dawid, A. 1979. \322Conditional independence in statistical theory \(with discussion\).\323 Journal of)J 59 422 :M (the Royal Statistical Society B, 41, 1-31.)S 41 458 :M 1.882 .188(Glymour, C., Scheines, R., Spirtes, P., and Kelly, K. 1987. Discovering causal structure.)J 59 476 :M (Academic Press, San Diego, CA.)S 41 512 :M (Hausman, D. \(1984\). \322Causal priority.\323 Nous 18, 261-279.)S 41 548 :M .086 .009(Kiiveri, H. and Speed, T. 1982. "Structural analysis of multivariate data: A review." Sociological)J 59 566 :M (Methodology, Leinhardt, S. \(ed.\). Jossey-Bass, San Francisco.)S 41 602 :M (Papineau, D. 1985. \322Causal Asymmetry.\323 British Journal of Philosophy of Science, 36: 273-289.)S 41 638 :M .472 .047(Pearl, J. 1988. Probabilistic reasoning in intelligent systems)J 335 638 :M f0_12 sf .13 .013(. )J f3_12 sf .627 .063(Morgan and Kaufman, San Mateo)J 59 656 :M (CA.)S endp %%Page: 16 16 %%BeginPageSetup initializepage (peter; page: 16 of 16)setjob %%EndPageSetup gS 0 0 552 730 rC 41 56 :M f3_12 sf (Reichenbach, H. 1956. The direction of time. Univ. of California Press, Berkeley, CA.)S 41 92 :M .561 .056(Salmon, W. 1984. Scientific explanation and the causal structure of the world. Princeton Univ.)J 59 110 :M (Press, Princeton, NJ.)S 41 146 :M 2.225 .223(Scheines, R., Spirtes, P., Glymour, C., and Meek, C. 1994. TETRAD II: User's Manual,)J 59 164 :M (Lawrence Erlbaum and Associates, Hillsdale, NJ.)S 41 200 :M 1.372 .137(Skyrms, B. 1980. Causal necessity: a pragmatic investigation of the necessity of laws. Yale)J 59 218 :M (University Press, New Haven.)S 41 254 :M -.005(Spirtes, P., Glymour, C., and Scheines, R. 1993. Causation, prediction, and search. Lecture Notes)A 59 272 :M (in Statistics, V. 81, Springer-Verlag, New York.)S 41 308 :M .064 .006(Spirtes, P. 1994. "Conditional Independence in Directed Cyclic Graphical Models for Feedback.")J 59 326 :M .647 .065(Technical Report 一本道无码-PHIL-54, Department of Philosophy, 一本道无码,)J 59 344 :M (Pittsburgh, PA.)S 41 380 :M (Suppes, P. 1970. A probabilistic theory of causality. North-Holland, Amsterdam.)S endp %%Trailer end %%EOF