本文主要是介绍PostgreSQL attr correlation for values(logical order) ctid (physcial order),希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!

Postgres2015全国用户大会将于11月20至21日在北京丽亭华苑酒店召开。本次大会嘉宾阵容强大,国内顶级PostgreSQL数据库专家将悉数到场,并特邀欧洲、俄罗斯、日本、美国等国家和地区的数据库方面专家助阵:
- Postgres-XC项目的发起人铃木市一(SUZUKI Koichi)
- Postgres-XL的项目发起人Mason Sharp
- pgpool的作者石井达夫(Tatsuo Ishii)
- PG-Strom的作者海外浩平(Kaigai Kohei)
- Greenplum研发总监姚延栋
- 周正中(德哥), PostgreSQL中国用户会创始人之一
- 汪洋,平安科技数据库技术部经理
- ……
|
![]() |
postgres=# \d pg_statsView "pg_catalog.pg_stats"Column | Type | Modifiers------------------------+----------+-----------schemaname | name |tablename | name |attname | name |inherited | boolean |null_frac | real |avg_width | integer |n_distinct | real |most_common_vals | anyarray |most_common_freqs | real[] |histogram_bounds | anyarray |correlation | real |most_common_elems | anyarray |most_common_elem_freqs | real[] |elem_count_histogram | real[] |
年广告费投入 | 月均销售额 |
---|---|
12.5 15.3 23.2 26.4 33.5 34.4 39.4 45.2 55.4 60.9 | 21.2 23.9 32.9 34.1 42.5 43.2 49.0 52.8 59.4 63.5 |
序号 | 广告投入(万元) x | 月均销售额(万元) y | | | |
---|---|---|---|---|---|
1 2 3 4 5 6 7 8 9 10 | 12.5 15.3 23.2 26.4 33.5 34.4 39.4 45.2 55.4 60.9 | 21.2 23.9 32.9 34.1 42.5 43.2 49.0 52.8 59.4 63.5 | 156.25 234.09 538.24 696.96 1122.25 1183.36 1552.36 2043.04 3069.16 3708.81 | 449.44 571.21 1082.41 1162.81 1806.25 1866.24 2401.00 2787.84 3528.36 4032.25 | 265.00 365.67 763.28 900.24 1423.75 1486.08 1930.60 2386.56 3290.76 3867.15 |
合计 | 346.2 | 422.5 | 14304.52 | 19687.81 | 16679.09 |
- =0.9942

postgres=# \d+ pg_statsCASEWHEN s.stakind1 = 3 THEN s.stanumbers1[1]WHEN s.stakind2 = 3 THEN s.stanumbers2[1]WHEN s.stakind3 = 3 THEN s.stanumbers3[1]WHEN s.stakind4 = 3 THEN s.stanumbers4[1]WHEN s.stakind5 = 3 THEN s.stanumbers5[1]ELSE NULL::realEND AS correlation,。。。FROM pg_statistic s
JOIN pg_class c ON c.oid = s.starelid
JOIN pg_attribute a ON c.oid = a.attrelid AND a.attnum = s.staattnum
LEFT JOIN pg_namespace n ON n.oid = c.relnamespace
WHERE NOT a.attisdropped AND has_column_privilege(c.oid, a.attnum, 'select'::text);
/** Now scan the values in order, find the most common ones, and also * accumulate ordering-correlation statistics.** To determine which are most common, we first have to count the* number of duplicates of each value. The duplicates are adjacent in* the sorted list, so a brute-force approach is to compare successive* datum values until we find two that are not equal. However, that* requires N-1 invocations of the datum comparison routine, which are* completely redundant with work that was done during the sort. (The* sort algorithm must at some point have compared each pair of items* that are adjacent in the sorted order; otherwise it could not know* that it's ordered the pair correctly.) We exploit this by having* compare_scalars remember the highest tupno index that each* ScalarItem has been found equal to. At the end of the sort, a* ScalarItem's tupnoLink will still point to itself if and only if it * is the last item of its group of duplicates (since the group will* be ordered by tupno).*/corr_xysum = 0;ndistinct = 0;nmultiple = 0;dups_cnt = 0;for (i = 0; i < values_cnt; i++){int tupno = values[i].tupno;corr_xysum += ((double) i) * ((double) tupno);dups_cnt++;if (tupnoLink[tupno] == tupno){/* Reached end of duplicates of this value */ndistinct++;if (dups_cnt > 1){nmultiple++;if (track_cnt < num_mcv ||dups_cnt > track[track_cnt - 1].count){/** Found a new item for the mcv list; find its* position, bubbling down old items if needed. Loop* invariant is that j points at an empty/ replaceable* slot.*/int j;if (track_cnt < num_mcv)track_cnt++;for (j = track_cnt - 1; j > 0; j--){if (dups_cnt <= track[j - 1].count)break;track[j].count = track[j - 1].count;track[j].first = track[j - 1].first;}track[j].count = dups_cnt;track[j].first = i + 1 - dups_cnt;}}dups_cnt = 0;}}........................./* Generate a correlation entry if there are multiple values */if (values_cnt > 1){MemoryContext old_context;float4 *corrs;double corr_xsum,corr_x2sum;/* Must copy the target values into anl_context */old_context = MemoryContextSwitchTo(stats->anl_context);corrs = (float4 *) palloc(sizeof(float4));MemoryContextSwitchTo(old_context);/*----------* Since we know the x and y value sets are both* 0, 1, ..., values_cnt-1* we have sum(x) = sum(y) =* (values_cnt-1)*values_cnt / 2* and sum(x^2) = sum(y^2) =* (values_cnt-1)*values_cnt*(2*values_cnt-1) / 6.*----------*/corr_xsum = ((double) (values_cnt - 1)) *((double) values_cnt) / 2.0;corr_x2sum = ((double) (values_cnt - 1)) *((double) values_cnt) * (double) (2 * values_cnt - 1) / 6.0;/* And the correlation coefficient reduces to */corrs[0] = (values_cnt * corr_xysum - corr_xsum * corr_xsum) /(values_cnt * corr_x2sum - corr_xsum * corr_xsum);stats->stakind[slot_idx] = STATISTIC_KIND_CORRELATION;stats->staop[slot_idx] = mystats->ltopr;stats->stanumbers[slot_idx] = corrs;stats->numnumbers[slot_idx] = 1;slot_idx++;}
Datumfloat8_corr(PG_FUNCTION_ARGS){ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);float8 *transvalues;float8 N,sumX,sumX2,sumY,sumY2,sumXY,numeratorX,numeratorY,numeratorXY;transvalues = check_float8_array(transarray, "float8_corr", 6);N = transvalues[0];sumX = transvalues[1];sumX2 = transvalues[2];sumY = transvalues[3];sumY2 = transvalues[4];sumXY = transvalues[5];/* if N is 0 we should return NULL */if (N < 1.0)PG_RETURN_NULL();numeratorX = N * sumX2 - sumX * sumX;CHECKFLOATVAL(numeratorX, isinf(sumX2) || isinf(sumX), true);numeratorY = N * sumY2 - sumY * sumY;CHECKFLOATVAL(numeratorY, isinf(sumY2) || isinf(sumY), true);numeratorXY = N * sumXY - sumX * sumY;CHECKFLOATVAL(numeratorXY, isinf(sumXY) || isinf(sumX) ||isinf(sumY), true);if (numeratorX <= 0 || numeratorY <= 0)PG_RETURN_NULL();PG_RETURN_FLOAT8(numeratorXY / sqrt(numeratorX * numeratorY));}
postgres=# create table t(id int);CREATE TABLEpostgres=# insert into t values (2),(5),(8),(3),(4),(6),(9),(7),(1);INSERT 0 9
postgres=# select ctid,* from t;ctid | id-------+----(0,1) | 2(0,2) | 5(0,3) | 8(0,4) | 3(0,5) | 4(0,6) | 6(0,7) | 9(0,8) | 7(0,9) | 1(9 rows)
postgres=# select * from (select row_number() over(order by ctid) as rn, * from t) as t(rn,id);rn | id----+----1 | 22 | 53 | 84 | 35 | 46 | 67 | 98 | 79 | 1(9 rows)
postgres=# analyze t;ANALYZE
postgres=# select * from pg_stats where attname ='id' and tablename='t';-[ RECORD 1 ]----------+--------------------schemaname | publictablename | tattname | idinherited | fnull_frac | 0avg_width | 4n_distinct | -1most_common_vals |most_common_freqs |histogram_bounds | {1,2,3,4,5,6,7,8,9}correlation | 0.116667most_common_elems |most_common_elem_freqs |elem_count_histogram |
postgres=# select corr(rn,id) from (select row_number() over(order by ctid) as rn, * from t) as t(rn,id);-[ RECORD 1 ]-----------corr | 0.116666666666667
postgres=# insert into t select * from generate_series(1,100000) order by random();INSERT 0 100000
postgres=# select ctid,* from t limit 100;ctid | id---------+-------(0,1) | 2(0,2) | 5(0,3) | 8(0,4) | 3(0,5) | 4(0,6) | 6(0,7) | 9(0,8) | 7(0,9) | 1(0,10) | 4607(0,11) | 39521(0,12) | 92869(0,13) | 80094(0,14) | 13214(0,15) | 15509(0,16) | 8380(0,17) | 22281(0,18) | 99252(0,19) | 60018(0,20) | 55716....postgres=# analyze t;ANALYZEpostgres=# select correlation from pg_stats where attname ='id' and tablename='t';correlation---------------0.000263469(1 row)
postgres=# select corr(rn,id) from (select row_number() over(order by ctid) as rn, * from t) as t(rn,id);corr---------------------0.00110293570728894(1 row)
postgres=# alter table t alter column id SET STATISTICS 10000;ALTER TABLEpostgres=# analyze t;ANALYZEpostgres=# select correlation from pg_stats where attname ='id' and tablename='t';correlation-------------0.00110296(1 row)
这篇关于PostgreSQL attr correlation for values(logical order) ctid (physcial order)的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!