#standardSQL
WITH historic_views AS ( SELECT *, '201703' q FROM `fh-bigquery.stackoverflow_archive.201703_posts_questions`
UNION ALL
SELECT *, '201706' q
FROM `fh-bigquery.stackoverflow_archive.201706_posts_questions`
UNION ALL
SELECT *, '201709' q
FROM `fh-bigquery.stackoverflow_archive.201709_posts_questions`
UNION ALL
SELECT *, '201712' q
FROM `fh-bigquery.stackoverflow_archive.201712_posts_questions`
UNION ALL
SELECT *, '201803' q
FROM `fh-bigquery.stackoverflow_archive.201803_posts_questions`
UNION ALL
SELECT *, '201806' q
FROM `fh-bigquery.stackoverflow_archive.201806_posts_questions` ), top_tags AS ( SELECT value FROM UNNEST(( SELECT APPROX_TOP_COUNT(tags, 5000)top
FROM `fh-bigquery.stackoverflow_archive.201803_posts_questions`
WHERE NOT tags LIKE '%|%')) WHERE count > 10), views_per_tag AS ( SELECT tag, COUNTIF(DATE(creation_date) BETWEEN DATE_SUB(PARSE_DATE('%Y%m', q), INTERVAL 3 MONTH) AND PARSE_DATE('%Y%m', q)) questions
, SUM(view_count) views_cum, PARSE_DATE('%Y%m', q) q FROM historic_views, UNNEST(SPLIT(tags, '|')) tag GROUP BY tag, q
), views_per_q AS ( SELECT *, views_cum - LAG(views_cum, 1) OVER(PARTITION BY tag ORDER BY q) views FROM views_per_tag
), percents AS ( SELECT *
, 100*views/SUM(views) OVER(PARTITION BY q) views_percent
, 100*questions/SUM(questions) OVER(PARTITION BY q) questions_percent FROM views_per_q WHERE views IS NOT null
AND tag IN (SELECT value FROM top_tags)
)SELECT *FROM percents
值得留意的趋向差异#standardSQL
WITH toptags AS (
SELECT *
FROM `fh-bigquery.stackoverflow.201806_stacko_tags_views`
WHERE q='2018-06-01'
AND tag NOT IN ('string', 'git', 'sql')
AND views_percent>0.05
)
SELECT a.tag, b.tag btag, a.views_percent - b.views_percent
FROM toptags a
JOIN toptags b
ON a.tag!=b.tag
AND a.views_percent>b.views_percent
AND a.questions_percent< b.questions_percent
ORDER by 3 DESC
参考:https://towardsdatascience.com/these-are-the-real-stack-overflow-trends-use-the-pageviews-c439903cd1a
(责任编辑:admin)