Tom commited on
Commit
7a00b95
·
1 Parent(s): 1876b60

filtering for dead posts

Browse files
513935c4d2db2d2d/query_results_661f24f3.csv ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ id,title,source_url,author,published_date,image_url,type
2
+ 1242,These preteen go-kart drivers are spending millions for a shot at F1 racing,https://www.washingtonpost.com/world/interactive/2024/formula-1-karting-children-parents-racing-costs/,The Washington Post,2025-07-17,,spotlight
3
+ 1912,A Formula 1 pistop: 2 seconds of adrenaline and pressure,https://www.washingtonpost.com/sports/interactive/2023/formula-one-pitstop-haas-red-bull/,The Washington Post,2023-07-17,,spotlight
4
+ 7047,Racing Against History,http://www.nytimes.com/interactive/2012/08/01/sports/olympics/racing-against-history.html?gwh=2D12538F1CD4F05B39F50285EFA1313E,The New York Times,2012-07-17,,spotlight
5
+ 442,75 years of innovation: How F1 has evolved since 1950 and where it's headed,https://www.espn.com/espn/feature/story/_/id/43832710/how-f1-evolved-1950-where-headed-2026,ESPN,,,spotlight
513935c4d2db2d2d/query_results_8b61c5d0.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ id,title,source_url,author,published_date,image_url,type
2
+ 391,Our World | Justdiggit,https://ourworld.justdiggit.org/en/,Just Digg It,2024-01-19,https://towumekminbldlabbyss.supabase.co/storage/v1/object/public/images/posts/share-ourworld-justdiggit.jpg,spotlight
513935c4d2db2d2d/query_results_c6e0aed3.csv ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ id,title,source_url,author,published_date,image_url,type
2
+ 1242,These preteen go-kart drivers are spending millions for a shot at F1 racing,https://www.washingtonpost.com/world/interactive/2024/formula-1-karting-children-parents-racing-costs/,The Washington Post,2025-07-17,,spotlight
3
+ 925,Weed drinks are everywhere in Minnesota. Other states are now embracing them.,https://www.politico.com/news/2024/07/10/minnesota-weed-drinks-00165375,POLITICO,2025-07-17,,spotlight
4
+ 1912,A Formula 1 pistop: 2 seconds of adrenaline and pressure,https://www.washingtonpost.com/sports/interactive/2023/formula-one-pitstop-haas-red-bull/,The Washington Post,2023-07-17,,spotlight
5
+ 3122,Rising Reality: A look at the difficulties facing communities bracing for climate change all along San Francisco Bay,https://www.sfchronicle.com/projects/2021/san-francisco-bay-area-sea-level-rise-2021/mission-creek,San Francisco Chronicle,2021-07-17,,spotlight
6
+ 7047,Racing Against History,http://www.nytimes.com/interactive/2012/08/01/sports/olympics/racing-against-history.html?gwh=2D12538F1CD4F05B39F50285EFA1313E,The New York Times,2012-07-17,,spotlight
7
+ 3754,For embracing responsive design,http://www.bostonglobe.com/arts/specials/gardner,Boston Globe,2011-07-17,,spotlight
8
+ 46,Privacy Preserving Proximity Tracing,https://tracing.ft0.ch/#/,Privacy Preserving Proximity Tracing,,,spotlight
9
+ 442,75 years of innovation: How F1 has evolved since 1950 and where it's headed,https://www.espn.com/espn/feature/story/_/id/43832710/how-f1-evolved-1950-where-headed-2026,ESPN,,,spotlight
src/vanna.py CHANGED
@@ -68,7 +68,7 @@ class CustomSQLSystemPromptBuilder(SystemPromptBuilder):
68
  prompt += (
69
  "\n## Database Schema\n"
70
  "Tables:\n"
71
- "- posts (id, title, source_url, author, published_date, image_url, type, provider_id, created_at, updated_at)\n"
72
  "- providers (id, name)\n"
73
  "- provider_attributes (id, provider_id, type, name)\n"
74
  "- post_provider_attributes (post_id, attribute_id)\n"
@@ -96,6 +96,7 @@ class CustomSQLSystemPromptBuilder(SystemPromptBuilder):
96
  "- `providers.name`: name of the publishing organization (e.g., 'Nuanced', 'SND').\n"
97
  "- `tags.name`: thematic keyword or topic (e.g., '3D', 'AI', 'Design').\n"
98
  "- `post_tags.weight`: relevance score between a post and a tag.\n"
 
99
  )
100
 
101
  # ======================
@@ -103,6 +104,7 @@ class CustomSQLSystemPromptBuilder(SystemPromptBuilder):
103
  # ======================
104
  prompt += (
105
  "\n## Business Logic\n"
 
106
  "- A query mentioning an organization (e.g., 'New York Times') should search both `posts.author` and `providers.name`.\n"
107
  "- Return all post types (spotlight, resource, insight) unless the user specifies otherwise.\n"
108
  "- Tags link posts to specific themes or disciplines.\n"
@@ -165,23 +167,23 @@ class CustomSQLSystemPromptBuilder(SystemPromptBuilder):
165
  "LEFT JOIN post_tags pt ON p.id = pt.post_id "
166
  "LEFT JOIN tags t ON pt.tag_id = t.id "
167
  "LEFT JOIN providers pr ON p.provider_id = pr.id "
168
- "WHERE t.name ~* '\\\\mf1\\\\M' OR t.name ~* '\\\\mformula\\\\M' "
169
  "OR p.title ~* '\\\\mf1\\\\M' OR p.title ~* '\\\\mformula\\\\M' "
170
- "OR p.author ~* '\\\\mf1\\\\M' "
171
  "ORDER BY p.published_date DESC NULLS LAST LIMIT 9;\"]\n"
172
  "\nUser: 'Show me posts from The New York Times'\n"
173
  "Assistant: [call run_sql with \"SELECT DISTINCT p.id, p.title, p.source_url, p.author, p.published_date, p.image_url, p.type "
174
  "FROM posts p "
175
  "LEFT JOIN providers pr ON p.provider_id = pr.id "
176
- "WHERE p.author ~* '\\\\mnew\\\\M.*\\\\myork\\\\M.*\\\\mtimes\\\\M' OR pr.name ~* '\\\\mnew\\\\M.*\\\\myork\\\\M.*\\\\mtimes\\\\M' "
177
  "ORDER BY p.published_date DESC NULLS LAST LIMIT 9;\"]\n"
178
  "\nUser: 'interactive visualizations'\n"
179
  "Assistant: [call run_sql with \"SELECT DISTINCT p.id, p.title, p.source_url, p.author, p.published_date, p.image_url, p.type "
180
  "FROM posts p "
181
  "LEFT JOIN post_tags pt ON p.id = pt.post_id "
182
  "LEFT JOIN tags t ON pt.tag_id = t.id "
183
- "WHERE t.name ~* '\\\\minteractive\\\\M' OR p.title ~* '\\\\minteractive\\\\M' "
184
- "OR p.title ~* '\\\\mvisualization\\\\M' OR t.name ~* '\\\\mdataviz\\\\M' "
185
  "ORDER BY p.published_date DESC NULLS LAST LIMIT 9;\"]\n"
186
  )
187
 
 
68
  prompt += (
69
  "\n## Database Schema\n"
70
  "Tables:\n"
71
+ "- posts (id, title, source_url, author, published_date, image_url, type, provider_id, created_at, updated_at, dead)\n"
72
  "- providers (id, name)\n"
73
  "- provider_attributes (id, provider_id, type, name)\n"
74
  "- post_provider_attributes (post_id, attribute_id)\n"
 
96
  "- `providers.name`: name of the publishing organization (e.g., 'Nuanced', 'SND').\n"
97
  "- `tags.name`: thematic keyword or topic (e.g., '3D', 'AI', 'Design').\n"
98
  "- `post_tags.weight`: relevance score between a post and a tag.\n"
99
+ "- `posts.dead`: boolean flag indicating if the post is dead/removed (true = dead, false = active).\n"
100
  )
101
 
102
  # ======================
 
104
  # ======================
105
  prompt += (
106
  "\n## Business Logic\n"
107
+ "- **ALWAYS filter out dead posts**: Include `WHERE p.dead = false` (or `AND p.dead = false`) in every query. Never return posts where dead = true.\n"
108
  "- A query mentioning an organization (e.g., 'New York Times') should search both `posts.author` and `providers.name`.\n"
109
  "- Return all post types (spotlight, resource, insight) unless the user specifies otherwise.\n"
110
  "- Tags link posts to specific themes or disciplines.\n"
 
167
  "LEFT JOIN post_tags pt ON p.id = pt.post_id "
168
  "LEFT JOIN tags t ON pt.tag_id = t.id "
169
  "LEFT JOIN providers pr ON p.provider_id = pr.id "
170
+ "WHERE p.dead = false AND (t.name ~* '\\\\mf1\\\\M' OR t.name ~* '\\\\mformula\\\\M' "
171
  "OR p.title ~* '\\\\mf1\\\\M' OR p.title ~* '\\\\mformula\\\\M' "
172
+ "OR p.author ~* '\\\\mf1\\\\M') "
173
  "ORDER BY p.published_date DESC NULLS LAST LIMIT 9;\"]\n"
174
  "\nUser: 'Show me posts from The New York Times'\n"
175
  "Assistant: [call run_sql with \"SELECT DISTINCT p.id, p.title, p.source_url, p.author, p.published_date, p.image_url, p.type "
176
  "FROM posts p "
177
  "LEFT JOIN providers pr ON p.provider_id = pr.id "
178
+ "WHERE p.dead = false AND (p.author ~* '\\\\mnew\\\\M.*\\\\myork\\\\M.*\\\\mtimes\\\\M' OR pr.name ~* '\\\\mnew\\\\M.*\\\\myork\\\\M.*\\\\mtimes\\\\M') "
179
  "ORDER BY p.published_date DESC NULLS LAST LIMIT 9;\"]\n"
180
  "\nUser: 'interactive visualizations'\n"
181
  "Assistant: [call run_sql with \"SELECT DISTINCT p.id, p.title, p.source_url, p.author, p.published_date, p.image_url, p.type "
182
  "FROM posts p "
183
  "LEFT JOIN post_tags pt ON p.id = pt.post_id "
184
  "LEFT JOIN tags t ON pt.tag_id = t.id "
185
+ "WHERE p.dead = false AND (t.name ~* '\\\\minteractive\\\\M' OR p.title ~* '\\\\minteractive\\\\M' "
186
+ "OR p.title ~* '\\\\mvisualization\\\\M' OR t.name ~* '\\\\mdataviz\\\\M') "
187
  "ORDER BY p.published_date DESC NULLS LAST LIMIT 9;\"]\n"
188
  )
189