Spaces:
Running
Running
Tom
commited on
Commit
·
7a00b95
1
Parent(s):
1876b60
filtering for dead posts
Browse files
513935c4d2db2d2d/query_results_661f24f3.csv
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id,title,source_url,author,published_date,image_url,type
|
| 2 |
+
1242,These preteen go-kart drivers are spending millions for a shot at F1 racing,https://www.washingtonpost.com/world/interactive/2024/formula-1-karting-children-parents-racing-costs/,The Washington Post,2025-07-17,,spotlight
|
| 3 |
+
1912,A Formula 1 pistop: 2 seconds of adrenaline and pressure,https://www.washingtonpost.com/sports/interactive/2023/formula-one-pitstop-haas-red-bull/,The Washington Post,2023-07-17,,spotlight
|
| 4 |
+
7047,Racing Against History,http://www.nytimes.com/interactive/2012/08/01/sports/olympics/racing-against-history.html?gwh=2D12538F1CD4F05B39F50285EFA1313E,The New York Times,2012-07-17,,spotlight
|
| 5 |
+
442,75 years of innovation: How F1 has evolved since 1950 and where it's headed,https://www.espn.com/espn/feature/story/_/id/43832710/how-f1-evolved-1950-where-headed-2026,ESPN,,,spotlight
|
513935c4d2db2d2d/query_results_8b61c5d0.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id,title,source_url,author,published_date,image_url,type
|
| 2 |
+
391,Our World | Justdiggit,https://ourworld.justdiggit.org/en/,Just Digg It,2024-01-19,https://towumekminbldlabbyss.supabase.co/storage/v1/object/public/images/posts/share-ourworld-justdiggit.jpg,spotlight
|
513935c4d2db2d2d/query_results_c6e0aed3.csv
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id,title,source_url,author,published_date,image_url,type
|
| 2 |
+
1242,These preteen go-kart drivers are spending millions for a shot at F1 racing,https://www.washingtonpost.com/world/interactive/2024/formula-1-karting-children-parents-racing-costs/,The Washington Post,2025-07-17,,spotlight
|
| 3 |
+
925,Weed drinks are everywhere in Minnesota. Other states are now embracing them.,https://www.politico.com/news/2024/07/10/minnesota-weed-drinks-00165375,POLITICO,2025-07-17,,spotlight
|
| 4 |
+
1912,A Formula 1 pistop: 2 seconds of adrenaline and pressure,https://www.washingtonpost.com/sports/interactive/2023/formula-one-pitstop-haas-red-bull/,The Washington Post,2023-07-17,,spotlight
|
| 5 |
+
3122,Rising Reality: A look at the difficulties facing communities bracing for climate change all along San Francisco Bay,https://www.sfchronicle.com/projects/2021/san-francisco-bay-area-sea-level-rise-2021/mission-creek,San Francisco Chronicle,2021-07-17,,spotlight
|
| 6 |
+
7047,Racing Against History,http://www.nytimes.com/interactive/2012/08/01/sports/olympics/racing-against-history.html?gwh=2D12538F1CD4F05B39F50285EFA1313E,The New York Times,2012-07-17,,spotlight
|
| 7 |
+
3754,For embracing responsive design,http://www.bostonglobe.com/arts/specials/gardner,Boston Globe,2011-07-17,,spotlight
|
| 8 |
+
46,Privacy Preserving Proximity Tracing,https://tracing.ft0.ch/#/,Privacy Preserving Proximity Tracing,,,spotlight
|
| 9 |
+
442,75 years of innovation: How F1 has evolved since 1950 and where it's headed,https://www.espn.com/espn/feature/story/_/id/43832710/how-f1-evolved-1950-where-headed-2026,ESPN,,,spotlight
|
src/vanna.py
CHANGED
|
@@ -68,7 +68,7 @@ class CustomSQLSystemPromptBuilder(SystemPromptBuilder):
|
|
| 68 |
prompt += (
|
| 69 |
"\n## Database Schema\n"
|
| 70 |
"Tables:\n"
|
| 71 |
-
"- posts (id, title, source_url, author, published_date, image_url, type, provider_id, created_at, updated_at)\n"
|
| 72 |
"- providers (id, name)\n"
|
| 73 |
"- provider_attributes (id, provider_id, type, name)\n"
|
| 74 |
"- post_provider_attributes (post_id, attribute_id)\n"
|
|
@@ -96,6 +96,7 @@ class CustomSQLSystemPromptBuilder(SystemPromptBuilder):
|
|
| 96 |
"- `providers.name`: name of the publishing organization (e.g., 'Nuanced', 'SND').\n"
|
| 97 |
"- `tags.name`: thematic keyword or topic (e.g., '3D', 'AI', 'Design').\n"
|
| 98 |
"- `post_tags.weight`: relevance score between a post and a tag.\n"
|
|
|
|
| 99 |
)
|
| 100 |
|
| 101 |
# ======================
|
|
@@ -103,6 +104,7 @@ class CustomSQLSystemPromptBuilder(SystemPromptBuilder):
|
|
| 103 |
# ======================
|
| 104 |
prompt += (
|
| 105 |
"\n## Business Logic\n"
|
|
|
|
| 106 |
"- A query mentioning an organization (e.g., 'New York Times') should search both `posts.author` and `providers.name`.\n"
|
| 107 |
"- Return all post types (spotlight, resource, insight) unless the user specifies otherwise.\n"
|
| 108 |
"- Tags link posts to specific themes or disciplines.\n"
|
|
@@ -165,23 +167,23 @@ class CustomSQLSystemPromptBuilder(SystemPromptBuilder):
|
|
| 165 |
"LEFT JOIN post_tags pt ON p.id = pt.post_id "
|
| 166 |
"LEFT JOIN tags t ON pt.tag_id = t.id "
|
| 167 |
"LEFT JOIN providers pr ON p.provider_id = pr.id "
|
| 168 |
-
"WHERE t.name ~* '\\\\mf1\\\\M' OR t.name ~* '\\\\mformula\\\\M' "
|
| 169 |
"OR p.title ~* '\\\\mf1\\\\M' OR p.title ~* '\\\\mformula\\\\M' "
|
| 170 |
-
"OR p.author ~* '\\\\mf1\\\\M' "
|
| 171 |
"ORDER BY p.published_date DESC NULLS LAST LIMIT 9;\"]\n"
|
| 172 |
"\nUser: 'Show me posts from The New York Times'\n"
|
| 173 |
"Assistant: [call run_sql with \"SELECT DISTINCT p.id, p.title, p.source_url, p.author, p.published_date, p.image_url, p.type "
|
| 174 |
"FROM posts p "
|
| 175 |
"LEFT JOIN providers pr ON p.provider_id = pr.id "
|
| 176 |
-
"WHERE p.author ~* '\\\\mnew\\\\M.*\\\\myork\\\\M.*\\\\mtimes\\\\M' OR pr.name ~* '\\\\mnew\\\\M.*\\\\myork\\\\M.*\\\\mtimes\\\\M' "
|
| 177 |
"ORDER BY p.published_date DESC NULLS LAST LIMIT 9;\"]\n"
|
| 178 |
"\nUser: 'interactive visualizations'\n"
|
| 179 |
"Assistant: [call run_sql with \"SELECT DISTINCT p.id, p.title, p.source_url, p.author, p.published_date, p.image_url, p.type "
|
| 180 |
"FROM posts p "
|
| 181 |
"LEFT JOIN post_tags pt ON p.id = pt.post_id "
|
| 182 |
"LEFT JOIN tags t ON pt.tag_id = t.id "
|
| 183 |
-
"WHERE t.name ~* '\\\\minteractive\\\\M' OR p.title ~* '\\\\minteractive\\\\M' "
|
| 184 |
-
"OR p.title ~* '\\\\mvisualization\\\\M' OR t.name ~* '\\\\mdataviz\\\\M' "
|
| 185 |
"ORDER BY p.published_date DESC NULLS LAST LIMIT 9;\"]\n"
|
| 186 |
)
|
| 187 |
|
|
|
|
| 68 |
prompt += (
|
| 69 |
"\n## Database Schema\n"
|
| 70 |
"Tables:\n"
|
| 71 |
+
"- posts (id, title, source_url, author, published_date, image_url, type, provider_id, created_at, updated_at, dead)\n"
|
| 72 |
"- providers (id, name)\n"
|
| 73 |
"- provider_attributes (id, provider_id, type, name)\n"
|
| 74 |
"- post_provider_attributes (post_id, attribute_id)\n"
|
|
|
|
| 96 |
"- `providers.name`: name of the publishing organization (e.g., 'Nuanced', 'SND').\n"
|
| 97 |
"- `tags.name`: thematic keyword or topic (e.g., '3D', 'AI', 'Design').\n"
|
| 98 |
"- `post_tags.weight`: relevance score between a post and a tag.\n"
|
| 99 |
+
"- `posts.dead`: boolean flag indicating if the post is dead/removed (true = dead, false = active).\n"
|
| 100 |
)
|
| 101 |
|
| 102 |
# ======================
|
|
|
|
| 104 |
# ======================
|
| 105 |
prompt += (
|
| 106 |
"\n## Business Logic\n"
|
| 107 |
+
"- **ALWAYS filter out dead posts**: Include `WHERE p.dead = false` (or `AND p.dead = false`) in every query. Never return posts where dead = true.\n"
|
| 108 |
"- A query mentioning an organization (e.g., 'New York Times') should search both `posts.author` and `providers.name`.\n"
|
| 109 |
"- Return all post types (spotlight, resource, insight) unless the user specifies otherwise.\n"
|
| 110 |
"- Tags link posts to specific themes or disciplines.\n"
|
|
|
|
| 167 |
"LEFT JOIN post_tags pt ON p.id = pt.post_id "
|
| 168 |
"LEFT JOIN tags t ON pt.tag_id = t.id "
|
| 169 |
"LEFT JOIN providers pr ON p.provider_id = pr.id "
|
| 170 |
+
"WHERE p.dead = false AND (t.name ~* '\\\\mf1\\\\M' OR t.name ~* '\\\\mformula\\\\M' "
|
| 171 |
"OR p.title ~* '\\\\mf1\\\\M' OR p.title ~* '\\\\mformula\\\\M' "
|
| 172 |
+
"OR p.author ~* '\\\\mf1\\\\M') "
|
| 173 |
"ORDER BY p.published_date DESC NULLS LAST LIMIT 9;\"]\n"
|
| 174 |
"\nUser: 'Show me posts from The New York Times'\n"
|
| 175 |
"Assistant: [call run_sql with \"SELECT DISTINCT p.id, p.title, p.source_url, p.author, p.published_date, p.image_url, p.type "
|
| 176 |
"FROM posts p "
|
| 177 |
"LEFT JOIN providers pr ON p.provider_id = pr.id "
|
| 178 |
+
"WHERE p.dead = false AND (p.author ~* '\\\\mnew\\\\M.*\\\\myork\\\\M.*\\\\mtimes\\\\M' OR pr.name ~* '\\\\mnew\\\\M.*\\\\myork\\\\M.*\\\\mtimes\\\\M') "
|
| 179 |
"ORDER BY p.published_date DESC NULLS LAST LIMIT 9;\"]\n"
|
| 180 |
"\nUser: 'interactive visualizations'\n"
|
| 181 |
"Assistant: [call run_sql with \"SELECT DISTINCT p.id, p.title, p.source_url, p.author, p.published_date, p.image_url, p.type "
|
| 182 |
"FROM posts p "
|
| 183 |
"LEFT JOIN post_tags pt ON p.id = pt.post_id "
|
| 184 |
"LEFT JOIN tags t ON pt.tag_id = t.id "
|
| 185 |
+
"WHERE p.dead = false AND (t.name ~* '\\\\minteractive\\\\M' OR p.title ~* '\\\\minteractive\\\\M' "
|
| 186 |
+
"OR p.title ~* '\\\\mvisualization\\\\M' OR t.name ~* '\\\\mdataviz\\\\M') "
|
| 187 |
"ORDER BY p.published_date DESC NULLS LAST LIMIT 9;\"]\n"
|
| 188 |
)
|
| 189 |
|